Example #1
0
    def getLocalFileInfo(self, fileName, checksumType="default", date=None):
        """ Return exit code (0 if OK), file size and checksum of a local file, as well as as date string if requested """
        # note that date is mutable
        statusRet = 0
        outputRet = {}
        outputRet["errorLog"] = ""
        outputRet["report"] = {}
        outputRet["report"]["clientState"] = None

        outputRet["size"] = 0
        outputRet["checksum"] = ""
        outputRet["checksumType"] = checksumType

        self.log("Getting local File(%s) info." % fileName)
        # does the file exist?
        if not os.path.isfile(fileName):
            if fileName.find("DBRelease") >= 0 and os.path.exists(
                    os.path.dirname(fileName)):
                outputRet[
                    "errorLog"] = errorLog = "DBRelease file missing: %s" % (
                        fileNameame)
                self.log("!!WARNING!!2999!! %s" % (errorLog))
                return PilotErrors.ERR_MISSDBREL, outputRet
            else:
                outputRet[
                    "errorLog"] = errorLog = "No such file or directory: %s" % (
                        fileName)
                self.log("!!WARNING!!2999!! %s" % (errorLog))
                return PilotErrors.ERR_MISSINGLOCALFILE, outputRet

            # get the modification time if needed and store it in the mutable object
            if date:
                date = SiteMover.getModTime(os.path.dirname(fileName),
                                            os.path.basename(fileName))

        # get the file size
        try:
            self.log("Executing getsize() for file: %s" % (fileName))
            outputRet["size"] = fsize = str(os.path.getsize(fileName))
        except OSError, e:
            outputRet[
                "errorLog"] = errorLog = "Could not get file size: %s" % str(e)
            tolog("!!WARNING!!2999!! %s" % (errorLog))
            return PilotErrors.ERR_FAILEDSIZELOCAL, outputRet
Example #2
0
    def getLocalFileInfo(self, fileName, checksumType="default", date=None):
        """ Return exit code (0 if OK), file size and checksum of a local file, as well as as date string if requested """
        # note that date is mutable
        statusRet = 0
        outputRet = {}
        outputRet["errorLog"] = ""
        outputRet["report"] = {}
        outputRet["report"]["clientState"] = None

        outputRet["size"] = 0
        outputRet["checksum"] = ""
        outputRet["checksumType"] = checksumType

        self.log("Getting local File(%s) info." % fileName)
        # does the file exist?
        if not os.path.isfile(fileName):
            if fileName.find("DBRelease") >= 0 and os.path.exists(os.path.dirname(fileName)):
                outputRet["errorLog"] = errorLog = "DBRelease file missing: %s" % (fileNameame)
                self.log("!!WARNING!!2999!! %s" % (errorLog))
                return PilotErrors.ERR_MISSDBREL, outputRet
            else:
                outputRet["errorLog"] = errorLog = "No such file or directory: %s" % (fileName)
                self.log("!!WARNING!!2999!! %s" % (errorLog))
                return PilotErrors.ERR_MISSINGLOCALFILE, outputRet

            # get the modification time if needed and store it in the mutable object
            if date:
                date = SiteMover.getModTime(os.path.dirname(fileName), os.path.basename(fileName))

        # get the file size
        try:
            self.log("Executing getsize() for file: %s" % (fileName))
            outputRet["size"] = fsize = str(os.path.getsize(fileName))
        except OSError, e:
            outputRet["errorLog"] = errorLog = "Could not get file size: %s" % str(e)
            tolog("!!WARNING!!2999!! %s" % (errorLog))
            return PilotErrors.ERR_FAILEDSIZELOCAL, outputRet
    def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict):
        """ copy output file from disk to local SE """
        # function is based on dCacheSiteMover put function

        error = PilotErrors()
        pilotErrorDiag = ""

        # Get input parameters from pdict
        lfn = pdict.get('lfn', '')
        guid = pdict.get('guid', '')
        token = pdict.get('token', '')
        scope = pdict.get('scope', '')
        dsname = pdict.get('dsname', '')
        testLevel = pdict.get('testLevel', '0')
        extradirs = pdict.get('extradirs', '')
        proxycheck = pdict.get('proxycheck', False)
        experiment = pdict.get('experiment', '')
        analysisJob = pdict.get('analJob', False)
        prodSourceLabel = pdict.get('prodSourceLabel', '')

        # get the site information object
        si = getSiteInformation(experiment)

        tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel))
        if prodSourceLabel == 'ddm' and analysisJob:
            tolog(
                "Treating PanDA Mover job as a production job during stage-out"
            )
            analysisJob = False

        # get the DQ2 tracing report
        try:
            report = pdict['report']
        except:
            report = {}
        else:
            # set the proper protocol
            report['protocol'] = 'curl'
            # mark the relative start
            report['catStart'] = time()
            # the current file
            report['filename'] = lfn
            # guid
            report['guid'] = guid.replace('-', '')

        # preparing variables
        if fsize == 0 or fchecksum == 0:
            ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(
                source, csumtype="adler32")
            if ec != 0:
                self.prepareReport('LOCAL_FILE_INFO_FAIL', report)
                return self.put_data_retfail(ec, pilotErrorDiag)

        # now that the file size is known, add it to the tracing report
        report['filesize'] = fsize

        # get the checksum type
        if fchecksum != 0 and fchecksum != "":
            csumtype = self.getChecksumType(fchecksum)
        else:
            csumtype = "default"

        # get a proper envsetup
        envsetup = self.getEnvsetup()

        #if proxycheck:
        #    s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2)
        #    if s != 0:
        #        self.prepareReport('NO_PROXY', report)
        #        return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
        #else:
        #    tolog("Proxy verification turned off")
        tolog("Proxy verification turned off")

        filename = os.path.basename(source)

        # get all the proper paths
        ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(
            error,
            analysisJob,
            token,
            prodSourceLabel,
            dsname,
            filename,
            scope=scope,
            sitemover=self)  # quick workaround
        if ec != 0:
            self.prepareReport(tracer_error, report)
            return self.put_data_retfail(ec, pilotErrorDiag)
#here begins the new magic... from Vincenzo Lavorini
        sitemover = SiteMover.SiteMover()
        v_path = sitemover.getPathFromScope(scope, filename)
        rucio_c = Client()
        if "ATLAS" in token:
            token_ok = token[+5:]
        else:
            token_ok = token
        local_se_token = self.site_name + "_" + token_ok
        v_hostname = [
            j['hostname'] for j in rucio_c.get_protocols(local_se_token)
        ]
        v_port = [j['port'] for j in rucio_c.get_protocols(local_se_token)]
        v_prefix = [j['prefix'] for j in rucio_c.get_protocols(local_se_token)]
        v_address = "https://%s:%s%s" % (v_hostname[0], v_port[0], v_prefix[0])
        tolog("prova1 address is %s" % (v_address))
        if "rucio/" in v_address and "/rucio" in v_path:
            v_address = v_address[:-7]
            tolog("prova2 address is %s" % (v_address))
        elif "rucio" in v_address and "rucio" in v_path:
            v_address = v_address[:-6]
            tolog("prova3 address is %s" % (v_address))
        full_http_surl = v_address + v_path
        tolog("prova3 full_http__surl is %s" % (full_http_surl))

        full_surl = surl
        if full_surl[:len('token:')] == 'token:':
            # remove the space token (e.g. at Taiwan-LCG2) from the SURL info
            full_surl = full_surl[full_surl.index('srm://'):]

        # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/
        #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/
        #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz

#putfile=surl
#tolog("putfile: %s" % (putfile))
#tolog("full_surl: %s" % (full_surl))

# get https surl
#full_http_surl = full_surl.replace("srm://", "https://")

# get the DQ2 site name from ToA ---why? Is it needed?
#try:
#    _dq2SiteName = self.getDQ2SiteName(surl=putfile)
#except Exception, e:
#    tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
#else:
#    report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName)
#    tolog("DQ2 site name: %s" % (_dq2SiteName))

        if testLevel == "1":
            source = "thisisjustatest"

        # determine which timeout option to use
        #commented by Lavorini timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout)
        timeout_option = "--connect-timeout 300"

        sslCert = self.sslCert
        sslKey = self.sslKey
        sslCertDir = self.sslCertDir

        # check htcopy if it is existed or env is set properly
        #_cmd_str = 'which htcopy'
        #try:
        #    s, o = commands.getstatusoutput(_cmd_str)
        #except Exception, e:
        #    tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o))
        #    o = str(e)

        #if s != 0:
        #    tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str))
        #    o = o.replace('\n', ' ')
        #    tolog("!!WARNING!!2990!! check PUT command failed. Status=%s Output=%s" % (str(s), str(o)))
        #return 999999

        # cleanup the SURL if necessary (remove port and srm substring)
        #if token:
        # used lcg-cp options:
        # --srcsetype: specify SRM version
        #   --verbose: verbosity on
        #        --vo: specifies the Virtual Organization the user belongs to
        #          -s: space token description
        #          -b: BDII disabling
        #          -t: time-out
        # (lcg-cr) -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC
        #          -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally
        #          -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case,
        #              the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is
        #              generated in the same format as with the Replica Manager
        # _cmd_str = '%s lcg-cr --verbose --vo atlas -T srmv2 -s %s -b -t %d -l %s -g %s -d %s file:%s' %\
        #           (envsetup, token, self.timeout, lfclfn, guid, surl, fppfn)
        # usage: lcg-cp [-h,--help] [-i,--insecure] [-c,--config config_file]
        #               [-n nbstreams] [-s,--sst src_spacetokendesc] [-S,--dst dest_spacetokendesc]
        #               [-D,--defaultsetype se|srmv1|srmv2] [-T,--srcsetype se|srmv1|srmv2] [-U,--dstsetype se|srmv1|srmv2]
        #               [-b,--nobdii] [-t timeout] [-v,--verbose]  [-V,--vo vo] [--version] src_file  dest_file

        # surl = putfile[putfile.index('srm://'):]
        #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s?spacetoken=%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl, token)
        #_cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -U srmv2 -S %s file://%s %s' % (envsetup, timeout_option, token, source, full_surl)
        #else:
        # surl is the same as putfile
        #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl)
        #_cmd_str = '%s lcg-cp --vo atlas --verbose -b %s -U srmv2 file://%s %s' % (envsetup, timeout_option, source, full_surl)
        _cmd_str = 'curl -1 --verbose --cert %s --key %s --cacert %s --capath %s -L %s -T %s' % (
            self.sslKey, self.sslKey, self.sslKey, self.sslCertDir,
            full_http_surl, source)

        tolog("Executing command: %s" % (_cmd_str))
        t0 = os.times()
        _cmd = Popen(_cmd_str, stdout=PIPE, stderr=PIPE, shell=True)
        _cmd_out, _cmd_stderr = _cmd.communicate()
        report['relativeStart'] = time()
        report['transferStart'] = time()
        report['validateStart'] = time()
        t1 = os.times()
        t = t1[4] - t0[4]
        tolog("Curl command output = %s" % (_cmd_out))
        tolog("Command finished after %f s" % (t))
        if "bytes uploaded" not in _cmd_out:
            tolog("!!WARNING!!1137!! Command failed: %s" % (_cmd_str))
        '''
            # check if file was partially transferred, if so, remove it
            _ec = self.removeFile(envsetup, self.timeout, dst_gpfn)
            if _ec == -2:
                pilotErrorDiag += "(failed to remove file) " # i.e. do not retry stage-out

            if "Could not establish context" in o:
                pilotErrorDiag += "Could not establish context: Proxy / VO extension of proxy has probably expired"
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('CONTEXT_FAIL', report)
                return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag)
            elif "No such file or directory" in o:
                pilotErrorDiag += "No such file or directory: %s" % (o)
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('NO_FILE_DIR', report)
                return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
            elif "globus_xio: System error" in o:
                pilotErrorDiag += "Globus system error: %s" % (o)
                tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                self.prepareReport('GLOBUS_FAIL', report)
                return self.put_data_retfail(error.ERR_PUTGLOBUSSYSERR, pilotErrorDiag)
            else:
                if len(o) == 0 and t >= self.timeout:
                    pilotErrorDiag += "Copy command self timed out after %d s" % (t)
                    tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag))
                    self.prepareReport('CP_TIMEOUT', report)
                    return self.put_data_retfail(error.ERR_PUTTIMEOUT, pilotErrorDiag)
                else:
                    if len(o) == 0:
                        pilotErrorDiag += "Copy command returned error code %d but no output" % (ec)
                    else:
                        pilotErrorDiag += o
                    self.prepareReport('CP_ERROR', report)
                    return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag)
	'''
        verified = False
        #getting the remote checksum from Rucio:
        token_file = open('token_fle', 'r')
        token_rucio = token_file.readline()
        pos2print = token_rucio.find("CN")
        token_rucio2print = token_rucio[:pos2print] + '(Hidden token)'
        tolog("Token I am using: %s" % (token_rucio2print))
        httpredirector = readpar('httpredirector')

        trial_n = 1
        remote_checksum = "none"
        while (remote_checksum == "none" and trial_n < 8):
            trial_n += 1
            if not httpredirector:
                #cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip |awk \'{FS=\"hash type=\"}; {print $2}\' |awk \'{FS=\">\"}; {print $2}\' |awk \'{FS=\"<\"} {print $1}\'| grep -v \'^$\'"%(token_rucio,scope,filename)
                cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % (
                    token_rucio, scope, filename)
                cmd2print = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % (
                    token_rucio2print, scope, filename)
            else:
                if "http" in httpredirector:
                    tolog("HTTP redirector I am using: %s" % (httpredirector))
                    cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % (
                        token_rucio, httpredirector, scope, filename)
                    cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % (
                        token_rucioi2print, httpredirector, scope, filename)
                else:
                    tolog("HTTP redirector I am using: %s" % (httpredirector))
                    cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % (
                        token_rucio, httpredirector, reps[0].scope,
                        reps[0].filename)
                    cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml'  --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % (
                        token_rucio2print, httpredirector, reps[0].scope,
                        reps[0].filename)

            tolog("Getting remote checksum: command to be executed: %s" %
                  (cmd2print))
            checksum_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
            remote_checksum, stderr = checksum_cmd.communicate()
            tolog("Remote checksum as given by rucio %s" % (remote_checksum))
            if (remote_checksum == "none"):
                tolog("In checking checksum: command std error: %s" % (stderr))
                pilotErrorDiag = "Cannot get the checksum of file on SE"
                tolog("!!WARNING!!1137!! %s" % (pilotErrorDiag))
                tolog("!!WARNING!!1137!! trial numebr %s" % (trial_n))
                time.sleep(3)
                # try to get the remote checksum with lcg-get-checksum
                #remote_checksum = self.lcgGetChecksum(envsetup, self.timeout, full_surl)
                #if not remote_checksum:
                #    # try to grab the remote file info using lcg-ls command
                #    remote_checksum, remote_fsize = self.getRemoteFileInfo(envsetup, self.timeout, full_surl)
                #else:
                #    tolog("Setting remote file size to None (not needed)")
                #    remote_fsize = None

        # compare the checksums if the remote checksum was extracted
        tolog("Remote checksum: %s" % str(remote_checksum))
        tolog("Local checksum: %s" % (fchecksum))

        if remote_checksum:
            if remote_checksum != fchecksum:
                pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                                 (csumtype, os.path.basename(dst_gpfn), remote_checksum, fchecksum)
                tolog("!!WARNING!!1800!! %s" % (pilotErrorDiag))
                if csumtype == "adler32":
                    self.prepareReport('AD_MISMATCH', report)
                    return self.put_data_retfail(error.ERR_PUTADMISMATCH,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
                else:
                    self.prepareReport('MD5_MISMATCH', report)
                    return self.put_data_retfail(error.ERR_PUTMD5MISMATCH,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
            else:
                tolog("Remote and local checksums verified")
                verified = True
        else:
            tolog(
                "Skipped primary checksum verification (remote checksum not known)"
            )

        # if lcg-ls could not be used
        if "/pnfs/" in surl and not remote_checksum:
            # for dCache systems we can test the checksum with the use method
            tolog(
                "Detected dCache system: will verify local checksum with the local SE checksum"
            )
            # gpfn = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....
            path = surl[surl.find('/pnfs/'):]
            # path = /pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....#
            tolog("File path: %s" % (path))

            _filename = os.path.basename(path)
            _dir = os.path.dirname(path)

            # get the remote checksum
            tolog("Local checksum: %s" % (fchecksum))
            try:
                remote_checksum = self.getdCacheChecksum(_dir, _filename)
            except Exception, e:
                pilotErrorDiag = "Could not get checksum from dCache: %s (test will be skipped)" % str(
                    e)
                tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            else:
                if remote_checksum == "NOSUCHFILE":
                    pilotErrorDiag = "The pilot will fail the job since the remote file does not exist"
                    tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
                    self.prepareReport('NOSUCHFILE', report)
                    return self.put_data_retfail(error.ERR_NOSUCHFILE,
                                                 pilotErrorDiag,
                                                 surl=full_surl)
                elif remote_checksum:
                    tolog("Remote checksum: %s" % (remote_checksum))
                else:
                    tolog("Could not get remote checksum")

            if remote_checksum:
                if remote_checksum != fchecksum:
                    pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\
                                     (csumtype, _filename, remote_checksum, fchecksum)
                    if csumtype == "adler32":
                        self.prepareReport('AD_MISMATCH', report)
                        return self.put_data_retfail(error.ERR_PUTADMISMATCH,
                                                     pilotErrorDiag,
                                                     surl=full_surl)
                    else:
                        self.prepareReport('MD5_MISMATCH', report)
                        return self.put_data_retfail(error.ERR_PUTMD5MISMATCH,
                                                     pilotErrorDiag,
                                                     surl=full_surl)
                else:
                    tolog("Remote and local checksums verified")
                    verified = True
Example #4
0
        # get the DQ2 site name from ToA
        try:
            _dq2SiteName = self.getDQ2SiteName(surl=dst_gpfn)
        except Exception, e:
            tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
        else:
            report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName)
            tolog("DQ2 site name: %s" % (_dq2SiteName))

        try:
            self.mkdirWperm(dst_loc_sedir)
        except IOError, e:
            pilotErrorDiag = "put_data could not create dir: %s" % str(e)
            tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            self.prepareReport('MKDIR_FAIL', report)
            return SiteMover.put_data_retfail(error.ERR_MKDIR, pilotErrorDiag, surl=dst_gpfn)

        cmd = '%srfcp %s %s' % (_setup_str, source, dst_loc_sedir)
        tolog("Executing command: %s" % cmd)
        report['transferStart'] = time()
        s, o = commands.getstatusoutput(cmd)
        report['validateStart'] = time()
        if s != 0:
            o = o.replace('\n', ' ')
            check_syserr(s, o)
            pilotErrorDiag = "Error in copying: %s" % (o)
            tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            self.prepareReport('RFCP_FAIL', report)
            return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag, surl=dst_gpfn)
        try:
            os.chmod(dst_loc_pfn, self.permissions_FILE)
    def getProperPaths(
        self, error, analyJob, token, prodSourceLabel, dsname, filename,
        **pdict
    ):  # in current implementation this function completely depends on site mover so that it needs to be either reimplmented or completely moved outside SiteInformation
        """ Get proper paths (SURL and LFC paths) """

        ec = 0
        pilotErrorDiag = ""
        tracer_error = ""
        dst_gpfn = ""
        lfcdir = ""
        surl = ""

        alt = pdict.get('alt', False)
        scope = pdict.get('scope', None)

        # Get the proper endpoint
        #sitemover = SiteMover.SiteMover()
        sitemover = pdict.get('sitemover', SiteMover.SiteMover(
        ))  # quick workaround HACK: to be properly implemented later

        se = sitemover.getProperSE(token, alt=alt)

        # For production jobs, the SE path is stored in seprodpath
        # For analysis jobs, the SE path is stored in sepath

        destination = sitemover.getPreDestination(analyJob,
                                                  token,
                                                  prodSourceLabel,
                                                  alt=alt)
        if destination == '':
            pilotErrorDiag = "put_data destination path in SE not defined"
            tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag))
            tracer_error = 'PUT_DEST_PATH_UNDEF'
            ec = error.ERR_STAGEOUTFAILED
            return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl
        else:
            tolog("Going to store job output at: %s" % (destination))
            # /dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/

            # rucio path:
            # SE + destination + SiteMover.getPathFromScope(scope,lfn)

        # Get the LFC path
        lfcpath, pilotErrorDiag = sitemover.getLFCPath(analyJob, alt=alt)
        if lfcpath == "":
            tracer_error = 'LFC_PATH_EMPTY'
            ec = error.ERR_STAGEOUTFAILED
            return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl

        tolog("LFC path = %s" % (lfcpath))
        # /grid/atlas/users/pathena

        ec, pilotErrorDiag, dst_gpfn, lfcdir = sitemover.getFinalLCGPaths(
            analyJob,
            destination,
            dsname,
            filename,
            lfcpath,
            token,
            prodSourceLabel,
            scope=scope,
            alt=alt)
        if ec != 0:
            tracer_error = 'UNKNOWN_DSN_FORMAT'
            return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl

        # srm://f-dpm001.grid.sinica.edu.tw:8446/srm/managerv2?SFN=/dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/rucio/data12_8TeV/55/bc/NTUP_SUSYSKIM.01161650._000003.root.1
        # surl = srm://f-dpm001.grid.sinica.edu.tw:8446/srm/managerv2?SFN=/dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/user/apetrid/0328091854/user.apetrid.0328091854.805485.lib._011669/user.apetrid.0328091854.805485.lib._011669.lib.tgz

        # Define the SURL
        if "/rucio" in destination:
            surl = sitemover.getFullPath(scope,
                                         token,
                                         filename,
                                         analyJob,
                                         prodSourceLabel,
                                         alt=alt)
        else:
            surl = "%s%s" % (se, dst_gpfn)
        tolog("SURL = %s" % (surl))
        tolog("dst_gpfn = %s" % (dst_gpfn))
        tolog("lfcdir = %s" % (lfcdir))

        return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl
    def getProperPaths(self, error, analyJob, token, prodSourceLabel, dsname,
                       filename, **pdict):
        """ Get proper paths (SURL and LFC paths) """

        ec = 0
        pilotErrorDiag = ""
        tracer_error = ""
        dst_gpfn = ""
        lfcdir = ""
        surl = ""

        alt = pdict.get('alt', False)
        scope = pdict.get('scope', None)

        # Get the proper endpoint
        sitemover = SiteMover.SiteMover()
        se = sitemover.getProperSE(token, alt=alt)

        # For production jobs, the SE path is stored in seprodpath
        # For analysis jobs, the SE path is stored in sepath

        destination = sitemover.getPreDestination(analyJob,
                                                  token,
                                                  prodSourceLabel,
                                                  alt=alt)
        if destination == '':
            pilotErrorDiag = "put_data destination path in SE not defined"
            tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag))
            tracer_error = 'PUT_DEST_PATH_UNDEF'
            ec = error.ERR_STAGEOUTFAILED
            return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl
        else:
            tolog("Going to store job output at: %s" % (destination))
            # /dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/

            # rucio path:
            # SE + destination + SiteMover.getPathFromScope(scope,lfn)

        # Get the LFC path
        lfcpath, pilotErrorDiag = sitemover.getLFCPath(analyJob, alt=alt)
        if lfcpath == "":
            tracer_error = 'LFC_PATH_EMPTY'
            ec = error.ERR_STAGEOUTFAILED
            return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl

        tolog("LFC path = %s" % (lfcpath))
        # /grid/atlas/users/pathena

        ec, pilotErrorDiag, dst_gpfn, lfcdir = sitemover.getFinalLCGPaths(
            analyJob,
            destination,
            dsname,
            filename,
            lfcpath,
            token,
            prodSourceLabel,
            scope=scope,
            alt=alt)
        if ec != 0:
            tracer_error = 'UNKNOWN_DSN_FORMAT'
            return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl

        # srm://f-dpm001.grid.sinica.edu.tw:8446/srm/managerv2?SFN=/dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/rucio/data12_8TeV/55/bc/NTUP_SUSYSKIM.01161650._000003.root.1
        # surl = srm://f-dpm001.grid.sinica.edu.tw:8446/srm/managerv2?SFN=/dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/user/apetrid/0328091854/user.apetrid.0328091854.805485.lib._011669/user.apetrid.0328091854.805485.lib._011669.lib.tgz

        # Define the SURL
        if "/rucio" in destination:
            surl = sitemover.getFullPath(scope,
                                         token,
                                         filename,
                                         analyJob,
                                         prodSourceLabel,
                                         alt=alt)
        else:
            surl = "%s%s" % (se, dst_gpfn)

            # Correct the SURL which might start with something like 'token:ATLASMCTAPE:srm://srm-atlas.cern.ch:8443/srm/man/..'
            # If so, remove the space token before the srm info
            if surl.startswith('token'):
                tolog("Removing space token part from SURL")
                dummy, surl = sitemover.extractSE(surl)

        tolog("SURL = %s" % (surl))
        tolog("dst_gpfn = %s" % (dst_gpfn))
        tolog("lfcdir = %s" % (lfcdir))

        return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl
    def getProperPaths(self, error, analyJob, token, prodSourceLabel, dsname, filename, **pdict):
        """ Get proper paths (SURL and LFC paths) """

        ec = 0
        pilotErrorDiag = ""
        tracer_error = ""
        dst_gpfn = ""
        lfcdir = ""
        surl = ""

        alt = pdict.get('alt', False)
        scope = pdict.get('scope', None)
        
        # Get the proper endpoint
        sitemover = SiteMover.SiteMover()
        se = sitemover.getProperSE(token, alt=alt)

        # For production jobs, the SE path is stored in seprodpath
        # For analysis jobs, the SE path is stored in sepath

        destination = sitemover.getPreDestination(analyJob, token, prodSourceLabel, alt=alt)
        if destination == '':
            pilotErrorDiag = "put_data destination path in SE not defined"
            tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag))
            tracer_error = 'PUT_DEST_PATH_UNDEF'
            ec = error.ERR_STAGEOUTFAILED
            return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl
        else:
            tolog("Going to store job output at: %s" % (destination))
            # /dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/
        
        se_path = ''
        sw_path = ''
        prod_name = ''
        prodSlt = ''
        TMPMDSTFILE = ''
        TMPHISTFILE = ''
        EVTDUMPFILE = ''
        MERGEDMDSTFILE = ''
        MERGEDHISTFILE = ''
        MERGEDDUMPFILE = ''
        PRODSOFT = ''
        
        if not ".log.tgz" in filename:
            try:
                # always use this filename as the new jobDef module name
                import newJobDef
                
                job = Job.Job()
                job.setJobDef(newJobDef.job)
                
                tolog("job.payload: %s" % (job.payload))
                tolog("job.trf: %s" % (job.trf))
                tolog("filename: %s" % (filename))
                
                tolog("Trying to get sw path, name and hist filename from job definition.")
                sw_prefix, sw_path, prod_name, prodSlt, TMPMDSTFILE, TMPHISTFILE, EVTDUMPFILE, MERGEDMDSTFILE, MERGEDHISTFILE, MERGEDDUMPFILE, PRODSOFT = self.getSWPathAndNameAndFilename(job.jobPars)
                tolog("sw_prefix: %s" % (sw_prefix))
                tolog("sw_path: %s" % (sw_path))
                tolog("prod_name: %s" % (prod_name))
                tolog("prodSlt: %s" % (prodSlt))
                tolog("TMPMDSTFILE: %s" % (TMPMDSTFILE))
                tolog("TMPHISTFILE: %s" % (TMPHISTFILE))
                tolog("EVTDUMPFILE: %s" % (EVTDUMPFILE))
                tolog("MERGEDMDSTFILE: %s" % (MERGEDMDSTFILE))
                tolog("MERGEDHISTFILE: %s" % (MERGEDHISTFILE))
                tolog("MERGEDDUMPFILE: %s" % (MERGEDDUMPFILE))
                tolog("PRODSOFT: %s" % (PRODSOFT))
                
                # prod
                if filename == TMPMDSTFILE :
                    se_path = sw_prefix + sw_path + PRODSOFT + '/mDST.chunks'
                if filename == TMPHISTFILE:
                    se_path = sw_prefix + sw_path + PRODSOFT + '/TRAFDIC'
                if filename == "testevtdump.raw":
                    se_path = sw_prefix + sw_path + PRODSOFT + '/evtdump/slot' + prodSlt
                    filename = EVTDUMPFILE
                if filename == "payload_stdout.txt":
                    se_path = sw_prefix + sw_path + PRODSOFT + '/logFiles'
                    filename = prod_name + '.' + TMPHISTFILE.replace('.root', '.stdout')
                if filename == "payload_stderr.txt":
                    se_path = sw_prefix + sw_path + PRODSOFT + '/logFiles'
                    filename = prod_name + '.' + TMPHISTFILE.replace('.root', '.stderr')
                # .txt.gz will replace .txt, for back compatibility both are placed
                if filename == "payload_stdout.out.gz":
                    se_path = sw_prefix + sw_path + PRODSOFT + '/logFiles'
                    filename = prod_name + '.' + TMPHISTFILE.replace('.root', '.stdout.gz')
                if filename == "payload_stderr.out.gz":
                    se_path = sw_prefix + sw_path + PRODSOFT + '/logFiles'
                    filename = prod_name + '.' + TMPHISTFILE.replace('.root', '.stderr.gz')
                
                # merge
                if filename == MERGEDMDSTFILE :
                    se_path = sw_prefix + sw_path + PRODSOFT + '/mDST'
                if filename == MERGEDHISTFILE:
                    se_path = sw_prefix + sw_path + PRODSOFT + '/histos'
                if filename == MERGEDDUMPFILE:
                    se_path = sw_prefix + sw_path + PRODSOFT + '/mergedDump/slot' + prodSlt
                
                destination = se_path
                
            except Exception, errorMsg:
                error = PilotErrors()
                pilotErrorDiag = "Exception caught in COMPASSSiteInformation: %s" % str(errorMsg)
                
                if 'format_exc' in traceback.__all__:
                    pilotErrorDiag += ", " + traceback.format_exc()    

                try:
                    tolog("!!FAILED!!3001!! %s" % (pilotErrorDiag))
                except Exception, e:
                    if len(pilotErrorDiag) > 10000:
                        pilotErrorDiag = pilotErrorDiag[:10000]
                        tolog("!!FAILED!!3001!! Truncated (%s): %s" % (e, pilotErrorDiag))
                    else:
                        pilotErrorDiag = "Exception caught in runJob: %s" % (e)
                        tolog("!!FAILED!!3001!! %s" % (pilotErrorDiag))
            tolog(
                "Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)"
                % str(e))
        else:
            report['localSite'], report['remoteSite'] = (_dq2SiteName,
                                                         _dq2SiteName)
            tolog("DQ2 site name: %s" % (_dq2SiteName))

        try:
            self.mkdirWperm(dst_loc_sedir)
        except IOError, e:
            pilotErrorDiag = "put_data could not create dir: %s" % str(e)
            tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            self.prepareReport('MKDIR_FAIL', report)
            return SiteMover.put_data_retfail(error.ERR_MKDIR,
                                              pilotErrorDiag,
                                              surl=dst_gpfn)

        cmd = '%srfcp %s %s' % (_setup_str, source, dst_loc_sedir)
        tolog("Executing command: %s" % cmd)
        report['transferStart'] = time()
        s, o = commands.getstatusoutput(cmd)
        report['validateStart'] = time()
        if s != 0:
            o = o.replace('\n', ' ')
            check_syserr(s, o)
            pilotErrorDiag = "Error in copying: %s" % (o)
            tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag))
            self.prepareReport('RFCP_FAIL', report)
            return self.put_data_retfail(error.ERR_STAGEOUTFAILED,
                                         pilotErrorDiag,