def getLocalFileInfo(self, fileName, checksumType="default", date=None): """ Return exit code (0 if OK), file size and checksum of a local file, as well as as date string if requested """ # note that date is mutable statusRet = 0 outputRet = {} outputRet["errorLog"] = "" outputRet["report"] = {} outputRet["report"]["clientState"] = None outputRet["size"] = 0 outputRet["checksum"] = "" outputRet["checksumType"] = checksumType self.log("Getting local File(%s) info." % fileName) # does the file exist? if not os.path.isfile(fileName): if fileName.find("DBRelease") >= 0 and os.path.exists( os.path.dirname(fileName)): outputRet[ "errorLog"] = errorLog = "DBRelease file missing: %s" % ( fileNameame) self.log("!!WARNING!!2999!! %s" % (errorLog)) return PilotErrors.ERR_MISSDBREL, outputRet else: outputRet[ "errorLog"] = errorLog = "No such file or directory: %s" % ( fileName) self.log("!!WARNING!!2999!! %s" % (errorLog)) return PilotErrors.ERR_MISSINGLOCALFILE, outputRet # get the modification time if needed and store it in the mutable object if date: date = SiteMover.getModTime(os.path.dirname(fileName), os.path.basename(fileName)) # get the file size try: self.log("Executing getsize() for file: %s" % (fileName)) outputRet["size"] = fsize = str(os.path.getsize(fileName)) except OSError, e: outputRet[ "errorLog"] = errorLog = "Could not get file size: %s" % str(e) tolog("!!WARNING!!2999!! %s" % (errorLog)) return PilotErrors.ERR_FAILEDSIZELOCAL, outputRet
def getLocalFileInfo(self, fileName, checksumType="default", date=None): """ Return exit code (0 if OK), file size and checksum of a local file, as well as as date string if requested """ # note that date is mutable statusRet = 0 outputRet = {} outputRet["errorLog"] = "" outputRet["report"] = {} outputRet["report"]["clientState"] = None outputRet["size"] = 0 outputRet["checksum"] = "" outputRet["checksumType"] = checksumType self.log("Getting local File(%s) info." % fileName) # does the file exist? if not os.path.isfile(fileName): if fileName.find("DBRelease") >= 0 and os.path.exists(os.path.dirname(fileName)): outputRet["errorLog"] = errorLog = "DBRelease file missing: %s" % (fileNameame) self.log("!!WARNING!!2999!! %s" % (errorLog)) return PilotErrors.ERR_MISSDBREL, outputRet else: outputRet["errorLog"] = errorLog = "No such file or directory: %s" % (fileName) self.log("!!WARNING!!2999!! %s" % (errorLog)) return PilotErrors.ERR_MISSINGLOCALFILE, outputRet # get the modification time if needed and store it in the mutable object if date: date = SiteMover.getModTime(os.path.dirname(fileName), os.path.basename(fileName)) # get the file size try: self.log("Executing getsize() for file: %s" % (fileName)) outputRet["size"] = fsize = str(os.path.getsize(fileName)) except OSError, e: outputRet["errorLog"] = errorLog = "Could not get file size: %s" % str(e) tolog("!!WARNING!!2999!! %s" % (errorLog)) return PilotErrors.ERR_FAILEDSIZELOCAL, outputRet
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') proxycheck = pdict.get('proxycheck', False) experiment = pdict.get('experiment', '') analysisJob = pdict.get('analJob', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'curl' # mark the relative start report['catStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-', '') # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo( source, csumtype="adler32") if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get the checksum type if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get a proper envsetup envsetup = self.getEnvsetup() #if proxycheck: # s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2) # if s != 0: # self.prepareReport('NO_PROXY', report) # return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) #else: # tolog("Proxy verification turned off") tolog("Proxy verification turned off") filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths( error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) #here begins the new magic... from Vincenzo Lavorini sitemover = SiteMover.SiteMover() v_path = sitemover.getPathFromScope(scope, filename) rucio_c = Client() if "ATLAS" in token: token_ok = token[+5:] else: token_ok = token local_se_token = self.site_name + "_" + token_ok v_hostname = [ j['hostname'] for j in rucio_c.get_protocols(local_se_token) ] v_port = [j['port'] for j in rucio_c.get_protocols(local_se_token)] v_prefix = [j['prefix'] for j in rucio_c.get_protocols(local_se_token)] v_address = "https://%s:%s%s" % (v_hostname[0], v_port[0], v_prefix[0]) tolog("prova1 address is %s" % (v_address)) if "rucio/" in v_address and "/rucio" in v_path: v_address = v_address[:-7] tolog("prova2 address is %s" % (v_address)) elif "rucio" in v_address and "rucio" in v_path: v_address = v_address[:-6] tolog("prova3 address is %s" % (v_address)) full_http_surl = v_address + v_path tolog("prova3 full_http__surl is %s" % (full_http_surl)) full_surl = surl if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz #putfile=surl #tolog("putfile: %s" % (putfile)) #tolog("full_surl: %s" % (full_surl)) # get https surl #full_http_surl = full_surl.replace("srm://", "https://") # get the DQ2 site name from ToA ---why? Is it needed? #try: # _dq2SiteName = self.getDQ2SiteName(surl=putfile) #except Exception, e: # tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e)) #else: # report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName) # tolog("DQ2 site name: %s" % (_dq2SiteName)) if testLevel == "1": source = "thisisjustatest" # determine which timeout option to use #commented by Lavorini timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout) timeout_option = "--connect-timeout 300" sslCert = self.sslCert sslKey = self.sslKey sslCertDir = self.sslCertDir # check htcopy if it is existed or env is set properly #_cmd_str = 'which htcopy' #try: # s, o = commands.getstatusoutput(_cmd_str) #except Exception, e: # tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o)) # o = str(e) #if s != 0: # tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str)) # o = o.replace('\n', ' ') # tolog("!!WARNING!!2990!! check PUT command failed. Status=%s Output=%s" % (str(s), str(o))) #return 999999 # cleanup the SURL if necessary (remove port and srm substring) #if token: # used lcg-cp options: # --srcsetype: specify SRM version # --verbose: verbosity on # --vo: specifies the Virtual Organization the user belongs to # -s: space token description # -b: BDII disabling # -t: time-out # (lcg-cr) -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC # -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally # -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case, # the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is # generated in the same format as with the Replica Manager # _cmd_str = '%s lcg-cr --verbose --vo atlas -T srmv2 -s %s -b -t %d -l %s -g %s -d %s file:%s' %\ # (envsetup, token, self.timeout, lfclfn, guid, surl, fppfn) # usage: lcg-cp [-h,--help] [-i,--insecure] [-c,--config config_file] # [-n nbstreams] [-s,--sst src_spacetokendesc] [-S,--dst dest_spacetokendesc] # [-D,--defaultsetype se|srmv1|srmv2] [-T,--srcsetype se|srmv1|srmv2] [-U,--dstsetype se|srmv1|srmv2] # [-b,--nobdii] [-t timeout] [-v,--verbose] [-V,--vo vo] [--version] src_file dest_file # surl = putfile[putfile.index('srm://'):] #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s?spacetoken=%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl, token) #_cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -U srmv2 -S %s file://%s %s' % (envsetup, timeout_option, token, source, full_surl) #else: # surl is the same as putfile #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl) #_cmd_str = '%s lcg-cp --vo atlas --verbose -b %s -U srmv2 file://%s %s' % (envsetup, timeout_option, source, full_surl) _cmd_str = 'curl -1 --verbose --cert %s --key %s --cacert %s --capath %s -L %s -T %s' % ( self.sslKey, self.sslKey, self.sslKey, self.sslCertDir, full_http_surl, source) tolog("Executing command: %s" % (_cmd_str)) t0 = os.times() _cmd = Popen(_cmd_str, stdout=PIPE, stderr=PIPE, shell=True) _cmd_out, _cmd_stderr = _cmd.communicate() report['relativeStart'] = time() report['transferStart'] = time() report['validateStart'] = time() t1 = os.times() t = t1[4] - t0[4] tolog("Curl command output = %s" % (_cmd_out)) tolog("Command finished after %f s" % (t)) if "bytes uploaded" not in _cmd_out: tolog("!!WARNING!!1137!! Command failed: %s" % (_cmd_str)) ''' # check if file was partially transferred, if so, remove it _ec = self.removeFile(envsetup, self.timeout, dst_gpfn) if _ec == -2: pilotErrorDiag += "(failed to remove file) " # i.e. do not retry stage-out if "Could not establish context" in o: pilotErrorDiag += "Could not establish context: Proxy / VO extension of proxy has probably expired" tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('CONTEXT_FAIL', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) elif "No such file or directory" in o: pilotErrorDiag += "No such file or directory: %s" % (o) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('NO_FILE_DIR', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) elif "globus_xio: System error" in o: pilotErrorDiag += "Globus system error: %s" % (o) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('GLOBUS_FAIL', report) return self.put_data_retfail(error.ERR_PUTGLOBUSSYSERR, pilotErrorDiag) else: if len(o) == 0 and t >= self.timeout: pilotErrorDiag += "Copy command self timed out after %d s" % (t) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('CP_TIMEOUT', report) return self.put_data_retfail(error.ERR_PUTTIMEOUT, pilotErrorDiag) else: if len(o) == 0: pilotErrorDiag += "Copy command returned error code %d but no output" % (ec) else: pilotErrorDiag += o self.prepareReport('CP_ERROR', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) ''' verified = False #getting the remote checksum from Rucio: token_file = open('token_fle', 'r') token_rucio = token_file.readline() pos2print = token_rucio.find("CN") token_rucio2print = token_rucio[:pos2print] + '(Hidden token)' tolog("Token I am using: %s" % (token_rucio2print)) httpredirector = readpar('httpredirector') trial_n = 1 remote_checksum = "none" while (remote_checksum == "none" and trial_n < 8): trial_n += 1 if not httpredirector: #cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip |awk \'{FS=\"hash type=\"}; {print $2}\' |awk \'{FS=\">\"}; {print $2}\' |awk \'{FS=\"<\"} {print $1}\'| grep -v \'^$\'"%(token_rucio,scope,filename) cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % ( token_rucio, scope, filename) cmd2print = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % ( token_rucio2print, scope, filename) else: if "http" in httpredirector: tolog("HTTP redirector I am using: %s" % (httpredirector)) cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % ( token_rucio, httpredirector, scope, filename) cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % ( token_rucioi2print, httpredirector, scope, filename) else: tolog("HTTP redirector I am using: %s" % (httpredirector)) cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % ( token_rucio, httpredirector, reps[0].scope, reps[0].filename) cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % ( token_rucio2print, httpredirector, reps[0].scope, reps[0].filename) tolog("Getting remote checksum: command to be executed: %s" % (cmd2print)) checksum_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) remote_checksum, stderr = checksum_cmd.communicate() tolog("Remote checksum as given by rucio %s" % (remote_checksum)) if (remote_checksum == "none"): tolog("In checking checksum: command std error: %s" % (stderr)) pilotErrorDiag = "Cannot get the checksum of file on SE" tolog("!!WARNING!!1137!! %s" % (pilotErrorDiag)) tolog("!!WARNING!!1137!! trial numebr %s" % (trial_n)) time.sleep(3) # try to get the remote checksum with lcg-get-checksum #remote_checksum = self.lcgGetChecksum(envsetup, self.timeout, full_surl) #if not remote_checksum: # # try to grab the remote file info using lcg-ls command # remote_checksum, remote_fsize = self.getRemoteFileInfo(envsetup, self.timeout, full_surl) #else: # tolog("Setting remote file size to None (not needed)") # remote_fsize = None # compare the checksums if the remote checksum was extracted tolog("Remote checksum: %s" % str(remote_checksum)) tolog("Local checksum: %s" % (fchecksum)) if remote_checksum: if remote_checksum != fchecksum: pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, os.path.basename(dst_gpfn), remote_checksum, fchecksum) tolog("!!WARNING!!1800!! %s" % (pilotErrorDiag)) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl) else: self.prepareReport('MD5_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl) else: tolog("Remote and local checksums verified") verified = True else: tolog( "Skipped primary checksum verification (remote checksum not known)" ) # if lcg-ls could not be used if "/pnfs/" in surl and not remote_checksum: # for dCache systems we can test the checksum with the use method tolog( "Detected dCache system: will verify local checksum with the local SE checksum" ) # gpfn = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0.... path = surl[surl.find('/pnfs/'):] # path = /pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....# tolog("File path: %s" % (path)) _filename = os.path.basename(path) _dir = os.path.dirname(path) # get the remote checksum tolog("Local checksum: %s" % (fchecksum)) try: remote_checksum = self.getdCacheChecksum(_dir, _filename) except Exception, e: pilotErrorDiag = "Could not get checksum from dCache: %s (test will be skipped)" % str( e) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) else: if remote_checksum == "NOSUCHFILE": pilotErrorDiag = "The pilot will fail the job since the remote file does not exist" tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.prepareReport('NOSUCHFILE', report) return self.put_data_retfail(error.ERR_NOSUCHFILE, pilotErrorDiag, surl=full_surl) elif remote_checksum: tolog("Remote checksum: %s" % (remote_checksum)) else: tolog("Could not get remote checksum") if remote_checksum: if remote_checksum != fchecksum: pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, _filename, remote_checksum, fchecksum) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl) else: self.prepareReport('MD5_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl) else: tolog("Remote and local checksums verified") verified = True
# get the DQ2 site name from ToA try: _dq2SiteName = self.getDQ2SiteName(surl=dst_gpfn) except Exception, e: tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e)) else: report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName) tolog("DQ2 site name: %s" % (_dq2SiteName)) try: self.mkdirWperm(dst_loc_sedir) except IOError, e: pilotErrorDiag = "put_data could not create dir: %s" % str(e) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.prepareReport('MKDIR_FAIL', report) return SiteMover.put_data_retfail(error.ERR_MKDIR, pilotErrorDiag, surl=dst_gpfn) cmd = '%srfcp %s %s' % (_setup_str, source, dst_loc_sedir) tolog("Executing command: %s" % cmd) report['transferStart'] = time() s, o = commands.getstatusoutput(cmd) report['validateStart'] = time() if s != 0: o = o.replace('\n', ' ') check_syserr(s, o) pilotErrorDiag = "Error in copying: %s" % (o) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.prepareReport('RFCP_FAIL', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag, surl=dst_gpfn) try: os.chmod(dst_loc_pfn, self.permissions_FILE)
def getProperPaths( self, error, analyJob, token, prodSourceLabel, dsname, filename, **pdict ): # in current implementation this function completely depends on site mover so that it needs to be either reimplmented or completely moved outside SiteInformation """ Get proper paths (SURL and LFC paths) """ ec = 0 pilotErrorDiag = "" tracer_error = "" dst_gpfn = "" lfcdir = "" surl = "" alt = pdict.get('alt', False) scope = pdict.get('scope', None) # Get the proper endpoint #sitemover = SiteMover.SiteMover() sitemover = pdict.get('sitemover', SiteMover.SiteMover( )) # quick workaround HACK: to be properly implemented later se = sitemover.getProperSE(token, alt=alt) # For production jobs, the SE path is stored in seprodpath # For analysis jobs, the SE path is stored in sepath destination = sitemover.getPreDestination(analyJob, token, prodSourceLabel, alt=alt) if destination == '': pilotErrorDiag = "put_data destination path in SE not defined" tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag)) tracer_error = 'PUT_DEST_PATH_UNDEF' ec = error.ERR_STAGEOUTFAILED return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl else: tolog("Going to store job output at: %s" % (destination)) # /dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/ # rucio path: # SE + destination + SiteMover.getPathFromScope(scope,lfn) # Get the LFC path lfcpath, pilotErrorDiag = sitemover.getLFCPath(analyJob, alt=alt) if lfcpath == "": tracer_error = 'LFC_PATH_EMPTY' ec = error.ERR_STAGEOUTFAILED return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl tolog("LFC path = %s" % (lfcpath)) # /grid/atlas/users/pathena ec, pilotErrorDiag, dst_gpfn, lfcdir = sitemover.getFinalLCGPaths( analyJob, destination, dsname, filename, lfcpath, token, prodSourceLabel, scope=scope, alt=alt) if ec != 0: tracer_error = 'UNKNOWN_DSN_FORMAT' return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl # srm://f-dpm001.grid.sinica.edu.tw:8446/srm/managerv2?SFN=/dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/rucio/data12_8TeV/55/bc/NTUP_SUSYSKIM.01161650._000003.root.1 # surl = srm://f-dpm001.grid.sinica.edu.tw:8446/srm/managerv2?SFN=/dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/user/apetrid/0328091854/user.apetrid.0328091854.805485.lib._011669/user.apetrid.0328091854.805485.lib._011669.lib.tgz # Define the SURL if "/rucio" in destination: surl = sitemover.getFullPath(scope, token, filename, analyJob, prodSourceLabel, alt=alt) else: surl = "%s%s" % (se, dst_gpfn) tolog("SURL = %s" % (surl)) tolog("dst_gpfn = %s" % (dst_gpfn)) tolog("lfcdir = %s" % (lfcdir)) return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl
def getProperPaths(self, error, analyJob, token, prodSourceLabel, dsname, filename, **pdict): """ Get proper paths (SURL and LFC paths) """ ec = 0 pilotErrorDiag = "" tracer_error = "" dst_gpfn = "" lfcdir = "" surl = "" alt = pdict.get('alt', False) scope = pdict.get('scope', None) # Get the proper endpoint sitemover = SiteMover.SiteMover() se = sitemover.getProperSE(token, alt=alt) # For production jobs, the SE path is stored in seprodpath # For analysis jobs, the SE path is stored in sepath destination = sitemover.getPreDestination(analyJob, token, prodSourceLabel, alt=alt) if destination == '': pilotErrorDiag = "put_data destination path in SE not defined" tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag)) tracer_error = 'PUT_DEST_PATH_UNDEF' ec = error.ERR_STAGEOUTFAILED return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl else: tolog("Going to store job output at: %s" % (destination)) # /dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/ # rucio path: # SE + destination + SiteMover.getPathFromScope(scope,lfn) # Get the LFC path lfcpath, pilotErrorDiag = sitemover.getLFCPath(analyJob, alt=alt) if lfcpath == "": tracer_error = 'LFC_PATH_EMPTY' ec = error.ERR_STAGEOUTFAILED return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl tolog("LFC path = %s" % (lfcpath)) # /grid/atlas/users/pathena ec, pilotErrorDiag, dst_gpfn, lfcdir = sitemover.getFinalLCGPaths( analyJob, destination, dsname, filename, lfcpath, token, prodSourceLabel, scope=scope, alt=alt) if ec != 0: tracer_error = 'UNKNOWN_DSN_FORMAT' return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl # srm://f-dpm001.grid.sinica.edu.tw:8446/srm/managerv2?SFN=/dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/rucio/data12_8TeV/55/bc/NTUP_SUSYSKIM.01161650._000003.root.1 # surl = srm://f-dpm001.grid.sinica.edu.tw:8446/srm/managerv2?SFN=/dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/user/apetrid/0328091854/user.apetrid.0328091854.805485.lib._011669/user.apetrid.0328091854.805485.lib._011669.lib.tgz # Define the SURL if "/rucio" in destination: surl = sitemover.getFullPath(scope, token, filename, analyJob, prodSourceLabel, alt=alt) else: surl = "%s%s" % (se, dst_gpfn) # Correct the SURL which might start with something like 'token:ATLASMCTAPE:srm://srm-atlas.cern.ch:8443/srm/man/..' # If so, remove the space token before the srm info if surl.startswith('token'): tolog("Removing space token part from SURL") dummy, surl = sitemover.extractSE(surl) tolog("SURL = %s" % (surl)) tolog("dst_gpfn = %s" % (dst_gpfn)) tolog("lfcdir = %s" % (lfcdir)) return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl
def getProperPaths(self, error, analyJob, token, prodSourceLabel, dsname, filename, **pdict): """ Get proper paths (SURL and LFC paths) """ ec = 0 pilotErrorDiag = "" tracer_error = "" dst_gpfn = "" lfcdir = "" surl = "" alt = pdict.get('alt', False) scope = pdict.get('scope', None) # Get the proper endpoint sitemover = SiteMover.SiteMover() se = sitemover.getProperSE(token, alt=alt) # For production jobs, the SE path is stored in seprodpath # For analysis jobs, the SE path is stored in sepath destination = sitemover.getPreDestination(analyJob, token, prodSourceLabel, alt=alt) if destination == '': pilotErrorDiag = "put_data destination path in SE not defined" tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag)) tracer_error = 'PUT_DEST_PATH_UNDEF' ec = error.ERR_STAGEOUTFAILED return ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl else: tolog("Going to store job output at: %s" % (destination)) # /dpm/grid.sinica.edu.tw/home/atlas/atlasscratchdisk/ se_path = '' sw_path = '' prod_name = '' prodSlt = '' TMPMDSTFILE = '' TMPHISTFILE = '' EVTDUMPFILE = '' MERGEDMDSTFILE = '' MERGEDHISTFILE = '' MERGEDDUMPFILE = '' PRODSOFT = '' if not ".log.tgz" in filename: try: # always use this filename as the new jobDef module name import newJobDef job = Job.Job() job.setJobDef(newJobDef.job) tolog("job.payload: %s" % (job.payload)) tolog("job.trf: %s" % (job.trf)) tolog("filename: %s" % (filename)) tolog("Trying to get sw path, name and hist filename from job definition.") sw_prefix, sw_path, prod_name, prodSlt, TMPMDSTFILE, TMPHISTFILE, EVTDUMPFILE, MERGEDMDSTFILE, MERGEDHISTFILE, MERGEDDUMPFILE, PRODSOFT = self.getSWPathAndNameAndFilename(job.jobPars) tolog("sw_prefix: %s" % (sw_prefix)) tolog("sw_path: %s" % (sw_path)) tolog("prod_name: %s" % (prod_name)) tolog("prodSlt: %s" % (prodSlt)) tolog("TMPMDSTFILE: %s" % (TMPMDSTFILE)) tolog("TMPHISTFILE: %s" % (TMPHISTFILE)) tolog("EVTDUMPFILE: %s" % (EVTDUMPFILE)) tolog("MERGEDMDSTFILE: %s" % (MERGEDMDSTFILE)) tolog("MERGEDHISTFILE: %s" % (MERGEDHISTFILE)) tolog("MERGEDDUMPFILE: %s" % (MERGEDDUMPFILE)) tolog("PRODSOFT: %s" % (PRODSOFT)) # prod if filename == TMPMDSTFILE : se_path = sw_prefix + sw_path + PRODSOFT + '/mDST.chunks' if filename == TMPHISTFILE: se_path = sw_prefix + sw_path + PRODSOFT + '/TRAFDIC' if filename == "testevtdump.raw": se_path = sw_prefix + sw_path + PRODSOFT + '/evtdump/slot' + prodSlt filename = EVTDUMPFILE if filename == "payload_stdout.txt": se_path = sw_prefix + sw_path + PRODSOFT + '/logFiles' filename = prod_name + '.' + TMPHISTFILE.replace('.root', '.stdout') if filename == "payload_stderr.txt": se_path = sw_prefix + sw_path + PRODSOFT + '/logFiles' filename = prod_name + '.' + TMPHISTFILE.replace('.root', '.stderr') # .txt.gz will replace .txt, for back compatibility both are placed if filename == "payload_stdout.out.gz": se_path = sw_prefix + sw_path + PRODSOFT + '/logFiles' filename = prod_name + '.' + TMPHISTFILE.replace('.root', '.stdout.gz') if filename == "payload_stderr.out.gz": se_path = sw_prefix + sw_path + PRODSOFT + '/logFiles' filename = prod_name + '.' + TMPHISTFILE.replace('.root', '.stderr.gz') # merge if filename == MERGEDMDSTFILE : se_path = sw_prefix + sw_path + PRODSOFT + '/mDST' if filename == MERGEDHISTFILE: se_path = sw_prefix + sw_path + PRODSOFT + '/histos' if filename == MERGEDDUMPFILE: se_path = sw_prefix + sw_path + PRODSOFT + '/mergedDump/slot' + prodSlt destination = se_path except Exception, errorMsg: error = PilotErrors() pilotErrorDiag = "Exception caught in COMPASSSiteInformation: %s" % str(errorMsg) if 'format_exc' in traceback.__all__: pilotErrorDiag += ", " + traceback.format_exc() try: tolog("!!FAILED!!3001!! %s" % (pilotErrorDiag)) except Exception, e: if len(pilotErrorDiag) > 10000: pilotErrorDiag = pilotErrorDiag[:10000] tolog("!!FAILED!!3001!! Truncated (%s): %s" % (e, pilotErrorDiag)) else: pilotErrorDiag = "Exception caught in runJob: %s" % (e) tolog("!!FAILED!!3001!! %s" % (pilotErrorDiag))
tolog( "Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e)) else: report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName) tolog("DQ2 site name: %s" % (_dq2SiteName)) try: self.mkdirWperm(dst_loc_sedir) except IOError, e: pilotErrorDiag = "put_data could not create dir: %s" % str(e) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.prepareReport('MKDIR_FAIL', report) return SiteMover.put_data_retfail(error.ERR_MKDIR, pilotErrorDiag, surl=dst_gpfn) cmd = '%srfcp %s %s' % (_setup_str, source, dst_loc_sedir) tolog("Executing command: %s" % cmd) report['transferStart'] = time() s, o = commands.getstatusoutput(cmd) report['validateStart'] = time() if s != 0: o = o.replace('\n', ' ') check_syserr(s, o) pilotErrorDiag = "Error in copying: %s" % (o) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.prepareReport('RFCP_FAIL', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag,