def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') pandaProxySecretKey = pdict.get('pandaProxySecretKey') jobSetID = pdict.get('jobsetID') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 's3objectstorepresignedurl', lfn, guid) if path == '': path = './' fullname = os.path.join(path, lfn) # pandaID, filename, jobSetID, pandaProxySecretKey=None, stageIn=True status, output = self.stageIn(jobId, lfn, jobSetID, pandaProxySecretKey, fullname, fsize, fchecksum, experiment) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") state = "DONE" else: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" # self.__sendReport(state, report) self.prepareReport(state, report) return status, output
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 's3objectstore', lfn, guid) if path == '': path = './' fullname = os.path.join(path, lfn) status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") state = "DONE" else: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return status, output
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') transferType = pdict.get('transferType', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'local', lfn, guid) tolog("transferType=%s" % (transferType)) status, output = self.getStageInMode(lfn, prodDBlockToken, transferType) tolog("output=%s" % str(output)) if output["transfer_mode"]: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state=output["transfer_mode"], ftype="input") tolog( "updated file state for lfn=%s, workDir=%s, jobId=%s, state=%s" % (lfn, workDir, jobId, output["transfer_mode"])) if status != 0: self.prepareReport(output["report"], report) return status, output["errorLog"] if path == '': path = './' fullname = os.path.join(path, lfn) status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") self.prepareReport(output["report"], report) return status, output["errorLog"]
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid) if path == '': path = './' fullname = os.path.join(path, lfn) # get the site information object si = getSiteInformation(experiment) ret_path = si.getCopyPrefixPathNew(gpfn, stageIn=True) if not ret_path.startswith("s3:"): errorLog = "Failed to use copyprefix to convert the current path to S3 path." tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog else: gpfn = ret_path status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") state = "DONE" else: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return status, output
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') os_bucket_id = pdict.get('os_bucket_id', -1) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 's3objectstore', lfn, guid) if path == '': path = './' fullname = os.path.join(path, lfn) status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment, os_bucket_id=os_bucket_id) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") state = "DONE" else: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" # self.__sendReport(state, report) self.prepareReport(state, report) return status, output
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) os_bucket_id = pdict.get('os_bucket_id', -1) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid) if path == '': path = './' fullname = os.path.join(path, lfn) # get the site information object si = getSiteInformation(experiment) ret_path = si.getCopyPrefixPathNew(gpfn, stageIn=True) if not ret_path.startswith("s3:"): errorLog = "Failed to use copyprefix to convert the current path to S3 path." tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog else: gpfn = ret_path status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment, os_bucket_id=os_bucket_id) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") state = "DONE" else: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return status, output
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') os_bucket_id = pdict.get('os_bucket_id', -1) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 's3objectstore', lfn, guid) if path == '': path = './' fullname = os.path.join(path, lfn) status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment, os_bucket_id=os_bucket_id, report=report) report['eventType'] = 'get_es' parsed = urlparse.urlparse(gpfn) scheme = parsed.scheme hostname = parsed.netloc.partition(':')[0] port = int(parsed.netloc.partition(':')[2]) report['remoteSite'] = '%s://%s:%s' % (scheme, hostname, port) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") state = "DONE" else: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" # self.__sendReport(state, report) self.prepareReport(state, report) return status, output
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') transferType = pdict.get('transferType', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'local', lfn, guid) tolog("transferType=%s"%(transferType)) status, output = self.getStageInMode(lfn, prodDBlockToken, transferType) tolog("output=%s" % str(output)) if output["transfer_mode"]: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state=output["transfer_mode"], ftype="input") tolog("updated file state for lfn=%s, workDir=%s, jobId=%s, state=%s"%(lfn, workDir, jobId, output["transfer_mode"])) if status !=0: self.prepareReport(output["report"], report) return status, output["errorLog"] if path == '': path = './' fullname = os.path.join(path, lfn) status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") self.prepareReport(output["report"], report) return status, output["errorLog"]
self.log( 'WARNING: Error in copying file (attempt %s/%s): %s' % (_attempt, self.stageinretry, result)) if not isinstance(result, Exception): # transferred successfully # finalize and send trace report self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time()) self.sendTrace(self.trace_report) updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="transferred", ftype="input") dumpFileStates(self.workDir, self.job.jobId, ftype="input") ## self.updateSURLDictionary(guid, surl, self.workDir, self.job.jobId) # FIX ME LATER fdat = result.copy() #fdat.update(lfn=lfn, pfn=pfn, guid=guid, surl=surl) transferred_files.append(fdat) else: failed_transfers.append(result) dumpFileStates(self.workDir, self.job.jobId, ftype="input") #self.log('transferred_files= %s' % transferred_files)
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ stage-in function """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get("usect", True) jobId = pdict.get("jobId", "") workDir = pdict.get("workDir", "") experiment = pdict.get("experiment", "") prodDBlockToken = pdict.get("access", "") # get the Rucio tracing report report = self.getStubTracingReport(pdict["report"], "castor", lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.prepareReport("RFCP_FAIL", report) return ec, pilotErrorDiag # get the experiment object thisExperiment = getExperiment(experiment) # do we have a valid proxy? s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup) if s != 0: self.prepareReport("PROXYFAIL", report) return s, pilotErrorDiag # Strip off prefix in order to use rfcp directly tolog("gpfn: %s" % (gpfn)) pat = re.compile("^.*(/castor/.*)$") mat = pat.match(gpfn) if mat: getfile = mat.group(1) else: pilotErrorDiag = "Get file not in castor: %s" % (gpfn) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) self.prepareReport("NO_FILE", report) return error.ERR_STAGEINFAILED, pilotErrorDiag # when the file has been copied we will rename it to the lfn (to remove the legacy __DQ2-string on some files) dest_path = os.path.join(path, lfn) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == "local" or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn) ) report["relativeStart"] = None report["transferStart"] = None self.prepareReport("FOUND_ROOT", report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") # transfer the input file with rfcp _cmd_str = "%srfcp %s %s" % (envsetup, getfile, dest_path) tolog("Executing command: %s" % (_cmd_str)) report["transferStart"] = time() s, o = commands.getstatusoutput(_cmd_str) report["validateStart"] = time() if s != 0: o = o.replace("\n", " ") check_syserr(s, o) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") if o.find("No such file or directory") >= 0: if getfile.find("DBRelease") >= 0: pilotErrorDiag = "Missing DBRelease file: %s" % (getfile) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_MISSDBREL else: pilotErrorDiag = "No such file or directory: %s" % (getfile) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_NOSUCHFILE else: pilotErrorDiag = "rfcp failed: %d, %s" % (s, o) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_STAGEINFAILED self.prepareReport("RFCP_FAIL", report) return ec, pilotErrorDiag # check file size and checksum if fsize != 0 or fchecksum != 0: # which checksum type are we using? if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get remote file size and checksum ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo(dest_path, csumtype=csumtype) tolog("File info: %d, %s, %s" % (ec, dstfsize, dstfchecksum)) if ec != 0: self.prepareReport("LOCAL_FILE_INFO_FAIL", report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return ec, pilotErrorDiag # compare remote and local file size if fsize != 0 and dstfsize != fsize: pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" % ( os.path.basename(gpfn), str(dstfsize), str(fsize), ) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) self.prepareReport("FS_MISMATCH", report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return error.ERR_GETWRONGSIZE, pilotErrorDiag # compare remote and local file checksum if fchecksum != 0 and dstfchecksum != fchecksum and not self.isDummyChecksum(fchecksum): pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" % ( csumtype, os.path.basename(gpfn), dstfchecksum, fchecksum, ) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") if csumtype == "adler32": self.prepareReport("AD_MISMATCH", report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.prepareReport("MD5_MISMATCH", report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") self.prepareReport("DONE", report) return 0, pilotErrorDiag
def do_put_files(self, ddmendpoint, protocols, files): # old function : TO BE DEPRECATED ... """ Copy files to dest SE :ddmendpoint: DDMEndpoint name used to store files :return: (list of transferred_files details, list of failed_transfers details) :raise: PilotException in case of error """ self.log('[deprecated do_put_files()]Prepare to copy files=%s to ddmendpoint=%s using protocols data=%s' % (files, ddmendpoint, protocols)) self.log("[deprecated do_put_files()]Number of stage-out tries: %s" % self.stageoutretry) # get SURL for Panda calback registration # resolve from special protocol activity=SE # fix me later to proper name of activitiy=SURL (panda SURL, at the moment only 2-letter name is allowed on AGIS side) surl_prot = [dict(se=e[0], path=e[2]) for e in sorted(self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get('SE', []), key=lambda x: x[1])] if not surl_prot: self.log('FAILED to resolve default SURL path for ddmendpoint=%s' % ddmendpoint) return [], [] surl_prot = surl_prot[0] # take first self.log("[do_put_files] SURL protocol to be used: %s" % surl_prot) self.trace_report.update(localSite=ddmendpoint, remoteSite=ddmendpoint) transferred_files, failed_transfers = [], [] for dat in protocols: copytool, copysetup = dat.get('copytool'), dat.get('copysetup') try: sitemover = getSiteMover(copytool)(copysetup, workDir=self.job.workdir) sitemover.trace_report = self.trace_report sitemover.protocol = dat # ## sitemover.ddmconf = self.ddmconf # quick workaround ### sitemover.setup() except Exception, e: self.log('[do_put_files] WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue self.log("[do_put_files] Copy command: %s, sitemover=%s" % (copytool, sitemover)) self.log("[do_put_files] Copy setup: %s" % copysetup) self.trace_report.update(protocol=copytool) se, se_path = dat.get('se', ''), dat.get('path', '') self.log("[do_put_files] Found N=%s files to be transferred: %s" % (len(files), [e.get('pfn') for e in files])) for fdata in files: scope, lfn, pfn = fdata.get('scope', ''), fdata.get('lfn'), fdata.get('pfn') guid = fdata.get('guid', '') surl = sitemover.getSURL(surl_prot.get('se'), surl_prot.get('path'), scope, lfn, self.job) # job is passing here for possible JOB specific processing turl = sitemover.getSURL(se, se_path, scope, lfn, self.job) # job is passing here for possible JOB specific processing self.trace_report.update(scope=scope, dataset=fdata.get('dsname_report'), url=surl) self.trace_report.update(catStart=time.time(), filename=lfn, guid=guid.replace('-', '')) self.log("[do_put_files] Preparing copy for pfn=%s to ddmendpoint=%s using copytool=%s: mover=%s" % (pfn, ddmendpoint, copytool, sitemover)) self.log("[do_put_files] lfn=%s: SURL=%s" % (lfn, surl)) self.log("[do_put_files] TURL=%s" % turl) if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK): error = "Erron: input pfn file is not exist: %s" % pfn self.log(error) raise PilotException(error, code=PilotErrors.ERR_MISSINGOUTPUTFILE, state="FILE_INFO_FAIL") filename = os.path.basename(pfn) # update the current file state updateFileState(filename, self.workDir, self.job.jobId, mode="file_state", state="not_transferred") dumpFileStates(self.workDir, self.job.jobId) # loop over multple stage-out attempts for _attempt in xrange(1, self.stageoutretry + 1): if _attempt > 1: # if not first stage-out attempt, take a nap before next attempt self.log(" -- Waiting %d seconds before next stage-out attempt for file=%s --" % (self.stageout_sleeptime, filename)) time.sleep(self.stageout_sleeptime) self.log("[do_put_files] Put attempt %d/%d for filename=%s" % (_attempt, self.stageoutretry, filename)) try: # quick work around from Job import FileSpec stub_fspec = FileSpec(ddmendpoint=ddmendpoint) result = sitemover.stageOut(pfn, turl, stub_fspec) break # transferred successfully except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: self.log(traceback.format_exc()) result = PilotException("stageOut failed with error=%s" % e, code=PilotErrors.ERR_STAGEOUTFAILED) self.log('WARNING [do_put_files]: Error in copying file (attempt %s): %s' % (_attempt, result))
def stagein(self): """ :return: (transferred_files, failed_transfers) """ activity = 'pr' pandaqueue = self.si.getQueueName() # FIX ME LATER protocols = self.protocols.setdefault(activity, self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue]) self.log("stage-in: protocols=%s" % protocols) if not protocols: raise PilotException("Failed to get files: no protocols defined for input. check aprotocols schedconfig settings for activity=%s, " % activity, code=PilotErrors.ERR_NOSTORAGE) files = self.job.inData self.resolve_replicas(files, protocols) # populates also self.ddmconf=self.si.resolveDDMConf([]) maxinputsize = self.getMaxInputSize() totalsize = reduce(lambda x, y: x + y.filesize, files, 0) transferred_files, failed_transfers = [], [] self.log("Found N=%s files to be transferred, total_size=%.3f MB: %s" % (len(files), totalsize/1024./1024., [e.lfn for e in files])) for dat in protocols: copytool, copysetup = dat.get('copytool'), dat.get('copysetup') try: sitemover = getSiteMover(copytool)(copysetup, workDir=self.job.workdir) sitemover.trace_report = self.trace_report sitemover.protocol = dat # ## sitemover.ddmconf = self.ddmconf # self.si.resolveDDMConf([]) # quick workaround ### sitemover.setup() except Exception, e: self.log('WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue remain_files = [e for e in files if e.status not in ['direct_access', 'transferred']] if not remain_files: self.log('INFO: all input files have been successfully processed') break self.log("Copy command [stage-in]: %s, sitemover=%s" % (copytool, sitemover)) self.log("Copy setup [stage-in]: %s" % copysetup) ddmendpoint = dat.get('ddm') self.trace_report.update(protocol=copytool, localSite=ddmendpoint, remoteSite=ddmendpoint) # verify file sizes and available space for stagein sitemover.check_availablespace(maxinputsize, remain_files) for fdata in remain_files: #if fdata.status == 'transferred': # already transferred, skip # continue updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="not_transferred", ftype="input") self.log("[stage-in] Prepare to get_data: protocol=%s, fspec=%s" % (dat, fdata)) r = sitemover.resolve_replica(fdata) # quick stub: propagate changes to FileSpec if r.get('surl'): fdata.surl = r['surl'] # TO BE CLARIFIED if it's still used and need if r.get('pfn'): fdata.turl = r['pfn'] if r.get('ddmendpoint'): fdata.ddmendpoint = r['ddmendpoint'] self.log("[stage-in] found replica to be used: ddmendpoint=%s, pfn=%s" % (fdata.ddmendpoint, fdata.turl)) # check if protocol and found replica belong to same site # protocol_site = self.ddmconf.get(dat.get('ddm'), {}).get('site') replica_site = self.ddmconf.get(fdata.ddmendpoint, {}).get('site') if protocol_site != replica_site: self.log('INFO: cross-sites checks: protocol_site=%s and replica_site=%s mismatched .. skip file processing for copytool=%s' % (protocol_site, replica_site, copytool)) continue # check direct access self.log("fdata.is_directaccess()=%s, job.accessmode=%s, mover.is_directaccess()=%s" % (fdata.is_directaccess(), self.job.accessmode, self.is_directaccess())) is_directaccess = self.is_directaccess() if self.job.accessmode == 'copy': is_directaccess = False elif self.job.accessmode == 'direct': is_directaccess = True if fdata.is_directaccess() and is_directaccess: # direct access mode, no transfer required fdata.status = 'direct_access' updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="transfer_mode", state="direct_access", ftype="input") self.log("Direct access mode will be used for lfn=%s .. skip transfer the file" % fdata.lfn) continue # apply site-mover custom job-specific checks for stage-in try: is_stagein_allowed = sitemover.is_stagein_allowed(fdata, self.job) if not is_stagein_allowed: reason = 'SiteMover does not allowed stage-in operation for the job' except PilotException, e: is_stagein_allowed = False reason = e except Exception: raise
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file (local access to the dCache file) is assumed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') analJob = pdict.get('analJob', False) timeout = pdict.get('timeout', 5*3600) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'dCache', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # remove any host and SFN info from PFN path loc_pfn = self.extractPathFromPFN(gpfn) copyprefixin = readpar('copyprefixin') if copyprefixin != '': # Extract the copy prefix pfrom, pto = copyprefixin.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefixin to file: %s" % (loc_pfn)) else: copyprefix = readpar('copyprefix') if copyprefix != '': # Extract the copy prefix pfrom, pto = copyprefix.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefix to file: %s" % (loc_pfn)) report['relativeStart'] = time() # for analysis jobs, skip input file if on tape or if lib file if analJob: if not self.isLibFile(loc_pfn): if not self.isFileStaged(_setup_str, loc_pfn): pilotErrorDiag = "File %s is not staged and will be skipped for analysis job" % (loc_pfn) self.prepareReport('FILE_ON_TAPE', report) return error.ERR_FILEONTAPE, pilotErrorDiag else: tolog("Skipping file stage check for lib file") # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") dest_path = os.path.join(path, lfn) _cmd_str = '%sdccp %s %s' % (_setup_str, loc_pfn, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time() telapsed = 0 try: # t0 = time() # s, cout = commands.getstatusoutput(_cmd_str) # cerr = "" # telapsed = time() - t0 s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) print "DEBUG: s = ", s, type(s) print "DEBUG: telapsed = ", telapsed, type(telapsed) print "DEBUG: cout = ", cout, type(cout) print "DEBUG: cerr = ", cerr, type(cerr) cout = self.filter_text(cout) cerr = self.filter_text(cerr) if not self.is_number(s): s = 1 if not self.is_number(telapsed): telapsed = 0 except Exception, e: tolog("!!WARNING!!2999!! timed_command() threw an exception: %s" % (e)) s = 1 o = self.filter_text(e) telapsed = timeout # write traceback info to stderr import traceback exc, msg, tb = sys.exc_info() traceback.print_tb(tb) _pilotErrorDiag = "Unexpected exception: %s" % (get_exc_plus()) tolog("!!WARNING!!2999!! get_exc_plus: %s" % (_pilotErrorDiag)) print "!!WARNING!!2999!! get_exc_plus: %s" % (_pilotErrorDiag)
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file (local access to the dCache file) is assumed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') analJob = pdict.get('analJob', False) timeout = pdict.get('timeout', 5 * 3600) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'BNLdCache', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # remove any host and SFN info from PFN path loc_pfn = self.extractPathFromPFN(gpfn) copyprefixin = readpar('copyprefixin') if copyprefixin != '': # Extract the copy prefix pfrom, pto = copyprefixin.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefixin to file: %s" % (loc_pfn)) else: copyprefix = readpar('copyprefix') if copyprefix != '': # Extract the copy prefix pfrom, pto = copyprefix.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefix to file: %s" % (loc_pfn)) report['relativeStart'] = time.time() pnfsid = self.getPnfsid(loc_pfn, guid) # for analysis jobs, skip input file if on tape or if lib file if analJob: if not self.isLibFile(loc_pfn): if pnfsid == None: isStaged = self.isFileStaged(_setup_str, loc_pfn) else: _com = "/cacheinfos/isFileInPool?pnfsid=%s" % (pnfsid) isStaged = self.isFileStaged( _setup_str, loc_pfn, url="ddmv02.usatlas.bnl.gov:8000", com=_com) if not isStaged: pilotErrorDiag = "File %s is not staged and will be skipped for analysis job" % ( loc_pfn) self.prepareReport('FILE_ON_TAPE', report) return error.ERR_FILEONTAPE, pilotErrorDiag else: tolog("Skipping file stage check for lib file") # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") dest_path = os.path.join(path, lfn) if pnfsid == None: _cmd_str = '%sdccp %s %s' % (_setup_str, loc_pfn, dest_path) else: _cmd_str = '%sdccp pnfs://dcdcap.usatlas.bnl.gov:22125/%s %s' % ( _setup_str, pnfsid, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time.time() try: s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) except Exception, e: tolog("!!WARNING!!2999!! timed_command() threw an exception: %s" % str(e)) s = 1 o = str(e) telapsed = timeout
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file (local access to the dCache file) is assumed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') analJob = pdict.get('analJob', False) timeout = pdict.get('timeout', 5*3600) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'BNLdCache', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.__sendReport('RFCP_FAIL', report) return ec, pilotErrorDiag # remove any host and SFN info from PFN path loc_pfn = self.extractPathFromPFN(gpfn) copyprefixin = readpar('copyprefixin') if copyprefixin != '': # Extract the copy prefix pfrom, pto = copyprefixin.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefixin to file: %s" % (loc_pfn)) else: copyprefix = readpar('copyprefix') if copyprefix != '': # Extract the copy prefix pfrom, pto = copyprefix.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefix to file: %s" % (loc_pfn)) report['relativeStart'] = time.time() pnfsid = self.getPnfsid(loc_pfn, guid) # for analysis jobs, skip input file if on tape or if lib file if analJob: if not self.isLibFile(loc_pfn): if pnfsid == None: isStaged = self.isFileStaged(_setup_str, loc_pfn) else: _com = "/cacheinfos/isFileInPool?pnfsid=%s" % (pnfsid) isStaged = self.isFileStaged(_setup_str, loc_pfn, url="ddmv02.usatlas.bnl.gov:8000", com=_com) if not isStaged: pilotErrorDiag = "File %s is not staged and will be skipped for analysis job" % (loc_pfn) self.__sendReport('FILE_ON_TAPE', report) return error.ERR_FILEONTAPE, pilotErrorDiag else: tolog("Skipping file stage check for lib file") # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.__sendReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") dest_path = os.path.join(path, lfn) if pnfsid == None: _cmd_str = '%sdccp %s %s' % (_setup_str, loc_pfn, dest_path) else: _cmd_str = '%sdccp pnfs://dcdcap.usatlas.bnl.gov:22125/%s %s' % (_setup_str, pnfsid, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time.time() try: s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) except Exception, e: tolog("!!WARNING!!2999!! timed_command() threw an exception: %s" % str(e)) s = 1 o = str(e) telapsed = timeout
result = sitemover.stageOut(pfn, turl) break # transferred successfully except PilotException, e: result = e except Exception, e: result = PilotException("stageOut failed with error=%s" % e, code=PilotErrors.ERR_STAGEOUTFAILED) self.log('WARNING: Error in copying file (attempt %s): %s' % (_attempt, result)) if not isinstance(result, Exception): # transferred successfully # finalize and send trace report self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time()) self.sendTrace(self.trace_report) updateFileState(filename, self.workDir, self.job.jobId, mode="file_state", state="transferred") dumpFileStates(self.workDir, self.job.jobId) self.updateSURLDictionary(guid, surl, self.workDir, self.job.jobId) # FIX ME LATER fdat = result.copy() fdat.update(lfn=lfn, pfn=pfn, guid=guid, surl=surl) transferred_files.append(fdat) else: failed_transfers.append(result) dumpFileStates(self.workDir, self.job.jobId) self.log('transferred_files= %s' % transferred_files)
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ Moves a DS file the local SE (where was put from DDM) to the working directory. Performs the copy and, for systems supporting it, checks size and md5sum correctness gpfn: full source URL (e.g. method://[host[:port]/full-dir-path/filename - a SRM URL is OK) path: destination absolute path (in a local file system) returns the status of the transfer. In case of failure it should remove the partially copied destination """ # The local file is assumed to have a relative path that is the same of the relative path in the 'gpfn' # loc_... are the variables used to access the file in the locally exported file system error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') sitename = pdict.get('sitename', '') cmtconfig = pdict.get('cmtconfig', '') prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'xrdcp', lfn, guid) # get a proper setup _setup_str = self.getSetup() # if "CERN" in sitename: # _setup_str = "source /afs/cern.ch/project/xrootd/software/setup_stable_for_atlas.sh;" ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.__sendReport('RFCP_FAIL', report) return ec, pilotErrorDiag # remove any host and SFN info from PFN path src_loc_pfn = self.extractPathFromPFN(gpfn) # determine which copy command to use cpt = "xrdcp" # Do pre-stagin copytoolin = readpar('copytoolin') # copytoolin = "xrdcp" #PN readpar('copytoolin') tolog("xrdcpSiteMover ELN copytoolin : %s" % (copytoolin)) if copytoolin != '': if copytoolin.find('^') > -1: cpt, pstage = copytoolin.split('^') if pstage != "" and pstage != 'dummy': # xrdcp is anyway hardcoded below... cmd = "%s %s " % (pstage, src_loc_pfn) rc, rs, pilotErrorDiag = self.copy(cmd, stagein=True) if rc != 0: self.__sendReport('PSTAGE_FAIL', report) return rc, pilotErrorDiag else: tolog("Successfully pre-staged file") else: cpt = readpar('copytoolin') #cpt = "xrdcp" #PN readpar('copytoolin') # is there a special copytool to be used? # if cmtconfig != "" and "CERN" in sitename: # # special path at CERN since default command is broken (as of November 2011) # if "x86_64" in cmtconfig: # # export LD_LIBRARY_PATH:/afs/cern.ch/sw/lcg/external/xrootd/3.1.0/x86_64-slc5-gcc43-opt/lib64/:/afs/cern.ch/sw/lcg/contrib/gcc/4.3.3/x86_64-slc5-gcc43-opt/lib64/ # lib_path1 = "/afs/cern.ch/sw/lcg/external/xrootd/3.1.0/%s/lib64" % (cmtconfig) # lib_path2 = "/afs/cern.ch/sw/lcg/contrib/gcc/4.3.3/%s/lib64" % (cmtconfig) # else: # lib_path1 = "/afs/cern.ch/sw/lcg/external/xrootd/3.1.0/%s/lib" % (cmtconfig) # lib_path2 = "/afs/cern.ch/sw/lcg/contrib/gcc/4.3.3/%s/lib" % (cmtconfig) # command_path = "/afs/cern.ch/sw/lcg/external/xrootd/3.1.0/%s/bin/xrdcp" % (cmtconfig) # status, badPath = self.verifyPaths([lib_path1, lib_path2, command_path]) # if status: # cpt = "export LD_LIBRARY_PATH=%s:%s:$LD_LIBRARY_PATH;" % (lib_path1, lib_path2) # cpt += command_path # else: # tolog("Path %s does not exist" % (badPath)) # pilotErrorDiag = "Failed to figure out a proper path for xrdcp: %s" % (badPath) # tolog("!!WARNING!!1776!! %s" % (pilotErrorDiag)) # self.__sendReport('PSTAGE_FAIL', report) # return error.ERR_STAGEINFAILED, pilotErrorDiag # if "x86_64" in cmtconfig and "slc5" in cmtconfig: # _path = "/afs/cern.ch/project/xrootd/software/setup_stable_for_atlas_slc5_x86_64.sh" # elif "x86_64" in cmtconfig and "slc6" in cmtconfig: # _path = "/afs/cern.ch/project/xrootd/software/setup_stable_for_atlas_slc6_x86_64.sh" # else: # _path = "/afs/cern.ch/project/xrootd/software/setup_stable_for_atlas_slc5_i686.sh" # status, badPath = self.verifyPaths([_path]) # if status: # cpt = "source %s; xrdcp" % (path) tolog("Site mover will use get command: %s" % (cpt)) # copyprefixin = 'dcap://ccdcapatlas.in2p3.fr:22125^root://ccsrb15:1094' # gpfn = 'srm://ccsrm.in2p3.fr/pnfs/in2p3.fr/data/...' # src_loc_pfn = '/pnfs/in2p3.fr/data/atlas/...' # add 'root://ccsrb15:1094' to src_loc_pfn copyprefix = readpar('copyprefixin') if copyprefix == "": copyprefix = readpar('copyprefix') tolog("Using copyprefix = %s" % (copyprefix)) else: tolog("Using copyprefixin = %s" % (copyprefix)) if copyprefix == "": pilotErrorDiag = "Empty copyprefix, cannot continue" tolog("!!WARNING!!1777!! %s" % (pilotErrorDiag)) self.__sendReport('PSTAGE_FAIL', report) return error.ERR_STAGEINFAILED, pilotErrorDiag # handle copyprefix lists pfroms, ptos = getCopyprefixLists(copyprefix) if len(pfroms) != len(ptos): pilotErrorDiag = "Copyprefix lists not of equal length: %s, %s" % (str(pfroms), str(ptos)) tolog("!!WARNING!!1777!! %s" % (pilotErrorDiag)) self.__sendReport('PSTAGE_FAIL', report) return error.ERR_STAGEINFAILED, pilotErrorDiag for (pfrom, pto) in map(None, pfroms, ptos): if (pfrom != "" and pfrom != None and pfrom != "dummy") and (pto != "" and pto != None and pto != "dummy"): if gpfn[:len(pfrom)] == pfrom or gpfn[:len(pto)] == pto: src_loc_pfn = pto + src_loc_pfn src_loc_pfn = src_loc_pfn.replace('///','//') break tolog("PFN=%s" % (gpfn)) tolog("TURL=%s" % (src_loc_pfn)) src_loc_filename = lfn # os.path.basename(src_loc_pfn) # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename # dest vars: path if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # protect against bad pfn's # src_loc_pfn = src_loc_pfn.replace('///','/') src_loc_pfn = src_loc_pfn.replace('//xrootd/','/xrootd/') if src_loc_pfn.find('//pnfs') == -1: src_loc_pfn = src_loc_pfn.replace('/pnfs','//pnfs') # should the root file be copied or read directly by athena? directIn = self.checkForDirectAccess(lfn, useCT, workDir, jobId, prodDBlockToken) if directIn: report['relativeStart'] = None report['transferStart'] = None self.__sendReport('FOUND_ROOT', report) return error.ERR_DIRECTIOFILE, pilotErrorDiag # in case fchecksum is not given to this function, attempt to use the md5 option to get it dest_file = os.path.join(path, src_loc_filename) if fchecksum == 0 or fchecksum == "" or fchecksum == None or fchecksum == "None": useMd5Option = True cmd = "%s %s -md5 %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) else: useMd5Option = False cmd = "%s %s %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) # is the md5 option available? if useMd5Option: cmd_test = "%s %s" % (_setup_str, cpt) tolog("Executing test command: %s" % (cmd_test)) rc, rs = commands.getstatusoutput(cmd_test) if rs.find("-md5") > 0: tolog("This xrdcp version supports the md5 option") else: tolog("This xrdcp version does not support the md5 option (checksum test will be skipped)") useMd5Option = False cmd = "%s %s %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) #PN # if not ".lib." in src_loc_pfn: # cmd = "xrd-cpXXX -h" # add the full stage-out command to the job setup script to_script = cmd.replace(dest_file, "`pwd`/%s" % os.path.basename(dest_file)) to_script = to_script.lstrip(' ') # remove any initial spaces if to_script.startswith('/'): to_script = 'source ' + to_script addToJobSetupScript(to_script, path) # transfer the file report['transferStart'] = time() rc, rs, pilotErrorDiag = self.copy(cmd, stagein=True) report['validateStart'] = time() if rc != 0: self.__sendReport('COPY_FAIL', report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return rc, pilotErrorDiag else: tolog("Successfully transferred file") # get file size from the command output if not known already if fsize == 0: fsize = self.getFileSize(rs) # get checksum from the command output if not known already if useMd5Option and fchecksum == 0: fchecksum = self.getChecksum(rs) else: if fchecksum == 0 or fchecksum == None: fchecksum = "" else: tolog("fchecksum = %s" % (fchecksum)) # get destination (local) file size and checksum ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo(dest_file, csumtype=csumtype) tolog("File info: %d, %s, %s" % (ec, dstfsize, dstfchecksum)) if ec != 0: self.__sendReport('LOCAL_FILE_INFO_FAIL', report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return ec, pilotErrorDiag # compare remote and local file checksum if fchecksum != "" and fchecksum != 0 and dstfchecksum != fchecksum and not self.isDummyChecksum(fchecksum): pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, os.path.basename(gpfn), fchecksum, dstfchecksum) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") if csumtype == "adler32": self.__sendReport('AD_MISMATCH', report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.__sendReport('MD5_MISMATCH', report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag # compare remote and local file size (skip test if remote/source file size is not known) if dstfsize != fsize and fsize != 0 and fsize != "": pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" %\ (os.path.basename(gpfn), str(dstfsize), str(fsize)) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.__sendReport('FS_MISMATCH', report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return error.ERR_GETWRONGSIZE, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") self.__sendReport('DONE', report) return 0, pilotErrorDiag
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'local', lfn, guid) if not path: tolog('path is empty, using current directory') path = os.getcwd() # build setup string envsetup = self.getEnvsetup(get=True) ec, pilotErrorDiag = verifySetupCommand(self.__error, envsetup) if ec != 0: self.__sendReport('RFCP_FAIL', report) return ec, pilotErrorDiag # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.__sendReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return self.__error.ERR_DIRECTIOFILE, self.__pilotErrorDiag else: tolog("Normal file transfer") else: tolog("not directIn") # build the get command _params = "" if fsize != 0 and fsize != "0": _params += self.__par_filesize % (fsize,) if fchecksum and fchecksum != 'None' and fchecksum != 0 and fchecksum != "0" and not self.isDummyChecksum(fchecksum): csumtype = self.getChecksumType(fchecksum) # special case for md5sum (command only understands 'md5' and 'adler32', and not 'ad' and 'md5sum') if csumtype == 'md5sum': csumtype = 'md5' _params += self.__par_checksum % ("%s:%s" % (csumtype, fchecksum),) # add the guid option _params += " --guid %s" % (guid) dest_path = os.path.join(path, lfn) #PN # if ".lib." in gpfn: # localGet = self.__localget # else: # localGet = self.__localgetBAD # execStr = localGet % (envsetup, _params, gpfn, dest_path) execStr = self.__localget % (envsetup, _params, gpfn, dest_path) tolog("Executing command: %s" % (execStr)) report['transferStart'] = time() try: status, telapsed, cout, cerr = timed_command(execStr, self.__timeout) except Exception, e: self.__pilotErrorDiag = 'timed_command() threw an exception: %s' % str(e) tolog(self.__warningStr % self.__pilotErrorDiag) status = 1 output = str(e) telapsed = self.__timeout
class lcgcp2SiteMover(SiteMover.SiteMover): """ SiteMover that uses lcg-cp for both get and put """ # no registration is done copyCommand = "lcg-cp2" checksum_command = "adler32" has_mkdir = False has_df = False has_getsize = False has_md5sum = True has_chmod = False timeout = 3600 def __init__(self, setup_path, *args, **kwrds): self._setup = setup_path def get_timeout(self): return self.timeout def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'lcg2', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # get the experiment object thisExperiment = getExperiment(experiment) if proxycheck: # do we have a valid proxy? s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup) if s != 0: self.prepareReport('PROXYFAIL', report) return s, pilotErrorDiag else: tolog("Proxy verification turned off") getfile = gpfn if path == '': path = './' fullname = os.path.join(path, lfn) # can not test filesize and checksum if remote values are not known if fsize == 0 or fchecksum == 0: tolog( "!!WARNING!!2999!! Remote file size/checksum not known: %s/%s" % (fsize, fchecksum)) # Maybe be a comma list but take first always # (Remember that se can be a list where the first is used for output but any can be used for input) se = readpar('se').split(",")[0] _dummytoken, se = self.extractSE(se) tolog("Using SE: %s" % (se)) # se = srm://head01.aglt2.org:8443/srm/managerv2?SFN= # for srm protocol, use the full info from 'se' if getfile[:3] == "srm": try: # e.g. tmp = ['srm:', '', 'head01.aglt2.org', 'pnfs/aglt2.org/rucio/panda/dis/08/...'] tmp = getfile.split('/', 3)[2] except Exception, e: tolog('!!WARNING!!2999!! Could not extract srm protocol for replacement, keeping getfile variable as it is: %s (%s)' %\ (getfile, str(e))) else: # replace srm with 'srm://head01.aglt2.org:8443/srm/managerv2?SFN=' if not there already if not '?SFN=' in getfile: # srm = 'srm://head01.aglt2.org' srm = 'srm://' + tmp # does seopt contain any matching srm's? sematch = self.getSEMatchFromSEOpt(srm) if sematch != "": getfile = getfile.replace(srm, sematch) tolog( "Replaced %s with %s (from seopt) in getfile: %s" % (srm, sematch, getfile)) else: getfile = getfile.replace(srm, se) tolog("Replaced %s with %s (from se) in getfile: %s" % (srm, se, getfile)) else: tolog("Found SFN part in getfile: %s" % (getfile)) # add port number from se to getfile if necessary getfile = self.addPortToPath(se, getfile) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") # determine which timeout option to use if self.isNewLCGVersion("%s lcg-cp" % (envsetup)): timeout_option = "--srm-timeout=%d --connect-timeout=300 --sendreceive-timeout=%d" % ( self.timeout, self.timeout) else: timeout_option = "-t %d" % (self.timeout) # used lcg-cp options: # --vo: specifies the Virtual Organization the user belongs to # -b: no bdii #PN _cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -T srmv2 %s file://%s' % ( envsetup, timeout_option, getfile, fullname) # if ".lib." in getfile: # _cmd_str = _cmd_str.replace("XXX","") tolog("Executing command: %s" % (_cmd_str)) # getfile = gsiftp://umfs02.grid.umich.edu/atlas/data08/dq2/other/D/DBRelease-3.1.1.tar.gz # getfile = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/dq2/panda/dis/08/06/04/panda.64d403f5-adae-42f8-8614-1fc044eb85ea_dis12076725/misal1_mc12.005802.JF17_pythia_jet_filter.digit.RDO.v12000601_tid008610._11639.pool.root.1 s = -1 o = '(not defined)' t0 = os.times() report['relativeStart'] = time() report['transferStart'] = time() try: s, o = commands.getstatusoutput(_cmd_str) except Exception, e: tolog("!!WARNING!!2990!! Exception caught by get_data(): %s" % (str(e))) o = str(e)
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'local', lfn, guid) if not path: tolog('path is empty, using current directory') path = os.getcwd() # build setup string envsetup = self.getEnvsetup(get=True) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.__sendReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return 0, self.__pilotErrorDiag else: tolog("Normal file transfer") else: tolog("not directIn") # THE FOLLOWING CODE UNTIL THE END OF THIS FUNCTION SHOULD BE REPLACED WITH NEW CODE FOR GLOBUS-ONLINE def downloadGC(): """Download Globus Connect file from Globus Server """ arg = ['curl','--connect-timeout','20','--max-time','120','-s','-S','http://confluence.globus.org/download/attachments/14516429/globusconnect','-o','globusconnect'] tolog("Download Arguments: %s" % (arg)) subprocess.call(arg) #Function to retrieve users proxy from Cern myproxy server. #The credential should be deposite to the server by the user under the Pilots DN username #and authoried to retrieve it by the Pilot without passphrase, #to run Globus Online. #Code partially extracted from MyproxyUtils.py #Uses as parameter userID, which is obtained from job info from Panda. def getproxy(userID): """Gets users myproxy from cern server """ dn = userID if dn.count('/CN=') > 1: first_index = dn.find('/CN=') second_index = dn.find('/CN=', first_index+1) dn = dn[0:second_index] arg = ['myproxy-logon','-s','myproxy.cern.ch','--no_passphrase','-l',dn] subprocess.call(arg) #Fuction uses endpoint as parameter, that the user specifies at job submission. #Corresponds to the name of the endpoint for the working node using Globus- #Connect. Returns the code for the setup def createEndpoint(endpoint): """Create the endpoint the user specifies and returns the code for setup """ arg = ['gsissh', 'cli.globusonline.org', 'endpoint-add','--gc', endpoint] proc = subprocess.Popen(arg, stderr=subprocess.PIPE, stdout=subprocess.PIPE) return_code = proc.wait() i = 0 for line in proc.stdout: tolog(" %s " % (line.rstrip())) i += 1 if i == 3: code = line return code #Function uses endpoint as parameter to get the code and run the #setup in Globus Connect. This connects the endpoint on the working node with #Globus Online def setupGC(endpoint): """Installing Globus Connect on working node. Creates endpoint, get the setup code and uses it on setup mode """ code = createEndpoint(endpoint) code = code.strip() arg = ['sh', 'globusconnect', '-setup', code] tolog("Arguments: %s:" % (arg)) subprocess.call(arg) #Function that runns Globus Connect on the background of the working node #to make it a Globus Online Endpoint def startGC(): """Start Globus Connect process on working node, on the background """ tolog("-----Running Globus Connect------") arg = ['sh', 'globusconnect', '-start'] subprocess.Popen(arg) #Function uses source and destinations as parameters.Source is a #Globus Online Endpoint and path to file, like this: #SITE1:/home/osg_data/panda/0064.file #Destination is the Endpoint on the working node ,using Globus Connect, #like this: #OSGDEV:/direct/usatlas+u/ccontrer/pilot/autopilot/trunk/ #User defines source and destination at job submission. #This function activates the source endpoint, under the consideration #it is a gridFTP server. Destination is already activated since it is a #Globus Connect endpoint and need no manual activation #This function creates one transfer task, so it should be executed #for as many source files as the user specifies #For a better dinamic, destination path can/should be obtained from job information (?) #and the endpoint is specified by user, as above. dest = endpoint + ':' + jobdir def stageincmdGO(src, dest): """create command for stage-in using Globus Online """ #Activate the source endpoint source = src.split(':')[0] arg = ['gsissh', 'cli.globusonline.org', 'endpoint-activate','-g', source] subprocess.call(arg) #Create transfer task cmd = 'gsissh cli.globusonline.org scp -v %s %s'%(src, dest) tolog("Transfer task created: %s" % (cmd)) return cmd #END OF GLOBUS ONLINE FUNCTIONS # build the get command _params = "" if fsize != 0 and fsize != "0": _params += self.__par_filesize % (fsize,) if fchecksum and fchecksum != 'None' and fchecksum != 0 and fchecksum != "0" and not self.isDummyChecksum(fchecksum): csumtype = self.getChecksumType(fchecksum) # special case for md5sum (command only understands 'md5' and 'adler32', and not 'ad' and 'md5sum') if csumtype == 'md5sum': csumtype = 'md5' _params += self.__par_checksum % ("%s:%s" % (csumtype, fchecksum),) # add the guid option _params += " --guid %s" % (guid) execStr = self.__localget % (envsetup, _params, gpfn, os.path.join(path, lfn)) tolog("Executing command: %s" % (execStr)) report['transferStart'] = time() try: status, telapsed, cout, cerr = timed_command(execStr, self.__timeout) except Exception, e: self.__pilotErrorDiag = 'timed_command() threw an exception: %s' % str(e) tolog(self.__warningStr % self.__pilotErrorDiag) status = 1 output = str(e) telapsed = self.__timeout
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict token = pdict.get('token', None) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'lcg', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.__sendReport('RFCP_FAIL', report) return ec, pilotErrorDiag if proxycheck: # do we have a valid proxy? s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup) if s != 0: self.__sendReport('PROXYFAIL', report) return s, pilotErrorDiag else: tolog("Proxy verification turned off") getfile = gpfn if path == '': path = './' fullname = os.path.join(path, lfn) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.__sendReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") # get remote filesize and checksum if fsize == 0 or fchecksum == 0: try: import lfc except Exception, e: pilotErrorDiag = "get_data() could not import lfc module: %s" % str(e) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) self.__sendReport('LFC_IMPORT', report) return error.ERR_GETLFCIMPORT, pilotErrorDiag os.environ['LFC_HOST'] = readpar('lfchost') try: ret, res = lfc.lfc_getreplicas([str(guid)],"") except Exception, e: pilotErrorDiag = "Failed to get LFC replicas: %s" % str(e) tolog("!!WARNING!!2990!! Exception caught: %s" % (pilotErrorDiag)) tolog("Mover get_data finished (failed)") self.__sendReport('NO_LFC_REPS', report) return error.ERR_FAILEDLFCGETREPS, pilotErrorDiag
_status = self.removeLocal(dest_path) if not _status: tolog( "!!WARNING!!1112!! Failed to remove local file, get retry will fail" ) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.prepareReport('MD5_MISMATCH', report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") self.prepareReport('DONE', report) return 0, pilotErrorDiag def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """Data transfer: includes BNL dir creation""" error = PilotErrors() pilotErrorDiag = "" tolog("BNLdCacheSiteMover::put_data() called") # Get input parameters from pdict lfn = pdict.get('lfn', '')
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ Moves a DS file the local SE (where was put from DDM) to the working directory. Performs the copy and, for systems supporting it, checks size and md5sum correctness gpfn: full source URL (e.g. method://[host[:port]/full-dir-path/filename - a SRM URL is OK) path: destination absolute path (in a local file system) returns the status of the transfer. In case of failure it should remove the partially copied destination """ # The local file is assumed to have a relative path that is the same of the relative path in the 'gpfn' # loc_... are the variables used to access the file in the locally exported file system error = PilotErrors() pilotErrorDiag = "" # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') dsname = pdict.get('dsname', '') workDir = pdict.get('workDir', '') prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'xrootd', lfn, guid) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = '' ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag tolog("xrootdSiteMover get_data using setup: %s" % (_setup_str)) # remove any host and SFN info from PFN path src_loc_pfn = self.extractPathFromPFN(gpfn) src_loc_filename = lfn # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename # dest vars: path if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # protect against bad pfn's src_loc_pfn = src_loc_pfn.replace('///', '/') src_loc_pfn = src_loc_pfn.replace('//xrootd/', '/xrootd/') # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: rootFile = self.isRootFile(src_loc_pfn, setup=_setup_str) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog( "Found root file: %s (will not be transferred in direct reading mode)" % (src_loc_pfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('IS_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") else: tolog("No direct access mode") ec = 0 if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo( src_loc_pfn, csumtype=csumtype) if ec != 0: self.prepareReport('GET_LOCAL_FILE_INFO_FAIL', report) return ec, pilotErrorDiag dest_file = os.path.join(path, src_loc_filename) report['relativeStart'] = time() # determine which copy command to use cpt = self.getCopytool(_setup_str) report['transferStart'] = time() cmd = "%s %s %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) #PN # if ".lib." in src_loc_pfn: # cmd = "%s %s %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) # else: # cmd = "%s %sXXX %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) tolog("Executing command: %s" % (cmd)) # execute timeout = 3600 try: rc, telapsed, cout, cerr = timed_command(cmd, timeout) except Exception, e: self.__pilotErrorDiag = 'timed_command() threw an exception: %s' % str( e) tolog("!!WARNING!!1111!! %s" % (pilotErrorDiag)) rc = 1 rs = str(e) telapsed = timeout
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ Moves a DS file the local SE (where was put from DDM) to the working directory. Performs the copy and, for systems supporting it, checks size and md5sum correctness gpfn: full source URL (e.g. method://[host[:port]/full-dir-path/filename - a SRM URL is OK) path: destination absolute path (in a local file system) returns the status of the transfer. In case of failure it should remove the partially copied destination """ # The local file is assumed to have a relative path that is the same of the relative path in the 'gpfn' # loc_... are the variables used to access the file in the locally exported file system error = PilotErrors() pilotErrorDiag = "" # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') dsname = pdict.get('dsname', '') workDir = pdict.get('workDir', '') prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'xrootd', lfn, guid) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = '' ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.__sendReport('RFCP_FAIL', report) return ec, pilotErrorDiag tolog("xrootdSiteMover get_data using setup: %s" % (_setup_str)) # remove any host and SFN info from PFN path src_loc_pfn = self.extractPathFromPFN(gpfn) src_loc_filename = lfn # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename # dest vars: path if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # protect against bad pfn's src_loc_pfn = src_loc_pfn.replace('///','/') src_loc_pfn = src_loc_pfn.replace('//xrootd/','/xrootd/') # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: rootFile = self.isRootFile(src_loc_pfn, setup=_setup_str) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog("Found root file: %s (will not be transferred in direct reading mode)" % (src_loc_pfn)) report['relativeStart'] = None report['transferStart'] = None self.__sendReport('IS_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") else: tolog("No direct access mode") ec = 0 if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(src_loc_pfn, csumtype=csumtype) if ec != 0: self.__sendReport('GET_LOCAL_FILE_INFO_FAIL', report) return ec, pilotErrorDiag dest_file = os.path.join(path, src_loc_filename) report['relativeStart'] = time() # determine which copy command to use cpt = self.getCopytool(_setup_str) report['transferStart'] = time() cmd = "%s %s %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) #PN # if ".lib." in src_loc_pfn: # cmd = "%s %s %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) # else: # cmd = "%s %sXXX %s %s" % (_setup_str, cpt, src_loc_pfn, dest_file) tolog("Executing command: %s" % (cmd)) # execute timeout = 3600 try: rc, telapsed, cout, cerr = timed_command(cmd, timeout) except Exception, e: self.__pilotErrorDiag = 'timed_command() threw an exception: %s' % str(e) tolog("!!WARNING!!1111!! %s" % (pilotErrorDiag)) rc = 1 rs = str(e) telapsed = timeout
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file (local access to the dCache file) is assumed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') analJob = pdict.get('analJob', False) timeout = pdict.get('timeout', 5 * 3600) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'dCache', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # remove any host and SFN info from PFN path loc_pfn = self.extractPathFromPFN(gpfn) copyprefixin = readpar('copyprefixin') if copyprefixin != '': # Extract the copy prefix pfrom, pto = copyprefixin.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefixin to file: %s" % (loc_pfn)) else: copyprefix = readpar('copyprefix') if copyprefix != '': # Extract the copy prefix pfrom, pto = copyprefix.split('^') loc_pfn = pfrom + loc_pfn tolog("Added copyprefix to file: %s" % (loc_pfn)) report['relativeStart'] = time() # for analysis jobs, skip input file if on tape or if lib file if analJob: if not self.isLibFile(loc_pfn): if not self.isFileStaged(_setup_str, loc_pfn): pilotErrorDiag = "File %s is not staged and will be skipped for analysis job" % ( loc_pfn) self.prepareReport('FILE_ON_TAPE', report) return error.ERR_FILEONTAPE, pilotErrorDiag else: tolog("Skipping file stage check for lib file") # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") dest_path = os.path.join(path, lfn) _cmd_str = '%sdccp %s %s' % (_setup_str, loc_pfn, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time() telapsed = 0 try: # t0 = time() # s, cout = commands.getstatusoutput(_cmd_str) # cerr = "" # telapsed = time() - t0 s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) print "DEBUG: s = ", s, type(s) print "DEBUG: telapsed = ", telapsed, type(telapsed) print "DEBUG: cout = ", cout, type(cout) print "DEBUG: cerr = ", cerr, type(cerr) cout = self.filter_text(cout) cerr = self.filter_text(cerr) if not self.is_number(s): s = 1 if not self.is_number(telapsed): telapsed = 0 except Exception, e: tolog("!!WARNING!!2999!! timed_command() threw an exception: %s" % (e)) s = 1 o = self.filter_text(e) telapsed = timeout # write traceback info to stderr import traceback exc, msg, tb = sys.exc_info() traceback.print_tb(tb) _pilotErrorDiag = "Unexpected exception: %s" % (get_exc_plus()) tolog("!!WARNING!!2999!! get_exc_plus: %s" % (_pilotErrorDiag)) print "!!WARNING!!2999!! get_exc_plus: %s" % (_pilotErrorDiag)
except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: result = PilotException("stageIn failed with error=%s" % e, code=PilotErrors.ERR_STAGEINFAILED) self.log(traceback.format_exc()) self.log('WARNING: Error in copying file (attempt %s/%s): %s' % (_attempt, self.stageinretry, result)) if not isinstance(result, Exception): # transferred successfully # finalize and send trace report self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time()) self.sendTrace(self.trace_report) updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="transferred", ftype="input") dumpFileStates(self.workDir, self.job.jobId, ftype="input") ## self.updateSURLDictionary(guid, surl, self.workDir, self.job.jobId) # FIX ME LATER fdat = result.copy() #fdat.update(lfn=lfn, pfn=pfn, guid=guid, surl=surl) transferred_files.append(fdat) else: failed_transfers.append(result) dumpFileStates(self.workDir, self.job.jobId, ftype="input") #self.log('transferred_files= %s' % transferred_files) self.log('Summary of transferred files:') for e in transferred_files:
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict token = pdict.get('token', None) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'lcg', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # get the experiment object thisExperiment = getExperiment(experiment) if proxycheck: # do we have a valid proxy? s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup) if s != 0: self.prepareReport('PROXYFAIL', report) return s, pilotErrorDiag else: tolog("Proxy verification turned off") getfile = gpfn if path == '': path = './' fullname = os.path.join(path, lfn) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") # get remote filesize and checksum if fsize == 0 or fchecksum == 0: try: import lfc except Exception, e: pilotErrorDiag = "get_data() could not import lfc module: %s" % str(e) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) self.prepareReport('LFC_IMPORT', report) return error.ERR_GETLFCIMPORT, pilotErrorDiag os.environ['LFC_HOST'] = readpar('lfchost') try: ret, res = lfc.lfc_getreplicas([str(guid)],"") except Exception, e: pilotErrorDiag = "Failed to get LFC replicas: %s" % str(e) tolog("!!WARNING!!2990!! Exception caught: %s" % (pilotErrorDiag)) tolog("Mover get_data finished (failed)") self.prepareReport('NO_LFC_REPS', report) return error.ERR_FAILEDLFCGETREPS, pilotErrorDiag
def stageout(self, activity, files): """ Copy files to dest SE: main control function, it should care about alternative stageout and retry-policy for diffrent ddmenndpoints :return: list of entries (is_success, success_transfers, failed_transfers, exception) for each ddmendpoint :return: (transferred_files, failed_transfers) :raise: PilotException in case of error """ if not files: raise PilotException("Failed to put files: empty file list to be transferred") pandaqueue = self.si.getQueueName() # FIX ME LATER protocols = self.protocols.setdefault(activity, self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue]) self.log("Mover.stageout() [new implementation] started for activity=%s, files=%s, protocols=%s" % (activity, files, protocols)) # check if file exists before actual processing # populate filesize if need for fspec in files: pfn = os.path.join(self.job.workdir, fspec.lfn) if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK): error = "Erron: input pfn file is not exist: %s" % pfn self.log(error) raise PilotException(error, code=PilotErrors.ERR_MISSINGOUTPUTFILE, state="FILE_INFO_FAIL") fspec.filesize = os.path.getsize(pfn) totalsize = reduce(lambda x, y: x + y.filesize, files, 0) transferred_files, failed_transfers = [],[] self.log("Found N=%s files to be transferred, total_size=%.3f MB: %s" % (len(files), totalsize/1024./1024., [e.lfn for e in files])) # group protocols, files by ddm ddmprotocols, ddmfiles = {}, {} for e in files: ddmfiles.setdefault(e.ddmendpoint, []).append(e) for e in protocols: if e['ddm'] not in ddmfiles: continue ddmprotocols.setdefault(e['ddm'], []).append(e) unknown_ddms = set(ddmfiles) - set(ddmprotocols) if unknown_ddms: raise PilotException("Failed to put files: no protocols defined for output ddmendpoints=%s .. check aprotocols schedconfig settings for activity=%s, " % (unknown_ddms, activity), code=PilotErrors.ERR_NOSTORAGE) self.log("[stage-out] [%s] filtered protocols to be used to transfer files: protocols=%s" % (activity, ddmprotocols)) # get SURL endpoint for Panda callback registration # resolve from special protocol activity=SE # fix me later to proper name of activitiy=SURL (panda SURL, at the moment only 2-letter name is allowed on AGIS side) self.ddmconf.update(self.si.resolveDDMConf(set(ddmfiles))) surl_protocols, no_surl_ddms = {}, set() for fspec in files: if not fspec.surl: # initilize only if not already set surl_prot = [dict(se=e[0], path=e[2]) for e in sorted(self.ddmconf.get(fspec.ddmendpoint, {}).get('aprotocols', {}).get('SE', []), key=lambda x: x[1])] if surl_prot: surl_protocols.setdefault(fspec.ddmendpoint, surl_prot[0]) else: no_surl_ddms.add(fspec.ddmendpoint) if no_surl_ddms: # failed to resolve SURLs self.log('FAILED to resolve default SURL path for ddmendpoints=%s' % list(no_surl_ddms)) raise PilotException("Failed to put files: no SE/SURL protocols defined for output ddmendpoints=%s .. check ddmendpoints aprotocols settings for activity=SE, " % list(no_surl_ddms), code=PilotErrors.ERR_NOSTORAGE) # try to use each protocol of same ddmendpoint until successfull transfer for ddmendpoint, iprotocols in ddmprotocols.iteritems(): for dat in iprotocols: copytool, copysetup = dat.get('copytool'), dat.get('copysetup') try: sitemover = getSiteMover(copytool)(copysetup, workDir=self.job.workdir) sitemover.trace_report = self.trace_report sitemover.protocol = dat # ## sitemover.ddmconf = self.ddmconf # quick workaround ### sitemover.setup() except Exception, e: self.log('WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue remain_files = [e for e in ddmfiles.get(ddmendpoint) if e.status not in ['transferred']] if not remain_files: self.log('INFO: all files to be transfered to ddm=%s have been successfully processed for activity=%s ..' % (ddmendpoint, activity)) # stop checking other protocols of ddmendpoint break self.log("Copy command [stage-out]: %s, sitemover=%s" % (copytool, sitemover)) self.log("Copy setup [stage-out]: %s" % copysetup) self.trace_report.update(protocol=copytool, localSite=ddmendpoint, remoteSite=ddmendpoint) # validate se value? se, se_path = dat.get('se', ''), dat.get('path', '') for fdata in remain_files: if not fdata.surl: fdata.surl = sitemover.getSURL(surl_protocols[fdata.ddmendpoint].get('se'), surl_protocols[fdata.ddmendpoint].get('path'), fdata.scope, fdata.lfn, self.job) # job is passing here for possible JOB specific processing updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="not_transferred", ftype="output") fdata.turl = sitemover.getSURL(se, se_path, fdata.scope, fdata.lfn, self.job) # job is passing here for possible JOB specific processing self.log("[stage-out] resolved SURL=%s to be used for lfn=%s, ddmendpoint=%s" % (fdata.surl, fdata.lfn, fdata.ddmendpoint)) self.log("[stage-out] resolved TURL=%s to be used for lfn=%s, ddmendpoint=%s" % (fdata.turl, fdata.lfn, fdata.ddmendpoint)) self.log("[stage-out] Prepare to put_data: ddmendpoint=%s, protocol=%s, fspec=%s" % (ddmendpoint, dat, fdata)) self.trace_report.update(catStart=time.time(), filename=fdata.lfn, guid=fdata.guid.replace('-', '')) self.trace_report.update(scope=fdata.scope, dataset=fdata.destinationDblock, url=fdata.turl) self.log("[stage-out] Preparing copy for lfn=%s using copytool=%s: mover=%s" % (fdata.lfn, copytool, sitemover)) #dumpFileStates(self.workDir, self.job.jobId, ftype="output") # loop over multple stage-out attempts for _attempt in xrange(1, self.stageoutretry + 1): if _attempt > 1: # if not first stage-out attempt, take a nap before next attempt self.log(" -- Waiting %s seconds before next stage-out attempt for file=%s --" % (self.stageout_sleeptime, fdata.lfn)) time.sleep(self.stageout_sleeptime) self.log("Put attempt %s/%s for filename=%s" % (_attempt, self.stageoutretry, fdata.lfn)) try: result = sitemover.put_data(fdata) fdata.status = 'transferred' # mark as successful if result.get('surl'): fdata.surl = result.get('surl') #if result.get('pfn'): # fdata.turl = result.get('pfn') #self.trace_report.update(url=fdata.surl) ### self.trace_report.update(url=fdata.turl) ### # finalize and send trace report self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time()) self.sendTrace(self.trace_report) updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="transferred", ftype="output") dumpFileStates(self.workDir, self.job.jobId, ftype="output") self.updateSURLDictionary(fdata.guid, fdata.surl, self.workDir, self.job.jobId) # FIXME LATER: isolate later fdat = result.copy() #fdat.update(lfn=lfn, pfn=pfn, guid=guid, surl=surl) transferred_files.append(fdat) break # transferred successfully except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: result = PilotException("stageOut failed with error=%s" % e, code=PilotErrors.ERR_STAGEOUTFAILED) self.log(traceback.format_exc()) self.log('WARNING: Error in copying file (attempt %s/%s): %s' % (_attempt, self.stageoutretry, result)) if isinstance(result, Exception): # failure transfer failed_transfers.append(result)
def stageout(self, activity, files): """ Copy files to dest SE: main control function, it should care about alternative stageout and retry-policy for diffrent ddmendpoints :return: list of entries (is_success, success_transfers, failed_transfers, exception) for each ddmendpoint :return: (transferred_files, failed_transfers) :raise: PilotException in case of error """ if not files: raise PilotException( "Failed to put files: empty file list to be transferred") pandaqueue = self.si.getQueueName() # FIX ME LATER protocols = self.protocols.setdefault( activity, self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue]) copytools = self.si.resolvePandaCopytools(pandaqueue, activity)[pandaqueue] self.log( "Mover.stageout() [new implementation] started for activity=%s, files=%s, protocols=%s, copytools=%s" % (activity, files, protocols, copytools)) # check if file exists before actual processing # populate filesize if need for fspec in files: pfn = os.path.join(self.job.workdir, fspec.lfn) if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK): error = "Erron: input pfn file is not exist: %s" % pfn self.log(error) raise PilotException(error, code=PilotErrors.ERR_MISSINGOUTPUTFILE, state="FILE_INFO_FAIL") fspec.filesize = os.path.getsize(pfn) totalsize = reduce(lambda x, y: x + y.filesize, files, 0) transferred_files, failed_transfers = [], [] self.log( "Found N=%s files to be transferred, total_size=%.3f MB: %s" % (len(files), totalsize / 1024. / 1024., [e.lfn for e in files])) # first resolve protocol settings from PQ specific aprotocols settings # then resolve settings from default ddm.protocols supported by copytools # group protocols, files by ddmendpoint ddmprotocols, ddmfiles = {}, {} for e in files: ddmfiles.setdefault(e.ddmendpoint, []).append(e) # load DDM conf/protocols self.ddmconf.update(self.si.resolveDDMConf(ddmfiles.keys())) for e in protocols: if e['ddm'] not in ddmfiles: # skip not affected protocols settings continue e['copytools'] = [{ 'copytool': e['copytool'], 'copysetup': e['copysetup'] }] ddmprotocols.setdefault(e['ddm'], []).append(e) # generate default protocols from copytools/schemes and ddmconf unknown_ddms = set(ddmfiles) - set(ddmprotocols) for ddmendpoint in unknown_ddms: dd = self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}) dat = dd.get(activity, []) or dd.get('w', []) dprotocols = [ dict(se=e[0], path=e[2], resolve_scheme=True) for e in sorted(dat, key=lambda x: x[1]) ] ddmprotocols.setdefault(ddmendpoint, dprotocols) unknown_ddms = set(ddmfiles) - set(ddmprotocols) if unknown_ddms: raise PilotException( "Failed to put files: no protocols defined for output ddmendpoints=%s .. check aprotocols schedconfig settings for activity=%s or default ddm.aprotocols entries" % (unknown_ddms, activity), code=PilotErrors.ERR_NOSTORAGE) self.log( "[stage-out] [%s] filtered protocols to be used to transfer files: protocols=%s" % (activity, ddmprotocols)) # get SURL endpoint for Panda callback registration # resolve from special protocol activity='SE' or fallback to activity='a', then to 'r' surl_protocols, no_surl_ddms = {}, set() for fspec in files: if not fspec.surl: # initialize only if not already set d = self.ddmconf.get(fspec.ddmendpoint, {}).get('aprotocols', {}) xprot = d.get('SE', []) if not xprot: xprot = [ e for e in d.get('a', d.get('r', [])) if e[0] and e[0].startswith('srm') ] surl_prot = [ dict(se=e[0], path=e[2]) for e in sorted(xprot, key=lambda x: x[1]) ] if surl_prot: surl_protocols.setdefault(fspec.ddmendpoint, surl_prot[0]) else: no_surl_ddms.add(fspec.ddmendpoint) if no_surl_ddms: # failed to resolve SURLs self.log( 'FAILED to resolve default SURL path for ddmendpoints=%s' % list(no_surl_ddms)) raise PilotException( "Failed to put files: no SE/SURL protocols defined for output ddmendpoints=%s .. check ddmendpoints aprotocols settings for activity=SE/a/r" % list(no_surl_ddms), code=PilotErrors.ERR_NOSTORAGE) sitemover_objects = {} # try to iterate over protocol of given ddmendpoint until successfull transfer for ddmendpoint, iprotocols in ddmprotocols.iteritems(): for dat in iprotocols: remain_files = [ e for e in ddmfiles.get(ddmendpoint) if e.status not in ['transferred'] ] if not remain_files: self.log( 'INFO: all files to be transfered to ddm=%s have been successfully processed for activity=%s ..' % (ddmendpoint, activity)) # stop checking other protocols of ddmendpoint break if not 'copytools' in dat: # use allowed copytools cdat = [] for cp, settings in copytools: cdat.append({ 'copytool': cp, 'copysetup': settings.get('setup') }) dat['copytools'] = cdat if not dat['copytools']: msg = 'FAILED to resolve final copytools settings for ddmendpoint=%s, please check schedconf.copytools settings: copytools=%s, iprotocols=' % list( ddmendpoint, copytools, iprotocols) self.log(msg) raise PilotException(msg, code=PilotErrors.ERR_NOSTORAGE) for cpsettings in dat.get('copytools', []): copytool, copysetup = cpsettings.get( 'copytool'), cpsettings.get('copysetup') try: sitemover = sitemover_objects.get(copytool) if not sitemover: sitemover = getSiteMover(copytool)( copysetup, workDir=self.job.workdir) sitemover_objects.setdefault(copytool, sitemover) sitemover.trace_report = self.trace_report sitemover.protocol = dat # ## sitemover.ddmconf = self.ddmconf # quick workaround ### sitemover.setup() if dat.get('resolve_scheme'): dat['scheme'] = sitemover.schemes except Exception, e: self.log( 'WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue if dat.get( 'scheme' ): # filter protocols by accepted scheme from copytool should_skip = True for scheme in dat.get('scheme'): if dat['se'].startswith(scheme): should_skip = False break if should_skip: self.log( "[stage-out] protocol=%s of ddmendpoint=%s is skipped since copytool=%s does not support it, accepted schemes=%s" % (dat['se'], ddmendpoint, copytool, dat['scheme'])) continue self.log("Copy command [stage-out]: %s, sitemover=%s" % (copytool, sitemover)) self.log("Copy setup [stage-out]: %s" % copysetup) self.trace_report.update(protocol=copytool, localSite=ddmendpoint, remoteSite=ddmendpoint) # validate se value? se, se_path = dat.get('se', ''), dat.get('path', '') for fdata in remain_files: if not fdata.surl: fdata.surl = sitemover.getSURL( surl_protocols[fdata.ddmendpoint].get('se'), surl_protocols[fdata.ddmendpoint].get('path'), fdata.scope, fdata.lfn, self.job ) # job is passing here for possible JOB specific processing updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="not_transferred", ftype="output") fdata.turl = sitemover.getSURL( se, se_path, fdata.scope, fdata.lfn, self.job ) # job is passing here for possible JOB specific processing self.log( "[stage-out] resolved SURL=%s to be used for lfn=%s, ddmendpoint=%s" % (fdata.surl, fdata.lfn, fdata.ddmendpoint)) self.log( "[stage-out] resolved TURL=%s to be used for lfn=%s, ddmendpoint=%s" % (fdata.turl, fdata.lfn, fdata.ddmendpoint)) self.log( "[stage-out] Prepare to put_data: ddmendpoint=%s, protocol=%s, fspec=%s" % (ddmendpoint, dat, fdata)) self.trace_report.update(catStart=time.time(), filename=fdata.lfn, guid=fdata.guid.replace( '-', '')) self.trace_report.update( scope=fdata.scope, dataset=fdata.destinationDblock, url=fdata.turl) self.log( "[stage-out] Preparing copy for lfn=%s using copytool=%s: mover=%s" % (fdata.lfn, copytool, sitemover)) #dumpFileStates(self.workDir, self.job.jobId, ftype="output") # loop over multple stage-out attempts for _attempt in xrange(1, self.stageoutretry + 1): if _attempt > 1: # if not first stage-out attempt, take a nap before next attempt self.log( " -- Waiting %s seconds before next stage-out attempt for file=%s --" % (self.stageout_sleeptime, fdata.lfn)) time.sleep(self.stageout_sleeptime) self.log("Put attempt %s/%s for filename=%s" % (_attempt, self.stageoutretry, fdata.lfn)) try: result = sitemover.put_data(fdata) fdata.status = 'transferred' # mark as successful if result.get('surl'): fdata.surl = result.get('surl') #if result.get('pfn'): # fdata.turl = result.get('pfn') #self.trace_report.update(url=fdata.surl) ### self.trace_report.update(url=fdata.turl) ### # finalize and send trace report self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time()) self.sendTrace(self.trace_report) updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="transferred", ftype="output") dumpFileStates(self.workDir, self.job.jobId, ftype="output") self.updateSURLDictionary( fdata.guid, fdata.surl, self.workDir, self. job.jobId) # FIXME LATER: isolate later fdat = result.copy() #fdat.update(lfn=lfn, pfn=pfn, guid=guid, surl=surl) transferred_files.append(fdat) break # transferred successfully except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: result = PilotException( "stageOut failed with error=%s" % e, code=PilotErrors.ERR_STAGEOUTFAILED) self.log(traceback.format_exc()) self.log( 'WARNING: Error in copying file (attempt %s/%s): %s' % (_attempt, self.stageoutretry, result)) if isinstance(result, Exception): # failure transfer failed_transfers.append(result)
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ Moves a DS file from a remote SE to the working directory. Performs the copy and, for systems supporting it, checks size and md5sum correctness gpfn: full source URL (e.g. method://[host[:port]/full-dir-path/filename) IGNORED HERE, will use dq-list-files to get it path: destination absolute path (in a local file system) returns the status of the transfer. In case of failure it should remove the partially copied destination """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict guid = pdict.get("guid", "") useCT = pdict.get("usect", True) jobId = pdict.get("jobId", "") dsname = pdict.get("dsname", "") workDir = pdict.get("workDir", "") experiment = pdict.get("experiment", "") prodDBlockToken = pdict.get("access", "") # get the site information object tolog("get_data: experiment=%s" % (experiment)) si = getSiteInformation(experiment) # get the DQ2 tracing report report = self.getStubTracingReport(pdict["report"], "fax", lfn, guid) src_loc_filename = lfn # os.path.basename(src_loc_pfn) # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename # dest vars: path if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # should the root file be copied or read directly by athena? (note: this section is necessary in case FAX is used as primary site mover) directIn = self.checkForDirectAccess(lfn, useCT, workDir, jobId, prodDBlockToken) if directIn: report["relativeStart"] = None report["transferStart"] = None self.__sendReport("FOUND_ROOT", report) return error.ERR_DIRECTIOFILE, pilotErrorDiag # local destination path dest_file = os.path.join(path, src_loc_filename) # the initial gpfn is ignored since the pilot will get it from the global redirector # however, the lfn can differ e.g. for files the has the __DQ2-* bit in it. In that case # the global redirector will not give the correct name, and the pilot need to correct for it # so better to use the lfn taken from the initial gpfn right away # warning: tests at CERN has shown that this is not true. the global redirector will not find a file with __DQ2- in it initial_lfn = os.path.basename(gpfn) tolog("Initial LFN=%s" % (initial_lfn)) # get the global path # if gpfn != "": # tolog("Ignoring initial GPFN since pilot will get it using the global redirector (%s)" % (gpfn)) gpfn = self.findGlobalFilePath(src_loc_filename, dsname) if gpfn == "": ec = error.ERR_STAGEINFAILED pilotErrorDiag = "Failed to get global paths for FAX transfer" tolog("!!WARNING!!3330!! %s" % (pilotErrorDiag)) self.__sendReport("RFCP_FAIL", report) return ec, pilotErrorDiag tolog("GPFN=%s" % (gpfn)) global_lfn = os.path.basename(gpfn) if global_lfn != initial_lfn: # tolog("WARNING: Global LFN not the same as the initial LFN. Will try to use the initial LFN") tolog("WARNING: Global LFN not the same as the initial LFN. Will use the global LFN") # gpfn = gpfn.replace(global_lfn, initial_lfn) # tolog("Updated GPFN=%s" % (gpfn)) # setup ROOT locally _setup_str = self.getLocalROOTSetup() # define the copy command cmd = "%s xrdcp -d 1 -f %s %s" % (_setup_str, gpfn, dest_file) # transfer the file report["transferStart"] = time() rc, rs, pilotErrorDiag = self.copy(cmd, stagein=True) report["validateStart"] = time() if rc != 0: self.__sendReport("COPY_FAIL", report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return rc, pilotErrorDiag else: tolog("Successfully transferred file") # get file size from the command output if not known already if fsize == 0: fsize = self.getFileSize(rs) # get checksum from the command output if not known already if fchecksum == 0: fchecksum = self.getChecksum(rs) else: if fchecksum == 0 or fchecksum == None: fchecksum = "" else: tolog("fchecksum = %s" % (fchecksum)) # get destination (local) file size and checksum ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo(dest_file, csumtype=csumtype) tolog("File info: %d, %s, %s" % (ec, dstfsize, dstfchecksum)) if ec != 0: self.__sendReport("LOCAL_FILE_INFO_FAIL", report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return ec, pilotErrorDiag # compare remote and local file checksum if fchecksum != "" and fchecksum != 0 and dstfchecksum != fchecksum and not self.isDummyChecksum(fchecksum): pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" % ( csumtype, os.path.basename(gpfn), fchecksum, dstfchecksum, ) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") if csumtype == "adler32": self.__sendReport("AD_MISMATCH", report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.__sendReport("MD5_MISMATCH", report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag # compare remote and local file size (skip test if remote/source file size is not known) if dstfsize != fsize and fsize != 0 and fsize != "": pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" % ( os.path.basename(gpfn), str(dstfsize), str(fsize), ) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) self.__sendReport("FS_MISMATCH", report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return error.ERR_GETWRONGSIZE, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") self.__sendReport("DONE", report) return 0, pilotErrorDiag
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, fscope=None, **pdict): """ copy input file from SE to local dir """ # determine which timeout option to use timeout_option = "--connect-timeout 300 --timeout %d" % (self.timeout) sslCert = self.sslCert sslKey = self.sslKey sslCertDir = self.sslCertDir # used aria2c options: # --certificate Client certificate file and password (SSL)(proxy) # --private-key user proxy again # --ca-certificate: concatenate *.0 in cert dir to make bundle # --out: <file> Write output to <file> instead of stdout # --dir: output directory, needed when multiple files(metalink) # --continue: if file is already there (from previous) then success # --auto-file-renaming=false : don't rename existing file error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict token = pdict.get('token', None) jobId = pdict.get('jobId', '') scope = pdict.get('scope', '') workDir = pdict.get('workDir', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'aria2c' # mark the relative start report['catStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-', '') # get a proper envsetup envsetup = self.getEnvsetup(get=True) #if proxycheck: # # do we have a valid proxy? # s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup) # if s != 0: # self.prepareReport('PROXYFAIL', report) # return s, pilotErrorDiag #else: # tolog("Proxy verification turned off") tolog("Proxy verification turned off") getfile = gpfn if path == '': path = './' fullname = os.path.join(path, lfn) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return 0, pilotErrorDiag else: tolog("Normal file transfer") # Build ca bundle if not already there cabundleFile = 'cabundle.pem' if not os.path.exists(cabundleFile): _cmd_str = 'cat %s/*.0 > %s' % (sslCertDir, cabundleFile) tolog("Executing command: %s" % (_cmd_str)) s, o = commands.getstatusoutput(_cmd_str) # If metalink file not created(including all inputs) # then make one just for this input if os.path.exists('AllInput.xml.meta4'): metalink = 'AllInput.xml.meta4' else: tolog("Getting metalink from Rucio") rep = replica() rep.sfn = gpfn rep.filesize = fsize rep.filename = lfn rep.csumvalue = fchecksum if fscope: rep.scope = fscope else: #scope = extractPattern(gpfn,r'\/rucio\/(.+)\/[a-zA-Z0-9]{2}\/[a-zA-Z0-9]{2}\/') rep.scope = scope.replace("/", ".") replicas = {guid: [rep]} self.surls2metalink(replicas, 'oneInput.xml.meta4') metalink = 'oneInput.xml.meta4' # build the copy command #search how many links are available for download word_occour = 0 metaL_file = open(metalink) for line in metaL_file: for word in line.strip().split(): if word in ("<url"): word_occour += 1 tolog("number of links: %s, using only the first" % (str(word_occour))) #--check-certificate=false makes it easier(sles11) _cmd_str = '%s -j 1 --ca-certificate=%s --certificate=%s --private-key=%s --auto-file-renaming=false --continue --server-stat-of=aria2cperf.txt %s' % ( self.copyCommand, cabundleFile, sslCert, sslCert, metalink) # invoke the transfer commands report['relativeStart'] = time() report['transferStart'] = time() tolog("Executing command: %s" % (_cmd_str)) s, o = commands.getstatusoutput(_cmd_str) tolog(o) if s != 0: tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str)) check_syserr(s, o) pilotErrorDiag = "aria2c failed: %s" % (o) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_STAGEINFAILED return ec, pilotErrorDiag report['validateStart'] = time() # get the checksum type (md5sum or adler32) if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" if (fsize != 0 or fchecksum != 0) and self.doFileVerifications(): loc_filename = lfn dest_file = os.path.join(path, loc_filename) # get the checksum type (md5sum or adler32) if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get remote file size and checksum ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo( dest_file, csumtype=csumtype) if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return ec, pilotErrorDiag # compare remote and local file size if long(fsize) != 0 and long(dstfsize) != long(fsize): pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" %\ (os.path.basename(gpfn), str(dstfsize), str(fsize)) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('FS_MISMATCH', report) return error.ERR_GETWRONGSIZE, pilotErrorDiag # compare remote and local file checksum if fchecksum and dstfchecksum != fchecksum and not self.isDummyChecksum( fchecksum): pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, os.path.basename(gpfn), dstfchecksum, fchecksum) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) # report corrupted file to consistency server self.reportFileCorruption(gpfn) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.prepareReport('MD5_MISMATCH', report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") self.prepareReport('DONE', report) return 0, pilotErrorDiag
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file is assubed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'rfcpLFC' # mark the relative start report['relativeStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-','') tolog("gpfn is %s" % gpfn) # get a proper envsetup envsetup = self.getEnvsetup(get=True) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # remove any host and SFN info from PFN path loc_pfn = self.extractPathFromPFN(gpfn) try: if not loc_pfn.startswith(('/dpm', '/castor')): tolog("Potential problem with local filename. Does not start with '/dpm' or '/castor/'.") except TypeError: # Older version of python pass # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") dest_path = os.path.join(path, lfn) #PN _cmd_str = '%srfcp %s %s' % (_setup_str, loc_pfn, dest_path) # if ".lib." in loc_pfn: # _cmd_str = '%srfcp %s %s' % (_setup_str, loc_pfn, dest_path) # else: # _cmd_str = '%srfcpXXX %s %s' % (_setup_str, loc_pfn, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time() # execute timeout = 3600 try: s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) except Exception, e: pilotErrorDiag = 'timed_command() threw an exception: %s' % (e) tolog("!!WARNING!!1111!! %s" % (pilotErrorDiag)) s = 1 o = str(e) telapsed = timeout
(csumtype, os.path.basename(gpfn), dstfchecksum, fchecksum) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.prepareReport('MD5_MISMATCH', report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") self.prepareReport('DONE', report) return 0, pilotErrorDiag def put_data(self, source, ddm_storage, fsize=0, fchecksum=0, dsname='', **pdict): """ Data transfer using rfcp - generic version It's not advisable to use this right now because there's no easy way to register the srm space token if the file is copied with rfcp """ error = PilotErrors() # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '')
def stagein(self): """ :return: (transferred_files, failed_transfers) """ activity = 'pr' pandaqueue = self.si.getQueueName() # FIX ME LATER protocols = self.protocols.setdefault( activity, self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue]) copytools = self.si.resolvePandaCopytools(pandaqueue, activity)[pandaqueue] self.log("stage-in: pq.aprotocols=%s, pq.copytools=%s" % (protocols, copytools)) files = self.job.inData self.resolve_replicas( files) # populates also self.ddmconf = self.si.resolveDDMConf([]) maxinputsize = self.getMaxInputSize() totalsize = reduce(lambda x, y: x + y.filesize, files, 0) transferred_files, failed_transfers = [], [] self.log( "Found N=%s files to be transferred, total_size=%.3f MB: %s" % (len(files), totalsize / 1024. / 1024., [e.lfn for e in files])) # process first PQ specific protocols settings # then protocols supported by copytools # protocol generated from aprotocols is {'copytool':'', 'copysetup':'', 'se':'', 'ddm':''} # protocol generated from copytools is {'copytool':'', 'copysetup', 'scheme':''} # build accepted schemes from allowed copytools cprotocols = [] for cp, settings in copytools: cprotocols.append({ 'resolve_scheme': True, 'copytool': cp, 'copysetup': settings.get('setup') }) protocols = protocols + cprotocols if not protocols: raise PilotException( "Failed to get files: neither aprotocols nor allowed copytools defined for input. check copytools/acopytools/aprotocols schedconfig settings for activity=%s, pandaqueue=%s" % (activity, pandaqueue), code=PilotErrors.ERR_NOSTORAGE) sitemover_objects = {} for dat in protocols: remain_files = [ e for e in files if e.status not in ['direct_access', 'transferred'] ] if not remain_files: self.log( 'INFO: all input files have been successfully processed') break copytool, copysetup = dat.get('copytool'), dat.get('copysetup') try: sitemover = sitemover_objects.get(copytool) if not sitemover: sitemover = getSiteMover(copytool)( copysetup, workDir=self.job.workdir) sitemover_objects.setdefault(copytool, sitemover) sitemover.trace_report = self.trace_report sitemover.ddmconf = self.ddmconf # self.si.resolveDDMConf([]) # quick workaround ### sitemover.setup() if dat.get('resolve_scheme'): dat['scheme'] = sitemover.schemes except Exception, e: self.log( 'WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue self.log("Copy command [stage-in]: %s, sitemover=%s" % (copytool, sitemover)) self.log("Copy setup [stage-in]: %s" % copysetup) self.trace_report.update(protocol=copytool) # verify file sizes and available space for stagein sitemover.check_availablespace(maxinputsize, remain_files) for fdata in remain_files: updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="not_transferred", ftype="input") self.log( "[stage-in] Prepare to get_data: protocol=%s, fspec=%s" % (dat, fdata)) # check if protocol and fdata.ddmendpoint belong to same site # if dat.get('ddm'): protocol_site = self.ddmconf.get(dat.get('ddm'), {}).get('site') replica_site = self.ddmconf.get(fdata.ddmendpoint, {}).get('site') if protocol_site != replica_site: self.log( 'INFO: cross-sites checks: protocol_site=%s and (fdata.ddmenpoint) replica_site=%s mismatched .. skip file processing for copytool=%s (protocol=%s)' % (protocol_site, replica_site, copytool, dat)) continue r = sitemover.resolve_replica(fdata, dat) # quick stub: propagate changes to FileSpec if r.get('surl'): fdata.surl = r[ 'surl'] # TO BE CLARIFIED if it's still used and need if r.get('pfn'): fdata.turl = r['pfn'] if r.get('ddmendpoint'): fdata.ddmendpoint = r['ddmendpoint'] self.log( "[stage-in] found replica to be used: ddmendpoint=%s, pfn=%s" % (fdata.ddmendpoint, fdata.turl)) # check if protocol and found replica belong to same site if dat.get('ddm'): protocol_site = self.ddmconf.get(dat.get('ddm'), {}).get('site') replica_site = self.ddmconf.get(fdata.ddmendpoint, {}).get('site') if protocol_site != replica_site: self.log( 'INFO: cross-sites checks: protocol_site=%s and replica_site=%s mismatched .. skip file processing for copytool=%s' % (protocol_site, replica_site, copytool)) continue # check direct access self.log( "fdata.is_directaccess()=%s, job.accessmode=%s, mover.is_directaccess()=%s" % (fdata.is_directaccess(), self.job.accessmode, self.is_directaccess())) is_directaccess = self.is_directaccess() if self.job.accessmode == 'copy': is_directaccess = False elif self.job.accessmode == 'direct': is_directaccess = True if fdata.is_directaccess( ) and is_directaccess: # direct access mode, no transfer required fdata.status = 'direct_access' updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="transfer_mode", state="direct_access", ftype="input") self.log( "Direct access mode will be used for lfn=%s .. skip transfer the file" % fdata.lfn) continue # apply site-mover custom job-specific checks for stage-in try: is_stagein_allowed = sitemover.is_stagein_allowed( fdata, self.job) if not is_stagein_allowed: reason = 'SiteMover does not allowed stage-in operation for the job' except PilotException, e: is_stagein_allowed = False reason = e except Exception: raise
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file is assubed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'rfcpLFC' # mark the relative start report['relativeStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-', '') tolog("gpfn is %s" % gpfn) # get a proper envsetup envsetup = self.getEnvsetup(get=True) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # remove any host and SFN info from PFN path loc_pfn = self.extractPathFromPFN(gpfn) try: if not loc_pfn.startswith(('/dpm', '/castor')): tolog( "Potential problem with local filename. Does not start with '/dpm' or '/castor/'." ) except TypeError: # Older version of python pass # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") dest_path = os.path.join(path, lfn) #PN _cmd_str = '%srfcp %s %s' % (_setup_str, loc_pfn, dest_path) # if ".lib." in loc_pfn: # _cmd_str = '%srfcp %s %s' % (_setup_str, loc_pfn, dest_path) # else: # _cmd_str = '%srfcpXXX %s %s' % (_setup_str, loc_pfn, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time() # execute timeout = 3600 try: s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) except Exception, e: pilotErrorDiag = 'timed_command() threw an exception: %s' % (e) tolog("!!WARNING!!1111!! %s" % (pilotErrorDiag)) s = 1 o = str(e) telapsed = timeout
def do_put_files(self, ddmendpoint, protocols, files): # old function : TO BE DEPRECATED ... """ Copy files to dest SE :ddmendpoint: DDMEndpoint name used to store files :return: (list of transferred_files details, list of failed_transfers details) :raise: PilotException in case of error """ self.log( '[deprecated do_put_files()]Prepare to copy files=%s to ddmendpoint=%s using protocols data=%s' % (files, ddmendpoint, protocols)) self.log("[deprecated do_put_files()]Number of stage-out tries: %s" % self.stageoutretry) # get SURL for Panda calback registration # resolve from special protocol activity=SE # fix me later to proper name of activitiy=SURL (panda SURL, at the moment only 2-letter name is allowed on AGIS side) # if SE is not found, try to fallback to a surl_prot = [ dict(se=e[0], path=e[2]) for e in sorted(self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get( 'SE', self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get( 'a', [])), key=lambda x: x[1]) ] if not surl_prot: self.log('FAILED to resolve default SURL path for ddmendpoint=%s' % ddmendpoint) return [], [] surl_prot = surl_prot[0] # take first self.log("[do_put_files] SURL protocol to be used: %s" % surl_prot) self.trace_report.update(localSite=ddmendpoint, remoteSite=ddmendpoint) transferred_files, failed_transfers = [], [] for dat in protocols: copytool, copysetup = dat.get('copytool'), dat.get('copysetup') try: sitemover = getSiteMover(copytool)(copysetup, workDir=self.job.workdir) sitemover.trace_report = self.trace_report sitemover.protocol = dat # ## sitemover.ddmconf = self.ddmconf # quick workaround ### sitemover.setup() except Exception, e: self.log( '[do_put_files] WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue self.log("[do_put_files] Copy command: %s, sitemover=%s" % (copytool, sitemover)) self.log("[do_put_files] Copy setup: %s" % copysetup) self.trace_report.update(protocol=copytool) se, se_path = dat.get('se', ''), dat.get('path', '') self.log("[do_put_files] Found N=%s files to be transferred: %s" % (len(files), [e.get('pfn') for e in files])) for fdata in files: scope, lfn, pfn = fdata.get( 'scope', ''), fdata.get('lfn'), fdata.get('pfn') guid = fdata.get('guid', '') surl = sitemover.getSURL( surl_prot.get('se'), surl_prot.get('path'), scope, lfn, self.job ) # job is passing here for possible JOB specific processing turl = sitemover.getSURL( se, se_path, scope, lfn, self.job ) # job is passing here for possible JOB specific processing self.trace_report.update(scope=scope, dataset=fdata.get('dsname_report'), url=surl) self.trace_report.update(catStart=time.time(), filename=lfn, guid=guid.replace('-', '')) self.log( "[do_put_files] Preparing copy for pfn=%s to ddmendpoint=%s using copytool=%s: mover=%s" % (pfn, ddmendpoint, copytool, sitemover)) self.log("[do_put_files] lfn=%s: SURL=%s" % (lfn, surl)) self.log("[do_put_files] TURL=%s" % turl) if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK): error = "Erron: input pfn file is not exist: %s" % pfn self.log(error) raise PilotException( error, code=PilotErrors.ERR_MISSINGOUTPUTFILE, state="FILE_INFO_FAIL") filename = os.path.basename(pfn) # update the current file state updateFileState(filename, self.workDir, self.job.jobId, mode="file_state", state="not_transferred") dumpFileStates(self.workDir, self.job.jobId) # loop over multple stage-out attempts for _attempt in xrange(1, self.stageoutretry + 1): if _attempt > 1: # if not first stage-out attempt, take a nap before next attempt self.log( " -- Waiting %d seconds before next stage-out attempt for file=%s --" % (self.stageout_sleeptime, filename)) time.sleep(self.stageout_sleeptime) self.log( "[do_put_files] Put attempt %d/%d for filename=%s" % (_attempt, self.stageoutretry, filename)) try: # quick work around from Job import FileSpec stub_fspec = FileSpec(ddmendpoint=ddmendpoint, guid=guid, scope=scope, lfn=lfn) result = sitemover.stageOut(pfn, turl, stub_fspec) break # transferred successfully except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: self.log(traceback.format_exc()) result = PilotException( "stageOut failed with error=%s" % e, code=PilotErrors.ERR_STAGEOUTFAILED) self.log( 'WARNING [do_put_files]: Error in copying file (attempt %s): %s' % (_attempt, result))
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ stage-in function """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'castor', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # get the experiment object thisExperiment = getExperiment(experiment) # do we have a valid proxy? s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup) if s != 0: self.prepareReport('PROXYFAIL', report) return s, pilotErrorDiag # Strip off prefix in order to use rfcp directly tolog("gpfn: %s" % (gpfn)) pat = re.compile('^.*(/castor/.*)$') mat = pat.match(gpfn) if mat: getfile = mat.group(1) else: pilotErrorDiag = "Get file not in castor: %s" % (gpfn) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) self.prepareReport('NO_FILE', report) return error.ERR_STAGEINFAILED, pilotErrorDiag # when the file has been copied we will rename it to the lfn (to remove the legacy __DQ2-string on some files) dest_path = os.path.join(path, lfn) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") # transfer the input file with rfcp _cmd_str = '%srfcp %s %s' % (envsetup, getfile, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time() s, o = commands.getstatusoutput(_cmd_str) report['validateStart'] = time() if s != 0: o = o.replace('\n', ' ') check_syserr(s, o) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog( "!!WARNING!!1112!! Failed to remove local file, get retry will fail" ) if o.find("No such file or directory") >= 0: if getfile.find("DBRelease") >= 0: pilotErrorDiag = "Missing DBRelease file: %s" % (getfile) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_MISSDBREL else: pilotErrorDiag = "No such file or directory: %s" % ( getfile) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_NOSUCHFILE else: pilotErrorDiag = "rfcp failed: %d, %s" % (s, o) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_STAGEINFAILED self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag # check file size and checksum if fsize != 0 or fchecksum != 0: # which checksum type are we using? if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get remote file size and checksum ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo( dest_path, csumtype=csumtype) tolog("File info: %d, %s, %s" % (ec, dstfsize, dstfchecksum)) if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog( "!!WARNING!!1112!! Failed to remove local file, get retry will fail" ) return ec, pilotErrorDiag # compare remote and local file size if fsize != 0 and dstfsize != fsize: pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" %\ (os.path.basename(gpfn), str(dstfsize), str(fsize)) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.prepareReport('FS_MISMATCH', report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog( "!!WARNING!!1112!! Failed to remove local file, get retry will fail" ) return error.ERR_GETWRONGSIZE, pilotErrorDiag # compare remote and local file checksum if fchecksum != 0 and dstfchecksum != fchecksum and not self.isDummyChecksum( fchecksum): pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, os.path.basename(gpfn), dstfchecksum, fchecksum) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_path) if not _status: tolog( "!!WARNING!!1112!! Failed to remove local file, get retry will fail" ) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.prepareReport('MD5_MISMATCH', report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") self.prepareReport('DONE', report) return 0, pilotErrorDiag
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ # determine which timeout option to use timeout_option = "--connect-timeout 300 --timeout %d" % (self.timeout) sslCert = self.sslCert sslKey = self.sslKey sslCertDir = self.sslCertDir # used aria2c options: # --certificate Client certificate file and password (SSL)(proxy) # --private-key user proxy again # --ca-certificate: concatenate *.0 in cert dir to make bundle # --out: <file> Write output to <file> instead of stdout # --dir: output directory, needed when multiple files(metalink) # --continue: if file is already there (from previous) then success # --auto-file-renaming=false : don't rename existing file error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict token = pdict.get('token', None) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'aria2c' # mark the relative start report['catStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-','') # get a proper envsetup envsetup = self.getEnvsetup(get=True) if proxycheck: # do we have a valid proxy? s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup) if s != 0: self.__sendReport('PROXYFAIL', report) return s, pilotErrorDiag else: tolog("Proxy verification turned off") getfile = gpfn if path == '': path = './' fullname = os.path.join(path, lfn) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.__sendReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return 0, pilotErrorDiag else: tolog("Normal file transfer") # If metalink file not created(including all inputs) # then make one just for this input if os.path.exists('AllInput.xml.meta4'): metalink='AllInput.xml.meta4' else: rep = replica() rep.sfn = gpfn rep.filesize = fsize rep.csumvalue = fchecksum replicas = {guid:[rep]} self.surls2metalink(replicas,'oneInput.xml.meta4') metalink='oneInput.xml.meta4' # Build ca bundle if not already there cabundleFile='cabundle.pem' if not os.path.exists(cabundleFile): _cmd_str = 'cat %s/*.0 > %s'%(sslCertDir,cabundleFile) tolog("Executing command: %s" % (_cmd_str)) s, o = commands.getstatusoutput(_cmd_str) # build the copy command #--check-certificate=false makes it easier(sles11) _cmd_str = '%s --check-certificate=false --ca-certificate=%s --certificate=%s --private-key=%s --auto-file-renaming=false --continue --server-stat-of=aria2cperf.txt %s'%(self.copyCommand,cabundleFile,sslCert,sslCert,metalink) # invoke the transfer commands report['relativeStart'] = time() report['transferStart'] = time() tolog("Executing command: %s" % (_cmd_str)) s, o = commands.getstatusoutput(_cmd_str) tolog(o) if s != 0: tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str)) check_syserr(s, o) pilotErrorDiag = "aria2c failed: %s" % (o) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_STAGEINFAILED return ec, pilotErrorDiag report['validateStart'] = time() # get the checksum type (md5sum or adler32) if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" if (fsize != 0 or fchecksum != 0) and self.doFileVerifications(): loc_filename = lfn dest_file = os.path.join(path, loc_filename) # get the checksum type (md5sum or adler32) if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get remote file size and checksum ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo(dest_file, csumtype=csumtype) if ec != 0: self.__sendReport('LOCAL_FILE_INFO_FAIL', report) return ec, pilotErrorDiag # compare remote and local file size if long(fsize) != 0 and long(dstfsize) != long(fsize): pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" %\ (os.path.basename(gpfn), str(dstfsize), str(fsize)) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.__sendReport('FS_MISMATCH', report) return error.ERR_GETWRONGSIZE, pilotErrorDiag # compare remote and local file checksum if fchecksum and dstfchecksum != fchecksum and not self.isDummyChecksum(fchecksum): pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, os.path.basename(gpfn), dstfchecksum, fchecksum) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) # report corrupted file to consistency server self.reportFileCorruption(gpfn) if csumtype == "adler32": self.__sendReport('AD_MISMATCH', report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.__sendReport('MD5_MISMATCH', report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") self.__sendReport('DONE', report) return 0, pilotErrorDiag
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file is assubed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system TODO: document GPFN format (SURL from catalog srm://host/path) TODO: document better constraint """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'castorSVC', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) # Hard code the configuration dictionary for now, but eventually this should be # set dynamically. # # There are the following configuration sections: # setup - base environment veriables to be set # svcClassMap - dictionary of string matches vs. service class names # svcClassList - list of all service classes in case the svcClassMap matching fails # svcClassDefault - the service class to set if the file appears to be staged no where # # Information from RAL: # [root@srm0661 ~]# listStorageArea -v atlas # <Space Token> <Description> <service class> <type> <status> # 4948ef55-0000-1000-b7dd-9b38bdd87201 "ATLASGROUP" "atlasStripDeg" "DURABLE" "ALLOCATED" # 4948ef38-0000-1000-8606-973e4e998e02 "ATLASMCDISK" "atlasSimStrip" "DURABLE" "ALLOCATED" # 4948eec6-0000-1000-8ca2-aba0529b4806 "ATLASDATADISK" "atlasStripInput" "DURABLE" "ALLOCATED" # 4948ee8e-0000-1000-9ac5-81bb9b34ba7b "ATLASMCTAPE" "atlasSimRaw" "PERMANENT" "ALLOCATED" # 4948ee71-0000-1000-b611-a0afad31f6c8 "ATLASDATATAPE" "atlasT0Raw" "PERMANENT" "ALLOCATED" # "ATLASHOTDISK" "atlasHotDisk" # In addition there is the "atlasFarm" class, which is used when data is staged back from tape castorConfig = { 'setup': { 'STAGE_HOST': 'catlasstager.ads.rl.ac.uk', 'STAGER_HOST': 'catlasstager.ads.rl.ac.uk', 'RFIO_USE_CASTOR_V2': 'YES', }, 'svcClassList': ( 'atlasHotDisk', 'atlasSimStrip', 'atlasStripInput', 'atlasFarm', 'atlasStripDeg', 'atlasT0Raw', 'atlasSimRaw', 'atlasScratchDisk', ), 'svcClassMap': { '/atlashotdisk/': 'atlasHotDisk', '/atlasmcdisk/': 'atlasStripInput', '/atlasdatadisk/': 'atlasStripInput', '/atlasgroupdisk/': 'atlasStripDeg', '/atlasdatatape/': 'atlasFarm', '/atlasmctape/': 'atlasFarm', '/atlasscratchdisk/': 'atlasScratchDisk', '/atlasProdDisk/': 'atlasScratchDisk', }, 'svcClassDefault': 'atlasFarm', } # Set all environment variables for castor setup for envVar, value in castorConfig['setup'].iteritems(): os.environ[envVar] = value # Strip the gpfn (SURL) back to its bare castor component tolog("gpfn is %s" % gpfn) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag loc_pfn = '' if (gpfn.find('SFN') != -1): s = gpfn.split('SFN=') loc_pfn = s[1] tolog("Found SFN string. Local file name %s" % loc_pfn) else: _tmp = gpfn.split('/', 3) loc_pfn = '/' + _tmp[3] tolog("Splitting SURL on slashes. Got local file name %s" % loc_pfn) if not loc_pfn.startswith('/castor/'): tolog( "WARNING: Problem with local filename: Does not start with '/castor/'." ) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog( "Direct access mode is switched off (file will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") # Now need to find the service class associated with the file. # If we find a clear indication of a space token in the file path # then this is easy. However, if we don't, then use stager_qry to # interrogate each possible service class. If this fails then use # atlasFarm in desperation. serviceClass = None for pathMatch, svcClass in castorConfig['svcClassMap'].iteritems(): if loc_pfn.find(pathMatch) >= 0: tolog('Matched path element %s - service class is %s' % (pathMatch, svcClass)) serviceClass = svcClass break else: tolog('Path element %s for service class %s - no match' % (pathMatch, svcClass)) # For testing the fallback, then we need to hobble ourselves by unsetting serviceClass: #tolog('Automatic service class was: %s' % serviceClass) #tolog('Unsetting service class for fallback testing') #serviceClass = None if serviceClass == None: tolog("Warning: Failed to find service class hint in SURL.") for tryMe in castorConfig['svcClassList']: os.environ['STAGE_SVCCLASS'] = tryMe tolog('Trying service class %s for file' % tryMe) err, output = commands.getstatusoutput('stager_qry -M %s' % loc_pfn) if err != 0: tolog( 'WARNING: Unexpected status from stager_qry: %d\n%s' % (err, output)) else: if output.find('STAGED') >= 0: tolog('Found file in service class %s' % tryMe) serviceClass = tryMe break else: tolog('File not found in service class %s' % tryMe) if serviceClass == None: tolog( 'WARNING: Failed to find file in any expected service class - will set STAGE_SVCCLASS to %s' % castorConfig['svcClassDefault']) serviceClass = castorConfig['svcClassDefault'] tolog('Setting STAGE_SVCCLASS to %s' % serviceClass) os.environ['STAGE_SVCCLASS'] = serviceClass dest_path = os.path.join(path, lfn) _cmd_str = '%s/usr/bin/rfcp %s %s' % (_setup_str, loc_pfn, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time() # execute timeout = 3600 try: s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) except Exception, e: pilotErrorDiag = 'timed_command() threw an exception: %s' % (e) tolog("!!WARNING!!1111!! %s" % (pilotErrorDiag)) s = 1 o = str(e) telapsed = timeout
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ The local file is assubed to have a relative path that is the same of the relative path in the 'gpfn' loc_... are the variables used to access the file in the locally exported file system TODO: document GPFN format (SURL from catalog srm://host/path) TODO: document better constraint """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict useCT = pdict.get('usect', True) jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'castorSVC', lfn, guid) # get a proper envsetup envsetup = self.getEnvsetup(get=True) # Hard code the configuration dictionary for now, but eventually this should be # set dynamically. # # There are the following configuration sections: # setup - base environment veriables to be set # svcClassMap - dictionary of string matches vs. service class names # svcClassList - list of all service classes in case the svcClassMap matching fails # svcClassDefault - the service class to set if the file appears to be staged no where # # Information from RAL: # [root@srm0661 ~]# listStorageArea -v atlas # <Space Token> <Description> <service class> <type> <status> # 4948ef55-0000-1000-b7dd-9b38bdd87201 "ATLASGROUP" "atlasStripDeg" "DURABLE" "ALLOCATED" # 4948ef38-0000-1000-8606-973e4e998e02 "ATLASMCDISK" "atlasSimStrip" "DURABLE" "ALLOCATED" # 4948eec6-0000-1000-8ca2-aba0529b4806 "ATLASDATADISK" "atlasStripInput" "DURABLE" "ALLOCATED" # 4948ee8e-0000-1000-9ac5-81bb9b34ba7b "ATLASMCTAPE" "atlasSimRaw" "PERMANENT" "ALLOCATED" # 4948ee71-0000-1000-b611-a0afad31f6c8 "ATLASDATATAPE" "atlasT0Raw" "PERMANENT" "ALLOCATED" # "ATLASHOTDISK" "atlasHotDisk" # In addition there is the "atlasFarm" class, which is used when data is staged back from tape castorConfig = { 'setup' : { 'STAGE_HOST' : 'catlasstager.ads.rl.ac.uk', 'STAGER_HOST' : 'catlasstager.ads.rl.ac.uk', 'RFIO_USE_CASTOR_V2' : 'YES', }, 'svcClassList' : ('atlasHotDisk', 'atlasSimStrip', 'atlasStripInput', 'atlasFarm', 'atlasStripDeg', 'atlasT0Raw', 'atlasSimRaw', 'atlasScratchDisk', ), 'svcClassMap' : { '/atlashotdisk/' : 'atlasHotDisk', '/atlasmcdisk/' : 'atlasStripInput', '/atlasdatadisk/' : 'atlasStripInput', '/atlasgroupdisk/' : 'atlasStripDeg', '/atlasdatatape/' : 'atlasFarm', '/atlasmctape/' : 'atlasFarm', '/atlasscratchdisk/' : 'atlasScratchDisk', '/atlasProdDisk/' : 'atlasScratchDisk', }, 'svcClassDefault' : 'atlasFarm', } # Set all environment variables for castor setup for envVar, value in castorConfig['setup'].iteritems(): os.environ[envVar] = value # Strip the gpfn (SURL) back to its bare castor component tolog("gpfn is %s" % gpfn) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = envsetup ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return ec, pilotErrorDiag loc_pfn = '' if( gpfn.find('SFN') != -1 ): s = gpfn.split('SFN=') loc_pfn = s[1] tolog("Found SFN string. Local file name %s" % loc_pfn) else: _tmp = gpfn.split('/', 3) loc_pfn = '/'+_tmp[3] tolog("Splitting SURL on slashes. Got local file name %s" % loc_pfn) if not loc_pfn.startswith('/castor/'): tolog("WARNING: Problem with local filename: Does not start with '/castor/'.") # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == 'local' or not rootFile: directIn = False tolog("Direct access mode has been switched off for this file (will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", ftype="input") elif rootFile: tolog("Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn)) report['relativeStart'] = None report['transferStart'] = None self.prepareReport('FOUND_ROOT', report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", ftype="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", ftype="input") return error.ERR_DIRECTIOFILE, pilotErrorDiag else: tolog("Normal file transfer") # Now need to find the service class associated with the file. # If we find a clear indication of a space token in the file path # then this is easy. However, if we don't, then use stager_qry to # interrogate each possible service class. If this fails then use # atlasFarm in desperation. serviceClass = None for pathMatch, svcClass in castorConfig['svcClassMap'].iteritems(): if loc_pfn.find(pathMatch) >= 0: tolog('Matched path element %s - service class is %s' % (pathMatch, svcClass)) serviceClass = svcClass break else: tolog('Path element %s for service class %s - no match' % (pathMatch, svcClass)) # For testing the fallback, then we need to hobble ourselves by unsetting serviceClass: #tolog('Automatic service class was: %s' % serviceClass) #tolog('Unsetting service class for fallback testing') #serviceClass = None if serviceClass == None: tolog("Warning: Failed to find service class hint in SURL.") for tryMe in castorConfig['svcClassList']: os.environ['STAGE_SVCCLASS'] = tryMe tolog('Trying service class %s for file' % tryMe) err, output = commands.getstatusoutput('stager_qry -M %s' % loc_pfn) if err != 0: tolog('WARNING: Unexpected status from stager_qry: %d\n%s' % (err, output)) else: if output.find('STAGED') >= 0: tolog('Found file in service class %s' % tryMe) serviceClass = tryMe break else: tolog('File not found in service class %s' % tryMe) if serviceClass == None: tolog('WARNING: Failed to find file in any expected service class - will set STAGE_SVCCLASS to %s' % castorConfig['svcClassDefault']) serviceClass = castorConfig['svcClassDefault'] tolog('Setting STAGE_SVCCLASS to %s' % serviceClass) os.environ['STAGE_SVCCLASS'] = serviceClass dest_path = os.path.join(path, lfn) _cmd_str = '%s/usr/bin/rfcp %s %s' % (_setup_str, loc_pfn, dest_path) tolog("Executing command: %s" % (_cmd_str)) report['transferStart'] = time() # execute timeout = 3600 try: s, telapsed, cout, cerr = timed_command(_cmd_str, timeout) except Exception, e: pilotErrorDiag = 'timed_command() threw an exception: %s' % (e) tolog("!!WARNING!!1111!! %s" % (pilotErrorDiag)) s = 1 o = str(e) telapsed = timeout
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, fscope=None, **pdict): """ copy input file from SE to local dir """ # determine which timeout option to use timeout_option = "--connect-timeout 300 --timeout %d" % (self.timeout) sslCert = self.sslCert sslKey = self.sslKey sslCertDir = self.sslCertDir # used aria2c options: # --certificate Client certificate file and password (SSL)(proxy) # --private-key user proxy again # --ca-certificate: concatenate *.0 in cert dir to make bundle # --out: <file> Write output to <file> instead of stdout # --dir: output directory, needed when multiple files(metalink) # --continue: if file is already there (from previous) then success # --auto-file-renaming=false : don't rename existing file error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict token = pdict.get("token", None) jobId = pdict.get("jobId", "") scope = pdict.get("scope", "") workDir = pdict.get("workDir", "") proxycheck = pdict.get("proxycheck", False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get("usect", True) prodDBlockToken = pdict.get("access", "") # get the DQ2 tracing report try: report = pdict["report"] except: report = {} else: # set the proper protocol report["protocol"] = "aria2c" # mark the relative start report["catStart"] = time() # the current file report["filename"] = lfn # guid report["guid"] = guid.replace("-", "") # get a proper envsetup envsetup = self.getEnvsetup(get=True) # if proxycheck: # # do we have a valid proxy? # s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup) # if s != 0: # self.prepareReport('PROXYFAIL', report) # return s, pilotErrorDiag # else: # tolog("Proxy verification turned off") tolog("Proxy verification turned off") getfile = gpfn if path == "": path = "./" fullname = os.path.join(path, lfn) # should the root file be copied or read directly by athena? directIn, useFileStager = self.getTransferModes() if directIn: if useCT: directIn = False tolog("Direct access mode is switched off (file will be transferred with the copy tool)") updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") else: # determine if the file is a root file according to its name rootFile = self.isRootFileName(lfn) if prodDBlockToken == "local" or not rootFile: directIn = False tolog( "Direct access mode has been switched off for this file (will be transferred with the copy tool)" ) updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="copy_to_scratch", type="input") elif rootFile: tolog( "Found root file according to file name: %s (will not be transferred in direct reading mode)" % (lfn) ) report["relativeStart"] = None report["transferStart"] = None self.prepareReport("FOUND_ROOT", report) if useFileStager: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="file_stager", type="input") else: updateFileState(lfn, workDir, jobId, mode="transfer_mode", state="remote_io", type="input") return 0, pilotErrorDiag else: tolog("Normal file transfer") # Build ca bundle if not already there cabundleFile = "cabundle.pem" if not os.path.exists(cabundleFile): _cmd_str = "cat %s/*.0 > %s" % (sslCertDir, cabundleFile) tolog("Executing command: %s" % (_cmd_str)) s, o = commands.getstatusoutput(_cmd_str) # If metalink file not created(including all inputs) # then make one just for this input if os.path.exists("AllInput.xml.meta4"): metalink = "AllInput.xml.meta4" else: tolog("Getting metalink from Rucio") rep = replica() rep.sfn = gpfn rep.filesize = fsize rep.filename = lfn rep.csumvalue = fchecksum if fscope: rep.scope = fscope else: # scope = extractPattern(gpfn,r'\/rucio\/(.+)\/[a-zA-Z0-9]{2}\/[a-zA-Z0-9]{2}\/') rep.scope = scope.replace("/", ".") replicas = {guid: [rep]} self.surls2metalink(replicas, "oneInput.xml.meta4") metalink = "oneInput.xml.meta4" # build the copy command # search how many links are available for download word_occour = 0 metaL_file = open(metalink) for line in metaL_file: for word in line.strip().split(): if word in ("<url"): word_occour += 1 tolog("number of links: %s, using only the first" % (str(word_occour))) # --check-certificate=false makes it easier(sles11) _cmd_str = ( "%s -j 1 --ca-certificate=%s --certificate=%s --private-key=%s --auto-file-renaming=false --continue --server-stat-of=aria2cperf.txt %s" % (self.copyCommand, cabundleFile, sslCert, sslCert, metalink) ) # invoke the transfer commands report["relativeStart"] = time() report["transferStart"] = time() tolog("Executing command: %s" % (_cmd_str)) s, o = commands.getstatusoutput(_cmd_str) tolog(o) if s != 0: tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str)) check_syserr(s, o) pilotErrorDiag = "aria2c failed: %s" % (o) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) ec = error.ERR_STAGEINFAILED return ec, pilotErrorDiag report["validateStart"] = time() # get the checksum type (md5sum or adler32) if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" if (fsize != 0 or fchecksum != 0) and self.doFileVerifications(): loc_filename = lfn dest_file = os.path.join(path, loc_filename) # get the checksum type (md5sum or adler32) if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get remote file size and checksum ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo(dest_file, csumtype=csumtype) if ec != 0: self.prepareReport("LOCAL_FILE_INFO_FAIL", report) return ec, pilotErrorDiag # compare remote and local file size if long(fsize) != 0 and long(dstfsize) != long(fsize): pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" % ( os.path.basename(gpfn), str(dstfsize), str(fsize), ) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport("FS_MISMATCH", report) return error.ERR_GETWRONGSIZE, pilotErrorDiag # compare remote and local file checksum if fchecksum and dstfchecksum != fchecksum and not self.isDummyChecksum(fchecksum): pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" % ( csumtype, os.path.basename(gpfn), dstfchecksum, fchecksum, ) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) # report corrupted file to consistency server self.reportFileCorruption(gpfn) if csumtype == "adler32": self.prepareReport("AD_MISMATCH", report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.prepareReport("MD5_MISMATCH", report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") self.prepareReport("DONE", report) return 0, pilotErrorDiag