def setupStageOutHPCEvent(self): if self.__job.prodDBlockTokenForOutput is not None and len( self.__job.prodDBlockTokenForOutput ) > 0 and self.__job.prodDBlockTokenForOutput[0] != 'NULL': siteInfo = getSiteInformation(self.getExperiment()) objectstore_orig = siteInfo.readpar("objectstore") #siteInfo.replaceQueuedataField("objectstore", self.__job.prodDBlockTokenForOutput[0]) espath = getFilePathForObjectStore(filetype="eventservice") else: #siteInfo = getSiteInformation(self.getExperiment()) #objectstore = siteInfo.readpar("objectstore") espath = getFilePathForObjectStore(filetype="eventservice") self.__espath = getFilePathForObjectStore(filetype="eventservice") tolog("EventServer objectstore path: " + espath) siteInfo = getSiteInformation(self.getExperiment()) # get the copy tool setup = siteInfo.getCopySetup(stageIn=False) tolog("Copy Setup: %s" % (setup)) dsname, datasetDict = self.getDatasets() self.__report = getInitialTracingReport( userid=self.__job.prodUserID, sitename=self.__jobSite.sitename, dsname=dsname, eventType="objectstore", analysisJob=self.__analysisJob, jobId=self.__job.jobId, jobDefId=self.__job.jobDefinitionID, dn=self.__job.prodUserID) self.__siteMover = objectstoreSiteMover(setup)
def setup(self, experiment=None, surl=None): """ setup env """ if os.environ.get("http_proxy") and hostname and hostname.endswith( "bnl.gov"): del os.environ['http_proxy'] if os.environ.get("https_proxy") and hostname and hostname.endswith( "bnl.gov"): del os.environ['https_proxy'] si = getSiteInformation(experiment) self.os_name = si.getObjectstoresField("os_name", os_bucket_name="eventservice") self.os_endpoint = si.getObjectstoresField( "os_endpoint", os_bucket_name="eventservice") self.os_bucket_endpoint = si.getObjectstoresField( "os_bucket_endpoint", os_bucket_name="eventservice") self.public_key = si.getObjectstoresField( "os_access_key", os_bucket_name="eventservice") self.private_key = si.getObjectstoresField( "os_secret_key", os_bucket_name="eventservice") if not (self.os_ddmendpoint and self.os_ddmendpoint != "" and self.os_bucket_endpoint and self.os_bucket_endpoint != ""): tolog("Failed to get S3 objectstore name") return PilotErrors.ERR_GETKEYPAIR, "Failed to get S3 objectstore name" return 0, ""
def getDefaultResources(self): siteInfo = getSiteInformation(self.getExperiment()) catchalls = siteInfo.readpar("catchall") values = {} for catchall in catchalls.split(","): if '=' in catchall: values[catchall.split('=')[0]] = catchall.split('=')[1] res = {} res['queue'] = values.get('queue', 'regular') res['mppwidth'] = values.get('mppwidth', 48) res['mppnppn'] = values.get('mppnppn', 1) res['walltime_m'] = values.get('walltime_m', 30) res['ATHENA_PROC_NUMBER'] = values.get('ATHENA_PROC_NUMBER', 23) res['max_nodes'] = values.get('max_nodes', 3) res['min_walltime_m'] = values.get('min_walltime_m', 20) res['max_walltime_m'] = values.get('max_walltime_m', 30) res['nodes'] = values.get('nodes', 2) res['min_nodes'] = values.get('min_nodes', 2) res['cpu_per_node'] = values.get('cpu_per_node', 24) res['partition'] = values.get('partition', None) res['repo'] = values.get('repo', None) res['max_events'] = values.get('max_events', 10000) res['initialtime_m'] = values.get('initialtime_m', 15) res['time_per_event_m'] = values.get('time_per_event_m', 10) res['mode'] = values.get('mode', 'normal') res['backfill_queue'] = values.get('backfill_queue', 'regular') res['stageout_threads'] = int(values.get('stageout_threads', 4)) res['copy_input_files'] = values.get('copy_input_files', 'false').lower() return res
def setup(self, experiment=None, surl=None): """ setup env """ try: import boto import boto.s3.connection from boto.s3.key import Key except ImportError: tolog("Failed to import boto, add /cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/ to sys.path") sys.path.append('/cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/') try: import boto import boto.s3.connection from boto.s3.key import Key except ImportError: tolog("Failed to import boto again. exit") return PilotErrors.ERR_UNKNOWN, "Failed to import boto" if os.environ.get("http_proxy"): del os.environ['http_proxy'] if os.environ.get("https_proxy"): del os.environ['https_proxy'] si = getSiteInformation(experiment) keyPair = None if re.search("^s3://.*\.usatlas\.bnl\.gov:8443", surl) != None: keyPair = si.getSecurityKey('BNL_ObjectStoreKey', 'BNL_ObjectStoreKey.pub') if surl.startswith("s3://s3.amazonaws.com:80"): keyPair = si.getSecurityKey('Amazon_ObjectStoreKey', 'Amazon_ObjectStoreKey.pub') if keyPair == None or keyPair["publicKey"] == None or keyPair["privateKey"] == None: tolog("Failed to get the keyPair for S3 objectstore %s " % (surl)) return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore" self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"]) return 0, ""
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'xrdcp', lfn, guid) filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths( error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt, sitemover=self) # quick workaround if ec != 0: reportState = {} reportState["clientState"] = tracer_error self.prepareReport(reportState, report) return self.put_data_retfail(ec, pilotErrorDiag) # get the RSE from ToA try: _RSE = self.getRSE(surl=surl) except Exception, e: tolog( "Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') experiment = pdict.get('experiment', '') outputDir = pdict.get('outputDir', '') os_bucket_id = pdict.get('os_bucket_id', -1) timeout = pdict.get('timeout', None) if not timeout: timeout = self.timeout # get the site information object si = getSiteInformation(experiment) # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 's3objectstore', lfn, guid) parsed = urlparse.urlparse(destination) scheme = parsed.scheme hostname = parsed.netloc.partition(':')[0] port = int(parsed.netloc.partition(':')[2]) report['remoteSite'] = '%s://%s:%s' % (scheme, hostname, port) filename = os.path.basename(source) surl = destination self.log("surl=%s, timeout=%s" % (surl, timeout)) if "log.tgz" in surl: surl = surl.replace(lfn, "%s:%s"%(scope,lfn)) else: report['eventType'] = 'put_es' status, output, size, checksum = self.stageOut(source, surl, token, experiment, outputDir=outputDir, timeout=timeout, os_bucket_id=os_bucket_id, report=report) if status !=0: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" # self.__sendReport(state, report) self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) state = "DONE" # self.__sendReport(state, report) self.prepareReport(state, report) return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
def setup(self, experiment=None, surl=None): """ setup env """ try: import boto import boto.s3.connection from boto.s3.key import Key except ImportError: tolog( "Failed to import boto, add /cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/ to sys.path" ) sys.path.append( '/cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/' ) try: import boto import boto.s3.connection from boto.s3.key import Key except ImportError: tolog("Failed to import boto again. exit") return PilotErrors.ERR_UNKNOWN, "Failed to import boto" if os.environ.get("http_proxy"): del os.environ['http_proxy'] if os.environ.get("https_proxy"): del os.environ['https_proxy'] si = getSiteInformation(experiment) os_access_key = si.getObjectstoresField("os_access_key", "eventservice") os_secret_key = si.getObjectstoresField("os_secret_key", "eventservice") if os_access_key and os_access_key != "" and os_secret_key and os_secret_key != "": keyPair = si.getSecurityKey(os_secret_key, os_access_key) else: tolog("Failed to get the keyPair for S3 objectstore") return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore" os_is_secure = si.getObjectstoresField("os_is_secure", "eventservice") self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"], os_is_secure) # keyPair = None # if re.search("^s3://.*\.usatlas\.bnl\.gov:8443", surl) != None: # keyPair = si.getSecurityKey('BNL_ObjectStoreKey', 'BNL_ObjectStoreKey.pub') # if re.search("^s3://.*\.cern\.ch:443", surl) != None: # keyPair = si.getSecurityKey('CERN_ObjectStoreKey', 'CERN_ObjectStoreKey.pub') # if surl.startswith("s3://s3.amazonaws.com:80"): # keyPair = si.getSecurityKey('Amazon_ObjectStoreKey', 'Amazon_ObjectStoreKey.pub') # if keyPair == None or keyPair["publicKey"] == None or keyPair["privateKey"] == None: # tolog("Failed to get the keyPair for S3 objectstore %s " % (surl)) # return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore" # # self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"]) return 0, ""
def setupStageOutHPCEvent(self): if self.__job.prodDBlockTokenForOutput is not None and len(self.__job.prodDBlockTokenForOutput) > 0 and self.__job.prodDBlockTokenForOutput[0] != 'NULL': siteInfo = getSiteInformation(self.getExperiment()) objectstore_orig = siteInfo.readpar("objectstore") #siteInfo.replaceQueuedataField("objectstore", self.__job.prodDBlockTokenForOutput[0]) espath = getFilePathForObjectStore(filetype="eventservice") else: #siteInfo = getSiteInformation(self.getExperiment()) #objectstore = siteInfo.readpar("objectstore") espath = getFilePathForObjectStore(filetype="eventservice") self.__espath = getFilePathForObjectStore(filetype="eventservice") tolog("EventServer objectstore path: " + espath) siteInfo = getSiteInformation(self.getExperiment()) # get the copy tool setup = siteInfo.getCopySetup(stageIn=False) tolog("Copy Setup: %s" % (setup)) dsname, datasetDict = self.getDatasets() self.__report = getInitialTracingReport(userid=self.__job.prodUserID, sitename=self.__jobSite.sitename, dsname=dsname, eventType="objectstore", analysisJob=self.__analysisJob, jobId=self.__job.jobId, jobDefId=self.__job.jobDefinitionID, dn=self.__job.prodUserID) self.__siteMover = objectstoreSiteMover(setup)
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) jobId = pdict.get('jobId', '') jobSetID = pdict.get('jobsetID', '') lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') #token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') outputDir = pdict.get('outputDir', '') timeout = pdict.get('timeout', None) pandaProxySecretKey = pdict.get('pandaProxySecretKey') if not timeout: timeout = self.timeout # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 's3objectstorepresignedurl', lfn, guid) filename = os.path.basename(source) surl = destination status, output, size, checksum = self.stageOut(source, jobId, lfn, jobSetID, pandaProxySecretKey, experiment, outputDir=outputDir, timeout=timeout) if status !=0: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" # self.__sendReport(state, report) self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) state = "DONE" # self.__sendReport(state, report) # self.prepareReport(state, report) return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid) if path == '': path = './' fullname = os.path.join(path, lfn) # get the site information object si = getSiteInformation(experiment) ret_path = si.getCopyPrefixPathNew(gpfn, stageIn=True) if not ret_path.startswith("s3:"): errorLog = "Failed to use copyprefix to convert the current path to S3 path." tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog else: gpfn = ret_path status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") state = "DONE" else: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return status, output
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') logPath = pdict.get('logPath', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'xrootdObjectstore', lfn, guid) filename = os.path.basename(source) if logPath != "": surl = logPath else: surl = os.path.join(destination, lfn) # get the DQ2 site name from ToA try: _dq2SiteName = self.getDQ2SiteName(surl=surl) except Exception, e: tolog( "Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 's3objectstore', lfn, guid) filename = os.path.basename(source) surl = destination status, output, size, checksum = self.stageOut(source, surl, token, experiment) if status != 0: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) state = "DONE" self.prepareReport(state, report) return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') outputDir = pdict.get('outputDir', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid) filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt, sitemover=self) # quick workaround if ec != 0: reportState = {} reportState["clientState"] = tracer_error self.prepareReport(reportState, report) return self.put_data_retfail(ec, pilotErrorDiag) # get the RSE from ToA try: _RSE = self.getRSE(surl=surl) except Exception, e: tolog("Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
def setup(self, experiment): """ setup env """ if self.__isSetuped: return 0, None self.__experiment = experiment thisExperiment = getExperiment(experiment) self.useTracingService = thisExperiment.useTracingService() si = getSiteInformation(experiment) self._defaultSetup = self.getLocalROOTSetup(si) _setupStr = self._defaultSetup #self.getSetup() # get the user proxy if available envsetupTest = _setupStr.strip() if envsetupTest != "" and not envsetupTest.endswith(';'): envsetupTest += ";" if os.environ.has_key('X509_USER_PROXY'): envsetupTest += " export X509_USER_PROXY=%s;" % ( os.environ['X509_USER_PROXY']) self.log("to verify site setup: %s " % envsetupTest) status, output = self.verifySetup(envsetupTest, experiment) self.log("site setup verifying: status: %s, output: %s" % (status, output["errorLog"])) if status == 0: self._setup = envsetupTest self.__isSetuped = True return status, output else: if self._defaultSetup: #try to use default setup self.log("Try to use default envsetup") envsetupTest = self._defaultSetup.strip() if envsetupTest != "" and not envsetupTest.endswith(';'): envsetupTest += ";" if os.environ.has_key('X509_USER_PROXY'): envsetupTest += " export X509_USER_PROXY=%s;" % ( os.environ['X509_USER_PROXY']) self.log("verify default setup: %s " % envsetupTest) status, output = self.verifySetup(envsetupTest, experiment) self.log("default setup verifying: status: %s, output: %s" % (status, output["errorLog"])) if status == 0: self._setup = envsetupTest self.__isSetuped = True return status, output return status, output
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 's3objectstore', lfn, guid) filename = os.path.basename(source) surl = destination status, output, size, checksum = self.stageOut(source, surl, token, experiment) if status !=0: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) state = "DONE" self.prepareReport(state, report) return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') logPath = pdict.get('logPath', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'xrootdObjectstore', lfn, guid) filename = os.path.basename(source) if logPath != "": surl = logPath else: surl = os.path.join(destination, lfn) # get the RSE from ToA try: _RSE = self.getRSE(surl=surl) except Exception, e: tolog("Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ copy input file from SE to local dir """ error = PilotErrors() # Get input parameters from pdict jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) os_bucket_id = pdict.get('os_bucket_id', -1) # try to get the direct reading control variable (False for direct reading mode; file should not be copied) useCT = pdict.get('usect', True) prodDBlockToken = pdict.get('access', '') # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid) if path == '': path = './' fullname = os.path.join(path, lfn) # get the site information object si = getSiteInformation(experiment) ret_path = si.getCopyPrefixPathNew(gpfn, stageIn=True) if not ret_path.startswith("s3:"): errorLog = "Failed to use copyprefix to convert the current path to S3 path." tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog else: gpfn = ret_path status, output = self.stageIn(gpfn, fullname, fsize, fchecksum, experiment, os_bucket_id=os_bucket_id) if status == 0: updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", ftype="input") state = "DONE" else: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return status, output
def setup(self, experiment): """ setup env """ if self.__isSetuped: return 0, None self.__experiment = experiment thisExperiment = getExperiment(experiment) self.useTracingService = thisExperiment.useTracingService() si = getSiteInformation(experiment) self._defaultSetup = self.getLocalROOTSetup(si) _setupStr = self._defaultSetup #self.getSetup() # get the user proxy if available envsetupTest = _setupStr.strip() if envsetupTest != "" and not envsetupTest.endswith(';'): envsetupTest += ";" if os.environ.has_key('X509_USER_PROXY'): envsetupTest += " export X509_USER_PROXY=%s;" % (os.environ['X509_USER_PROXY']) self.log("to verify site setup: %s " % envsetupTest) status, output = self.verifySetup(envsetupTest, experiment) self.log("site setup verifying: status: %s, output: %s" % (status, output["errorLog"])) if status == 0: self._setup = envsetupTest self.__isSetuped = True return status, output else: if self._defaultSetup: #try to use default setup self.log("Try to use default envsetup") envsetupTest = self._defaultSetup.strip() if envsetupTest != "" and not envsetupTest.endswith(';'): envsetupTest += ";" if os.environ.has_key('X509_USER_PROXY'): envsetupTest += " export X509_USER_PROXY=%s;" % (os.environ['X509_USER_PROXY']) self.log("verify default setup: %s " % envsetupTest) status, output = self.verifySetup(envsetupTest, experiment) self.log("default setup verifying: status: %s, output: %s" % (status, output["errorLog"])) if status == 0: self._setup = envsetupTest self.__isSetuped = True return status, output return status, output
def setup(self, experiment=None, surl=None): """ setup env """ if os.environ.get("http_proxy") and hostname and hostname.endswith("bnl.gov"): del os.environ['http_proxy'] if os.environ.get("https_proxy") and hostname and hostname.endswith("bnl.gov"): del os.environ['https_proxy'] si = getSiteInformation(experiment) self.os_name = si.getObjectstoresField("os_name", os_bucket_name="eventservice") self.os_endpoint = si.getObjectstoresField("os_endpoint", os_bucket_name="eventservice") self.os_bucket_endpoint = si.getObjectstoresField("os_bucket_endpoint", os_bucket_name="eventservice") self.public_key = si.getObjectstoresField("os_access_key", os_bucket_name="eventservice") self.private_key = si.getObjectstoresField("os_secret_key", os_bucket_name="eventservice") if not (self.os_ddmendpoint and self.os_ddmendpoint != "" and self.os_bucket_endpoint and self.os_bucket_endpoint != ""): tolog("Failed to get S3 objectstore name") return PilotErrors.ERR_GETKEYPAIR, "Failed to get S3 objectstore name" return 0, ""
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ Move the file from the current local directory to the local pilot init dir Parameters are: source -- full path of the file in local directory destinaion -- destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name) NOT USED (pinitdir is used instead) fsize -- file size of the source file (evaluated if 0) fchecksum -- MD5 checksum of the source file (evaluated if 0) pdict -- to allow additional parameters that may make sense with specific movers Assume that the pilot init dir is locally mounted and its local path is the same as the remote path if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them returns: exitcode, pilotErrorDiag, gpfn, fsize, fchecksum """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict DN = pdict.get('DN', '') dsname = pdict.get('dsname', '') analJob = pdict.get('analJob', False) sitename = pdict.get('sitename', '') testLevel = pdict.get('testLevel', '0') pilot_initdir = pdict.get('pinitdir', '') experiment = pdict.get('experiment', "ATLAS") # get the site information object si = getSiteInformation(experiment) # are we on a tier 3? if si.isTier3(): outputDir = self.getTier3Path(dsname, DN) tolog("Writing output on a Tier 3 site to: %s" % (outputDir)) # create the dirs if they don't exist try: self.mkdirWperm(outputDir) except Exception, e: tolog("!!WARNING!!2999!! Could not create dir: %s, %s" % (outputDir, str(e)))
def setup(self, experiment=None, surl=None): """ setup env """ # unset proxy for BNL object store (direct access only). Todo: set/unset proxy before/after access to bnl.org if os.environ.get("http_proxy") and hostname and hostname.endswith("bnl.gov"): del os.environ['http_proxy'] if os.environ.get("https_proxy") and hostname and hostname.endswith("bnl.gov"): del os.environ['https_proxy'] si = getSiteInformation(experiment) # not used here: self.os_name = si.getObjectstoresField("os_name", os_bucket_name="eventservice") self.os_endpoint = si.getObjectstoresField("os_endpoint", os_bucket_name="eventservice") if not self.os_endpoint or self.os_endpoint == "" : logStr = "Failed to get os_endpoint value" tolog(logStr) return PilotErrors.ERR_GETKEYPAIR, logStr if self.os_endpoint.endswith("/"): self.os_endpoint=self.os_endpoint[:-1] self.os_bucket_endpoint = si.getObjectstoresField("os_bucket_endpoint", os_bucket_name="eventservice") if not ( self.os_bucket_endpoint and self.os_bucket_endpoint != "" ) : logStr = "Failed to get os_bucket_endpoint value" tolog(logStr) return PilotErrors.ERR_GETKEYPAIR, logStr if self.os_bucket_endpoint.endswith("/"): self.os_bucket_endpoint=self.os_bucket_endpoint[:-1] if self.os_bucket_endpoint.startswith("/"): self.os_bucket_endpoint=self.os_bucket_endpoint[1:] self.public_key = si.getObjectstoresField("os_access_key", os_bucket_name="eventservice") if not ( self.public_key and self.public_key != "" ) : logStr = "Failed to get os_access_key (os public key) value" tolog(logStr) return PilotErrors.ERR_GETKEYPAIR, logStr self.private_key = si.getObjectstoresField("os_secret_key", os_bucket_name="eventservice") if not ( self.private_key and self.private_key != "" ) : logStr = "Failed to get os_secret_key (os private key) value" tolog(logStr) return PilotErrors.ERR_GETKEYPAIR, logStr return 0, ""
def setup(self, experiment=None, surl=None, os_bucket_id=-1, label='r'): """ setup env """ if not self.__isBotoLoaded: try: import boto import boto.s3.connection from boto.s3.key import Key self.__isBotoLoaded = True except ImportError: tolog("Failed to import boto, add /cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/ to sys.path") sys.path.append('/cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/') try: import boto import boto.s3.connection from boto.s3.key import Key self.__isBotoLoaded = True except ImportError: tolog("Failed to import boto again. exit") return PilotErrors.ERR_UNKNOWN, "Failed to import boto" si = getSiteInformation(experiment) # os_bucket_id will only be set if the setup function is called, if setup via the init function - get the default bucket id if os_bucket_id == -1: ddmendpoint = si.getObjectstoreDDMEndpoint(os_bucket_name='eventservice') # assume eventservice else: ddmendpoint = si.getObjectstoreDDMEndpointFromBucketID(os_bucket_id) endpoint_id = si.getObjectstoreEndpointID(ddmendpoint=ddmendpoint, label=label, protocol='s3') os_access_key, os_secret_key, os_is_secure = si.getObjectstoreKeyInfo(endpoint_id, ddmendpoint=ddmendpoint) if os_access_key and os_access_key != "" and os_secret_key and os_secret_key != "": keyPair = si.getSecurityKey(os_secret_key, os_access_key) if "privateKey" not in keyPair or keyPair["privateKey"] is None: tolog("Failed to get the keyPair for S3 objectstore") return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore" else: tolog("Failed to get the keyPair name for S3 objectstore") return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair name for S3 objectstore" self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"], os_is_secure, self._useTimerCommand) return 0, ""
def getSpecialSetupCommand(self): """ Set special_setup_cmd if necessary """ # Note: this special setup command is hardly used and could probably be removed # in case any special setup should be added to the setup string before the trf is executed, the command defined in this method # could be added to the run command by using method addSPSetupToCmd(). # the special command is also forwarded to the get and put functions (currently not used) special_setup_cmd = "" # add envsetup to the special command setup on tier-3 sites # (unknown if this is still needed) si = getSiteInformation(self.__experiment) if si.isTier3(): _envsetup = readpar('envsetup') if _envsetup != "": special_setup_cmd += _envsetup if not special_setup_cmd.endswith(';'): special_setup_cmd += ";" return special_setup_cmd
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') proxycheck = pdict.get('proxycheck', False) experiment = pdict.get('experiment', '') analysisJob = pdict.get('analJob', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'curl' # mark the relative start report['catStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-','') # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32") if ec != 0: self.__sendReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get the checksum type if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get a proper envsetup envsetup = self.getEnvsetup() #if proxycheck: # s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2) # if s != 0: # self.__sendReport('NO_PROXY', report) # return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) #else: # tolog("Proxy verification turned off") tolog("Proxy verification turned off") filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope) if ec != 0: self.__sendReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) #here begins the new magic... from Vincenzo Lavorini sitemover = SiteMover.SiteMover() v_path = sitemover.getPathFromScope(scope, filename) rucio_c = Client() if "ATLAS" in token: token_ok=token[+5:] else: token_ok=token local_se_token=self.site_name+"_"+token_ok v_hostname= [j['hostname'] for j in rucio_c.get_protocols(local_se_token)] v_port= [j['port'] for j in rucio_c.get_protocols(local_se_token)] v_prefix= [j['prefix'] for j in rucio_c.get_protocols(local_se_token)] v_address= "https://%s:%s%s"%(v_hostname[0],v_port[0],v_prefix[0]) tolog("prova1 address is %s" % (v_address)) if "rucio/" in v_address and "/rucio" in v_path: v_address=v_address[:-7] tolog("prova2 address is %s" % (v_address)) elif "rucio" in v_address and "rucio" in v_path : v_address=v_address[:-6] tolog("prova3 address is %s" % (v_address)) full_http_surl=v_address+v_path tolog("prova3 full_http__surl is %s" % (full_http_surl)) full_surl =surl if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz #putfile=surl #tolog("putfile: %s" % (putfile)) #tolog("full_surl: %s" % (full_surl)) # get https surl #full_http_surl = full_surl.replace("srm://", "https://") # get the DQ2 site name from ToA ---why? Is it needed? #try: # _dq2SiteName = self.getDQ2SiteName(surl=putfile) #except Exception, e: # tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e)) #else: # report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName) # tolog("DQ2 site name: %s" % (_dq2SiteName)) if testLevel == "1": source = "thisisjustatest" # determine which timeout option to use #commented by Lavorini timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout) timeout_option = "--connect-timeout 300" sslCert = self.sslCert sslKey = self.sslKey sslCertDir = self.sslCertDir # check htcopy if it is existed or env is set properly #_cmd_str = 'which htcopy' #try: # s, o = commands.getstatusoutput(_cmd_str) #except Exception, e: # tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o)) # o = str(e) #if s != 0: # tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str)) # o = o.replace('\n', ' ') # tolog("!!WARNING!!2990!! check PUT command failed. Status=%s Output=%s" % (str(s), str(o))) #return 999999 # cleanup the SURL if necessary (remove port and srm substring) #if token: # used lcg-cp options: # --srcsetype: specify SRM version # --verbose: verbosity on # --vo: specifies the Virtual Organization the user belongs to # -s: space token description # -b: BDII disabling # -t: time-out # (lcg-cr) -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC # -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally # -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case, # the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is # generated in the same format as with the Replica Manager # _cmd_str = '%s lcg-cr --verbose --vo atlas -T srmv2 -s %s -b -t %d -l %s -g %s -d %s file:%s' %\ # (envsetup, token, self.timeout, lfclfn, guid, surl, fppfn) # usage: lcg-cp [-h,--help] [-i,--insecure] [-c,--config config_file] # [-n nbstreams] [-s,--sst src_spacetokendesc] [-S,--dst dest_spacetokendesc] # [-D,--defaultsetype se|srmv1|srmv2] [-T,--srcsetype se|srmv1|srmv2] [-U,--dstsetype se|srmv1|srmv2] # [-b,--nobdii] [-t timeout] [-v,--verbose] [-V,--vo vo] [--version] src_file dest_file # surl = putfile[putfile.index('srm://'):] #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s?spacetoken=%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl, token) #_cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -U srmv2 -S %s file://%s %s' % (envsetup, timeout_option, token, source, full_surl) #else: # surl is the same as putfile #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl) #_cmd_str = '%s lcg-cp --vo atlas --verbose -b %s -U srmv2 file://%s %s' % (envsetup, timeout_option, source, full_surl) _cmd_str = 'curl -1 --verbose --cert %s --key %s --cacert %s --capath %s -L %s -T %s' % (self.sslKey,self.sslKey,self.sslKey,self.sslCertDir,full_http_surl, source) tolog("Executing command: %s" % (_cmd_str)) t0 = os.times() _cmd=Popen(_cmd_str,stdout=PIPE,stderr=PIPE, shell=True ) _cmd_out, _cmd_stderr= _cmd.communicate() report['relativeStart'] = time() report['transferStart'] = time() report['validateStart'] = time() t1 = os.times() t = t1[4] - t0[4] tolog("Curl command output = %s" % (_cmd_out)) tolog("Command finished after %f s" % (t)) if "bytes uploaded" not in _cmd_out: tolog("!!WARNING!!1137!! Command failed: %s" % (_cmd_str)) ''' # check if file was partially transferred, if so, remove it _ec = self.removeFile(envsetup, self.timeout, dst_gpfn) if _ec == -2: pilotErrorDiag += "(failed to remove file) " # i.e. do not retry stage-out if "Could not establish context" in o: pilotErrorDiag += "Could not establish context: Proxy / VO extension of proxy has probably expired" tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.__sendReport('CONTEXT_FAIL', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) elif "No such file or directory" in o: pilotErrorDiag += "No such file or directory: %s" % (o) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.__sendReport('NO_FILE_DIR', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) elif "globus_xio: System error" in o: pilotErrorDiag += "Globus system error: %s" % (o) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.__sendReport('GLOBUS_FAIL', report) return self.put_data_retfail(error.ERR_PUTGLOBUSSYSERR, pilotErrorDiag) else: if len(o) == 0 and t >= self.timeout: pilotErrorDiag += "Copy command self timed out after %d s" % (t) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.__sendReport('CP_TIMEOUT', report) return self.put_data_retfail(error.ERR_PUTTIMEOUT, pilotErrorDiag) else: if len(o) == 0: pilotErrorDiag += "Copy command returned error code %d but no output" % (ec) else: pilotErrorDiag += o self.__sendReport('CP_ERROR', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) ''' verified = False #getting the remote checksum from Rucio: token_file=open('token_fle', 'r') token_rucio=token_file.readline() pos2print=token_rucio.find("CN") token_rucio2print=token_rucio[:pos2print]+'(Hidden token)' tolog("Token I am using: %s" %(token_rucio2print)) httpredirector = readpar('httpredirector') trial_n=1 remote_checksum="none" while (remote_checksum == "none" and trial_n<8): trial_n+=1 if not httpredirector: #cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip |awk \'{FS=\"hash type=\"}; {print $2}\' |awk \'{FS=\">\"}; {print $2}\' |awk \'{FS=\"<\"} {print $1}\'| grep -v \'^$\'"%(token_rucio,scope,filename) cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip "%(token_rucio,scope,filename) cmd2print = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip "%(token_rucio2print,scope,filename) else: if "http" in httpredirector: tolog("HTTP redirector I am using: %s" %(httpredirector)) cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip "%(token_rucio,httpredirector,scope,filename) cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip "%(token_rucioi2print,httpredirector,scope,filename) else: tolog("HTTP redirector I am using: %s" %(httpredirector)) cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip "%(token_rucio,httpredirector,reps[0].scope,reps[0].filename) cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip "%(token_rucio2print,httpredirector,reps[0].scope,reps[0].filename) tolog("Getting remote checksum: command to be executed: %s" %(cmd2print)) checksum_cmd=Popen(cmd, stdout=PIPE,stderr=PIPE, shell=True) remote_checksum, stderr=checksum_cmd.communicate() tolog("Remote checksum as given by rucio %s" %(remote_checksum)) if (remote_checksum == "none"): tolog("In checking checksum: command std error: %s" %(stderr)) pilotErrorDiag = "Cannot get the checksum of file on SE" tolog("!!WARNING!!1137!! %s" % (pilotErrorDiag)) tolog("!!WARNING!!1137!! trial numebr %s" % (trial_n)) time.sleep(3) # try to get the remote checksum with lcg-get-checksum #remote_checksum = self.lcgGetChecksum(envsetup, self.timeout, full_surl) #if not remote_checksum: # # try to grab the remote file info using lcg-ls command # remote_checksum, remote_fsize = self.getRemoteFileInfo(envsetup, self.timeout, full_surl) #else: # tolog("Setting remote file size to None (not needed)") # remote_fsize = None # compare the checksums if the remote checksum was extracted tolog("Remote checksum: %s" % str(remote_checksum)) tolog("Local checksum: %s" % (fchecksum)) if remote_checksum: if remote_checksum != fchecksum: pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, os.path.basename(dst_gpfn), remote_checksum, fchecksum) tolog("!!WARNING!!1800!! %s" % (pilotErrorDiag)) if csumtype == "adler32": self.__sendReport('AD_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl) else: self.__sendReport('MD5_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl) else: tolog("Remote and local checksums verified") verified = True else: tolog("Skipped primary checksum verification (remote checksum not known)") # if lcg-ls could not be used if "/pnfs/" in surl and not remote_checksum: # for dCache systems we can test the checksum with the use method tolog("Detected dCache system: will verify local checksum with the local SE checksum") # gpfn = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0.... path = surl[surl.find('/pnfs/'):] # path = /pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....# tolog("File path: %s" % (path)) _filename = os.path.basename(path) _dir = os.path.dirname(path) # get the remote checksum tolog("Local checksum: %s" % (fchecksum)) try: remote_checksum = self.getdCacheChecksum(_dir, _filename) except Exception, e: pilotErrorDiag = "Could not get checksum from dCache: %s (test will be skipped)" % str(e) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) else: if remote_checksum == "NOSUCHFILE": pilotErrorDiag = "The pilot will fail the job since the remote file does not exist" tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.__sendReport('NOSUCHFILE', report) return self.put_data_retfail(error.ERR_NOSUCHFILE, pilotErrorDiag) elif remote_checksum: tolog("Remote checksum: %s" % (remote_checksum)) else: tolog("Could not get remote checksum") if remote_checksum: if remote_checksum != fchecksum: pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, _filename, remote_checksum, fchecksum) if csumtype == "adler32": self.__sendReport('AD_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl) else: self.__sendReport('MD5_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl) else: tolog("Remote and local checksums verified") verified = True
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'lcg2', lfn, guid) # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32") if ec != 0: self.__sendReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get the checksum type if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get a proper envsetup if alt: # use a cvmfs setup for stage-out to alternative SE envsetup = si.getLocalEMISetup() if envsetup[-1] != ";": envsetup += "; " else: envsetup = self.getEnvsetup(alt=alt) ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.__sendReport('RFCP_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # get the experiment object thisExperiment = getExperiment(experiment) if proxycheck: s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup, limit=2) if s != 0: self.__sendReport('NO_PROXY', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) else: tolog("Proxy verification turned off") filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt) if ec != 0: self.__sendReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) putfile = surl full_surl = putfile if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz tolog("putfile = %s" % (putfile)) tolog("full_surl = %s" % (full_surl)) # get the DQ2 site name from ToA try: _dq2SiteName = self.getDQ2SiteName(surl=putfile) except Exception, e: tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from local dir to SE and register into dataset and catalogues """ # Get input parameters from pdict # Mancinelli: added sitename and appid variable sitename = pdict.get('sitename', '') appid = pdict.get('report').get('appid','') lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') dsname = pdict.get('dsname', '') workDir = pdict.get('workDir', '') analyJob = pdict.get('analJob', False) extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) if prodSourceLabel == 'ddm' and analyJob: tolog("Treating PanDA Mover job as a production job during stage-out") analyJob = False # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'local' # mark the relative start report['relativeStart'] = time() # the current file report['filename'] = lfn report['guid'] = guid.replace('-','') # report['dataset'] = dsname filename = os.path.basename(source) # get the local file size and checksum csumtype = self.checksum_command if fsize == 0 or fchecksum == 0: ec, self.__pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype=csumtype) if ec != 0: self.__sendReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, self.__pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get a proper envsetup envsetup = self.getEnvsetup() ec, pilotErrorDiag = verifySetupCommand(self.__error, envsetup) if ec != 0: self.__sendReport('RFCP_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) #Mancinelli: TODO change. This is a Hack.. need to undrestand how to get Job data in a proper manner #JobData= '%s/Job_%s.py' % (os.path.dirname(source), appid) JobData= '%s/jobState-%s-test.pickle' % (os.path.dirname(source), appid) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(self.__error, analyJob, token, prodSourceLabel, dsname, filename, sitename, JobData) if ec != 0: self.__sendReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) dst_gpfn = surl tolog("dst_gpfn: %s" % (dst_gpfn)) # get the DQ2 site name from ToA try: _dq2SiteName = self.getDQ2SiteName(surl=dst_gpfn) except Exception, e: tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
def put_data(self, pfn, destination, fsize=0, fchecksum=0, dsname='', extradirs='', **pdict): """ copy output file from disk to local SE """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') logFile = pdict.get('logFile', '') sitename = pdict.get('sitename', '') proxycheck = pdict.get('proxycheck', False) experiment = pdict.get('experiment', '') analysisJob = pdict.get('analJob', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False filename = pfn.split('/')[-1] # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'lcg', lfn, guid) # is the dataset defined? if dsname == '': pilotErrorDiag = "Dataset name not specified to put_data" tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag)) self.__sendReport('DSN_UNDEF', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(pfn, csumtype="adler32") if ec != 0: self.__sendReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get a proper envsetup envsetup = self.getEnvsetup() ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.__sendReport('RFCP_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # do we need to check the user proxy? if proxycheck: s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2) if s != 0: self.__sendReport('PROXY_FAIL', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) else: tolog("Proxy verification turned off") # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename) if ec != 0: self.__sendReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) lfclfn = os.path.join(lfcdir, lfn) # LFC LFN = /grid/atlas/dq2/testpanda/testpanda.destDB.dfb45803-1251-43bb-8e7a-6ad2b6f205be_sub01000492/ #364aeb74-8b62-4c8f-af43-47b447192ced_0.job.log.tgz # putfile is the SURL putfile = surl full_surl = putfile if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz tolog("putfile = %s" % (putfile)) tolog("full_surl = %s" % (full_surl)) # get the DQ2 site name from ToA try: _dq2SiteName = self.getDQ2SiteName(surl=putfile) except: # WARNING: do not print the exception here since it can sometimes not be converted to a string! (problem seen at Taiwan) tolog("Warning: Failed to get the DQ2 site name (can not add this info to tracing report)") else: report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName) tolog("DQ2 site name: %s" % (_dq2SiteName)) # get the absolute (full) path to the file fppfn = os.path.abspath(pfn) tolog("pfn=%s" % (pfn)) cmd = '%s echo "LFC_HOST=$LFC_HOST"; lfc-mkdir -p %s' % (envsetup, lfcdir) # export LFC_HOST=lfc0448.gridpp.rl.ac.uk ; echo "LFC_HOST=$LFC_HOST"; #lfc-mkdir -p /grid/atlas/dq2/testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647 tolog("Executing command: %s" % (cmd)) s, o = commands.getstatusoutput(cmd) if s == 0: tolog("LFC setup and mkdir succeeded") tolog("Command output: %s" % (o)) else: tolog("!!WARNING!!2990!! LFC setup and mkdir failed. Status=%s Output=%s" % (s, o)) if o == "Could not establish context": pilotErrorDiag = "Could not establish context: Proxy / VO extension of proxy has probably expired" tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.dumpExtendedProxy(envsetup) self.__sendReport('CONTEXT_FAIL', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) else: pilotErrorDiag = "LFC setup and mkdir failed: %s" % (o) self.__sendReport('LFC_SETUP_FAIL', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) # determine which timeout option to use if self.isNewLCGVersion("%s lcg-cr" % (envsetup)): timeout_option = "--srm-timeout=%d --connect-timeout=300 --sendreceive-timeout=%d" % (self.timeout, self.timeout) else: timeout_option = "-t %d" % (self.timeout) # used lcg-cr options: # --verbose: verbosity on # --vo: specifies the Virtual Organization the user belongs to # -T: specify SRM version # -s: space token description # -b: BDII disabling # -t: time-out # -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC # -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally # -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case, # the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is # generated in the same format as with the Replica Manager if token: surl = putfile[putfile.index('srm://'):] _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-cr --verbose --vo atlas -T srmv2 -s %s -b %s -l %s -g %s -d %s file:%s' % (envsetup, token, timeout_option, lfclfn, guid, surl, fppfn) else: surl = putfile _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-cr --verbose --vo atlas %s -l %s -g %s -d %s file:%s' % (envsetup, timeout_option, lfclfn, guid, surl, fppfn) tolog("Executing command: %s" % (_cmd_str)) s = -1 t0 = os.times() report['relativeStart'] = time() report['transferStart'] = time() try: s, o = commands.getstatusoutput(_cmd_str) except Exception, e: tolog("!!WARNING!!2990!! Exception caught: %s" % (str(e))) o = str(e)
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ Perform the move and, check size and md5sum correctness. Parameters are: gpfn -- full source URL (e.g. method://[host[:port]/full-dir-path/filename - a SRM URL is OK) - NOT USED (pinitdir replaces it) path -- destination absolute path (in a local file system). It is assumed to be there. get_data returns an error if the path is missing Return the status of the transfer. In case of failure it should remove the partially copied destination """ # The local file is assumed to have a relative path that is the same of the relative path in the 'gpfn' # loc_... are the variables used to access the file in the locally exported file system # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename # dest vars: path error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict pilot_initdir = pdict.get('pinitdir', '') experiment = pdict.get('experiment', "ATLAS") # get the site information object si = getSiteInformation(experiment) if si.isTier3(): inputDir = os.path.dirname(gpfn) else: inputDir = pdict.get('inputDir', '') if inputDir == "": tolog("Get function will use pilot launch dir as input file dir: %s" % (pilot_initdir)) inputDir = pilot_initdir else: tolog("Get function will use requested input file dir: %s" % (inputDir)) if inputDir == "": pilotErrorDiag = "Input dir not set (can not figure out where the input files are)" tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag)) return error.ERR_STAGEINFAILED, pilotErrorDiag src_loc_pfn = os.path.join(inputDir, lfn) src_loc_filename = lfn dest_file = os.path.join(path, src_loc_filename) # verify that the file exists if not os.path.exists(src_loc_pfn): pilotErrorDiag = "No such file or directory: %s" % (src_loc_pfn) tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag)) if src_loc_pfn.find("DBRelease") >= 0: ec = error.ERR_MISSDBREL else: ec = error.ERR_NOSUCHFILE return ec, pilotErrorDiag # make a symbolic link to the input file in the job work dir cmd = "ln -s %s %s" % (src_loc_pfn, dest_file) tolog("Executing command: %s" % (cmd)) ec, rv = commands.getstatusoutput(cmd) if ec != 0: pilotErrorDiag = "Error linking the file: %d, %s" % (ec, rv) tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag)) return error.ERR_STAGEINFAILED, pilotErrorDiag return 0, pilotErrorDiag
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid) filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) # get local adler32 checksum status, output, adler_size, adler_checksum = self.getLocalFileInfo(source, checksumType="adler32") if status != 0: errorLog = 'Failed to get local file %s adler32 checksum: %s' % (source, output) tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) ret_path = si.getCopyPrefixPathNew(surl, stageIn=False) tolog("Convert destination: %s to new path: %s" % (surl, ret_path)) if not ret_path.startswith("s3:"): errorLog = "Failed to use copyprefix to convert the current path to S3 path." tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog size = None checksum = None else: status, output, size, checksum = self.stageOut(source, ret_path, token, experiment) if status !=0: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) else: if size == adler_size: tolog("The file size is not changed. Will check whether adler32 changed.") status, output, new_adler_size, new_adler_checksum = self.getLocalFileInfo(source, checksumType="adler32") if status != 0: errorLog = 'Failed to get local file %s adler32 checksum: %s' % (source, output) tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) else: if adler_checksum == new_adler_checksum: tolog("The file checksum is not changed. Will use adler32 %s to replace the md5 checksum %s" % (adler_checksum, checksum)) checksum = adler_checksum else: errorLog = "The file checksum changed from %s(before transfer) to %s(after transfer)" % (adler_checksum, new_adler_checksum) tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) state = "DONE" self.prepareReport(state, report) return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ Moves the file from the current local directory to a storage element source: full path of the file in local directory destination: destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name) Assumes that the SE is locally mounted and its local path is the same as the remote path if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them returns: exitcode, gpfn,fsize, fchecksum """ error = PilotErrors() # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') dsname = pdict.get('dsname', '') analyJob = pdict.get('analyJob', False) extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) if prodSourceLabel == 'ddm' and analyJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analyJob = False # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'xrootd', lfn, guid) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = '' ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) report['relativeStart'] = time() ec = 0 if fsize == 0 or fchecksum == 0: if not self.useExternalAdler32(): # Can not use external adler32 command for remote file since the command is # not available (defaulting to md5sum for put operation) tolog( "Command not found: adler32.sh (will switch to md5sum for local file checksum)" ) csumtype = "default" else: csumtype = "adler32" ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo( source, csumtype=csumtype) if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize tolog("File destination: %s" % (destination)) dst_se = destination # srm://dcsrm.usatlas.bnl.gov:8443/srm/managerv1?SFN=/pnfs/usatlas.bnl.gov/ if (dst_se.find('SFN') != -1): s = dst_se.split('SFN=') dst_loc_se = s[1] dst_prefix = s[0] + 'SFN=' else: _sentries = dst_se.split('/', 3) # 'method://host:port' is it always a ftp server? can it be srm? something else? dst_serv = _sentries[0] + '//' + _sentries[2] # dst_host = _sentries[2] # host and port dst_loc_se = '/' + _sentries[3] dst_prefix = dst_serv # use bare destination when it starts with root:// if destination.startswith('root://'): dst_loc_se = destination dst_prefix = '' # report['dataset'] = dsname # May be be a comma list but take first always # (Remember that se can be a list where the first is used for output but any can be used for input) se = readpar('se').split(",")[0] _dummytoken, se = self.extractSE(se) tolog("Using SE: %s" % (se)) filename = os.path.basename(source) ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths( error, analyJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) # are we transfering to a space token? if token != None and token != "": # Special case for GROUPDISK (do not remove dst: bit before this stage, needed in several places) if "dst:" in token: token = token[len('dst:'):] tolog("Dropped dst: part of space token descriptor; token=%s" % (token)) token = "ATLASGROUPDISK" tolog("Space token descriptor reset to: %s" % (token)) # get the proper destination #destination = self.getDestination(analyJob, token) #if destination == '': # pilotErrorDiag = "put_data destination path in SE not defined" # tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag)) # self.prepareReport('SE_DEST_PATH_UNDEF', report) # return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) #tolog("Going to store job output at destination: %s" % (destination)) # add the space token to the destination string #dst_loc_sedir = os.path.join(destination, os.path.join(extradirs, dsname)) #dst_loc_pfn = os.path.join(dst_loc_sedir, filename) #dst_loc_pfn += "?oss.cgroup=%s" % (token) dst_loc_pfn = dst_gpfn + "?oss.cgroup=%s" % (token) #else: #dst_loc_sedir = os.path.join(dst_loc_se, os.path.join(extradirs, dsname)) #dst_loc_pfn = os.path.join(dst_loc_sedir, filename) dst_loc_pfn = dst_gpfn dst_gpfn = dst_prefix + dst_loc_pfn tolog("Final destination path: %s" % (dst_loc_pfn)) tolog("dst_gpfn: %s" % (dst_gpfn)) # get the Rucio site name from ToA try: _RSE = self.getRSE(surl=dst_gpfn) except Exception, e: tolog( "Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the Rucio tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'curl' # mark the relative start report['catStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-', '') # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo( source, csumtype="adler32") if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get the checksum type if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get a proper envsetup envsetup = self.getEnvsetup() # get the experiment object thisExperiment = getExperiment(experiment) if proxycheck: s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup, limit=2) if s != 0: self.prepareReport('NO_PROXY', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) else: tolog("Proxy verification turned off") filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths( error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) putfile = surl full_surl = putfile if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz tolog("putfile: %s" % (putfile)) tolog("full_surl: %s" % (full_surl)) # get https surl full_http_surl = full_surl.replace("srm://", "https://") # get the RSE from ToA try: _RSE = self.getRSE(surl=putfile) except Exception, e: tolog( "Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ Moves the file from the current local directory to a storage element source: full path of the file in local directory destination: destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name) Assumes that the SE is locally mounted and its local path is the same as the remote path if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them returns: exitcode, gpfn,fsize, fchecksum """ error = PilotErrors() # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') jobId = pdict.get('jobId', '') workDir = pdict.get('workDir', '') dsname = pdict.get('dsname', '') analyJob = pdict.get('analyJob', False) extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) if prodSourceLabel == 'ddm' and analyJob: tolog("Treating PanDA Mover job as a production job during stage-out") analyJob = False # get the Rucio tracing report report = self.getStubTracingReport(pdict['report'], 'xrootd', lfn, guid) if self._setup: _setup_str = "source %s; " % self._setup else: _setup_str = '' ec, pilotErrorDiag = verifySetupCommand(error, _setup_str) if ec != 0: self.prepareReport('RFCP_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) report['relativeStart'] = time() ec = 0 if fsize == 0 or fchecksum == 0: if not self.useExternalAdler32(): # Can not use external adler32 command for remote file since the command is # not available (defaulting to md5sum for put operation) tolog("Command not found: adler32.sh (will switch to md5sum for local file checksum)") csumtype = "default" else: csumtype = "adler32" ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype=csumtype) if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize tolog("File destination: %s" % (destination)) dst_se = destination # srm://dcsrm.usatlas.bnl.gov:8443/srm/managerv1?SFN=/pnfs/usatlas.bnl.gov/ if( dst_se.find('SFN') != -1 ): s = dst_se.split('SFN=') dst_loc_se = s[1] dst_prefix = s[0] + 'SFN=' else: _sentries = dst_se.split('/', 3) # 'method://host:port' is it always a ftp server? can it be srm? something else? dst_serv = _sentries[0] + '//' + _sentries[2] # dst_host = _sentries[2] # host and port dst_loc_se = '/'+ _sentries[3] dst_prefix = dst_serv # use bare destination when it starts with root:// if destination.startswith('root://'): dst_loc_se = destination dst_prefix = '' # report['dataset'] = dsname # May be be a comma list but take first always # (Remember that se can be a list where the first is used for output but any can be used for input) se = readpar('se').split(",")[0] _dummytoken, se = self.extractSE(se) tolog("Using SE: %s" % (se)) filename = os.path.basename(source) ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analyJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) # are we transfering to a space token? if token != None and token != "": # Special case for GROUPDISK (do not remove dst: bit before this stage, needed in several places) if "dst:" in token: token = token[len('dst:'):] tolog("Dropped dst: part of space token descriptor; token=%s" % (token)) token = "ATLASGROUPDISK" tolog("Space token descriptor reset to: %s" % (token)) # get the proper destination #destination = self.getDestination(analyJob, token) #if destination == '': # pilotErrorDiag = "put_data destination path in SE not defined" # tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag)) # self.prepareReport('SE_DEST_PATH_UNDEF', report) # return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) #tolog("Going to store job output at destination: %s" % (destination)) # add the space token to the destination string #dst_loc_sedir = os.path.join(destination, os.path.join(extradirs, dsname)) #dst_loc_pfn = os.path.join(dst_loc_sedir, filename) #dst_loc_pfn += "?oss.cgroup=%s" % (token) dst_loc_pfn = dst_gpfn + "?oss.cgroup=%s" % (token) #else: #dst_loc_sedir = os.path.join(dst_loc_se, os.path.join(extradirs, dsname)) #dst_loc_pfn = os.path.join(dst_loc_sedir, filename) dst_loc_pfn = dst_gpfn dst_gpfn = dst_prefix + dst_loc_pfn tolog("Final destination path: %s" % (dst_loc_pfn)) tolog("dst_gpfn: %s" % (dst_gpfn)) # get the Rucio site name from ToA try: _RSE = self.getRSE(surl=dst_gpfn) except Exception, e: tolog("Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
def put_data(self, pfn, destination, fsize=0, fchecksum=0, dsname='', extradirs='', **pdict): """ copy output file from disk to local SE """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') logFile = pdict.get('logFile', '') sitename = pdict.get('sitename', '') proxycheck = pdict.get('proxycheck', False) experiment = pdict.get('experiment', '') analysisJob = pdict.get('analJob', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False filename = pfn.split('/')[-1] # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'lcg', lfn, guid) # is the dataset defined? if dsname == '': pilotErrorDiag = "Dataset name not specified to put_data" tolog('!!WARNING!!2990!! %s' % (pilotErrorDiag)) self.prepareReport('DSN_UNDEF', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(pfn, csumtype="adler32") if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get a proper envsetup envsetup = self.getEnvsetup() ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.prepareReport('RFCP_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # get the experiment object thisExperiment = getExperiment(experiment) # do we need to check the user proxy? if proxycheck: s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup, limit=2) if s != 0: self.prepareReport('PROXY_FAIL', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) else: tolog("Proxy verification turned off") # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag, surl=dst_gpfn) lfclfn = os.path.join(lfcdir, lfn) # LFC LFN = /grid/atlas/dq2/testpanda/testpanda.destDB.dfb45803-1251-43bb-8e7a-6ad2b6f205be_sub01000492/ #364aeb74-8b62-4c8f-af43-47b447192ced_0.job.log.tgz # putfile is the SURL putfile = surl full_surl = putfile if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz tolog("putfile = %s" % (putfile)) tolog("full_surl = %s" % (full_surl)) # get the DQ2 site name from ToA try: _dq2SiteName = self.getDQ2SiteName(surl=putfile) except: # WARNING: do not print the exception here since it can sometimes not be converted to a string! (problem seen at Taiwan) tolog("Warning: Failed to get the DQ2 site name (can not add this info to tracing report)") else: report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName) tolog("DQ2 site name: %s" % (_dq2SiteName)) # get the absolute (full) path to the file fppfn = os.path.abspath(pfn) tolog("pfn=%s" % (pfn)) cmd = '%s echo "LFC_HOST=$LFC_HOST"; lfc-mkdir -p %s' % (envsetup, lfcdir) # export LFC_HOST=lfc0448.gridpp.rl.ac.uk ; echo "LFC_HOST=$LFC_HOST"; #lfc-mkdir -p /grid/atlas/dq2/testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647 tolog("Executing command: %s" % (cmd)) s, o = commands.getstatusoutput(cmd) if s == 0: tolog("LFC setup and mkdir succeeded") tolog("Command output: %s" % (o)) else: tolog("!!WARNING!!2990!! LFC setup and mkdir failed. Status=%s Output=%s" % (s, o)) if o == "Could not establish context": pilotErrorDiag = "Could not establish context: Proxy / VO extension of proxy has probably expired" tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.dumpExtendedProxy(envsetup) self.prepareReport('CONTEXT_FAIL', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag, surl=full_surl) else: pilotErrorDiag = "LFC setup and mkdir failed: %s" % (o) self.prepareReport('LFC_SETUP_FAIL', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag, surl=full_surl) # determine which timeout option to use if self.isNewLCGVersion("%s lcg-cr" % (envsetup)): timeout_option = "--srm-timeout=%d --connect-timeout=300 --sendreceive-timeout=%d" % (self.timeout, self.timeout) else: timeout_option = "-t %d" % (self.timeout) # used lcg-cr options: # --verbose: verbosity on # --vo: specifies the Virtual Organization the user belongs to # -T: specify SRM version # -s: space token description # -b: BDII disabling # -t: time-out # -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC # -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally # -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case, # the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is # generated in the same format as with the Replica Manager if token: # Special case for GROUPDISK (do not remove dst: bit before this stage, needed in several places) if "dst:" in token: token = token[len('dst:'):] tolog("Dropped dst: part of space token descriptor; token=%s" % (token)) token = "ATLASGROUPDISK" tolog("Space token descriptor reset to: %s" % (token)) surl = putfile[putfile.index('srm://'):] _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-cr --verbose --vo atlas -T srmv2 -s %s -b %s -l %s -g %s -d %s file:%s' % (envsetup, token, timeout_option, lfclfn, guid, surl, fppfn) else: surl = putfile _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-cr --verbose --vo atlas %s -l %s -g %s -d %s file:%s' % (envsetup, timeout_option, lfclfn, guid, surl, fppfn) # GoeGrid testing: _cmd_str = '%s which lcg-cr; lcg-cr --version; lcg-crXXX --verbose --vo atlas %s -l %s -g %s -d %s file:%s' % (envsetup, timeout_option, lfclfn, guid, surl, fppfn) tolog("Executing command: %s" % (_cmd_str)) s = -1 t0 = os.times() report['relativeStart'] = time() report['transferStart'] = time() try: s, o = commands.getstatusoutput(_cmd_str) except Exception, e: tolog("!!WARNING!!2990!! Exception caught: %s" % (str(e))) o = str(e)
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') proxycheck = pdict.get('proxycheck', False) experiment = pdict.get('experiment', '') analysisJob = pdict.get('analJob', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the DQ2 tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'curl' # mark the relative start report['catStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-', '') # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo( source, csumtype="adler32") if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get the checksum type if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get a proper envsetup envsetup = self.getEnvsetup() #if proxycheck: # s, pilotErrorDiag = self.verifyProxy(envsetup=envsetup, limit=2) # if s != 0: # self.prepareReport('NO_PROXY', report) # return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) #else: # tolog("Proxy verification turned off") tolog("Proxy verification turned off") filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths( error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) #here begins the new magic... from Vincenzo Lavorini sitemover = SiteMover.SiteMover() v_path = sitemover.getPathFromScope(scope, filename) rucio_c = Client() if "ATLAS" in token: token_ok = token[+5:] else: token_ok = token local_se_token = self.site_name + "_" + token_ok v_hostname = [ j['hostname'] for j in rucio_c.get_protocols(local_se_token) ] v_port = [j['port'] for j in rucio_c.get_protocols(local_se_token)] v_prefix = [j['prefix'] for j in rucio_c.get_protocols(local_se_token)] v_address = "https://%s:%s%s" % (v_hostname[0], v_port[0], v_prefix[0]) tolog("prova1 address is %s" % (v_address)) if "rucio/" in v_address and "/rucio" in v_path: v_address = v_address[:-7] tolog("prova2 address is %s" % (v_address)) elif "rucio" in v_address and "rucio" in v_path: v_address = v_address[:-6] tolog("prova3 address is %s" % (v_address)) full_http_surl = v_address + v_path tolog("prova3 full_http__surl is %s" % (full_http_surl)) full_surl = surl if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz #putfile=surl #tolog("putfile: %s" % (putfile)) #tolog("full_surl: %s" % (full_surl)) # get https surl #full_http_surl = full_surl.replace("srm://", "https://") # get the DQ2 site name from ToA ---why? Is it needed? #try: # _dq2SiteName = self.getDQ2SiteName(surl=putfile) #except Exception, e: # tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e)) #else: # report['localSite'], report['remoteSite'] = (_dq2SiteName, _dq2SiteName) # tolog("DQ2 site name: %s" % (_dq2SiteName)) if testLevel == "1": source = "thisisjustatest" # determine which timeout option to use #commented by Lavorini timeout_option = "--connect-timeout 300 --max-time %d" % (self.timeout) timeout_option = "--connect-timeout 300" sslCert = self.sslCert sslKey = self.sslKey sslCertDir = self.sslCertDir # check htcopy if it is existed or env is set properly #_cmd_str = 'which htcopy' #try: # s, o = commands.getstatusoutput(_cmd_str) #except Exception, e: # tolog("!!WARNING!!2990!! Exception caught: %s (%d, %s)" % (str(e), s, o)) # o = str(e) #if s != 0: # tolog("!!WARNING!!2990!! Command failed: %s" % (_cmd_str)) # o = o.replace('\n', ' ') # tolog("!!WARNING!!2990!! check PUT command failed. Status=%s Output=%s" % (str(s), str(o))) #return 999999 # cleanup the SURL if necessary (remove port and srm substring) #if token: # used lcg-cp options: # --srcsetype: specify SRM version # --verbose: verbosity on # --vo: specifies the Virtual Organization the user belongs to # -s: space token description # -b: BDII disabling # -t: time-out # (lcg-cr) -l: specifies the Logical File Name associated with the file. If this option is present, an entry is added to the LFC # -g: specifies the Grid Unique IDentifier. If this option is not present, a GUID is generated internally # -d: specifies the destination. It can be the Storage Element fully qualified hostname or an SURL. In the latter case, # the scheme can be sfn: for a classical SE or srm:. If only the fully qualified hostname is given, a filename is # generated in the same format as with the Replica Manager # _cmd_str = '%s lcg-cr --verbose --vo atlas -T srmv2 -s %s -b -t %d -l %s -g %s -d %s file:%s' %\ # (envsetup, token, self.timeout, lfclfn, guid, surl, fppfn) # usage: lcg-cp [-h,--help] [-i,--insecure] [-c,--config config_file] # [-n nbstreams] [-s,--sst src_spacetokendesc] [-S,--dst dest_spacetokendesc] # [-D,--defaultsetype se|srmv1|srmv2] [-T,--srcsetype se|srmv1|srmv2] [-U,--dstsetype se|srmv1|srmv2] # [-b,--nobdii] [-t timeout] [-v,--verbose] [-V,--vo vo] [--version] src_file dest_file # surl = putfile[putfile.index('srm://'):] #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s?spacetoken=%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl, token) #_cmd_str = '%s lcg-cp --verbose --vo atlas -b %s -U srmv2 -S %s file://%s %s' % (envsetup, timeout_option, token, source, full_surl) #else: # surl is the same as putfile #_cmd_str = '%s htcopy --ca-path %s --user-cert %s --user-key %s "%s"' % (envsetup, sslCertDir, sslCert, sslKey, full_http_surl) #_cmd_str = '%s lcg-cp --vo atlas --verbose -b %s -U srmv2 file://%s %s' % (envsetup, timeout_option, source, full_surl) _cmd_str = 'curl -1 --verbose --cert %s --key %s --cacert %s --capath %s -L %s -T %s' % ( self.sslKey, self.sslKey, self.sslKey, self.sslCertDir, full_http_surl, source) tolog("Executing command: %s" % (_cmd_str)) t0 = os.times() _cmd = Popen(_cmd_str, stdout=PIPE, stderr=PIPE, shell=True) _cmd_out, _cmd_stderr = _cmd.communicate() report['relativeStart'] = time() report['transferStart'] = time() report['validateStart'] = time() t1 = os.times() t = t1[4] - t0[4] tolog("Curl command output = %s" % (_cmd_out)) tolog("Command finished after %f s" % (t)) if "bytes uploaded" not in _cmd_out: tolog("!!WARNING!!1137!! Command failed: %s" % (_cmd_str)) ''' # check if file was partially transferred, if so, remove it _ec = self.removeFile(envsetup, self.timeout, dst_gpfn) if _ec == -2: pilotErrorDiag += "(failed to remove file) " # i.e. do not retry stage-out if "Could not establish context" in o: pilotErrorDiag += "Could not establish context: Proxy / VO extension of proxy has probably expired" tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('CONTEXT_FAIL', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) elif "No such file or directory" in o: pilotErrorDiag += "No such file or directory: %s" % (o) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('NO_FILE_DIR', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) elif "globus_xio: System error" in o: pilotErrorDiag += "Globus system error: %s" % (o) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('GLOBUS_FAIL', report) return self.put_data_retfail(error.ERR_PUTGLOBUSSYSERR, pilotErrorDiag) else: if len(o) == 0 and t >= self.timeout: pilotErrorDiag += "Copy command self timed out after %d s" % (t) tolog("!!WARNING!!2990!! %s" % (pilotErrorDiag)) self.prepareReport('CP_TIMEOUT', report) return self.put_data_retfail(error.ERR_PUTTIMEOUT, pilotErrorDiag) else: if len(o) == 0: pilotErrorDiag += "Copy command returned error code %d but no output" % (ec) else: pilotErrorDiag += o self.prepareReport('CP_ERROR', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) ''' verified = False #getting the remote checksum from Rucio: token_file = open('token_fle', 'r') token_rucio = token_file.readline() pos2print = token_rucio.find("CN") token_rucio2print = token_rucio[:pos2print] + '(Hidden token)' tolog("Token I am using: %s" % (token_rucio2print)) httpredirector = readpar('httpredirector') trial_n = 1 remote_checksum = "none" while (remote_checksum == "none" and trial_n < 8): trial_n += 1 if not httpredirector: #cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip |awk \'{FS=\"hash type=\"}; {print $2}\' |awk \'{FS=\">\"}; {print $2}\' |awk \'{FS=\"<\"} {print $1}\'| grep -v \'^$\'"%(token_rucio,scope,filename) cmd = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % ( token_rucio, scope, filename) cmd2print = "curl -v -1 -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://rucio-lb-prod.cern.ch/replicas/%s/%s?select=geoip " % ( token_rucio2print, scope, filename) else: if "http" in httpredirector: tolog("HTTP redirector I am using: %s" % (httpredirector)) cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % ( token_rucio, httpredirector, scope, filename) cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem %s/replicas/%s/%s?select=geoip " % ( token_rucioi2print, httpredirector, scope, filename) else: tolog("HTTP redirector I am using: %s" % (httpredirector)) cmd = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % ( token_rucio, httpredirector, reps[0].scope, reps[0].filename) cmd2print = "curl -v -1 -v -H \"%s\" -H 'Accept: application/metalink4+xml' --cacert cabundle.pem https://%s/replicas/%s/%s?select=geoip " % ( token_rucio2print, httpredirector, reps[0].scope, reps[0].filename) tolog("Getting remote checksum: command to be executed: %s" % (cmd2print)) checksum_cmd = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) remote_checksum, stderr = checksum_cmd.communicate() tolog("Remote checksum as given by rucio %s" % (remote_checksum)) if (remote_checksum == "none"): tolog("In checking checksum: command std error: %s" % (stderr)) pilotErrorDiag = "Cannot get the checksum of file on SE" tolog("!!WARNING!!1137!! %s" % (pilotErrorDiag)) tolog("!!WARNING!!1137!! trial numebr %s" % (trial_n)) time.sleep(3) # try to get the remote checksum with lcg-get-checksum #remote_checksum = self.lcgGetChecksum(envsetup, self.timeout, full_surl) #if not remote_checksum: # # try to grab the remote file info using lcg-ls command # remote_checksum, remote_fsize = self.getRemoteFileInfo(envsetup, self.timeout, full_surl) #else: # tolog("Setting remote file size to None (not needed)") # remote_fsize = None # compare the checksums if the remote checksum was extracted tolog("Remote checksum: %s" % str(remote_checksum)) tolog("Local checksum: %s" % (fchecksum)) if remote_checksum: if remote_checksum != fchecksum: pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, os.path.basename(dst_gpfn), remote_checksum, fchecksum) tolog("!!WARNING!!1800!! %s" % (pilotErrorDiag)) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl) else: self.prepareReport('MD5_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl) else: tolog("Remote and local checksums verified") verified = True else: tolog( "Skipped primary checksum verification (remote checksum not known)" ) # if lcg-ls could not be used if "/pnfs/" in surl and not remote_checksum: # for dCache systems we can test the checksum with the use method tolog( "Detected dCache system: will verify local checksum with the local SE checksum" ) # gpfn = srm://head01.aglt2.org:8443/srm/managerv2?SFN=/pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0.... path = surl[surl.find('/pnfs/'):] # path = /pnfs/aglt2.org/atlasproddisk/mc08/EVNT/mc08.109270.J0....# tolog("File path: %s" % (path)) _filename = os.path.basename(path) _dir = os.path.dirname(path) # get the remote checksum tolog("Local checksum: %s" % (fchecksum)) try: remote_checksum = self.getdCacheChecksum(_dir, _filename) except Exception, e: pilotErrorDiag = "Could not get checksum from dCache: %s (test will be skipped)" % str( e) tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) else: if remote_checksum == "NOSUCHFILE": pilotErrorDiag = "The pilot will fail the job since the remote file does not exist" tolog('!!WARNING!!2999!! %s' % (pilotErrorDiag)) self.prepareReport('NOSUCHFILE', report) return self.put_data_retfail(error.ERR_NOSUCHFILE, pilotErrorDiag, surl=full_surl) elif remote_checksum: tolog("Remote checksum: %s" % (remote_checksum)) else: tolog("Could not get remote checksum") if remote_checksum: if remote_checksum != fchecksum: pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" %\ (csumtype, _filename, remote_checksum, fchecksum) if csumtype == "adler32": self.prepareReport('AD_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTADMISMATCH, pilotErrorDiag, surl=full_surl) else: self.prepareReport('MD5_MISMATCH', report) return self.put_data_retfail(error.ERR_PUTMD5MISMATCH, pilotErrorDiag, surl=full_surl) else: tolog("Remote and local checksums verified") verified = True
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'lcg2', lfn, guid) # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo( source, csumtype="adler32") if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get the checksum type if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get a proper envsetup if alt: # use a cvmfs setup for stage-out to alternative SE envsetup = si.getLocalEMISetup() if envsetup[-1] != ";": envsetup += "; " else: envsetup = self.getEnvsetup(alt=alt) ec, pilotErrorDiag = verifySetupCommand(error, envsetup) if ec != 0: self.prepareReport('RFCP_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # get the experiment object thisExperiment = getExperiment(experiment) if proxycheck: s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup, limit=2) if s != 0: self.prepareReport('NO_PROXY', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) else: tolog("Proxy verification turned off") filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths( error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt) if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag, surl=dst_gpfn) putfile = surl full_surl = putfile if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz tolog("putfile = %s" % (putfile)) tolog("full_surl = %s" % (full_surl)) # get the DQ2 site name from ToA try: _dq2SiteName = self.getDQ2SiteName(surl=putfile) except Exception, e: tolog( "Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog("Treating PanDA Mover job as a production job during stage-out") analysisJob = False # get the Rucio tracing report try: report = pdict['report'] except: report = {} else: # set the proper protocol report['protocol'] = 'curl' # mark the relative start report['catStart'] = time() # the current file report['filename'] = lfn # guid report['guid'] = guid.replace('-','') # preparing variables if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32") if ec != 0: self.prepareReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get the checksum type if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # get a proper envsetup envsetup = self.getEnvsetup() # get the experiment object thisExperiment = getExperiment(experiment) if proxycheck: s, pilotErrorDiag = thisExperiment.verifyProxy(envsetup=envsetup, limit=2) if s != 0: self.prepareReport('NO_PROXY', report) return self.put_data_retfail(error.ERR_NOPROXY, pilotErrorDiag) else: tolog("Proxy verification turned off") filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) putfile = surl full_surl = putfile if full_surl[:len('token:')] == 'token:': # remove the space token (e.g. at Taiwan-LCG2) from the SURL info full_surl = full_surl[full_surl.index('srm://'):] # srm://dcache01.tier2.hep.manchester.ac.uk/pnfs/tier2.hep.manchester.ac.uk/data/atlas/dq2/ #testpanda.destDB/testpanda.destDB.604b4fbc-dbe9-4b05-96bb-6beee0b99dee_sub0974647/ #86ecb30d-7baa-49a8-9128-107cbfe4dd90_0.job.log.tgz tolog("putfile: %s" % (putfile)) tolog("full_surl: %s" % (full_surl)) # get https surl full_http_surl = full_surl.replace("srm://", "https://") # get the RSE from ToA try: _RSE = self.getRSE(surl=putfile) except Exception, e: tolog("Warning: Failed to get RSE: %s (can not add this info to tracing report)" % str(e))
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ Moves the file from the current local directory to a storage element source: full path of the file in local directory destinaion: destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name) Assumes that the SE is locally mounted and its local path is the same as the remote path if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them returns: exitcode, gpfn,fsize, fchecksum """ error = PilotErrors() # Get input parameters from pdict lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') analyJob = pdict.get('analJob', False) dsname = pdict.get('dsname', '') sitename = pdict.get('sitename', '') cmtconfig = pdict.get('cmtconfig', '') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'xrdcp', lfn, guid) # get a proper setup _setup_str = self.getSetup() # if "CERN" in sitename: # _setup_str = "source /afs/cern.ch/project/xrootd/software/setup_stable_for_atlas.sh;" # PN, for now #_setup_str = "" tolog("xrdcpSiteMover put_data using setup: %s" % (_setup_str)) lfcpath, pilotErrorDiag = self.getLFCPath(analyJob) if lfcpath == "": self.__sendReport('STAGEOUT_FAIL', report) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) ec = 0 # get the file size and checksum of the local file if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32") if ec != 0: self.__sendReport('LOCAL_FILE_INFO_FAIL', report) return self.put_data_retfail(ec, pilotErrorDiag) tolog("Local checksum: %s, local file size: %s" % (fchecksum, str(fsize))) # now that the file size is known, add it to the tracing report report['filesize'] = fsize # get all the proper paths filename = os.path.basename(source) ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analyJob, token, prodSourceLabel, dsname, filename) if ec != 0: self.__sendReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) # correct the surl since it might contain the space token and the port info at the beginning surl = self.stripListSEs([surl])[0] tolog("dst_gpfn: %s" % (dst_gpfn)) tolog("surl : %s" % (surl)) bare_dst_gpfn = dst_gpfn # ie starts with /.. (url and port will be added later, not good for rfmkdir eg) dst_loc_pfn = dst_gpfn dst_gpfn = surl # get the DQ2 site name from ToA try: _dq2SiteName = self.getDQ2SiteName(surl=dst_gpfn) except Exception, e: tolog("Warning: Failed to get the DQ2 site name: %s (can not add this info to tracing report)" % str(e))
def setup(self, experiment=None, surl=None, os_bucket_id=-1, label='r'): """ setup env """ if not self.__isBotoLoaded: try: import boto import boto.s3.connection from boto.s3.key import Key self.__isBotoLoaded = True except ImportError: tolog( "Failed to import boto, add /cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/ to sys.path" ) sys.path.append( '/cvmfs/atlas.cern.ch/repo/sw/external/boto/lib/python2.6/site-packages/' ) try: import boto import boto.s3.connection from boto.s3.key import Key self.__isBotoLoaded = True except ImportError: tolog("Failed to import boto again. exit") return PilotErrors.ERR_UNKNOWN, "Failed to import boto" hostname = None try: hostname = socket.getfqdn() except: tolog(traceback.format_exc()) if os.environ.get("http_proxy") and hostname and hostname.endswith( "bnl.gov"): del os.environ['http_proxy'] if os.environ.get("https_proxy") and hostname and hostname.endswith( "bnl.gov"): del os.environ['https_proxy'] si = getSiteInformation(experiment) # os_bucket_id will only be set if the setup function is called, if setup via the init function - get the default bucket id if os_bucket_id == -1: ddmendpoint = si.getObjectstoreDDMEndpoint( os_bucket_name='eventservice') # assume eventservice else: ddmendpoint = si.getObjectstoreDDMEndpointFromBucketID( os_bucket_id) endpoint_id = si.getObjectstoreEndpointID(ddmendpoint=ddmendpoint, label=label, protocol='s3') os_access_key, os_secret_key, os_is_secure = si.getObjectstoreKeyInfo( endpoint_id, ddmendpoint=ddmendpoint) if os_access_key and os_access_key != "" and os_secret_key and os_secret_key != "": keyPair = si.getSecurityKey(os_secret_key, os_access_key) if "privateKey" not in keyPair or keyPair["privateKey"] is None: tolog("Failed to get the keyPair for S3 objectstore") return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore" else: tolog("Failed to get the keyPair name for S3 objectstore") return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair name for S3 objectstore" self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"], os_is_secure, self._useTimerCommand) # keyPair = None # if re.search("^s3://.*\.usatlas\.bnl\.gov:8443", surl) != None: # keyPair = si.getSecurityKey('BNL_ObjectStoreKey', 'BNL_ObjectStoreKey.pub') # if re.search("^s3://.*\.cern\.ch:443", surl) != None: # keyPair = si.getSecurityKey('CERN_ObjectStoreKey', 'CERN_ObjectStoreKey.pub') # if surl.startswith("s3://s3.amazonaws.com:80"): # keyPair = si.getSecurityKey('Amazon_ObjectStoreKey', 'Amazon_ObjectStoreKey.pub') # if keyPair == None or keyPair["publicKey"] == None or keyPair["privateKey"] == None: # tolog("Failed to get the keyPair for S3 objectstore %s " % (surl)) # return PilotErrors.ERR_GETKEYPAIR, "Failed to get the keyPair for S3 objectstore" # # self.s3Objectstore = S3ObjctStore(keyPair["privateKey"], keyPair["publicKey"]) return 0, ""
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ Move the file from the current local directory to the local pilot init dir Parameters are: source -- full path of the file in local directory destinaion -- destination SE, method://[hostname[:port]]/full-dir-path/ (NB: no file name) NOT USED (pinitdir is used instead) fsize -- file size of the source file (evaluated if 0) fchecksum -- MD5 checksum of the source file (evaluated if 0) pdict -- to allow additional parameters that may make sense with specific movers Assume that the pilot init dir is locally mounted and its local path is the same as the remote path if both fsize and fchecksum (for the source) are given and !=0 these are assumed without reevaluating them returns: exitcode, pilotErrorDiag, gpfn, fsize, fchecksum """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict DN = pdict.get('DN', '') dsname = pdict.get('dsname', '') analJob = pdict.get('analJob', False) sitename = pdict.get('sitename', '') testLevel = pdict.get('testLevel', '0') pilot_initdir = pdict.get('pinitdir', '') experiment = pdict.get('experiment', '') token = pdict.get('token', '') prodSourceLabel = pdict.get('prodSourceLabel', '') dsname = pdict.get('dsname', '') scope = pdict.get('scope', '') alt = pdict.get('alt', False) jobId = pdict.get('jobId', '') tolog("jobId: %s" % jobId) # get the site information object si = getSiteInformation(experiment) outputDir = pdict.get('outputDir', '') # # are we on a tier 3? # if si.isTier3(): # outputDir = self.getTier3Path(dsname, DN) # tolog("Writing output on a Tier 3 site to: %s" % (outputDir)) # # # create the dirs if they don't exist # try: # self.mkdirWperm(outputDir) # except Exception, e: # tolog("!!WARNING!!2999!! Could not create dir: %s, %s" % (outputDir, str(e))) # else: # outputDir = pdict.get('outputDir', '') if outputDir == "": tolog("Put function will use pilot launch dir as output file dir: %s" % (pilot_initdir)) outputDir = pilot_initdir else: if not os.path.isdir(outputDir): pilotErrorDiag = "Output directory does not exist: %s" % (outputDir) tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag)) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) else: tolog("Put function will use requested output file dir: %s" % (outputDir)) if outputDir == "": pilotErrorDiag = "Pilot init dir not set (can not figure out where the output files should be moved to)" tolog('!!WARNING!!2100!! %s' % (pilotErrorDiag)) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) if fsize == 0 or fchecksum == 0: ec, pilotErrorDiag, fsize, fchecksum = self.getLocalFileInfo(source, csumtype="adler32") if ec != 0: return self.put_data_retfail(ec, pilotErrorDiag) dst_loc_sedir = outputDir filename = os.path.basename(source) dst_loc_pfn = os.path.join(dst_loc_sedir, filename) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths(error, analJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt, jobId=jobId, jobPars=self.jobPars, sitemover=self) # quick workaround if ec != 0: reportState = {} reportState["clientState"] = tracer_error self.prepareReport(reportState, report) return self.put_data_retfail(ec, pilotErrorDiag) tolog('dst_gpfn: %s' % dst_gpfn) tolog('lfcdir: %s' % lfcdir) dst_loc_sedir = lfcdir dst_loc_pfn = dst_gpfn # for CERNVM, use dst_loc_sedir as a starting point for creating a directory structure if sitename == "CERNVM": # NOTE: LFC registration is not done here but some of the LFC variables are used to find out # the disk path so the code has to be partially repeated here lfcpath, pilotErrorDiag = self.getLFCPath(analJob) if lfcpath == "": return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) else: tolog("Got LFC path: %s" % (lfcpath)) dst_loc_sedir, _dummy = self.getLCGPaths(outputDir, dsname, filename, lfcpath) tolog("Got LCG paths: %s" % (dst_loc_sedir)) # create the sub directories try: self.mkdirWperm(dst_loc_sedir) except Exception, e: pilotErrorDiag = "Could not create dir: %s, %s" % (dst_loc_sedir, e) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) return self.put_data_retfail(error.ERR_STAGEOUTFAILED, pilotErrorDiag) else: tolog("Successfully created sub-directories: %s" % (dst_loc_sedir))
def updatePandaServer(self, job, site, workerNode, port, xmlstr=None, spaceReport=False, log=None, ra=0, jr=False, useCoPilot=False, stdout_tail="", stdout_path="", additionalMetadata=None): """ Update the job status with the jobdispatcher web server. State is a tuple of (jobId, ["jobstatus", transExitCode, pilotErrorCode], timestamp) log = log extracts xmlstr is set in postJobTask for finished jobs (all files). Failed jobs will only send xml for log (created in this function) jr = job recovery mode """ tolog("Updating job status in updatePandaServer(): PandaId=%s, result=%s, time=%s" % (job.getState())) # set any holding job to failed for sites that do not use job recovery (e.g. sites with LSF, that immediately # removes any work directory after the LSF job finishes which of course makes job recovery impossible) if not self.__jobrec: if job.result[0] == 'holding' and site.sitename != "CERNVM": job.result[0] = 'failed' tolog("This site does not support job recovery: HOLDING state reset to FAILED") # note: any changed job state above will be lost for fake server updates, does it matter? # get the node structure expected by the server node = self.getNodeStructure(job, site, workerNode, spaceReport=spaceReport, log=log) # skip the server update (e.g. on NG) if not self.__updateServer: tolog("(fake server update)") return 0, node tolog("xmlstr = %s" % (xmlstr)) # get the xml node['xml'] = self.getXML(job, site.sitename, site.workdir, xmlstr=xmlstr, jr=jr) # stdout tail in case job.debug == 'true' if job.debug.lower() == "true" and stdout_tail != "": # protection for potentially large tails stdout_tail = stdout_tail[-2048:] node['stdout'] = stdout_tail tolog("Will send stdout tail:\n%s (length = %d)" % (stdout_tail, len(stdout_tail))) # also send the full stdout to a text indexer if required if stdout_path != "": if "stdout_to_text_indexer" in readpar('catchall') and os.path.exists(stdout_path): tolog("Will send payload stdout to text indexer") # get the user name, which we will use to create a proper filename from SiteMover import SiteMover s = SiteMover() username = s.extractUsername(job.prodUserID) # get setup path for xrdcp try: si = getSiteInformation(job.experiment) setup_path = si.getLocalROOTSetup() filename = "PanDA_payload_stdout-%s.txt" % (job.jobId) dateDirs = self.getDateDirs() remotePath = os.path.join(os.path.join(username, dateDirs), filename) url = "root://faxbox.mwt2.org//group/logs/pilot/%s" % (remotePath) cmd = "%sxrdcp -f %s %s" % (setup_path, stdout_path, url) tolog("Executing command: %s" % (cmd)) rc, rs = getstatusoutput(cmd) tolog("rc=%d, rs=%s" % (rc, rs)) except Exception, e: tolog("!!WARNING!!3322!! Failed with text indexer: %s" % (e)) else: tolog("stdout_path not set")
def put_data(self, source, destination, fsize=0, fchecksum=0, **pdict): """ copy output file from disk to local SE """ # function is based on dCacheSiteMover put function error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict alt = pdict.get('alt', False) lfn = pdict.get('lfn', '') guid = pdict.get('guid', '') token = pdict.get('token', '') scope = pdict.get('scope', '') dsname = pdict.get('dsname', '') analysisJob = pdict.get('analJob', False) testLevel = pdict.get('testLevel', '0') extradirs = pdict.get('extradirs', '') experiment = pdict.get('experiment', '') proxycheck = pdict.get('proxycheck', False) prodSourceLabel = pdict.get('prodSourceLabel', '') # get the site information object si = getSiteInformation(experiment) tolog("put_data received prodSourceLabel=%s" % (prodSourceLabel)) if prodSourceLabel == 'ddm' and analysisJob: tolog( "Treating PanDA Mover job as a production job during stage-out" ) analysisJob = False # get the DQ2 tracing report report = self.getStubTracingReport(pdict['report'], 'gfal-copy', lfn, guid) filename = os.path.basename(source) # get all the proper paths ec, pilotErrorDiag, tracer_error, dst_gpfn, lfcdir, surl = si.getProperPaths( error, analysisJob, token, prodSourceLabel, dsname, filename, scope=scope, alt=alt, sitemover=self) # quick workaround if ec != 0: self.prepareReport(tracer_error, report) return self.put_data_retfail(ec, pilotErrorDiag) # get local adler32 checksum status, output, adler_size, adler_checksum = self.getLocalFileInfo( source, checksumType="adler32") if status != 0: errorLog = 'Failed to get local file %s adler32 checksum: %s' % ( source, output) tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) ret_path = si.getCopyPrefixPathNew(surl, stageIn=False) tolog("Convert destination: %s to new path: %s" % (surl, ret_path)) if not ret_path.startswith("s3:"): errorLog = "Failed to use copyprefix to convert the current path to S3 path." tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog size = None checksum = None else: status, output, size, checksum = self.stageOut( source, ret_path, token, experiment) if status != 0: errors = PilotErrors() state = errors.getErrorName(status) if state == None: state = "PSTAGE_FAIL" self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) else: if size == adler_size: tolog( "The file size is not changed. Will check whether adler32 changed." ) status, output, new_adler_size, new_adler_checksum = self.getLocalFileInfo( source, checksumType="adler32") if status != 0: errorLog = 'Failed to get local file %s adler32 checksum: %s' % ( source, output) tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) else: if adler_checksum == new_adler_checksum: tolog( "The file checksum is not changed. Will use adler32 %s to replace the md5 checksum %s" % (adler_checksum, checksum)) checksum = adler_checksum else: errorLog = "The file checksum changed from %s(before transfer) to %s(after transfer)" % ( adler_checksum, new_adler_checksum) tolog("!!WARNING!!1777!! %s" % (errorLog)) status = PilotErrors.ERR_STAGEINFAILED state = "PSTAGE_FAIL" output = errorLog self.prepareReport(state, report) return self.put_data_retfail(status, output, surl) state = "DONE" self.prepareReport(state, report) return 0, pilotErrorDiag, surl, size, checksum, self.arch_type
def get_data(self, gpfn, lfn, path, fsize=0, fchecksum=0, guid=0, **pdict): """ Moves a DS file from a remote SE to the working directory. Performs the copy and, for systems supporting it, checks size and md5sum correctness gpfn: full source URL (e.g. method://[host[:port]/full-dir-path/filename) IGNORED HERE, will use dq-list-files to get it path: destination absolute path (in a local file system) returns the status of the transfer. In case of failure it should remove the partially copied destination """ error = PilotErrors() pilotErrorDiag = "" # Get input parameters from pdict guid = pdict.get("guid", "") useCT = pdict.get("usect", True) jobId = pdict.get("jobId", "") dsname = pdict.get("dsname", "") workDir = pdict.get("workDir", "") experiment = pdict.get("experiment", "") prodDBlockToken = pdict.get("access", "") # get the site information object tolog("get_data: experiment=%s" % (experiment)) si = getSiteInformation(experiment) # get the DQ2 tracing report report = self.getStubTracingReport(pdict["report"], "fax", lfn, guid) src_loc_filename = lfn # os.path.basename(src_loc_pfn) # source vars: gpfn, loc_pfn, loc_host, loc_dirname, loc_filename # dest vars: path if fchecksum != 0 and fchecksum != "": csumtype = self.getChecksumType(fchecksum) else: csumtype = "default" # should the root file be copied or read directly by athena? (note: this section is necessary in case FAX is used as primary site mover) directIn = self.checkForDirectAccess(lfn, useCT, workDir, jobId, prodDBlockToken) if directIn: report["relativeStart"] = None report["transferStart"] = None self.__sendReport("FOUND_ROOT", report) return error.ERR_DIRECTIOFILE, pilotErrorDiag # local destination path dest_file = os.path.join(path, src_loc_filename) # the initial gpfn is ignored since the pilot will get it from the global redirector # however, the lfn can differ e.g. for files the has the __DQ2-* bit in it. In that case # the global redirector will not give the correct name, and the pilot need to correct for it # so better to use the lfn taken from the initial gpfn right away # warning: tests at CERN has shown that this is not true. the global redirector will not find a file with __DQ2- in it initial_lfn = os.path.basename(gpfn) tolog("Initial LFN=%s" % (initial_lfn)) # get the global path # if gpfn != "": # tolog("Ignoring initial GPFN since pilot will get it using the global redirector (%s)" % (gpfn)) gpfn = self.findGlobalFilePath(src_loc_filename, dsname) if gpfn == "": ec = error.ERR_STAGEINFAILED pilotErrorDiag = "Failed to get global paths for FAX transfer" tolog("!!WARNING!!3330!! %s" % (pilotErrorDiag)) self.__sendReport("RFCP_FAIL", report) return ec, pilotErrorDiag tolog("GPFN=%s" % (gpfn)) global_lfn = os.path.basename(gpfn) if global_lfn != initial_lfn: # tolog("WARNING: Global LFN not the same as the initial LFN. Will try to use the initial LFN") tolog("WARNING: Global LFN not the same as the initial LFN. Will use the global LFN") # gpfn = gpfn.replace(global_lfn, initial_lfn) # tolog("Updated GPFN=%s" % (gpfn)) # setup ROOT locally _setup_str = self.getLocalROOTSetup() # define the copy command cmd = "%s xrdcp -d 1 -f %s %s" % (_setup_str, gpfn, dest_file) # transfer the file report["transferStart"] = time() rc, rs, pilotErrorDiag = self.copy(cmd, stagein=True) report["validateStart"] = time() if rc != 0: self.__sendReport("COPY_FAIL", report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return rc, pilotErrorDiag else: tolog("Successfully transferred file") # get file size from the command output if not known already if fsize == 0: fsize = self.getFileSize(rs) # get checksum from the command output if not known already if fchecksum == 0: fchecksum = self.getChecksum(rs) else: if fchecksum == 0 or fchecksum == None: fchecksum = "" else: tolog("fchecksum = %s" % (fchecksum)) # get destination (local) file size and checksum ec, pilotErrorDiag, dstfsize, dstfchecksum = self.getLocalFileInfo(dest_file, csumtype=csumtype) tolog("File info: %d, %s, %s" % (ec, dstfsize, dstfchecksum)) if ec != 0: self.__sendReport("LOCAL_FILE_INFO_FAIL", report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return ec, pilotErrorDiag # compare remote and local file checksum if fchecksum != "" and fchecksum != 0 and dstfchecksum != fchecksum and not self.isDummyChecksum(fchecksum): pilotErrorDiag = "Remote and local checksums (of type %s) do not match for %s (%s != %s)" % ( csumtype, os.path.basename(gpfn), fchecksum, dstfchecksum, ) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") if csumtype == "adler32": self.__sendReport("AD_MISMATCH", report) return error.ERR_GETADMISMATCH, pilotErrorDiag else: self.__sendReport("MD5_MISMATCH", report) return error.ERR_GETMD5MISMATCH, pilotErrorDiag # compare remote and local file size (skip test if remote/source file size is not known) if dstfsize != fsize and fsize != 0 and fsize != "": pilotErrorDiag = "Remote and local file sizes do not match for %s (%s != %s)" % ( os.path.basename(gpfn), str(dstfsize), str(fsize), ) tolog("!!WARNING!!2999!! %s" % (pilotErrorDiag)) self.__sendReport("FS_MISMATCH", report) # remove the local file before any get retry is attempted _status = self.removeLocal(dest_file) if not _status: tolog("!!WARNING!!1112!! Failed to remove local file, get retry will fail") return error.ERR_GETWRONGSIZE, pilotErrorDiag updateFileState(lfn, workDir, jobId, mode="file_state", state="transferred", type="input") self.__sendReport("DONE", report) return 0, pilotErrorDiag