def stageIn(self, source, destination, fspec): """ Override stageIn rather than stageInFile since most of stageIn is unnecessary. Make a link from the downloaded file to the pilot working directory. :param source: original (remote) file location - not used :param destination: where to create the link :param fspec: dictionary containing destination replicas, scope, lfn :return: destination file details (checksumtype, checksum, size) """ src = os.path.join(self.init_dir, fspec.lfn) self.log('Creating link from %s to %s' % (fspec.lfn, src)) try: os.symlink(src, fspec.lfn) except OSError as e: raise PilotException('stageIn failed: %s' % str(e)) if not os.path.exists(fspec.lfn): raise PilotException('stageIn failed: symlink points to non-existent file') self.log('Symlink successful') checksum, checksum_type = fspec.get_checksum() return {'checksum_type': checksum_type, 'checksum': checksum, 'filesize': fspec.filesize}
def check_availablespace(self, maxinputsize, files): """ Verify that enough local space is available to stage in and run the job """ if not self.shouldVerifyStageIn(): return totalsize = reduce(lambda x, y: x + y.filesize, files, 0) # verify total filesize if maxinputsize and totalsize > maxinputsize: error = "Too many/too large input files (%s). Total file size=%s B > maxinputsize=%s B" % ( len(files), totalsize, maxinputsize) raise PilotException(error, code=PilotErrors.ERR_SIZETOOLARGE) self.log( "Total input file size=%s B within allowed limit=%s B (zero value means unlimited)" % (totalsize, maxinputsize)) # get available space wn = Node() wn.collectWNInfo(self.workDir) available_space = int(wn.disk) * 1024**2 # convert from MB to B self.log("Locally available space: %d B" % available_space) # are we wihin the limit? if totalsize > available_space: error = "Not enough local space for staging input files and run the job (need %d B, but only have %d B)" % ( totalsize, available_space) raise PilotException(error, code=PilotErrors.ERR_NOLOCALSPACE)
def _getPresignedUrl(self, pandaProxyURL, jobId, osPrivateKey, osPublicKey, pandaProxySecretKey, s3URL, stageIn=False): try: if not pandaProxySecretKey or pandaProxySecretKey == "": raise PilotException("Panda proxy secret key is not set for panda proxy operations") data = {'pandaID': jobId, 'secretKey':'%s' % pandaProxySecretKey, 'publicKey': 'publicKey:%s' % osPublicKey, 'privateKey': 'privateKey:%s' % osPrivateKey, 'url':'%s' % s3URL} if stageIn: data['method'] = 'GET' requestedURL = pandaProxyURL+'/getPresignedURL' self.log("agb: get presinged url: requested url='%s', data=%s" % (requestedURL, data) ) res = requests.post(requestedURL, data=data) self.log("result=%s" % res) self.log("res.text.encode('ascii') = %s" % res.text.encode('ascii')) if res.status_code == 200: tmpDict = cgi.parse_qs(res.text.encode('ascii')) if int(tmpDict['StatusCode'][0]) == 0: return tmpDict['presignedURL'][0] else: raise PilotException( "get remote path presigned url from panda proxy error %s: %s" % (tmpDict['StatusCode'][0], tmpDict['ErrorMsg'][0]) ) raise PilotException( "failed to get remote path presigned url from panda proxy, status code: %s" % res.status_code) except Exception as e: raise PilotException( "failure when get presigned url from panda proxy: %s" % str(e))
def stageIn(self, turl, dst, fspec): """ Use the rucio download command to stage in the file. :param turl: overrides parent signature -- unused :param dst: overrides parent signature -- unused :param fspec: dictionary containing destination replicas, scope, lfn :return: destination file details (ddmendpoint, surl, pfn) """ if fspec.replicas: if not fspec.allowAllInputRSEs: cmd = 'rucio download --dir %s --rse %s %s:%s' % ( dirname(dst), fspec.replicas[0][0], fspec.scope, fspec.lfn) else: cmd = 'rucio download --dir %s %s:%s' % ( dirname(dst), fspec.scope, fspec.lfn) else: cmd = 'rucio download --dir %s --rse %s --pfn %s %s:%s' % (dirname( dst), fspec.ddmendpoint, fspec.turl, fspec.scope, fspec.lfn) # Prepend the command with singularity if necessary from Singularity import singularityWrapper cmd = singularityWrapper(cmd, fspec.cmtconfig, dirname(dst)) tolog('stageIn: %s' % cmd) s, o = getstatusoutput(cmd) if s: raise PilotException( 'stageIn failed -- rucio download did not succeed: %s' % o.replace('\n', '')) # TODO: fix in rucio download to set specific outputfile # https://its.cern.ch/jira/browse/RUCIO-2063 cmd = 'mv %s %s' % (dirname(dst) + '/%s/%s' % (fspec.scope, fspec.lfn), dst) tolog('stageInCmd: %s' % cmd) s, o = getstatusoutput(cmd) tolog('stageInOutput: %s' % o) if s: raise PilotException( 'stageIn failed -- could not move downloaded file to destination: %s' % o.replace('\n', '')) if not fspec.replicas: fspec.filesize = os.path.getsize(dst) return { 'ddmendpoint': fspec.replicas[0][0] if fspec.replicas else fspec.ddmendpoint, 'surl': None, 'pfn': fspec.lfn }
def put_files(self, ddmendpoints, activity, files): """ Copy files to dest SE: main control function, it should care about alternative stageout and retry-policy for diffrent ddmenndpoints :ddmendpoint: list of DDMEndpoints where the files will be send (base DDMEndpoint SE + alternative SEs??) :return: list of entries (is_success, success_transfers, failed_transfers, exception) for each ddmendpoint :raise: PilotException in case of error """ if not ddmendpoints: raise PilotException("Failed to put files: Output ddmendpoint list is not set", code=PilotErrors.ERR_NOSTORAGE) if not files: raise PilotException("Failed to put files: empty file list to be transferred") missing_ddms = set(ddmendpoints) - set(self.ddmconf) if missing_ddms: self.ddmconf.update(self.si.resolveDDMConf(missing_ddms)) ddmprot = self.protocols.setdefault(activity, self.si.resolvePandaProtocols(ddmendpoints, activity)) output = [] for ddm in ddmendpoints: protocols = ddmprot.get(ddm) if not protocols: self.log('Failed to resolve protocols data for ddmendpoint=%s .. skipped processing..' % ddm) continue success_transfers, failed_transfers = [], [] try: success_transfers, failed_transfers = self.do_put_files(ddm, protocols, files) is_success = len(success_transfers) == len(files) output.append((is_success, success_transfers, failed_transfers, None)) if is_success: # NO additional transfers to another next DDMEndpoint/SE ?? .. fix me later if need break #except PilotException, e: # self.log('put_files: caught exception: %s' % e) except Exception, e: self.log('put_files: caught exception: %s' % e) # is_success, success_transfers, failed_transfers, exception import traceback self.log(traceback.format_exc()) output.append((False, [], [], e)) ### TODO: implement proper logic of put-policy: how to handle alternative stage out (processing of next DDMEndpoint).. self.log('put_files(): Failed to put files to ddmendpoint=%s .. successfully transferred files=%s/%s, failures=%s: will try next ddmendpoint from the list ..' % (ddm, len(success_transfers), len(files), len(failed_transfers)))
def stageOut(self, src, dst, fspec): """ Use the rucio upload command to stage out the file. :param src: overrides parent signature -- unused :param dst: overrides parent signature -- unused :param fspec: dictionary containing destination ddmendpoint, scope, lfn :return: destination file details (ddmendpoint, surl, pfn) """ if fspec.objectstoreId and int(fspec.objectstoreId) > 0: cmd = 'rucio upload --no-register --rse %s --scope %s --pfn %s %s' % ( fspec.ddmendpoint, fspec.scope, fspec.turl, fspec.pfn if fspec.pfn else fspec.lfn) else: guid = ' --guid %s' % fspec.guid if fspec.lfn and '.root' in fspec.lfn else '' cmd = 'rucio upload%s --no-register --rse %s --scope %s %s' % ( guid, fspec.ddmendpoint, fspec.scope, fspec.pfn if fspec.pfn else fspec.lfn) tolog('stageOutCmd: %s' % cmd) s, o = getstatusoutput(cmd) tolog('stageOutOutput: %s' % o) if s: raise PilotException( 'stageOut failed -- rucio upload did not succeed: %s' % o.replace('\n', '')) return { 'ddmendpoint': fspec.ddmendpoint, 'surl': fspec.surl, 'pfn': fspec.lfn }
def _stagefile(self, source, destination, filesize, is_stagein): """ Stage the file mode is stagein or stageout :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure :raise: PilotException in case of controlled error """ if self.checksum_type not in ['adler32']: # exclude md5 raise PilotException( "Failed to stage file: internal error: unsupported checksum_type=%s .. " % self.checksum_type, code=PilotErrors.ERR_STAGEINFAILED if is_stagein else PilotErrors.ERR_STAGEOUTFAILED, state='BAD_CSUMTYPE') cmd = '%s -np -f %s %s %s' % (self.copy_command, self.coption, source, destination) setup = self.getSetup() if setup: cmd = "%s; %s" % (setup, cmd) timeout = self.getTimeOut(filesize) self.log("Executing command: %s, timeout=%s" % (cmd, timeout)) t0 = datetime.now() is_timeout = False try: timer = TimerCommand(cmd) rcode, output = timer.run(timeout=timeout) is_timeout = timer.is_timeout except Exception, e: self.log("WARNING: xrdcp threw an exception: %s" % e) rcode, output = -1, str(e)
def stageOut(self, source, destination, fspec): """ Override stageOut rather than stageOutFile since most of stageOut is unnecessary. Move the output file from the pilot working directory to the top level directory. Create the output file list for ARC CE. :param source: local file location :param destination: remote location to copy file :param fspec: dictionary containing destination replicas, scope, lfn :return: destination file details (checksumtype, checksum, size) """ src = os.path.realpath(fspec.lfn) dest = os.path.join(self.init_dir, fspec.lfn) self.log('Moving %s to %s' % (src, dest)) try: shutil.move(src, dest) except IOError as e: raise PilotException('stageOut failed: %s' % str(e)) self.log('Copy successful') # Create output list for ARC CE self.createOutputList(fspec, dest) checksum, checksum_type = fspec.get_checksum() return {'checksum_type': checksum_type, 'checksum': checksum, 'filesize': fspec.filesize}
def stageOutFile(self, source, destination, fspec): """ Stage out the file Should be implementated by different site mover :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure :raise: PilotException in case of controlled error """ # resolve token value from fspec.ddmendpoint token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token') if not token: raise PilotException( "stageOutFile: Failed to resolve token value for ddmendpoint=%s: source=%s, destination=%s, fspec=%s .. unknown ddmendpoint" % (fspec.ddmendpoint, source, destination, fspec), code=PilotErrors.ERR_STAGEOUTFAILED, state='UNKNOWN_DDMENDPOINT') filesize = os.path.getsize(source) timeout = self.getTimeOut(filesize) cmd = '%s --verbose --vo atlas -b -U srmv2 --connect-timeout=300 --srm-timeout=%s --sendreceive-timeout=%s -S %s %s %s' % ( self.copy_command, timeout, timeout, token, source, destination) return self._stagefile(cmd, source, destination, filesize, is_stagein=False)
def stageOutFile(self, source, destination, fspec): """ Stage out the file Should be implementated by different site mover :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure :raise: PilotException in case of controlled error """ # resolve token value from fspec.ddmendpoint token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token') if not token: raise PilotException( "stageOutFile: Failed to resolve token value for ddmendpoint=%s: source=%s, destination=%s, fspec=%s .. unknown ddmendpoint" % (fspec.ddmendpoint, source, destination, fspec)) filesize = os.path.getsize(source) timeout = self.getTimeOut(filesize) src_checksum, src_checksum_type = fspec.get_checksum() checksum_opt = '' if src_checksum: checksum_opt = '-K %s:%s' % (src_checksum_type, src_checksum) src = "file://%s" % os.path.abspath(source) cmd = '%s --verbose %s -p -f -t %s -D "SRM PLUGIN:TURL_PROTOCOLS=gsiftp" -S %s %s %s' % ( self.copy_command, checksum_opt, timeout, token, src, destination) return self._stagefile(cmd, source, destination, filesize, is_stagein=False)
def stageInFile(self, turl, dst, fspec): """ Use the rucio download command to stage in the file. :param turl: overrides parent signature -- unused :param dst: overrides parent signature -- unused :param fspec: dictionary containing destination replicas, scope, lfn :return: destination file details (ddmendpoint, surl, pfn) """ num_retries = 2 success = False try_counter = 0 error_msg = None while not success and try_counter != num_retries: try_counter += 1 tolog('StageIn, attempt %s/%s' % (str(try_counter), str(num_retries))) try: self._stageInApi(dst, fspec) success = True except Exception as error: error_msg = error if error_msg and not success: raise PilotException('stageIn with API failed: %s' % error, code=PilotErrors.ERR_STAGEINFAILED) # TODO: fix in rucio download to set specific outputfile cmd = 'mv %s %s' % (dirname(dst) + '/%s/%s' % (fspec.scope, fspec.lfn), dst) tolog('stageInCmd: %s' % cmd) s, o = getstatusoutput(cmd) tolog('stageInOutput: s=%s o=%s' % (s, o)) if s: raise PilotException( 'stageIn failed -- could not move downloaded file to destination: %s' % o.replace('\n', ''), code=PilotErrors.ERR_STAGEOUTFAILED) if not fspec.replicas and not fspec.filesize: fspec.filesize = os.path.getsize(dst) return None, None
def stageOut(self, src, dst, fspec): """ Use the rucio upload command to stage out the file. :param src: overrides parent signature -- unused :param dst: overrides parent signature -- unused :param fspec: dictionary containing destination ddmendpoint, scope, lfn :return: destination file details (ddmendpoint, surl, pfn) """ num_retries = 2 success = False try_counter = 0 error_msg = None while not success and try_counter != num_retries: try_counter += 1 tolog('StageOut, attempt %s/%s' % (str(try_counter), str(num_retries))) try: self._stageOutApi(src, fspec) success = True except Exception as error: error_msg = error #physical check after upload if success and self.shouldVerifyStageOut(): try: file_exists = self.VerifyStageOut(fspec.ddmendpoint, fspec) tolog('File exists at the storage: %s' % str(file_exists)) if not file_exists: raise PilotException( 'stageOut: Physical check after upload failed.') except Exception as e: msg = 'stageOut: File existence verification failed with: %s' % e tolog(msg) raise PilotException(msg) if error_msg and not success: raise PilotException('stageOut with API failed: %s' % error_msg) return { 'ddmendpoint': fspec.ddmendpoint, 'surl': fspec.surl, 'pfn': fspec.lfn }
def stageOutFile(self, source, destination): """ Stage out the file Should be implementated by different site mover :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure :raise: PilotException in case of controlled error """ if self.checksum_type not in ['adler32']: # exclude md5 raise PilotException("Failed to stageOutFile(): internal error: unsupported checksum_type=%s .. " % self.checksum_type, code=PilotErrors.ERR_STAGEOUTFAILED, state='BAD_CSUMTYPE') cmd = "%s -h" % self.copy_command setup = self.getSetup() if setup: cmd = "%s; %s" % (setup, cmd) self.log("Execute command (%s) to decide which option should be used to calc file checksum.." % cmd) c = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True) output = c.communicate()[0] self.log("status: %s, output: %s" % (c.returncode, output)) coption = "" if c.returncode: self.log('FAILED to execute command=%s: %s' % (cmd, output)) else: if "--cksum" in output: coption = "--cksum %s:print" % self.checksum_type elif "-adler" in output and self.checksum_type == 'adler32': coption = "-adler" elif "-md5" in output and self.checksum_type == 'md5': coption = "-md5" if coption: self.log("Use %s option to get the checksum" % coption) else: self.log("Cannot find neither -adler nor --cksum. will not use checksum") cmd = '%s -np -f %s %s %s' % (self.copy_command, coption, source, destination) setup = self.getSetup() if setup: cmd = "%s; %s" % (setup, cmd) timeout = self.getTimeOut(os.path.getsize(source)) self.log("Executing command: %s, timeout=%s" % (cmd, timeout)) t0 = datetime.now() is_timeout = False try: timer = TimerCommand(cmd) rcode, output = timer.run(timeout=timeout) is_timeout = timer.is_timeout except Exception, e: self.log("WARNING: xrdcp threw an exception: %s" % e) rcode, output = -1, str(e)
def copysetup(self, value): value = os.path.expandvars(value.strip()) if value and not os.access(value, os.R_OK): self.log( "WARNING: copysetup=%s is invalid: file is not readdable" % value) raise PilotException( "Failed to set copysetup: passed invalid file name=%s" % value, code=PilotErrors.ERR_NOSUCHFILE, state="RFCP_FAIL") self._setup = value
def stageIn(self, source, destination, fspec): """ Override stageIn rather than stageInFile since most of stageIn is unnecessary. Make a link from the downloaded file to the pilot working directory. :param source: original (remote) file location - not used :param destination: where to create the link :param fspec: dictionary containing destination replicas, scope, lfn :return: destination file details (checksumtype, checksum, size) """ # block pre-load input file BEGIN # Alexander B.: the next block is necessary for testing of BOINC pilot on GRID resources. # it works only if the special variable "PRELOAD_STAGIN_FILES_FOR_MV_SITEMOVER" is set in external environment fileExpectedLocation = '%s/%s' % ( self.init_dir, fspec.lfn ) # the place where original mv_sitemover expect to find the file if not os.path.exists(fileExpectedLocation): preloadFilesFlag = os.environ.get( "PRELOAD_STAGIN_FILES_FOR_MV_SITEMOVER") if preloadFilesFlag and (preloadFilesFlag == '1' or preloadFilesFlag == "yes" or preloadFilesFlag == "on"): # the expected behavior actions: # rucio download valid1:EVNT.01416937._000001.pool.root.1 # mv valid1/EVNT.01416937._000001.pool.root.1 ./EVNT.09355665._094116.pool.root.1 self.log( 'pp: pre-load files for mv_sitemover: download locally stageIn the file: scope=%s file=%s' % (fspec.scope, fspec.lfn)) cmd = 'rucio download %s:%s' % (fspec.scope, fspec.lfn) self.log("Executing command: %s" % cmd) from subprocess import Popen, PIPE, STDOUT c = Popen(cmd, stdout=PIPE, stderr=STDOUT, shell=True) output = c.communicate()[0] if c.returncode: raise Exception(output) fileRucioLocation = '%s/%s' % ( fspec.scope, fspec.lfn ) # the place where Rucio downloads file self.log('pp: move from %s to %s' % (fileRucioLocation, fileExpectedLocation)) try: os.rename(fileRucioLocation, fileExpectedLocation) except OSError, e: raise PilotException( 'stageIn failed when rename the file from rucio location: %s' % str(e), code=PilotErrors.ERR_STAGEINFAILED)
def put_logfiles(self, files): """ Copy log files to dest SE :files: list of files to be moved """ activity = 'pl' ddms = self.job.ddmEndPointLog if not ddms: raise PilotException("Output ddmendpoint list (job.ddmEndPointLog) is not set", code=PilotErrors.ERR_NOSTORAGE) return self.put_files(ddms, activity, files)
def put_outfiles(self, files): """ Copy output files to dest SE :files: list of files to be moved :raise: an exception in case of errors """ activity = 'pw' ddms = self.job.ddmEndPointOut if not ddms: raise PilotException("Output ddmendpoint list (job.ddmEndPointOut) is not set", code=PilotErrors.ERR_NOSTORAGE) return self.put_files(ddms, activity, files)
def stageIn(self, turl, dst, fspec): """ Use the rucio download command to stage in the file. :param turl: overrides parent signature -- unused :param dst: overrides parent signature -- unused :param fspec: dictionary containing destination replicas, scope, lfn :return: destination file details (ddmendpoint, surl, pfn) """ cmd = 'rucio download --dir %s --rse %s %s:%s' % ( dirname(dst), fspec.replicas[0][0], fspec.scope, fspec.lfn) tolog('stageIn: %s' % cmd) s, o = getstatusoutput(cmd) if s: raise PilotException( 'stageIn failed -- rucio download did not succeed: %s' % o.replace('\n', '')) # TODO: fix in rucio download to set specific outputfile # https://its.cern.ch/jira/browse/RUCIO-2063 cmd = 'mv %s %s' % (dirname(dst) + '/%s/%s' % (fspec.scope, fspec.lfn), dst) tolog('stageInCmd: %s' % cmd) s, o = getstatusoutput(cmd) tolog('stageInOutput: %s' % o) if s: raise PilotException( 'stageIn failed -- could not move downloaded file to destination: %s' % o.replace('\n', '')) return { 'ddmendpoint': fspec.replicas[0][0], 'surl': None, 'pfn': fspec.lfn }
def is_stagein_allowed(self, fspec, job): """ check if stage-in operation is allowed for the mover apply additional job specific checks here if need Should be overwritten by custom sitemover :return: True in case stage-in transfer is allowed :raise: PilotException in case of controlled error """ # for analysis jobs, failure transfer of (non lib) input file if the file is on tape (not pre-staged) if job.isAnalysisJob() and not '.lib.tgz' in fspec.lfn: # check if file is on tape if not self.isFileStaged(fspec): raise PilotException("File %s is not staged and will be skipped for analysis job: stage-in is not allowed" % fspec.lfn, code=PilotErrors.ERR_FILEONTAPE, state='FILE_ON_TAPE') return True
def stageOutFile(self, source, destination, fspec): """ Stage out the file Should be implemented by different site mover :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure :raise: PilotException in case of controlled error """ self.log( "gfalcopy_sitemover: stageOutFile() arguments: src=%s, dst=%s fspec=%s" % (source, destination, fspec)) # we need to store the value of isDynafedCloud it in the private variable in order to use it later in getRemoteFileChecksum() , as # getRemoteFileChecksum() without fspec will be called after stageOutFile from the base.stageOut() self._isDynafedCloud = self.detectDynafedCloud(fspec.ddmendpoint) gfal_prop = self.gfal_prop_dynacloud if self._isDynafedCloud else self.gfal_prop_grid # in ES workflow only fspec.pfn is correct, but it may be not set for normal workflow src = fspec.pfn if fspec.pfn else source # resolve token value from fspec.ddmendpoint token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token') if not token: raise PilotException( "stageOutFile: Failed to resolve token value for ddmendpoint=%s: src=%s, destination=%s, fspec=%s .. unknown ddmendpoint" % (fspec.ddmendpoint, src, destination, fspec)) filesize = os.path.getsize(src) timeout = self.getTimeOut(filesize) src_checksum, src_checksum_type = fspec.get_checksum() checksum_opt = '' if src_checksum and not self._isDynafedCloud: checksum_opt = '-K %s:%s' % (src_checksum_type, src_checksum) srcUrl = "file://%s" % os.path.abspath( src) # may be omitted, gfal-utils understand local file paths cmd = '%s --verbose %s -p -f -t %s %s -S %s %s %s' % ( self.copy_command, checksum_opt, timeout, gfal_prop, token, srcUrl, destination) # Prepend the command with singularity if necessary from Singularity import singularityWrapper cmd = singularityWrapper(cmd, fspec.cmtconfig, dirname(src)) return self._stagefile(cmd, src, destination, filesize, is_stagein=False)
def stageIn(self, source, destination, fspec): """ Query HTTP for etag, then symlink to the pilot working directory. :param source: original file location :param destination: where to create the link :param fspec: dictionary containing destination replicas, scope, lfn :return: destination file details (checksumtype, checksum, size) """ self.log('source: %s' % str(source)) self.log('destination: %s' % str(destination)) self.log('fspec: %s' % str(fspec)) self.log('fspec.scope: %s' % str(fspec.scope)) self.log('fspec.lfn: %s' % str(fspec.lfn)) self.log('fspec.ddmendpoint: %s' % str(fspec.ddmendpoint)) # figure out the HTTP SURL from Rucio from rucio.client import ReplicaClient rc = ReplicaClient() http_surl_reps = [ r for r in rc.list_replicas(dids=[{ 'scope': fspec.scope, 'name': fspec.lfn }], schemes=['https'], rse_expression=fspec.ddmendpoint) ] self.log('http_surl_reps: %s' % http_surl_reps) http_surl = http_surl_reps[0]['rses'][fspec.ddmendpoint][0].rsplit( '_-')[0] self.log('http_surl: %s' % http_surl) # retrieve the TURL from the webdav etag cmd = 'davix-http --capath /cvmfs/atlas.cern.ch/repo/ATLASLocalRootBase/etc/grid-security-emi/certificates --cert $X509_USER_PROXY -X PROPFIND %s' % http_surl self.log('ETAG retrieval: %s' % cmd) try: timer = TimerCommand(cmd) rcode, output = timer.run(timeout=10) except Exception, e: self.log('FATAL: could not retrieve STORM WebDAV ETag: %s' % e) raise PilotException('Could not retrieve STORM WebDAV ETag: %s' % e)
def resolve_replicas(self, files): """ populates fdat.inputddms and fdat.replicas of each entry from `files` list fdat.replicas = [(ddmendpoint, replica, ddm_se)] ddm_se -- integration logic -- is used to manualy form TURL when ignore_rucio_replicas=True (quick stab until all protocols are properly populated in Rucio from AGIS) """ # build list of local ddmendpoints grouped by site # load ALL ddmconf self.ddmconf.update(self.si.resolveDDMConf([])) ddms = {} for ddm, dat in self.ddmconf.iteritems(): ddms.setdefault(dat['site'], []).append(dat) for fdat in files: # build and order list of local ddms ddmdat = self.ddmconf.get(fdat.ddmendpoint) if not ddmdat: raise Exception( "Failed to resolve ddmendpoint by name=%s send by Panda job, please check configuration. fdat=%s" % (fdat.ddmendpoint, fdat)) if not ddmdat['site']: raise Exception( "Failed to resolve site name of ddmendpoint=%s. please check ddm declaration: ddmconf=%s ... fdat=%s" % (fdat.ddmendpoint, ddmconf, fdat)) localddms = ddms.get(ddmdat['site']) # sort/filter ddms (as possible input source) fdat.inputddms = self._prepare_input_ddm(ddmdat, localddms) # load replicas from Rucio from rucio.client import Client c = Client() dids = [dict(scope=e.scope, name=e.lfn) for e in files] schemes = ['srm', 'root', 'https', 'gsiftp'] # Get the replica list try: replicas = c.list_replicas(dids, schemes=schemes) except Exception, e: raise PilotException("Failed to get replicas from Rucio: %s" % e, code=PilotErrors.ERR_FAILEDLFCGETREPS)
def stageOutFile(self, source, destination, fspec): """ Stage out the file Should be implementated by different site mover :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure :raise: PilotException in case of controlled error """ # resolve token value from fspec.ddmendpoint token = self.ddmconf.get(fspec.ddmendpoint, {}).get('token') if not token: raise PilotException( "stageOutFile: Failed to resolve token value for ddmendpoint=%s: source=%s, destination=%s, fspec=%s .. unknown ddmendpoint" % (fspec.ddmendpoint, source, destination, fspec)) filesize = os.path.getsize(source) checksum = fspec.get_checksum() if not checksum[0]: # checksum is not available => do calculate checksum = self.calc_file_checksum(source) fspec.set_checksum(checksum[0], checksum[1]) if not checksum[1]: checksum = checksum[0] else: checksum = "%s:%s" % (checksum[1], checksum[0]) opts = { '--size': filesize, '-t': token, '--checksum': checksum, '--guid': fspec.guid } opts = " ".join(["%s %s" % (k, v) for (k, v) in opts.iteritems()]) cmd = 'lsm-put %s %s %s' % (opts, source, destination) return self._stagefile(cmd, source, destination, filesize, is_stagein=False)
def stageOut(self, source, destination, fspec): """ Copy the output file from the pilot working directory to the destination directory. :param source: local file location :param destination: remote location to copy file :param fspec: dictionary containing destination replicas, scope, lfn :return: destination file details (checksumtype, checksum, size) """ src = os.path.realpath(fspec.lfn) dest = os.path.join(self.init_dir, fspec.lfn) self.log('Moving %s to %s' % (src, dest)) # copy the output try: shutil.move(src, dst) except Exception, e: self.log('FATAL: could not move outputfile: %s' % e) raise PilotException('Could not move outputfile: %s' % e)
class lcgcpSiteMover(BaseSiteMover): """ SiteMover that uses lcg-cp for both get and put """ name = "lcgcp" copy_command = "lcg-cp" checksum_type = "adler32" checksum_command = "lcg-get-checksum" schemes = ['srm', 'gsiftp'] # list of supported schemes for transfers def _stagefile(self, cmd, source, destination, filesize, is_stagein): """ Stage the file mode is stagein or stageout :return: destination file details (checksum, checksum_type) in case of success, throw exception in case of failure :raise: PilotException in case of controlled error """ timeout = self.getTimeOut(filesize) setup = self.getSetup() if setup: cmd = "%s; %s" % (setup, cmd) self.log("Executing command: %s, timeout=%s" % (cmd, timeout)) t0 = datetime.now() is_timeout = False try: timer = TimerCommand(cmd) rcode, output = timer.run(timeout=timeout) is_timeout = timer.is_timeout except Exception, e: self.log("WARNING: %s threw an exception: %s" % (self.copy_command, e)) rcode, output = -1, str(e) dt = datetime.now() - t0 self.log("Command execution time: %s" % dt) self.log("is_timeout=%s, rcode=%s, output=%s" % (is_timeout, rcode, output)) if is_timeout or rcode: ## do clean up if is_stagein: # stage-in clean up: check if file was partially transferred self.removeLocal(destination) if is_timeout: raise PilotException( "Copy command self timed out after %s, timeout=%s, output=%s" % (dt, timeout, output), code=PilotErrors.ERR_GETTIMEOUT if is_stagein else PilotErrors.ERR_PUTTIMEOUT, state='CP_TIMEOUT') if rcode: self.log( 'WARNING: [is_stagein=%s] Stage file command (%s) failed: Status=%s Output=%s' % (is_stagein, cmd, rcode, output.replace("\n", " "))) error = self.resolveStageErrorFromOutput(output, source, is_stagein=is_stagein) rcode = error.get('rcode') if not rcode: rcode = PilotErrors.ERR_STAGEINFAILED if is_stagein else PilotErrors.ERR_STAGEOUTFAILED state = error.get('state') if not state: state = 'COPY_FAIL' #'STAGEIN_FAILED' if is_stagein else 'STAGEOUT_FAILED' raise PilotException(error.get('error'), code=rcode, state=state) # extract filesize and checksum values from output # check stage-out: not used at the moment return None, None
timer = TimerCommand(cmd) rcode, output = timer.run(timeout=timeout) is_timeout = timer.is_timeout except Exception, e: self.log("WARNING: %s threw an exception: %s" % ('gfal-rm', e)) rcode, output = -1, str(e) dt = datetime.now() - t0 self.log("Command execution time: %s" % dt) self.log("is_timeout=%s, rcode=%s, output=%s" % (is_timeout, rcode, output)) if is_timeout: raise PilotException( "removeRemoteFile self timed out after %s, timeout=%s, output=%s" % (dt, timeout, output), code=PilotErrors.ERR_GENERALERROR, state='RM_TIMEOUT') if rcode: raise PilotException("Failed to remove remote file", code=PilotErrors.ERR_GENERALERROR, state='RM_FAILED') def stageOutFile(self, source, destination, fspec): """ Stage out the file Should be implementated by different site mover :return: remote file (checksum, checksum_type) in case of success, throw exception in case of failure :raise: PilotException in case of controlled error """
def do_put_files(self, ddmendpoint, protocols, files): # old function : TO BE DEPRECATED ... """ Copy files to dest SE :ddmendpoint: DDMEndpoint name used to store files :return: (list of transferred_files details, list of failed_transfers details) :raise: PilotException in case of error """ self.log( '[deprecated do_put_files()]Prepare to copy files=%s to ddmendpoint=%s using protocols data=%s' % (files, ddmendpoint, protocols)) self.log("[deprecated do_put_files()]Number of stage-out tries: %s" % self.stageoutretry) # get SURL for Panda calback registration # resolve from special protocol activity=SE # fix me later to proper name of activitiy=SURL (panda SURL, at the moment only 2-letter name is allowed on AGIS side) # if SE is not found, try to fallback to a surl_prot = [ dict(se=e[0], path=e[2]) for e in sorted(self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get( 'SE', self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}).get( 'a', [])), key=lambda x: x[1]) ] if not surl_prot: self.log('FAILED to resolve default SURL path for ddmendpoint=%s' % ddmendpoint) return [], [] surl_prot = surl_prot[0] # take first self.log("[do_put_files] SURL protocol to be used: %s" % surl_prot) self.trace_report.update(localSite=ddmendpoint, remoteSite=ddmendpoint) transferred_files, failed_transfers = [], [] for dat in protocols: copytool, copysetup = dat.get('copytool'), dat.get('copysetup') try: sitemover = getSiteMover(copytool)(copysetup, workDir=self.job.workdir) sitemover.trace_report = self.trace_report sitemover.protocol = dat # ## sitemover.ddmconf = self.ddmconf # quick workaround ### sitemover.setup() except Exception, e: self.log( '[do_put_files] WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue self.log("[do_put_files] Copy command: %s, sitemover=%s" % (copytool, sitemover)) self.log("[do_put_files] Copy setup: %s" % copysetup) self.trace_report.update(protocol=copytool) se, se_path = dat.get('se', ''), dat.get('path', '') self.log("[do_put_files] Found N=%s files to be transferred: %s" % (len(files), [e.get('pfn') for e in files])) for fdata in files: scope, lfn, pfn = fdata.get( 'scope', ''), fdata.get('lfn'), fdata.get('pfn') guid = fdata.get('guid', '') surl = sitemover.getSURL( surl_prot.get('se'), surl_prot.get('path'), scope, lfn, self.job ) # job is passing here for possible JOB specific processing turl = sitemover.getSURL( se, se_path, scope, lfn, self.job ) # job is passing here for possible JOB specific processing self.trace_report.update(scope=scope, dataset=fdata.get('dsname_report'), url=surl) self.trace_report.update(catStart=time.time(), filename=lfn, guid=guid.replace('-', '')) self.log( "[do_put_files] Preparing copy for pfn=%s to ddmendpoint=%s using copytool=%s: mover=%s" % (pfn, ddmendpoint, copytool, sitemover)) self.log("[do_put_files] lfn=%s: SURL=%s" % (lfn, surl)) self.log("[do_put_files] TURL=%s" % turl) if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK): error = "Erron: input pfn file is not exist: %s" % pfn self.log(error) raise PilotException( error, code=PilotErrors.ERR_MISSINGOUTPUTFILE, state="FILE_INFO_FAIL") filename = os.path.basename(pfn) # update the current file state updateFileState(filename, self.workDir, self.job.jobId, mode="file_state", state="not_transferred") dumpFileStates(self.workDir, self.job.jobId) # loop over multple stage-out attempts for _attempt in xrange(1, self.stageoutretry + 1): if _attempt > 1: # if not first stage-out attempt, take a nap before next attempt self.log( " -- Waiting %d seconds before next stage-out attempt for file=%s --" % (self.stageout_sleeptime, filename)) time.sleep(self.stageout_sleeptime) self.log( "[do_put_files] Put attempt %d/%d for filename=%s" % (_attempt, self.stageoutretry, filename)) try: # quick work around from Job import FileSpec stub_fspec = FileSpec(ddmendpoint=ddmendpoint, guid=guid, scope=scope, lfn=lfn) result = sitemover.stageOut(pfn, turl, stub_fspec) break # transferred successfully except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: self.log(traceback.format_exc()) result = PilotException( "stageOut failed with error=%s" % e, code=PilotErrors.ERR_STAGEOUTFAILED) self.log( 'WARNING [do_put_files]: Error in copying file (attempt %s): %s' % (_attempt, result))
def stageout(self, activity, files): """ Copy files to dest SE: main control function, it should care about alternative stageout and retry-policy for diffrent ddmendpoints :return: list of entries (is_success, success_transfers, failed_transfers, exception) for each ddmendpoint :return: (transferred_files, failed_transfers) :raise: PilotException in case of error """ if not files: raise PilotException( "Failed to put files: empty file list to be transferred") pandaqueue = self.si.getQueueName() # FIX ME LATER protocols = self.protocols.setdefault( activity, self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue]) copytools = self.si.resolvePandaCopytools(pandaqueue, activity)[pandaqueue] self.log( "Mover.stageout() [new implementation] started for activity=%s, files=%s, protocols=%s, copytools=%s" % (activity, files, protocols, copytools)) # check if file exists before actual processing # populate filesize if need for fspec in files: pfn = os.path.join(self.job.workdir, fspec.lfn) if not os.path.isfile(pfn) or not os.access(pfn, os.R_OK): error = "Erron: input pfn file is not exist: %s" % pfn self.log(error) raise PilotException(error, code=PilotErrors.ERR_MISSINGOUTPUTFILE, state="FILE_INFO_FAIL") fspec.filesize = os.path.getsize(pfn) totalsize = reduce(lambda x, y: x + y.filesize, files, 0) transferred_files, failed_transfers = [], [] self.log( "Found N=%s files to be transferred, total_size=%.3f MB: %s" % (len(files), totalsize / 1024. / 1024., [e.lfn for e in files])) # first resolve protocol settings from PQ specific aprotocols settings # then resolve settings from default ddm.protocols supported by copytools # group protocols, files by ddmendpoint ddmprotocols, ddmfiles = {}, {} for e in files: ddmfiles.setdefault(e.ddmendpoint, []).append(e) # load DDM conf/protocols self.ddmconf.update(self.si.resolveDDMConf(ddmfiles.keys())) for e in protocols: if e['ddm'] not in ddmfiles: # skip not affected protocols settings continue e['copytools'] = [{ 'copytool': e['copytool'], 'copysetup': e['copysetup'] }] ddmprotocols.setdefault(e['ddm'], []).append(e) # generate default protocols from copytools/schemes and ddmconf unknown_ddms = set(ddmfiles) - set(ddmprotocols) for ddmendpoint in unknown_ddms: dd = self.ddmconf.get(ddmendpoint, {}).get('aprotocols', {}) dat = dd.get(activity, []) or dd.get('w', []) dprotocols = [ dict(se=e[0], path=e[2], resolve_scheme=True) for e in sorted(dat, key=lambda x: x[1]) ] ddmprotocols.setdefault(ddmendpoint, dprotocols) unknown_ddms = set(ddmfiles) - set(ddmprotocols) if unknown_ddms: raise PilotException( "Failed to put files: no protocols defined for output ddmendpoints=%s .. check aprotocols schedconfig settings for activity=%s or default ddm.aprotocols entries" % (unknown_ddms, activity), code=PilotErrors.ERR_NOSTORAGE) self.log( "[stage-out] [%s] filtered protocols to be used to transfer files: protocols=%s" % (activity, ddmprotocols)) # get SURL endpoint for Panda callback registration # resolve from special protocol activity='SE' or fallback to activity='a', then to 'r' surl_protocols, no_surl_ddms = {}, set() for fspec in files: if not fspec.surl: # initialize only if not already set d = self.ddmconf.get(fspec.ddmendpoint, {}).get('aprotocols', {}) xprot = d.get('SE', []) if not xprot: xprot = [ e for e in d.get('a', d.get('r', [])) if e[0] and e[0].startswith('srm') ] surl_prot = [ dict(se=e[0], path=e[2]) for e in sorted(xprot, key=lambda x: x[1]) ] if surl_prot: surl_protocols.setdefault(fspec.ddmendpoint, surl_prot[0]) else: no_surl_ddms.add(fspec.ddmendpoint) if no_surl_ddms: # failed to resolve SURLs self.log( 'FAILED to resolve default SURL path for ddmendpoints=%s' % list(no_surl_ddms)) raise PilotException( "Failed to put files: no SE/SURL protocols defined for output ddmendpoints=%s .. check ddmendpoints aprotocols settings for activity=SE/a/r" % list(no_surl_ddms), code=PilotErrors.ERR_NOSTORAGE) sitemover_objects = {} # try to iterate over protocol of given ddmendpoint until successfull transfer for ddmendpoint, iprotocols in ddmprotocols.iteritems(): for dat in iprotocols: remain_files = [ e for e in ddmfiles.get(ddmendpoint) if e.status not in ['transferred'] ] if not remain_files: self.log( 'INFO: all files to be transfered to ddm=%s have been successfully processed for activity=%s ..' % (ddmendpoint, activity)) # stop checking other protocols of ddmendpoint break if not 'copytools' in dat: # use allowed copytools cdat = [] for cp, settings in copytools: cdat.append({ 'copytool': cp, 'copysetup': settings.get('setup') }) dat['copytools'] = cdat if not dat['copytools']: msg = 'FAILED to resolve final copytools settings for ddmendpoint=%s, please check schedconf.copytools settings: copytools=%s, iprotocols=' % list( ddmendpoint, copytools, iprotocols) self.log(msg) raise PilotException(msg, code=PilotErrors.ERR_NOSTORAGE) for cpsettings in dat.get('copytools', []): copytool, copysetup = cpsettings.get( 'copytool'), cpsettings.get('copysetup') try: sitemover = sitemover_objects.get(copytool) if not sitemover: sitemover = getSiteMover(copytool)( copysetup, workDir=self.job.workdir) sitemover_objects.setdefault(copytool, sitemover) sitemover.trace_report = self.trace_report sitemover.protocol = dat # ## sitemover.ddmconf = self.ddmconf # quick workaround ### sitemover.setup() if dat.get('resolve_scheme'): dat['scheme'] = sitemover.schemes except Exception, e: self.log( 'WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue if dat.get( 'scheme' ): # filter protocols by accepted scheme from copytool should_skip = True for scheme in dat.get('scheme'): if dat['se'].startswith(scheme): should_skip = False break if should_skip: self.log( "[stage-out] protocol=%s of ddmendpoint=%s is skipped since copytool=%s does not support it, accepted schemes=%s" % (dat['se'], ddmendpoint, copytool, dat['scheme'])) continue self.log("Copy command [stage-out]: %s, sitemover=%s" % (copytool, sitemover)) self.log("Copy setup [stage-out]: %s" % copysetup) self.trace_report.update(protocol=copytool, localSite=ddmendpoint, remoteSite=ddmendpoint) # validate se value? se, se_path = dat.get('se', ''), dat.get('path', '') for fdata in remain_files: if not fdata.surl: fdata.surl = sitemover.getSURL( surl_protocols[fdata.ddmendpoint].get('se'), surl_protocols[fdata.ddmendpoint].get('path'), fdata.scope, fdata.lfn, self.job ) # job is passing here for possible JOB specific processing updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="not_transferred", ftype="output") fdata.turl = sitemover.getSURL( se, se_path, fdata.scope, fdata.lfn, self.job ) # job is passing here for possible JOB specific processing self.log( "[stage-out] resolved SURL=%s to be used for lfn=%s, ddmendpoint=%s" % (fdata.surl, fdata.lfn, fdata.ddmendpoint)) self.log( "[stage-out] resolved TURL=%s to be used for lfn=%s, ddmendpoint=%s" % (fdata.turl, fdata.lfn, fdata.ddmendpoint)) self.log( "[stage-out] Prepare to put_data: ddmendpoint=%s, protocol=%s, fspec=%s" % (ddmendpoint, dat, fdata)) self.trace_report.update(catStart=time.time(), filename=fdata.lfn, guid=fdata.guid.replace( '-', '')) self.trace_report.update( scope=fdata.scope, dataset=fdata.destinationDblock, url=fdata.turl) self.log( "[stage-out] Preparing copy for lfn=%s using copytool=%s: mover=%s" % (fdata.lfn, copytool, sitemover)) #dumpFileStates(self.workDir, self.job.jobId, ftype="output") # loop over multple stage-out attempts for _attempt in xrange(1, self.stageoutretry + 1): if _attempt > 1: # if not first stage-out attempt, take a nap before next attempt self.log( " -- Waiting %s seconds before next stage-out attempt for file=%s --" % (self.stageout_sleeptime, fdata.lfn)) time.sleep(self.stageout_sleeptime) self.log("Put attempt %s/%s for filename=%s" % (_attempt, self.stageoutretry, fdata.lfn)) try: result = sitemover.put_data(fdata) fdata.status = 'transferred' # mark as successful if result.get('surl'): fdata.surl = result.get('surl') #if result.get('pfn'): # fdata.turl = result.get('pfn') #self.trace_report.update(url=fdata.surl) ### self.trace_report.update(url=fdata.turl) ### # finalize and send trace report self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time()) self.sendTrace(self.trace_report) updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="transferred", ftype="output") dumpFileStates(self.workDir, self.job.jobId, ftype="output") self.updateSURLDictionary( fdata.guid, fdata.surl, self.workDir, self. job.jobId) # FIXME LATER: isolate later fdat = result.copy() #fdat.update(lfn=lfn, pfn=pfn, guid=guid, surl=surl) transferred_files.append(fdat) break # transferred successfully except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: result = PilotException( "stageOut failed with error=%s" % e, code=PilotErrors.ERR_STAGEOUTFAILED) self.log(traceback.format_exc()) self.log( 'WARNING: Error in copying file (attempt %s/%s): %s' % (_attempt, self.stageoutretry, result)) if isinstance(result, Exception): # failure transfer failed_transfers.append(result)
def stagein(self): """ :return: (transferred_files, failed_transfers) """ activity = 'pr' pandaqueue = self.si.getQueueName() # FIX ME LATER protocols = self.protocols.setdefault( activity, self.si.resolvePandaProtocols(pandaqueue, activity)[pandaqueue]) copytools = self.si.resolvePandaCopytools(pandaqueue, activity)[pandaqueue] self.log("stage-in: pq.aprotocols=%s, pq.copytools=%s" % (protocols, copytools)) files = self.job.inData self.resolve_replicas( files) # populates also self.ddmconf = self.si.resolveDDMConf([]) maxinputsize = self.getMaxInputSize() totalsize = reduce(lambda x, y: x + y.filesize, files, 0) transferred_files, failed_transfers = [], [] self.log( "Found N=%s files to be transferred, total_size=%.3f MB: %s" % (len(files), totalsize / 1024. / 1024., [e.lfn for e in files])) # process first PQ specific protocols settings # then protocols supported by copytools # protocol generated from aprotocols is {'copytool':'', 'copysetup':'', 'se':'', 'ddm':''} # protocol generated from copytools is {'copytool':'', 'copysetup', 'scheme':''} # build accepted schemes from allowed copytools cprotocols = [] for cp, settings in copytools: cprotocols.append({ 'resolve_scheme': True, 'copytool': cp, 'copysetup': settings.get('setup') }) protocols = protocols + cprotocols if not protocols: raise PilotException( "Failed to get files: neither aprotocols nor allowed copytools defined for input. check copytools/acopytools/aprotocols schedconfig settings for activity=%s, pandaqueue=%s" % (activity, pandaqueue), code=PilotErrors.ERR_NOSTORAGE) sitemover_objects = {} for dat in protocols: remain_files = [ e for e in files if e.status not in ['direct_access', 'transferred'] ] if not remain_files: self.log( 'INFO: all input files have been successfully processed') break copytool, copysetup = dat.get('copytool'), dat.get('copysetup') try: sitemover = sitemover_objects.get(copytool) if not sitemover: sitemover = getSiteMover(copytool)( copysetup, workDir=self.job.workdir) sitemover_objects.setdefault(copytool, sitemover) sitemover.trace_report = self.trace_report sitemover.ddmconf = self.ddmconf # self.si.resolveDDMConf([]) # quick workaround ### sitemover.setup() if dat.get('resolve_scheme'): dat['scheme'] = sitemover.schemes except Exception, e: self.log( 'WARNING: Failed to get SiteMover: %s .. skipped .. try to check next available protocol, current protocol details=%s' % (e, dat)) continue self.log("Copy command [stage-in]: %s, sitemover=%s" % (copytool, sitemover)) self.log("Copy setup [stage-in]: %s" % copysetup) self.trace_report.update(protocol=copytool) # verify file sizes and available space for stagein sitemover.check_availablespace(maxinputsize, remain_files) for fdata in remain_files: updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="not_transferred", ftype="input") self.log( "[stage-in] Prepare to get_data: protocol=%s, fspec=%s" % (dat, fdata)) # check if protocol and fdata.ddmendpoint belong to same site # if dat.get('ddm'): protocol_site = self.ddmconf.get(dat.get('ddm'), {}).get('site') replica_site = self.ddmconf.get(fdata.ddmendpoint, {}).get('site') if protocol_site != replica_site: self.log( 'INFO: cross-sites checks: protocol_site=%s and (fdata.ddmenpoint) replica_site=%s mismatched .. skip file processing for copytool=%s (protocol=%s)' % (protocol_site, replica_site, copytool, dat)) continue r = sitemover.resolve_replica(fdata, dat) # quick stub: propagate changes to FileSpec if r.get('surl'): fdata.surl = r[ 'surl'] # TO BE CLARIFIED if it's still used and need if r.get('pfn'): fdata.turl = r['pfn'] if r.get('ddmendpoint'): fdata.ddmendpoint = r['ddmendpoint'] self.log( "[stage-in] found replica to be used: ddmendpoint=%s, pfn=%s" % (fdata.ddmendpoint, fdata.turl)) # check if protocol and found replica belong to same site if dat.get('ddm'): protocol_site = self.ddmconf.get(dat.get('ddm'), {}).get('site') replica_site = self.ddmconf.get(fdata.ddmendpoint, {}).get('site') if protocol_site != replica_site: self.log( 'INFO: cross-sites checks: protocol_site=%s and replica_site=%s mismatched .. skip file processing for copytool=%s' % (protocol_site, replica_site, copytool)) continue # check direct access self.log( "fdata.is_directaccess()=%s, job.accessmode=%s, mover.is_directaccess()=%s" % (fdata.is_directaccess(), self.job.accessmode, self.is_directaccess())) is_directaccess = self.is_directaccess() if self.job.accessmode == 'copy': is_directaccess = False elif self.job.accessmode == 'direct': is_directaccess = True if fdata.is_directaccess( ) and is_directaccess: # direct access mode, no transfer required fdata.status = 'direct_access' updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="transfer_mode", state="direct_access", ftype="input") self.log( "Direct access mode will be used for lfn=%s .. skip transfer the file" % fdata.lfn) continue # apply site-mover custom job-specific checks for stage-in try: is_stagein_allowed = sitemover.is_stagein_allowed( fdata, self.job) if not is_stagein_allowed: reason = 'SiteMover does not allowed stage-in operation for the job' except PilotException, e: is_stagein_allowed = False reason = e except Exception: raise
fdata.ddmendpoint = result.get('ddmendpoint') if result.get('surl'): fdata.surl = result.get('surl') if result.get('pfn'): fdata.turl = result.get('pfn') #self.trace_report.update(url=fdata.surl) ### self.trace_report.update(url=fdata.turl) ### break # transferred successfully except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: result = PilotException( "stageIn failed with error=%s" % e, code=PilotErrors.ERR_STAGEINFAILED) self.log(traceback.format_exc()) self.log( 'WARNING: Error in copying file (attempt %s/%s): %s' % (_attempt, self.stageinretry, result)) if not isinstance(result, Exception): # transferred successfully # finalize and send trace report self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time()) self.sendTrace(self.trace_report)
if result.get('ddmendpoint'): fdata.ddmendpoint = result.get('ddmendpoint') if result.get('surl'): fdata.surl = result.get('surl') if result.get('pfn'): fdata.turl = result.get('pfn') #self.trace_report.update(url=fdata.surl) ### self.trace_report.update(url=fdata.turl) ### break # transferred successfully except PilotException, e: result = e self.log(traceback.format_exc()) except Exception, e: result = PilotException("stageIn failed with error=%s" % e, code=PilotErrors.ERR_STAGEINFAILED) self.log(traceback.format_exc()) self.log('WARNING: Error in copying file (attempt %s/%s): %s' % (_attempt, self.stageinretry, result)) if not isinstance(result, Exception): # transferred successfully # finalize and send trace report self.trace_report.update(clientState='DONE', stateReason='OK', timeEnd=time.time()) self.sendTrace(self.trace_report) updateFileState(fdata.lfn, self.workDir, self.job.jobId, mode="file_state", state="transferred", ftype="input") dumpFileStates(self.workDir, self.job.jobId, ftype="input") ## self.updateSURLDictionary(guid, surl, self.workDir, self.job.jobId) # FIX ME LATER