def trigger_preparation(self, jobspec): # get logger tmpLog = core_utils.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID), method_name='trigger_preparation') tmpLog.debug('start') # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format( jobspec.computingSite)) # test we have a Globus Transfer Client if not self.tc: errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # get label label = self.make_label(jobspec) tmpLog.debug('label={0}'.format(label)) # get transfer tasks tmpStat, transferTasks = globus_utils.get_transfer_tasks( tmpLog, self.tc, label) if not tmpStat: errStr = 'failed to get transfer tasks' tmpLog.error(errStr) return False, errStr # check if already queued if label in transferTasks: tmpLog.debug('skip since already queued with {0}'.format( str(transferTasks[label]))) return True, '' # set the Globus destination Endpoint id and path will get them from Agis eventually from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) self.Globus_srcPath = queueConfig.preparator['Globus_srcPath'] self.srcEndpoint = queueConfig.preparator['srcEndpoint'] self.Globus_dstPath = self.basePath #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath'] self.dstEndpoint = queueConfig.preparator['dstEndpoint'] # get input files files = [] lfns = [] inFiles = jobspec.get_input_file_attributes(skip_ready=True) for inLFN, inFile in iteritems(inFiles): # set path to each file inFile['path'] = mover_utils.construct_file_path( self.basePath, inFile['scope'], inLFN) dstpath = inFile['path'] # check if path exists if not create it. if not os.access(self.basePath, os.F_OK): os.makedirs(self.basePath) # create the file paths for the Globus source and destination endpoints Globus_srcpath = mover_utils.construct_file_path( self.Globus_srcPath, inFile['scope'], inLFN) Globus_dstpath = mover_utils.construct_file_path( self.Globus_dstPath, inFile['scope'], inLFN) files.append({ 'scope': inFile['scope'], 'name': inLFN, 'Globus_dstPath': Globus_dstpath, 'Globus_srcPath': Globus_srcpath }) lfns.append(inLFN) tmpLog.debug('files[] {0}'.format(files)) try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errStr = '' if not tmpStatsrc: errStr += ' source Endpoint not activated ' if not tmpStatdst: errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) return False, errStr # both endpoints activated now prepare to transfer data if len(files) > 0: tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, label=label, sync_level="checksum") # loop over all input files and add for myfile in files: tdata.add_item(myfile['Globus_srcPath'], myfile['Globus_dstPath']) # submit transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] jobspec.set_groups_to_files( {transferID: { 'lfns': lfns, 'groupStatus': 'active' }}) tmpLog.debug('done') return True, '' else: return False, transfer_result['message'] # if no files to transfer return True return True, 'No files to transfer' except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) return errStat, {}
def check_stage_out_status(self, jobspec): # make logger tmpLog = self.make_logger(_logger, 'PandaID={0} ThreadID={1}'.format( jobspec.PandaID, threading.current_thread().ident), method_name='check_stage_out_status') tmpLog.debug('start') # show the dummy transfer id and set to a value with the PandaID if needed. tmpLog.debug('self.dummy_transfer_id = {}'.format( self.dummy_transfer_id)) if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base, 'XXXX'): old_dummy_transfer_id = self.dummy_transfer_id self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base, jobspec.PandaID) tmpLog.debug( 'Change self.dummy_transfer_id from {0} to {1}'.format( old_dummy_transfer_id, self.dummy_transfer_id)) # default return tmpRetVal = (True, '') # set flag if have db lock have_db_lock = False # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format( jobspec.computingSite)) # get the queueConfig and corresponding objStoreID_ES queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) # check queueConfig stager section to see if jobtype is set if 'jobtype' in queueConfig.stager: if queueConfig.stager['jobtype'] == "Yoda": self.Yodajob = True # set the location of the files in fileSpec.objstoreID # see file /cvmfs/atlas.cern.ch/repo/sw/local/etc/agis_ddmendpoints.json self.objstoreID = int(queueConfig.stager['objStoreID_ES']) if self.Yodajob: self.pathConvention = int(queueConfig.stager['pathConvention']) tmpLog.debug( 'Yoda Job - PandaID = {0} objstoreID = {1} pathConvention ={2}' .format(jobspec.PandaID, self.objstoreID, self.pathConvention)) else: self.pathConvention = None tmpLog.debug('PandaID = {0} objstoreID = {1}'.format( jobspec.PandaID, self.objstoreID)) # test we have a Globus Transfer Client if not self.tc: errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # set transferID to None transferID = None # get the scope of the log files outfileattrib = jobspec.get_output_file_attributes() scopeLog = 'xxxx' for key in outfileattrib.keys(): if "log.tgz" in key: scopeLog = outfileattrib[key]['scope'] # get transfer groups groups = jobspec.get_groups_of_output_files() tmpLog.debug( 'jobspec.get_groups_of_output_files() = : {0}'.format(groups)) # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests for dummy_transferID in groups: # skip if valid transfer ID not dummy one if validate_transferid(dummy_transferID): continue # lock for 120 sec tmpLog.debug( 'attempt to set DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) have_db_lock = self.dbInterface.get_object_lock(dummy_transferID, lock_interval=120) if not have_db_lock: # escape since locked by another thread msgStr = 'escape since locked by another thread' tmpLog.debug(msgStr) return None, msgStr # refresh group information since that could have been updated by another thread before getting the lock tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)') self.dbInterface.refresh_file_group_info(jobspec) # get transfer groups again with refreshed info tmpLog.debug( 'After db refresh call groups=jobspec.get_groups_of_output_files()' ) groups = jobspec.get_groups_of_output_files() tmpLog.debug( 'jobspec.get_groups_of_output_files() = : {0}'.format(groups)) # the dummy transfer ID is still there if dummy_transferID in groups: groupUpdateTime = groups[dummy_transferID]['groupUpdateTime'] # get files with the dummy transfer ID across jobs fileSpecs = self.dbInterface.get_files_with_group_id( dummy_transferID) # submit transfer if there are more than 10 files or the group was made before more than 10 min msgStr = 'dummy_transferID = {0} number of files = {1}'.format( dummy_transferID, len(fileSpecs)) tmpLog.debug(msgStr) if len(fileSpecs) >= 10 or \ groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10): tmpLog.debug('prepare to transfer files') # submit transfer and get a real transfer ID # set the Globus destination Endpoint id and path will get them from Agis eventually #self.Globus_srcPath = queueConfig.stager['Globus_srcPath'] self.srcEndpoint = queueConfig.stager['srcEndpoint'] self.Globus_srcPath = self.basePath self.Globus_dstPath = queueConfig.stager['Globus_dstPath'] self.dstEndpoint = queueConfig.stager['dstEndpoint'] # Test the endpoints and create the transfer data class errMsg = None try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errMsg = '' if not tmpStatsrc: errMsg += ' source Endpoint not activated ' if not tmpStatdst: errMsg += ' destination Endpoint not activated ' # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) self.have_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not self.have_db_lock: errMsg += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (None, errMsg) return tmpRetVal # both endpoints activated now prepare to transfer data tdata = None tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception( tmpLog) # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (errStat, errMsg) return tmpRetVal # loop over all files ifile = 0 for fileSpec in fileSpecs: logfile = False scope = 'panda' if fileSpec.scope is not None: scope = fileSpec.scope # for Yoda job set the scope to transient for non log files if self.Yodajob: scope = 'transient' if fileSpec.fileType == "log": logfile = True scope = scopeLog # only print to log file first 25 files if ifile < 25: msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format( fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) if ifile == 25: msgStr = "printed first 25 files skipping the rest".format( fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) srcURL = fileSpec.path dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format( endPoint=self.Globus_dstPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) if logfile: tmpLog.debug('src={srcURL} dst={dstURL}'.format( srcURL=srcURL, dstURL=dstURL)) if ifile < 25: tmpLog.debug('src={srcURL} dst={dstURL}'.format( srcURL=srcURL, dstURL=dstURL)) # add files to transfer object - tdata if os.access(srcURL, os.R_OK): if ifile < 25: tmpLog.debug("tdata.add_item({},{})".format( srcURL, dstURL)) tdata.add_item(srcURL, dstURL) else: errMsg = "source file {} does not exist".format( srcURL) # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (False, errMsg) return tmpRetVal ifile += 1 # submit transfer tmpLog.debug('Number of files to transfer - {}'.format( len(tdata['DATA']))) try: transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] tmpLog.debug( 'successfully submitted id={0}'.format( transferID)) # set status for files self.dbInterface.set_file_group( fileSpecs, transferID, 'running') msgStr = 'submitted transfer with ID={0}'.format( transferID) tmpLog.debug(msgStr) else: # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not release_db_lock: errMsg = 'Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (None, transfer_result['message']) return tmpRetVal except Exception as e: errStat, errMsg = globus_utils.handle_globus_exception( tmpLog) # release process lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(errMsg) return errStat, errMsg else: msgStr = 'wait until enough files are pooled' tmpLog.debug(msgStr) # release the lock tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if release_db_lock: tmpLog.debug( 'released DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) have_db_lock = False else: msgStr += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(msgStr) # return None to retry later return None, msgStr # release the db lock if needed if have_db_lock: tmpLog.debug( 'attempt to release DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock( dummy_transferID) if release_db_lock: tmpLog.debug( 'released DB lock for self.id - {0} dummy_transferID - {1}' .format(self.id, dummy_transferID)) have_db_lock = False else: msgStr += ' - Could not release DB lock for {}'.format( dummy_transferID) tmpLog.error(msgStr) return None, msgStr # check transfer with real transfer IDs # get transfer groups tmpLog.debug("groups = jobspec.get_groups_of_output_files()") groups = jobspec.get_groups_of_output_files() tmpLog.debug('Number of transfer groups - {0}'.format(len(groups))) tmpLog.debug('transfer groups any state - {0}'.format(groups)) if len(groups) == 0: tmpLog.debug( "jobspec.get_groups_of_output_files(skip_done=True) returned no files " ) tmpLog.debug("check_stage_out_status return status - True ") return True, '' for transferID in groups: # allow only valid UUID if validate_transferid(transferID): # get transfer task tmpStat, transferTasks = globus_utils.get_transfer_task_by_id( tmpLog, self.tc, transferID) # return a temporary error when failed to get task if not tmpStat: errStr = 'failed to get transfer task; tc = %s; transferID = %s' % ( str(self.tc), str(transferID)) tmpLog.error(errStr) return None, errStr # return a temporary error when task is missing if transferID not in transferTasks: errStr = 'transfer task ID - {} is missing'.format( transferID) tmpLog.error(errStr) return None, errStr # succeeded in finding a transfer task by tranferID if transferTasks[transferID]['status'] == 'SUCCEEDED': tmpLog.debug( 'transfer task {} succeeded'.format(transferID)) self.set_FileSpec_objstoreID(jobspec, self.objstoreID, self.pathConvention) if self.changeFileStatusOnSuccess: self.set_FileSpec_status(jobspec, 'finished') return True, '' # failed if transferTasks[transferID]['status'] == 'FAILED': errStr = 'transfer task {} failed'.format(transferID) tmpLog.error(errStr) self.set_FileSpec_status(jobspec, 'failed') return False, errStr # another status tmpStr = 'transfer task {0} status: {1}'.format( transferID, transferTasks[transferID]['status']) tmpLog.debug(tmpStr) return None, '' # end of loop over transfer groups tmpLog.debug( 'End of loop over transfers groups - ending check_stage_out_status function' ) return None, 'no valid transfer id found'
errStr = 'failed to get Globus Client ID and Refresh Token' tmpLog.error(errStr) sys.exit(1) # create Globus transfer client to send initial files to remote Globus source tmpStat, tc = globus_utils.create_globus_transfer_client( tmpLog, client_id, refresh_token) if not tmpStat: tc = None errStr = 'failed to create Globus Transfer Client' tmpLog.error(errStr) sys.exit(1) try: # We are sending test files from our destination machine to the source machine # Test endpoints for activation - tmpStatsrc, srcStr = globus_utils.check_endpoint_activation( tmpLog, tc, dstEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation( tmpLog, tc, srcEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errStr = '' if not tmpStatsrc: errStr += ' source Endpoint not activated ' if not tmpStatdst: errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) sys.exit(2) # We are sending test files from our destination machine to the source machine # both endpoints activated now prepare to transfer data
tc = None errStr = 'failed to get Globus Client ID and Refresh Token' tmpLog.error(errStr) sys.exit(1) # create Globus transfer client to send initial files to remote Globus source tmpStat, tc = globus_utils.create_globus_transfer_client(tmpLog,client_id,refresh_token) if not tmpStat: tc = None errStr = 'failed to create Globus Transfer Client' tmpLog.error(errStr) sys.exit(1) try: # We are sending test files from our destination machine to the source machine # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,tc,dstEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,tc,srcEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errStr = '' if not tmpStatsrc : errStr += ' source Endpoint not activated ' if not tmpStatdst : errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) sys.exit(2) # both endpoints activated now prepare to transfer data # We are sending test files from our destination machine to the source machine tdata = TransferData(tc,dstEndpoint,srcEndpoint,sync_level="checksum")
def check_status(self, jobspec): # make logger tmpLog = core_utils.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID), method_name='check_status') tmpLog.debug('start') # default return tmpRetVal = (True, '') # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite)) # test we have a Globus Transfer Client if not self.tc : errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # set transferID to None transferID = None # get transfer groups groups = jobspec.get_groups_of_input_files(skip_ready=True) tmpLog.debug('jobspec.get_groups_of_input_files() = : {0}'.format(groups)) # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests if self.dummy_transfer_id in groups: # lock for 120 sec if not self.have_db_lock : tmpLog.debug('attempt to set DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id)) self.have_db_lock = self.dbInterface.get_object_lock(self.dummy_transfer_id, lock_interval=120) if not self.have_db_lock: # escape since locked by another thread msgStr = 'escape since locked by another thread' tmpLog.debug(msgStr) return None, msgStr # refresh group information since that could have been updated by another thread before getting the lock self.dbInterface.refresh_file_group_info(jobspec) # get transfer groups again with refreshed info groups = jobspec.get_groups_of_input_files(skip_ready=True) # the dummy transfer ID is still there if self.dummy_transfer_id in groups: groupUpdateTime = groups[self.dummy_transfer_id]['groupUpdateTime'] # get files with the dummy transfer ID across jobs fileSpecs = self.dbInterface.get_files_with_group_id(self.dummy_transfer_id) # submit transfer if there are more than 10 files or the group was made before more than 10 min msgStr = 'self.dummy_transfer_id = {0} number of files = {1}'.format(self.dummy_transfer_id,len(fileSpecs)) tmpLog.debug(msgStr) if len(fileSpecs) >= 10 or \ groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10): tmpLog.debug('prepare to transfer files') # submit transfer and get a real transfer ID # set the Globus destination Endpoint id and path will get them from Agis eventually from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) self.Globus_srcPath = queueConfig.preparator['Globus_srcPath'] self.srcEndpoint = queueConfig.preparator['srcEndpoint'] self.Globus_dstPath = self.basePath #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath'] self.dstEndpoint = queueConfig.preparator['dstEndpoint'] # Test the endpoints and create the transfer data class errMsg = None try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errMsg = '' if not tmpStatsrc : errMsg += ' source Endpoint not activated ' if not tmpStatdst : errMsg += ' destination Endpoint not activated ' # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id)) self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id) if not self.have_db_lock: errMsg += ' - Could not release DB lock for {}'.format(self.dummy_transfer_id) tmpLog.error(errMsg) tmpRetVal = (None,errMsg) return tmpRetVal # both endpoints activated now prepare to transfer data tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id)) self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id) if not self.have_db_lock: errMsg += ' - Could not release DB lock for {}'.format(self.dummy_transfer_id) tmpLog.error(errMsg) tmpRetVal = (errStat, errMsg) return tmpRetVal # loop over all files for fileSpec in fileSpecs: attrs = jobspec.get_input_file_attributes() msgStr = "len(jobSpec.get_input_file_attributes()) = {0} type - {1}".format(len(attrs),type(attrs)) tmpLog.debug(msgStr) for key, value in attrs.iteritems(): msgStr = "input file attributes - {0} {1}".format(key,value) tmpLog.debug(msgStr) msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) scope = fileSpec.scope hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) #srcURL = fileSpec.path srcURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_srcPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL)) # add files to transfer object - tdata tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL)) tdata.add_item(srcURL,dstURL) # submit transfer try: transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] tmpLog.debug('successfully submitted id={0}'.format(transferID)) # set status for files self.dbInterface.set_file_group(fileSpecs, transferID, 'running') msgStr = 'submitted transfer with ID={0}'.format(transferID) tmpLog.debug(msgStr) else: # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id)) self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id) if not self.have_db_lock: errMsg = 'Could not release DB lock for {}'.format(self.dummy_transfer_id) tmpLog.error(errMsg) tmpRetVal = (None, transfer_result['message']) return tmpRetVal except Exception as e: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id)) self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id) if not self.have_db_lock: errMsg += ' - Could not release DB lock for {}'.format(self.dummy_transfer_id) tmpLog.error(errMsg) return errStat, errMsg else: msgStr = 'wait until enough files are pooled' tmpLog.debug(msgStr) # release the lock tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id)) self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id) if not self.have_db_lock: msgStr += ' - Could not release DB lock for {}'.format(self.dummy_transfer_id) tmpLog.error(msgStr) # return None to retry later return None, msgStr # check transfer with real transfer IDs # get transfer groups groups = jobspec.get_groups_of_input_files(skip_ready=True) for transferID in groups: if transferID != self.dummy_transfer_id : # get transfer task tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(tmpLog,self.tc,transferID) # return a temporary error when failed to get task if not tmpStat: errStr = 'failed to get transfer task' tmpLog.error(errStr) return None, errStr # return a temporary error when task is missing if transferID not in transferTasks: errStr = 'transfer task ID - {} is missing'.format(transferID) tmpLog.error(errStr) return None, errStr # succeeded in finding a transfer task by tranferID if transferTasks[transferID]['status'] == 'SUCCEEDED': tmpLog.debug('transfer task {} succeeded'.format(transferID)) self.set_FileSpec_status(jobspec,'finished') return True, '' # failed if transferTasks[transferID]['status'] == 'FAILED': errStr = 'transfer task {} failed'.format(transferID) tmpLog.error(errStr) self.set_FileSpec_status(jobspec,'failed') return False, errStr # another status tmpStr = 'transfer task {0} status: {1}'.format(transferID,transferTasks[transferID]['status']) tmpLog.debug(tmpStr) return None, ''
def trigger_preparation(self, jobspec): # get logger tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID), method_name='trigger_preparation') tmpLog.debug('start') # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite)) # test we have a Globus Transfer Client if not self.tc : errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # get label label = self.make_label(jobspec) tmpLog.debug('label={0}'.format(label)) # get transfer tasks tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label) if not tmpStat: errStr = 'failed to get transfer tasks' tmpLog.error(errStr) return False, errStr # check if already queued if label in transferTasks: tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label]))) return True, '' # set the Globus destination Endpoint id and path will get them from Agis eventually from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) self.Globus_srcPath = queueConfig.preparator['Globus_srcPath'] self.srcEndpoint = queueConfig.preparator['srcEndpoint'] self.Globus_dstPath = self.basePath #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath'] self.dstEndpoint = queueConfig.preparator['dstEndpoint'] # get input files files = [] lfns = [] inFiles = jobspec.get_input_file_attributes(skip_ready=True) for inLFN, inFile in iteritems(inFiles): # set path to each file inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN) dstpath = inFile['path'] # check if path exists if not create it. if not os.access(self.basePath, os.F_OK): os.makedirs(self.basePath) # create the file paths for the Globus source and destination endpoints Globus_srcpath = mover_utils.construct_file_path(self.Globus_srcPath, inFile['scope'], inLFN) Globus_dstpath = mover_utils.construct_file_path(self.Globus_dstPath, inFile['scope'], inLFN) files.append({'scope': inFile['scope'], 'name': inLFN, 'Globus_dstPath': Globus_dstpath, 'Globus_srcPath': Globus_srcpath}) lfns.append(inLFN) tmpLog.debug('files[] {0}'.format(files)) try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errStr = '' if not tmpStatsrc : errStr += ' source Endpoint not activated ' if not tmpStatdst : errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) return False,errStr # both endpoints activated now prepare to transfer data if len(files) > 0: tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, label=label, sync_level="checksum") # loop over all input files and add for myfile in files: tdata.add_item(myfile['Globus_srcPath'],myfile['Globus_dstPath']) # submit transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}}) tmpLog.debug('done') return True,'' else: return False,transfer_result['message'] # if no files to transfer return True return True, 'No files to transfer' except: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) return errStat, {}
def trigger_stage_out(self, jobspec): # make logger tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID), method_name='trigger_stage_out') tmpLog.debug('start') # default return tmpRetVal = (True, '') # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format( jobspec.computingSite)) # test we have a Globus Transfer Client if not self.tc: errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # get label label = self.make_label(jobspec) tmpLog.debug('label={0}'.format(label)) # get transfer tasks tmpStat, transferTasks = globus_utils.get_transfer_tasks( tmpLog, self.tc, label) if not tmpStat: errStr = 'failed to get transfer tasks' tmpLog.error(errStr) return False, errStr # check if already queued if label in transferTasks: tmpLog.debug('skip since already queued with {0}'.format( str(transferTasks[label]))) return True, '' # set the Globus destination Endpoint id and path will get them from Agis eventually from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) #self.Globus_srcPath = queueConfig.stager['Globus_srcPath'] self.srcEndpoint = queueConfig.stager['srcEndpoint'] self.Globus_srcPath = self.basePath self.Globus_dstPath = queueConfig.stager['Globus_dstPath'] self.dstEndpoint = queueConfig.stager['dstEndpoint'] # Test the endpoints and create the transfer data class errMsg = None try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation( tmpLog, self.tc, self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errMsg = '' if not tmpStatsrc: errMsg += ' source Endpoint not activated ' if not tmpStatdst: errMsg += ' destination Endpoint not activated ' tmpLog.error(errMsg) tmpRetVal = (False, errMsg) return tmpRetVal # both endpoints activated now prepare to transfer data tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, label=label, sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) tmpRetVal = (errStat, errMsg) return tmpRetVal # loop over all files fileAttrs = jobspec.get_output_file_attributes() lfns = [] for fileSpec in jobspec.outFiles: scope = fileAttrs[fileSpec.lfn]['scope'] hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) srcURL = fileSpec.path dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format( endPoint=self.Globus_dstPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL)) # add files to transfer object - tdata if os.access(srcURL, os.R_OK): tmpLog.debug("tdata.add_item({},{})".format(srcURL, dstURL)) tdata.add_item(srcURL, dstURL) lfns.append(fileSpec.lfn) else: errMsg = "source file {} does not exist".format(srcURL) tmpLog.error(errMsg) tmpRetVal = (False, errMsg) return tmpRetVal # submit transfer try: transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] tmpLog.debug( 'successfully submitted id={0}'.format(transferID)) jobspec.set_groups_to_files( {transferID: { 'lfns': lfns, 'groupStatus': 'active' }}) # set for fileSpec in jobspec.outFiles: if fileSpec.fileAttributes == None: fileSpec.fileAttributes = {} fileSpec.fileAttributes['transferID'] = transferID else: tmpRetVal = (False, transfer_result['message']) except Exception as e: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) if errMsg is None: errtype, errvalue = sys.exc_info()[:2] errMsg = "{0} {1}".format(errtype.__name__, errvalue) tmpRetVal = (errStat, errMsg) # return tmpLog.debug('done') return tmpRetVal
def check_status(self, jobspec): # make logger tmpLog = self.make_logger(_logger, 'PandaID={0} ThreadID={1}'.format(jobspec.PandaID,threading.current_thread().ident), method_name='check_status') tmpLog.debug('start') # show the dummy transfer id and set to a value with the PandaID if needed. tmpLog.debug('self.dummy_transfer_id = {}'.format(self.dummy_transfer_id)) if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base,'XXXX') : old_dummy_transfer_id = self.dummy_transfer_id self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base,jobspec.PandaID) tmpLog.debug('Change self.dummy_transfer_id from {0} to {1}'.format(old_dummy_transfer_id,self.dummy_transfer_id)) # default return tmpRetVal = (True, '') # set flag if have db lock have_db_lock = False # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite)) # get the queueConfig and corresponding objStoreID_ES queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) # check queueConfig stager section to see if jobtype is set if 'jobtype' in queueConfig.stager: if queueConfig.stager['jobtype'] == "Yoda" : self.Yodajob = True # set the location of the files in fileSpec.objstoreID # see file /cvmfs/atlas.cern.ch/repo/sw/local/etc/agis_ddmendpoints.json self.objstoreID = int(queueConfig.stager['objStoreID_ES']) if self.Yodajob : self.pathConvention = int(queueConfig.stager['pathConvention']) tmpLog.debug('Yoda Job - PandaID = {0} objstoreID = {1} pathConvention ={2}'.format(jobspec.PandaID,self.objstoreID,self.pathConvention)) else: self.pathConvention = None tmpLog.debug('PandaID = {0} objstoreID = {1}'.format(jobspec.PandaID,self.objstoreID)) # test we have a Globus Transfer Client if not self.tc : errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # set transferID to None transferID = None # get the scope of the log files outfileattrib = jobspec.get_output_file_attributes() scopeLog = 'xxxx' for key in outfileattrib.keys(): if "log.tgz" in key : scopeLog = outfileattrib[key]['scope'] # get transfer groups groups = jobspec.get_groups_of_output_files() tmpLog.debug('jobspec.get_groups_of_output_files() = : {0}'.format(groups)) # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests for dummy_transferID in groups: # skip if valid transfer ID not dummy one if validate_transferid(dummy_transferID) : continue # lock for 120 sec tmpLog.debug('attempt to set DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) have_db_lock = self.dbInterface.get_object_lock(dummy_transferID, lock_interval=120) if not have_db_lock: # escape since locked by another thread msgStr = 'escape since locked by another thread' tmpLog.debug(msgStr) return None, msgStr # refresh group information since that could have been updated by another thread before getting the lock tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)') self.dbInterface.refresh_file_group_info(jobspec) # get transfer groups again with refreshed info tmpLog.debug('After db refresh call groups=jobspec.get_groups_of_output_files()') groups = jobspec.get_groups_of_output_files() tmpLog.debug('jobspec.get_groups_of_output_files() = : {0}'.format(groups)) # the dummy transfer ID is still there if dummy_transferID in groups: groupUpdateTime = groups[dummy_transferID]['groupUpdateTime'] # get files with the dummy transfer ID across jobs fileSpecs = self.dbInterface.get_files_with_group_id(dummy_transferID) # submit transfer if there are more than 10 files or the group was made before more than 10 min msgStr = 'dummy_transferID = {0} number of files = {1}'.format(dummy_transferID,len(fileSpecs)) tmpLog.debug(msgStr) if len(fileSpecs) >= 10 or \ groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10): tmpLog.debug('prepare to transfer files') # submit transfer and get a real transfer ID # set the Globus destination Endpoint id and path will get them from Agis eventually #self.Globus_srcPath = queueConfig.stager['Globus_srcPath'] self.srcEndpoint = queueConfig.stager['srcEndpoint'] self.Globus_srcPath = self.basePath self.Globus_dstPath = queueConfig.stager['Globus_dstPath'] self.dstEndpoint = queueConfig.stager['dstEndpoint'] # Test the endpoints and create the transfer data class errMsg = None try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errMsg = '' if not tmpStatsrc : errMsg += ' source Endpoint not activated ' if not tmpStatdst : errMsg += ' destination Endpoint not activated ' # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) self.have_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not self.have_db_lock: errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (None,errMsg) return tmpRetVal # both endpoints activated now prepare to transfer data tdata = None tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (errStat, errMsg) return tmpRetVal # loop over all files ifile = 0 for fileSpec in fileSpecs: logfile = False scope ='panda' if fileSpec.scope is not None : scope = fileSpec.scope # for Yoda job set the scope to transient for non log files if self.Yodajob : scope = 'transient' if fileSpec.fileType == "log" : logfile = True scope = scopeLog # only print to log file first 25 files if ifile < 25 : msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) if ifile == 25 : msgStr = "printed first 25 files skipping the rest".format(fileSpec.lfn, fileSpec.scope) tmpLog.debug(msgStr) hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) srcURL = fileSpec.path dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) if logfile : tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL)) if ifile < 25 : tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL)) # add files to transfer object - tdata if os.access(srcURL, os.R_OK): if ifile < 25 : tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL)) tdata.add_item(srcURL,dstURL) else: errMsg = "source file {} does not exist".format(srcURL) # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (False,errMsg) return tmpRetVal ifile += 1 # submit transfer tmpLog.debug('Number of files to transfer - {}'.format(len(tdata['DATA']))) try: transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] tmpLog.debug('successfully submitted id={0}'.format(transferID)) # set status for files self.dbInterface.set_file_group(fileSpecs, transferID, 'running') msgStr = 'submitted transfer with ID={0}'.format(transferID) tmpLog.debug(msgStr) else: # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not release_db_lock: errMsg = 'Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) tmpRetVal = (None, transfer_result['message']) return tmpRetVal except Exception as e: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) # release process lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if not release_db_lock: errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(errMsg) return errStat, errMsg else: msgStr = 'wait until enough files are pooled' tmpLog.debug(msgStr) # release the lock tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if release_db_lock: tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) have_db_lock = False else: msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(msgStr) # return None to retry later return None, msgStr # release the db lock if needed if have_db_lock: tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) if release_db_lock: tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID)) have_db_lock = False else: msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID) tmpLog.error(msgStr) return None, msgStr # check transfer with real transfer IDs # get transfer groups tmpLog.debug("groups = jobspec.get_groups_of_output_files()") groups = jobspec.get_groups_of_output_files() tmpLog.debug('Number of transfer groups - {0}'.format(len(groups))) tmpLog.debug('transfer groups any state - {0}'.format(groups)) if len(groups) == 0: tmpLog.debug("jobspec.get_groups_of_output_files(skip_done=True) returned no files ") tmpLog.debug("check_status return status - True ") return True,'' for transferID in groups: # allow only valid UUID if validate_transferid(transferID) : # get transfer task tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(tmpLog,self.tc,transferID) # return a temporary error when failed to get task if not tmpStat: errStr = 'failed to get transfer task; tc = %s; transferID = %s' % (str(self.tc),str(transferID)) tmpLog.error(errStr) return None, errStr # return a temporary error when task is missing if transferID not in transferTasks: errStr = 'transfer task ID - {} is missing'.format(transferID) tmpLog.error(errStr) return None, errStr # succeeded in finding a transfer task by tranferID if transferTasks[transferID]['status'] == 'SUCCEEDED': tmpLog.debug('transfer task {} succeeded'.format(transferID)) self.set_FileSpec_objstoreID(jobspec, self.objstoreID, self.pathConvention) if self.changeFileStatusOnSuccess: self.set_FileSpec_status(jobspec, 'finished') return True, '' # failed if transferTasks[transferID]['status'] == 'FAILED': errStr = 'transfer task {} failed'.format(transferID) tmpLog.error(errStr) self.set_FileSpec_status(jobspec,'failed') return False, errStr # another status tmpStr = 'transfer task {0} status: {1}'.format(transferID,transferTasks[transferID]['status']) tmpLog.debug(tmpStr) return None, '' # end of loop over transfer groups tmpLog.debug('End of loop over transfers groups - ending check_status function') return None,'no valid transfer id found'
def trigger_stage_out(self, jobspec): # make logger tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID), method_name='trigger_stage_out') tmpLog.debug('start') # default return tmpRetVal = (True, '') # check that jobspec.computingSite is defined if jobspec.computingSite is None: # not found tmpLog.error('jobspec.computingSite is not defined') return False, 'jobspec.computingSite is not defined' else: tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite)) # test we have a Globus Transfer Client if not self.tc : errStr = 'failed to get Globus Transfer Client' tmpLog.error(errStr) return False, errStr # get label label = self.make_label(jobspec) tmpLog.debug('label={0}'.format(label)) # get transfer tasks tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label) if not tmpStat: errStr = 'failed to get transfer tasks' tmpLog.error(errStr) return False, errStr # check if already queued if label in transferTasks: tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label]))) return True, '' # set the Globus destination Endpoint id and path will get them from Agis eventually from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(jobspec.computingSite) #self.Globus_srcPath = queueConfig.stager['Globus_srcPath'] self.srcEndpoint = queueConfig.stager['srcEndpoint'] self.Globus_srcPath = self.basePath self.Globus_dstPath = queueConfig.stager['Globus_dstPath'] self.dstEndpoint = queueConfig.stager['dstEndpoint'] # Test the endpoints and create the transfer data class errMsg = None try: # Test endpoints for activation tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint) tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint) if tmpStatsrc and tmpStatdst: errStr = 'source Endpoint and destination Endpoint activated' tmpLog.debug(errStr) else: errMsg = '' if not tmpStatsrc : errMsg += ' source Endpoint not activated ' if not tmpStatdst : errMsg += ' destination Endpoint not activated ' tmpLog.error(errMsg) tmpRetVal = (False,errMsg) return tmpRetVal # both endpoints activated now prepare to transfer data tdata = TransferData(self.tc, self.srcEndpoint, self.dstEndpoint, label=label, sync_level="checksum") except: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) tmpRetVal = (errStat, errMsg) return tmpRetVal # loop over all files fileAttrs = jobspec.get_output_file_attributes() lfns = [] for fileSpec in jobspec.outFiles: scope = fileAttrs[fileSpec.lfn]['scope'] hash = hashlib.md5() hash.update('%s:%s' % (scope, fileSpec.lfn)) hash_hex = hash.hexdigest() correctedscope = "/".join(scope.split('.')) srcURL = fileSpec.path dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath, scope=correctedscope, hash1=hash_hex[0:2], hash2=hash_hex[2:4], lfn=fileSpec.lfn) tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL)) # add files to transfer object - tdata if os.access(srcURL, os.R_OK): tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL)) tdata.add_item(srcURL,dstURL) lfns.append(fileSpec.lfn) else: errMsg = "source file {} does not exist".format(srcURL) tmpLog.error(errMsg) tmpRetVal = (False,errMsg) return tmpRetVal # submit transfer try: transfer_result = self.tc.submit_transfer(tdata) # check status code and message tmpLog.debug(str(transfer_result)) if transfer_result['code'] == "Accepted": # succeeded # set transfer ID which are used for later lookup transferID = transfer_result['task_id'] tmpLog.debug('successfully submitted id={0}'.format(transferID)) jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}}) # set for fileSpec in jobspec.outFiles: if fileSpec.fileAttributes == None: fileSpec.fileAttributes = {} fileSpec.fileAttributes['transferID'] = transferID else: tmpRetVal = (False, transfer_result['message']) except Exception as e: errStat,errMsg = globus_utils.handle_globus_exception(tmpLog) if errMsg is None: errtype, errvalue = sys.exc_info()[:2] errMsg = "{0} {1}".format(errtype.__name__, errvalue) tmpRetVal = (errStat,errMsg) # return tmpLog.debug('done') return tmpRetVal