예제 #1
0
 def trigger_preparation(self, jobspec):
     # get logger
     tmpLog = core_utils.make_logger(_logger,
                                     'PandaID={0}'.format(jobspec.PandaID),
                                     method_name='trigger_preparation')
     tmpLog.debug('start')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(
             jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc:
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(
         tmpLog, self.tc, label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(
             str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
     self.srcEndpoint = queueConfig.preparator['srcEndpoint']
     self.Globus_dstPath = self.basePath
     #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
     self.dstEndpoint = queueConfig.preparator['dstEndpoint']
     # get input files
     files = []
     lfns = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     for inLFN, inFile in iteritems(inFiles):
         # set path to each file
         inFile['path'] = mover_utils.construct_file_path(
             self.basePath, inFile['scope'], inLFN)
         dstpath = inFile['path']
         # check if path exists if not create it.
         if not os.access(self.basePath, os.F_OK):
             os.makedirs(self.basePath)
         # create the file paths for the Globus source and destination endpoints
         Globus_srcpath = mover_utils.construct_file_path(
             self.Globus_srcPath, inFile['scope'], inLFN)
         Globus_dstpath = mover_utils.construct_file_path(
             self.Globus_dstPath, inFile['scope'], inLFN)
         files.append({
             'scope': inFile['scope'],
             'name': inLFN,
             'Globus_dstPath': Globus_dstpath,
             'Globus_srcPath': Globus_srcpath
         })
         lfns.append(inLFN)
     tmpLog.debug('files[] {0}'.format(files))
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errStr = ''
             if not tmpStatsrc:
                 errStr += ' source Endpoint not activated '
             if not tmpStatdst:
                 errStr += ' destination Endpoint not activated '
             tmpLog.error(errStr)
             return False, errStr
         # both endpoints activated now prepare to transfer data
         if len(files) > 0:
             tdata = TransferData(self.tc,
                                  self.srcEndpoint,
                                  self.dstEndpoint,
                                  label=label,
                                  sync_level="checksum")
             # loop over all input files and add
             for myfile in files:
                 tdata.add_item(myfile['Globus_srcPath'],
                                myfile['Globus_dstPath'])
             # submit
             transfer_result = self.tc.submit_transfer(tdata)
             # check status code and message
             tmpLog.debug(str(transfer_result))
             if transfer_result['code'] == "Accepted":
                 # succeeded
                 # set transfer ID which are used for later lookup
                 transferID = transfer_result['task_id']
                 jobspec.set_groups_to_files(
                     {transferID: {
                         'lfns': lfns,
                         'groupStatus': 'active'
                     }})
                 tmpLog.debug('done')
                 return True, ''
             else:
                 return False, transfer_result['message']
         # if no files to transfer return True
         return True, 'No files to transfer'
     except:
         errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
         return errStat, {}
    def check_stage_out_status(self, jobspec):
        # make logger
        tmpLog = self.make_logger(_logger,
                                  'PandaID={0} ThreadID={1}'.format(
                                      jobspec.PandaID,
                                      threading.current_thread().ident),
                                  method_name='check_stage_out_status')
        tmpLog.debug('start')
        # show the dummy transfer id and set to a value with the PandaID if needed.
        tmpLog.debug('self.dummy_transfer_id = {}'.format(
            self.dummy_transfer_id))
        if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base,
                                                      'XXXX'):
            old_dummy_transfer_id = self.dummy_transfer_id
            self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base,
                                                      jobspec.PandaID)
            tmpLog.debug(
                'Change self.dummy_transfer_id  from {0} to {1}'.format(
                    old_dummy_transfer_id, self.dummy_transfer_id))

        # default return
        tmpRetVal = (True, '')
        # set flag if have db lock
        have_db_lock = False
        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(
                jobspec.computingSite))
        # get the queueConfig and corresponding objStoreID_ES
        queueConfigMapper = QueueConfigMapper()
        queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
        # check queueConfig stager section to see if jobtype is set
        if 'jobtype' in queueConfig.stager:
            if queueConfig.stager['jobtype'] == "Yoda":
                self.Yodajob = True
        # set the location of the files in fileSpec.objstoreID
        # see file /cvmfs/atlas.cern.ch/repo/sw/local/etc/agis_ddmendpoints.json
        self.objstoreID = int(queueConfig.stager['objStoreID_ES'])
        if self.Yodajob:
            self.pathConvention = int(queueConfig.stager['pathConvention'])
            tmpLog.debug(
                'Yoda Job - PandaID = {0} objstoreID = {1} pathConvention ={2}'
                .format(jobspec.PandaID, self.objstoreID, self.pathConvention))
        else:
            self.pathConvention = None
            tmpLog.debug('PandaID = {0} objstoreID = {1}'.format(
                jobspec.PandaID, self.objstoreID))
        # test we have a Globus Transfer Client
        if not self.tc:
            errStr = 'failed to get Globus Transfer Client'
            tmpLog.error(errStr)
            return False, errStr
        # set transferID to None
        transferID = None
        # get the scope of the log files
        outfileattrib = jobspec.get_output_file_attributes()
        scopeLog = 'xxxx'
        for key in outfileattrib.keys():
            if "log.tgz" in key:
                scopeLog = outfileattrib[key]['scope']
        # get transfer groups
        groups = jobspec.get_groups_of_output_files()
        tmpLog.debug(
            'jobspec.get_groups_of_output_files() = : {0}'.format(groups))
        # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests
        for dummy_transferID in groups:
            # skip if valid transfer ID not dummy one
            if validate_transferid(dummy_transferID):
                continue
            # lock for 120 sec
            tmpLog.debug(
                'attempt to set DB lock for self.id - {0} dummy_transferID - {1}'
                .format(self.id, dummy_transferID))
            have_db_lock = self.dbInterface.get_object_lock(dummy_transferID,
                                                            lock_interval=120)
            if not have_db_lock:
                # escape since locked by another thread
                msgStr = 'escape since locked by another thread'
                tmpLog.debug(msgStr)
                return None, msgStr
            # refresh group information since that could have been updated by another thread before getting the lock
            tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)')
            self.dbInterface.refresh_file_group_info(jobspec)
            # get transfer groups again with refreshed info
            tmpLog.debug(
                'After db refresh call groups=jobspec.get_groups_of_output_files()'
            )
            groups = jobspec.get_groups_of_output_files()
            tmpLog.debug(
                'jobspec.get_groups_of_output_files() = : {0}'.format(groups))
            # the dummy transfer ID is still there
            if dummy_transferID in groups:
                groupUpdateTime = groups[dummy_transferID]['groupUpdateTime']
                # get files with the dummy transfer ID across jobs
                fileSpecs = self.dbInterface.get_files_with_group_id(
                    dummy_transferID)
                # submit transfer if there are more than 10 files or the group was made before more than 10 min
                msgStr = 'dummy_transferID = {0}  number of files = {1}'.format(
                    dummy_transferID, len(fileSpecs))
                tmpLog.debug(msgStr)
                if len(fileSpecs) >= 10 or \
                        groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10):
                    tmpLog.debug('prepare to transfer files')
                    # submit transfer and get a real transfer ID
                    # set the Globus destination Endpoint id and path will get them from Agis eventually
                    #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
                    self.srcEndpoint = queueConfig.stager['srcEndpoint']
                    self.Globus_srcPath = self.basePath
                    self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
                    self.dstEndpoint = queueConfig.stager['dstEndpoint']
                    # Test the endpoints and create the transfer data class
                    errMsg = None
                    try:
                        # Test endpoints for activation
                        tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
                            tmpLog, self.tc, self.srcEndpoint)
                        tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
                            tmpLog, self.tc, self.dstEndpoint)
                        if tmpStatsrc and tmpStatdst:
                            errStr = 'source Endpoint and destination Endpoint activated'
                            tmpLog.debug(errStr)
                        else:
                            errMsg = ''
                            if not tmpStatsrc:
                                errMsg += ' source Endpoint not activated '
                            if not tmpStatdst:
                                errMsg += ' destination Endpoint not activated '
                            # release process lock
                            tmpLog.debug(
                                'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                                .format(self.id, dummy_transferID))
                            self.have_db_lock = self.dbInterface.release_object_lock(
                                dummy_transferID)
                            if not self.have_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(
                                    dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (None, errMsg)
                            return tmpRetVal
                        # both endpoints activated now prepare to transfer data
                        tdata = None
                        tdata = TransferData(self.tc,
                                             self.srcEndpoint,
                                             self.dstEndpoint,
                                             sync_level="checksum")
                    except:
                        errStat, errMsg = globus_utils.handle_globus_exception(
                            tmpLog)
                        # release process lock
                        tmpLog.debug(
                            'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                            .format(self.id, dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(
                            dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(
                                dummy_transferID)
                        tmpLog.error(errMsg)
                        tmpRetVal = (errStat, errMsg)
                        return tmpRetVal
                    # loop over all files
                    ifile = 0
                    for fileSpec in fileSpecs:
                        logfile = False
                        scope = 'panda'
                        if fileSpec.scope is not None:
                            scope = fileSpec.scope
                        # for Yoda job set the scope to transient for non log files
                        if self.Yodajob:
                            scope = 'transient'
                        if fileSpec.fileType == "log":
                            logfile = True
                            scope = scopeLog
                        # only print to log file first 25 files
                        if ifile < 25:
                            msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(
                                fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        if ifile == 25:
                            msgStr = "printed first 25 files skipping the rest".format(
                                fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        hash = hashlib.md5()
                        hash.update('%s:%s' % (scope, fileSpec.lfn))
                        hash_hex = hash.hexdigest()
                        correctedscope = "/".join(scope.split('.'))
                        srcURL = fileSpec.path
                        dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(
                            endPoint=self.Globus_dstPath,
                            scope=correctedscope,
                            hash1=hash_hex[0:2],
                            hash2=hash_hex[2:4],
                            lfn=fileSpec.lfn)
                        if logfile:
                            tmpLog.debug('src={srcURL} dst={dstURL}'.format(
                                srcURL=srcURL, dstURL=dstURL))
                        if ifile < 25:
                            tmpLog.debug('src={srcURL} dst={dstURL}'.format(
                                srcURL=srcURL, dstURL=dstURL))
                        # add files to transfer object - tdata
                        if os.access(srcURL, os.R_OK):
                            if ifile < 25:
                                tmpLog.debug("tdata.add_item({},{})".format(
                                    srcURL, dstURL))
                            tdata.add_item(srcURL, dstURL)
                        else:
                            errMsg = "source file {} does not exist".format(
                                srcURL)
                            # release process lock
                            tmpLog.debug(
                                'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                                .format(self.id, dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(
                                dummy_transferID)
                            if not release_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(
                                    dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (False, errMsg)
                            return tmpRetVal
                        ifile += 1
                    # submit transfer
                    tmpLog.debug('Number of files to transfer - {}'.format(
                        len(tdata['DATA'])))
                    try:
                        transfer_result = self.tc.submit_transfer(tdata)
                        # check status code and message
                        tmpLog.debug(str(transfer_result))
                        if transfer_result['code'] == "Accepted":
                            # succeeded
                            # set transfer ID which are used for later lookup
                            transferID = transfer_result['task_id']
                            tmpLog.debug(
                                'successfully submitted id={0}'.format(
                                    transferID))
                            # set status for files
                            self.dbInterface.set_file_group(
                                fileSpecs, transferID, 'running')
                            msgStr = 'submitted transfer with ID={0}'.format(
                                transferID)
                            tmpLog.debug(msgStr)
                        else:
                            # release process lock
                            tmpLog.debug(
                                'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                                .format(self.id, dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(
                                dummy_transferID)
                            if not release_db_lock:
                                errMsg = 'Could not release DB lock for {}'.format(
                                    dummy_transferID)
                                tmpLog.error(errMsg)
                            tmpRetVal = (None, transfer_result['message'])
                            return tmpRetVal
                    except Exception as e:
                        errStat, errMsg = globus_utils.handle_globus_exception(
                            tmpLog)
                        # release process lock
                        tmpLog.debug(
                            'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                            .format(self.id, dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(
                            dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(
                                dummy_transferID)
                        tmpLog.error(errMsg)
                        return errStat, errMsg
                else:
                    msgStr = 'wait until enough files are pooled'
                    tmpLog.debug(msgStr)
                # release the lock
                tmpLog.debug(
                    'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                    .format(self.id, dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(
                    dummy_transferID)
                if release_db_lock:
                    tmpLog.debug(
                        'released DB lock for self.id - {0} dummy_transferID - {1}'
                        .format(self.id, dummy_transferID))
                    have_db_lock = False
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(
                        dummy_transferID)
                    tmpLog.error(msgStr)
                # return None to retry later
                return None, msgStr
            # release the db lock if needed
            if have_db_lock:
                tmpLog.debug(
                    'attempt to release DB lock for self.id - {0} dummy_transferID - {1}'
                    .format(self.id, dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(
                    dummy_transferID)
                if release_db_lock:
                    tmpLog.debug(
                        'released DB lock for self.id - {0} dummy_transferID - {1}'
                        .format(self.id, dummy_transferID))
                    have_db_lock = False
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(
                        dummy_transferID)
                    tmpLog.error(msgStr)
                    return None, msgStr
        # check transfer with real transfer IDs
        # get transfer groups
        tmpLog.debug("groups = jobspec.get_groups_of_output_files()")
        groups = jobspec.get_groups_of_output_files()
        tmpLog.debug('Number of transfer groups - {0}'.format(len(groups)))
        tmpLog.debug('transfer groups any state - {0}'.format(groups))
        if len(groups) == 0:
            tmpLog.debug(
                "jobspec.get_groups_of_output_files(skip_done=True) returned no files "
            )
            tmpLog.debug("check_stage_out_status return status - True ")
            return True, ''

        for transferID in groups:
            # allow only valid UUID
            if validate_transferid(transferID):
                # get transfer task
                tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(
                    tmpLog, self.tc, transferID)
                # return a temporary error when failed to get task
                if not tmpStat:
                    errStr = 'failed to get transfer task; tc = %s; transferID = %s' % (
                        str(self.tc), str(transferID))
                    tmpLog.error(errStr)
                    return None, errStr
                # return a temporary error when task is missing
                if transferID not in transferTasks:
                    errStr = 'transfer task ID - {} is missing'.format(
                        transferID)
                    tmpLog.error(errStr)
                    return None, errStr
                # succeeded in finding a transfer task by tranferID
                if transferTasks[transferID]['status'] == 'SUCCEEDED':
                    tmpLog.debug(
                        'transfer task {} succeeded'.format(transferID))
                    self.set_FileSpec_objstoreID(jobspec, self.objstoreID,
                                                 self.pathConvention)
                    if self.changeFileStatusOnSuccess:
                        self.set_FileSpec_status(jobspec, 'finished')
                    return True, ''
                # failed
                if transferTasks[transferID]['status'] == 'FAILED':
                    errStr = 'transfer task {} failed'.format(transferID)
                    tmpLog.error(errStr)
                    self.set_FileSpec_status(jobspec, 'failed')
                    return False, errStr
                # another status
                tmpStr = 'transfer task {0} status: {1}'.format(
                    transferID, transferTasks[transferID]['status'])
                tmpLog.debug(tmpStr)
                return None, ''
        # end of loop over transfer groups
        tmpLog.debug(
            'End of loop over transfers groups - ending check_stage_out_status function'
        )
        return None, 'no valid transfer id found'
예제 #3
0
    errStr = 'failed to get Globus Client ID and Refresh Token'
    tmpLog.error(errStr)
    sys.exit(1)

# create Globus transfer client to send initial files to remote Globus source
tmpStat, tc = globus_utils.create_globus_transfer_client(
    tmpLog, client_id, refresh_token)
if not tmpStat:
    tc = None
    errStr = 'failed to create Globus Transfer Client'
    tmpLog.error(errStr)
    sys.exit(1)
try:
    # We are sending test files from our destination machine to the source machine
    # Test endpoints for activation -
    tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
        tmpLog, tc, dstEndpoint)
    tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
        tmpLog, tc, srcEndpoint)
    if tmpStatsrc and tmpStatdst:
        errStr = 'source Endpoint and destination Endpoint activated'
        tmpLog.debug(errStr)
    else:
        errStr = ''
        if not tmpStatsrc:
            errStr += ' source Endpoint not activated '
        if not tmpStatdst:
            errStr += ' destination Endpoint not activated '
        tmpLog.error(errStr)
        sys.exit(2)
    # We are sending test files from our destination machine to the source machine
    # both endpoints activated now prepare to transfer data
   tc = None
   errStr = 'failed to get Globus Client ID and Refresh Token'
   tmpLog.error(errStr)
   sys.exit(1)

# create Globus transfer client to send initial files to remote Globus source
tmpStat, tc = globus_utils.create_globus_transfer_client(tmpLog,client_id,refresh_token)
if not tmpStat:
   tc = None
   errStr = 'failed to create Globus Transfer Client'
   tmpLog.error(errStr)
   sys.exit(1)
try:
   # We are sending test files from our destination machine to the source machine
   # Test endpoints for activation
   tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,tc,dstEndpoint)
   tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,tc,srcEndpoint)
   if tmpStatsrc and tmpStatdst:
      errStr = 'source Endpoint and destination Endpoint activated'
      tmpLog.debug(errStr)
   else:
      errStr = ''
      if not tmpStatsrc :
         errStr += ' source Endpoint not activated '
      if not tmpStatdst :
         errStr += ' destination Endpoint not activated '
      tmpLog.error(errStr)
      sys.exit(2)
   # both endpoints activated now prepare to transfer data
   # We are sending test files from our destination machine to the source machine
   tdata = TransferData(tc,dstEndpoint,srcEndpoint,sync_level="checksum")
 def check_status(self, jobspec):
     # make logger
     tmpLog = core_utils.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID),
                                     method_name='check_status')
     tmpLog.debug('start')
     # default return
     tmpRetVal = (True, '')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc :
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # set transferID to None
     transferID = None
     # get transfer groups
     groups = jobspec.get_groups_of_input_files(skip_ready=True)
     tmpLog.debug('jobspec.get_groups_of_input_files() = : {0}'.format(groups))
     # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests
     if self.dummy_transfer_id in groups:
         # lock for 120 sec
         if not self.have_db_lock :
             tmpLog.debug('attempt to set DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id))
             self.have_db_lock = self.dbInterface.get_object_lock(self.dummy_transfer_id, lock_interval=120)
         if not self.have_db_lock:
             # escape since locked by another thread
             msgStr = 'escape since locked by another thread'
             tmpLog.debug(msgStr)
             return None, msgStr
         # refresh group information since that could have been updated by another thread before getting the lock
         self.dbInterface.refresh_file_group_info(jobspec)
         # get transfer groups again with refreshed info
         groups = jobspec.get_groups_of_input_files(skip_ready=True)
         # the dummy transfer ID is still there
         if self.dummy_transfer_id in groups:
             groupUpdateTime = groups[self.dummy_transfer_id]['groupUpdateTime']
             # get files with the dummy transfer ID across jobs
             fileSpecs = self.dbInterface.get_files_with_group_id(self.dummy_transfer_id)
             # submit transfer if there are more than 10 files or the group was made before more than 10 min
             msgStr = 'self.dummy_transfer_id = {0}  number of files = {1}'.format(self.dummy_transfer_id,len(fileSpecs))
             tmpLog.debug(msgStr)
             if len(fileSpecs) >= 10 or \
                     groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10):
                 tmpLog.debug('prepare to transfer files')
                 # submit transfer and get a real transfer ID
                 # set the Globus destination Endpoint id and path will get them from Agis eventually  
                 from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
                 queueConfigMapper = QueueConfigMapper()
                 queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
                 self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
                 self.srcEndpoint = queueConfig.preparator['srcEndpoint']
                 self.Globus_dstPath = self.basePath
                 #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
                 self.dstEndpoint = queueConfig.preparator['dstEndpoint']
                 # Test the endpoints and create the transfer data class 
                 errMsg = None
                 try:
                     # Test endpoints for activation
                     tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
                     tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
                     if tmpStatsrc and tmpStatdst:
                         errStr = 'source Endpoint and destination Endpoint activated'
                         tmpLog.debug(errStr)
                     else:
                         errMsg = ''
                         if not tmpStatsrc :
                             errMsg += ' source Endpoint not activated '
                         if not tmpStatdst :
                             errMsg += ' destination Endpoint not activated '
                         # release process lock
                         tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id))
                         self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id)
                         if not self.have_db_lock:
                             errMsg += ' - Could not release DB lock for {}'.format(self.dummy_transfer_id)
                         tmpLog.error(errMsg)
                         tmpRetVal = (None,errMsg)
                         return tmpRetVal
                     # both endpoints activated now prepare to transfer data
                     tdata = TransferData(self.tc,
                                          self.srcEndpoint,
                                          self.dstEndpoint,
                                          sync_level="checksum")
                 except:
                     errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
                     # release process lock
                     tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id))
                     self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id)
                     if not self.have_db_lock:
                         errMsg += ' - Could not release DB lock for {}'.format(self.dummy_transfer_id)
                     tmpLog.error(errMsg)
                     tmpRetVal = (errStat, errMsg)
                     return tmpRetVal
                 # loop over all files
                 for fileSpec in fileSpecs:
                     attrs = jobspec.get_input_file_attributes()
                     msgStr = "len(jobSpec.get_input_file_attributes()) = {0} type - {1}".format(len(attrs),type(attrs))
                     tmpLog.debug(msgStr)
                     for key, value in attrs.iteritems():
                         msgStr = "input file attributes - {0} {1}".format(key,value)
                         tmpLog.debug(msgStr)
                     msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(fileSpec.lfn, fileSpec.scope)
                     tmpLog.debug(msgStr)
                     scope = fileSpec.scope
                     hash = hashlib.md5()
                     hash.update('%s:%s' % (scope, fileSpec.lfn))
                     hash_hex = hash.hexdigest()
                     correctedscope = "/".join(scope.split('.'))
                     #srcURL = fileSpec.path
                     srcURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_srcPath,
                                                                                scope=correctedscope,
                                                                                hash1=hash_hex[0:2],
                                                                                hash2=hash_hex[2:4],
                                                                                lfn=fileSpec.lfn)
                     dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath,
                                                                                scope=correctedscope,
                                                                                hash1=hash_hex[0:2],
                                                                                hash2=hash_hex[2:4],
                                                                                lfn=fileSpec.lfn)
                     tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL))
                     # add files to transfer object - tdata
                     tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL))
                     tdata.add_item(srcURL,dstURL)
                 # submit transfer 
                 try:
                     transfer_result = self.tc.submit_transfer(tdata)
                     # check status code and message
                     tmpLog.debug(str(transfer_result))
                     if transfer_result['code'] == "Accepted":
                         # succeeded
                         # set transfer ID which are used for later lookup
                         transferID = transfer_result['task_id']
                         tmpLog.debug('successfully submitted id={0}'.format(transferID))
                         # set status for files
                         self.dbInterface.set_file_group(fileSpecs, transferID, 'running')
                         msgStr = 'submitted transfer with ID={0}'.format(transferID)
                         tmpLog.debug(msgStr)
                     else:
                         # release process lock
                         tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id))
                         self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id)
                         if not self.have_db_lock:
                             errMsg = 'Could not release DB lock for {}'.format(self.dummy_transfer_id)
                             tmpLog.error(errMsg)
                         tmpRetVal = (None, transfer_result['message'])
                         return tmpRetVal
                 except Exception as e:
                     errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
                     # release process lock
                     tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id))
                     self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id)
                     if not self.have_db_lock:
                         errMsg += ' - Could not release DB lock for {}'.format(self.dummy_transfer_id)
                     tmpLog.error(errMsg)
                     return errStat, errMsg
             else:
                 msgStr = 'wait until enough files are pooled'
                 tmpLog.debug(msgStr)
             # release the lock
             tmpLog.debug('attempt to release DB lock for self.id - {0} self.dummy_transfer_id - {1}'.format(self.id,self.dummy_transfer_id))
             self.have_db_lock = self.dbInterface.release_object_lock(self.dummy_transfer_id) 
             if not self.have_db_lock:
                 msgStr += ' - Could not release DB lock for {}'.format(self.dummy_transfer_id)
                 tmpLog.error(msgStr)
             # return None to retry later
             return None, msgStr
     # check transfer with real transfer IDs
     # get transfer groups 
     groups = jobspec.get_groups_of_input_files(skip_ready=True)
     for transferID in groups:
         if transferID != self.dummy_transfer_id :
             # get transfer task
             tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(tmpLog,self.tc,transferID)
             # return a temporary error when failed to get task
             if not tmpStat:
                 errStr = 'failed to get transfer task'
                 tmpLog.error(errStr)
                 return None, errStr
             # return a temporary error when task is missing 
             if transferID not in transferTasks:
                 errStr = 'transfer task ID - {} is missing'.format(transferID)
                 tmpLog.error(errStr)
                 return None, errStr
             # succeeded in finding a transfer task by tranferID
             if transferTasks[transferID]['status'] == 'SUCCEEDED':
                 tmpLog.debug('transfer task {} succeeded'.format(transferID))
                 self.set_FileSpec_status(jobspec,'finished')
                 return True, ''
             # failed
             if transferTasks[transferID]['status'] == 'FAILED':
                 errStr = 'transfer task {} failed'.format(transferID)
                 tmpLog.error(errStr)
                 self.set_FileSpec_status(jobspec,'failed')
                 return False, errStr
             # another status
             tmpStr = 'transfer task {0} status: {1}'.format(transferID,transferTasks[transferID]['status'])
             tmpLog.debug(tmpStr)
             return None, ''
예제 #6
0
 def trigger_preparation(self, jobspec):
     # get logger
     tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_preparation')
     tmpLog.debug('start')               
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc :
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually  
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     self.Globus_srcPath = queueConfig.preparator['Globus_srcPath']
     self.srcEndpoint = queueConfig.preparator['srcEndpoint']
     self.Globus_dstPath = self.basePath
     #self.Globus_dstPath = queueConfig.preparator['Globus_dstPath']
     self.dstEndpoint = queueConfig.preparator['dstEndpoint']
     # get input files
     files = []
     lfns = []
     inFiles = jobspec.get_input_file_attributes(skip_ready=True)
     for inLFN, inFile in iteritems(inFiles):
         # set path to each file
         inFile['path'] = mover_utils.construct_file_path(self.basePath, inFile['scope'], inLFN)
         dstpath = inFile['path']
         # check if path exists if not create it.
         if not os.access(self.basePath, os.F_OK):
             os.makedirs(self.basePath)
         # create the file paths for the Globus source and destination endpoints 
         Globus_srcpath = mover_utils.construct_file_path(self.Globus_srcPath, inFile['scope'], inLFN)
         Globus_dstpath = mover_utils.construct_file_path(self.Globus_dstPath, inFile['scope'], inLFN)
         files.append({'scope': inFile['scope'],
                       'name': inLFN,
                       'Globus_dstPath': Globus_dstpath,
                       'Globus_srcPath': Globus_srcpath})
         lfns.append(inLFN)
     tmpLog.debug('files[] {0}'.format(files))
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errStr = ''
             if not tmpStatsrc :
                 errStr += ' source Endpoint not activated '
             if not tmpStatdst :
                 errStr += ' destination Endpoint not activated '
             tmpLog.error(errStr)
             return False,errStr
         # both endpoints activated now prepare to transfer data
         if len(files) > 0:
             tdata = TransferData(self.tc,
                                  self.srcEndpoint,
                                  self.dstEndpoint,
                                  label=label,
                                  sync_level="checksum")
             # loop over all input files and add 
             for myfile in files:
                 tdata.add_item(myfile['Globus_srcPath'],myfile['Globus_dstPath'])
             # submit
             transfer_result = self.tc.submit_transfer(tdata)
             # check status code and message
             tmpLog.debug(str(transfer_result))
             if transfer_result['code'] == "Accepted":
                 # succeeded
                 # set transfer ID which are used for later lookup
                 transferID = transfer_result['task_id']
                 jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}})
                 tmpLog.debug('done')
                 return True,''
             else:
                 return False,transfer_result['message']
         # if no files to transfer return True
         return True, 'No files to transfer'
     except:
         errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
         return errStat, {}
예제 #7
0
 def trigger_stage_out(self, jobspec):
     # make logger
     tmpLog = self.make_logger(_logger,
                               'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_stage_out')
     tmpLog.debug('start')
     # default return
     tmpRetVal = (True, '')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(
             jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc:
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(
         tmpLog, self.tc, label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(
             str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
     self.srcEndpoint = queueConfig.stager['srcEndpoint']
     self.Globus_srcPath = self.basePath
     self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
     self.dstEndpoint = queueConfig.stager['dstEndpoint']
     # Test the endpoints and create the transfer data class
     errMsg = None
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(
             tmpLog, self.tc, self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errMsg = ''
             if not tmpStatsrc:
                 errMsg += ' source Endpoint not activated '
             if not tmpStatdst:
                 errMsg += ' destination Endpoint not activated '
             tmpLog.error(errMsg)
             tmpRetVal = (False, errMsg)
             return tmpRetVal
         # both endpoints activated now prepare to transfer data
         tdata = TransferData(self.tc,
                              self.srcEndpoint,
                              self.dstEndpoint,
                              label=label,
                              sync_level="checksum")
     except:
         errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
         tmpRetVal = (errStat, errMsg)
         return tmpRetVal
     # loop over all files
     fileAttrs = jobspec.get_output_file_attributes()
     lfns = []
     for fileSpec in jobspec.outFiles:
         scope = fileAttrs[fileSpec.lfn]['scope']
         hash = hashlib.md5()
         hash.update('%s:%s' % (scope, fileSpec.lfn))
         hash_hex = hash.hexdigest()
         correctedscope = "/".join(scope.split('.'))
         srcURL = fileSpec.path
         dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(
             endPoint=self.Globus_dstPath,
             scope=correctedscope,
             hash1=hash_hex[0:2],
             hash2=hash_hex[2:4],
             lfn=fileSpec.lfn)
         tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL,
                                                         dstURL=dstURL))
         # add files to transfer object - tdata
         if os.access(srcURL, os.R_OK):
             tmpLog.debug("tdata.add_item({},{})".format(srcURL, dstURL))
             tdata.add_item(srcURL, dstURL)
             lfns.append(fileSpec.lfn)
         else:
             errMsg = "source file {} does not exist".format(srcURL)
             tmpLog.error(errMsg)
             tmpRetVal = (False, errMsg)
             return tmpRetVal
     # submit transfer
     try:
         transfer_result = self.tc.submit_transfer(tdata)
         # check status code and message
         tmpLog.debug(str(transfer_result))
         if transfer_result['code'] == "Accepted":
             # succeeded
             # set transfer ID which are used for later lookup
             transferID = transfer_result['task_id']
             tmpLog.debug(
                 'successfully submitted id={0}'.format(transferID))
             jobspec.set_groups_to_files(
                 {transferID: {
                     'lfns': lfns,
                     'groupStatus': 'active'
                 }})
             # set
             for fileSpec in jobspec.outFiles:
                 if fileSpec.fileAttributes == None:
                     fileSpec.fileAttributes = {}
                     fileSpec.fileAttributes['transferID'] = transferID
         else:
             tmpRetVal = (False, transfer_result['message'])
     except Exception as e:
         errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
         if errMsg is None:
             errtype, errvalue = sys.exc_info()[:2]
             errMsg = "{0} {1}".format(errtype.__name__, errvalue)
         tmpRetVal = (errStat, errMsg)
     # return
     tmpLog.debug('done')
     return tmpRetVal
예제 #8
0
    def check_status(self, jobspec):
        # make logger
        tmpLog = self.make_logger(_logger, 'PandaID={0} ThreadID={1}'.format(jobspec.PandaID,threading.current_thread().ident),
                                  method_name='check_status')
        tmpLog.debug('start')
        # show the dummy transfer id and set to a value with the PandaID if needed.
        tmpLog.debug('self.dummy_transfer_id = {}'.format(self.dummy_transfer_id))
        if self.dummy_transfer_id == '{0}_{1}'.format(dummy_transfer_id_base,'XXXX') :
            old_dummy_transfer_id = self.dummy_transfer_id
            self.dummy_transfer_id = '{0}_{1}'.format(dummy_transfer_id_base,jobspec.PandaID)
            tmpLog.debug('Change self.dummy_transfer_id  from {0} to {1}'.format(old_dummy_transfer_id,self.dummy_transfer_id))
 
        # default return
        tmpRetVal = (True, '')
        # set flag if have db lock
        have_db_lock = False 
        # check that jobspec.computingSite is defined
        if jobspec.computingSite is None:
            # not found
            tmpLog.error('jobspec.computingSite is not defined')
            return False, 'jobspec.computingSite is not defined'
        else:
            tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
        # get the queueConfig and corresponding objStoreID_ES
        queueConfigMapper = QueueConfigMapper()
        queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
        # check queueConfig stager section to see if jobtype is set
        if 'jobtype' in queueConfig.stager:
            if queueConfig.stager['jobtype'] == "Yoda" :
                self.Yodajob = True
        # set the location of the files in fileSpec.objstoreID
        # see file /cvmfs/atlas.cern.ch/repo/sw/local/etc/agis_ddmendpoints.json 
        self.objstoreID = int(queueConfig.stager['objStoreID_ES'])
        if self.Yodajob :
            self.pathConvention = int(queueConfig.stager['pathConvention'])
            tmpLog.debug('Yoda Job - PandaID = {0} objstoreID = {1} pathConvention ={2}'.format(jobspec.PandaID,self.objstoreID,self.pathConvention))
        else:
            self.pathConvention = None
            tmpLog.debug('PandaID = {0} objstoreID = {1}'.format(jobspec.PandaID,self.objstoreID))
        # test we have a Globus Transfer Client
        if not self.tc :
            errStr = 'failed to get Globus Transfer Client'
            tmpLog.error(errStr)
            return False, errStr
        # set transferID to None
        transferID = None
        # get the scope of the log files
        outfileattrib = jobspec.get_output_file_attributes()
        scopeLog = 'xxxx'
        for key in outfileattrib.keys():
            if "log.tgz" in key :
                scopeLog = outfileattrib[key]['scope']
        # get transfer groups
        groups = jobspec.get_groups_of_output_files()
        tmpLog.debug('jobspec.get_groups_of_output_files() = : {0}'.format(groups))
        # lock if the dummy transfer ID is used to avoid submitting duplicated transfer requests
        for dummy_transferID in groups:
            # skip if valid transfer ID not dummy one
            if validate_transferid(dummy_transferID) :
                continue
            # lock for 120 sec
            tmpLog.debug('attempt to set DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
            have_db_lock = self.dbInterface.get_object_lock(dummy_transferID, lock_interval=120)
            if not have_db_lock:
                # escape since locked by another thread
                msgStr = 'escape since locked by another thread'
                tmpLog.debug(msgStr)
                return None, msgStr
            # refresh group information since that could have been updated by another thread before getting the lock
            tmpLog.debug('self.dbInterface.refresh_file_group_info(jobspec)')
            self.dbInterface.refresh_file_group_info(jobspec)
            # get transfer groups again with refreshed info
            tmpLog.debug('After db refresh call groups=jobspec.get_groups_of_output_files()')
            groups = jobspec.get_groups_of_output_files()
            tmpLog.debug('jobspec.get_groups_of_output_files() = : {0}'.format(groups))
            # the dummy transfer ID is still there
            if dummy_transferID in groups:
                groupUpdateTime = groups[dummy_transferID]['groupUpdateTime']
                # get files with the dummy transfer ID across jobs
                fileSpecs = self.dbInterface.get_files_with_group_id(dummy_transferID)
                # submit transfer if there are more than 10 files or the group was made before more than 10 min
                msgStr = 'dummy_transferID = {0}  number of files = {1}'.format(dummy_transferID,len(fileSpecs))
                tmpLog.debug(msgStr)
                if len(fileSpecs) >= 10 or \
                        groupUpdateTime < datetime.datetime.utcnow() - datetime.timedelta(minutes=10):
                    tmpLog.debug('prepare to transfer files')
                    # submit transfer and get a real transfer ID
                    # set the Globus destination Endpoint id and path will get them from Agis eventually  
                    #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
                    self.srcEndpoint = queueConfig.stager['srcEndpoint']
                    self.Globus_srcPath = self.basePath
                    self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
                    self.dstEndpoint = queueConfig.stager['dstEndpoint']
                    # Test the endpoints and create the transfer data class 
                    errMsg = None
                    try:
                        # Test endpoints for activation
                        tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
                        tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
                        if tmpStatsrc and tmpStatdst:
                            errStr = 'source Endpoint and destination Endpoint activated'
                            tmpLog.debug(errStr)
                        else:
                            errMsg = ''
                            if not tmpStatsrc :
                                errMsg += ' source Endpoint not activated '
                            if not tmpStatdst :
                                errMsg += ' destination Endpoint not activated '
                            # release process lock
                            tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                            self.have_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                            if not self.have_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (None,errMsg)
                            return tmpRetVal
                        # both endpoints activated now prepare to transfer data
                        tdata = None
                        tdata = TransferData(self.tc,
                                             self.srcEndpoint,
                                             self.dstEndpoint,
                                             sync_level="checksum")
                    except:
                        errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
                        # release process lock
                        tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                        tmpLog.error(errMsg)
                        tmpRetVal = (errStat, errMsg)
                        return tmpRetVal
                    # loop over all files
                    ifile = 0
                    for fileSpec in fileSpecs:
                        logfile = False
                        scope ='panda'
                        if fileSpec.scope is not None :
                            scope = fileSpec.scope
                        # for Yoda job set the scope to transient for non log files
                        if self.Yodajob :
                            scope = 'transient'
                        if fileSpec.fileType == "log" :
                            logfile = True
                            scope = scopeLog
                        # only print to log file first 25 files
                        if ifile < 25 :
                            msgStr = "fileSpec.lfn - {0} fileSpec.scope - {1}".format(fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        if ifile == 25 :
                            msgStr = "printed first 25 files skipping the rest".format(fileSpec.lfn, fileSpec.scope)
                            tmpLog.debug(msgStr)
                        hash = hashlib.md5()
                        hash.update('%s:%s' % (scope, fileSpec.lfn))
                        hash_hex = hash.hexdigest()
                        correctedscope = "/".join(scope.split('.'))
                        srcURL = fileSpec.path
                        dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath,
                                                                                   scope=correctedscope,
                                                                                   hash1=hash_hex[0:2],
                                                                                   hash2=hash_hex[2:4],
                                                                                   lfn=fileSpec.lfn)
                        if logfile :
                            tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL))
                        if ifile < 25 :
                            tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL))
                        # add files to transfer object - tdata
                        if os.access(srcURL, os.R_OK):
                            if ifile < 25 :
                                tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL))
                            tdata.add_item(srcURL,dstURL)
                        else:
                            errMsg = "source file {} does not exist".format(srcURL)
                            # release process lock
                            tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                            if not release_db_lock:
                                errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                            tmpLog.error(errMsg)
                            tmpRetVal = (False,errMsg)
                            return tmpRetVal
                        ifile += 1
                    # submit transfer 
                    tmpLog.debug('Number of files to transfer - {}'.format(len(tdata['DATA'])))
                    try:
                        transfer_result = self.tc.submit_transfer(tdata)
                        # check status code and message
                        tmpLog.debug(str(transfer_result))
                        if transfer_result['code'] == "Accepted":
                            # succeeded
                            # set transfer ID which are used for later lookup
                            transferID = transfer_result['task_id']
                            tmpLog.debug('successfully submitted id={0}'.format(transferID))
                            # set status for files
                            self.dbInterface.set_file_group(fileSpecs, transferID, 'running')
                            msgStr = 'submitted transfer with ID={0}'.format(transferID)
                            tmpLog.debug(msgStr)
                        else:
                            # release process lock
                            tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                            release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                            if not release_db_lock:
                                errMsg = 'Could not release DB lock for {}'.format(dummy_transferID)
                                tmpLog.error(errMsg)
                            tmpRetVal = (None, transfer_result['message'])
                            return tmpRetVal
                    except Exception as e:
                        errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
                        # release process lock
                        tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                        release_db_lock = self.dbInterface.release_object_lock(dummy_transferID)
                        if not release_db_lock:
                            errMsg += ' - Could not release DB lock for {}'.format(dummy_transferID)
                        tmpLog.error(errMsg)
                        return errStat, errMsg
                else:
                    msgStr = 'wait until enough files are pooled'
                    tmpLog.debug(msgStr)
                # release the lock
                tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) 
                if release_db_lock:
                    tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                    have_db_lock = False
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID)
                    tmpLog.error(msgStr)
                # return None to retry later
                return None, msgStr
            # release the db lock if needed
            if have_db_lock:
                tmpLog.debug('attempt to release DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                release_db_lock = self.dbInterface.release_object_lock(dummy_transferID) 
                if release_db_lock:
                    tmpLog.debug('released DB lock for self.id - {0} dummy_transferID - {1}'.format(self.id,dummy_transferID))
                    have_db_lock = False 
                else:
                    msgStr += ' - Could not release DB lock for {}'.format(dummy_transferID)
                    tmpLog.error(msgStr)
                    return None, msgStr
        # check transfer with real transfer IDs
        # get transfer groups 
        tmpLog.debug("groups = jobspec.get_groups_of_output_files()")
        groups = jobspec.get_groups_of_output_files()
        tmpLog.debug('Number of transfer groups - {0}'.format(len(groups)))
        tmpLog.debug('transfer groups any state - {0}'.format(groups))
        if len(groups) == 0:
            tmpLog.debug("jobspec.get_groups_of_output_files(skip_done=True) returned no files ")
            tmpLog.debug("check_status return status - True ")
            return True,''

        for transferID in groups:
            # allow only valid UUID
            if validate_transferid(transferID) :
                # get transfer task
                tmpStat, transferTasks = globus_utils.get_transfer_task_by_id(tmpLog,self.tc,transferID)
                # return a temporary error when failed to get task
                if not tmpStat:
                    errStr = 'failed to get transfer task; tc = %s; transferID = %s' % (str(self.tc),str(transferID))
                    tmpLog.error(errStr)
                    return None, errStr
                # return a temporary error when task is missing 
                if transferID not in transferTasks:
                    errStr = 'transfer task ID - {} is missing'.format(transferID)
                    tmpLog.error(errStr)
                    return None, errStr
                # succeeded in finding a transfer task by tranferID
                if transferTasks[transferID]['status'] == 'SUCCEEDED':
                    tmpLog.debug('transfer task {} succeeded'.format(transferID))
                    self.set_FileSpec_objstoreID(jobspec, self.objstoreID, self.pathConvention)
                    if self.changeFileStatusOnSuccess:
                        self.set_FileSpec_status(jobspec, 'finished')
                    return True, ''
                # failed
                if transferTasks[transferID]['status'] == 'FAILED':
                    errStr = 'transfer task {} failed'.format(transferID)
                    tmpLog.error(errStr)
                    self.set_FileSpec_status(jobspec,'failed')
                    return False, errStr
                # another status
                tmpStr = 'transfer task {0} status: {1}'.format(transferID,transferTasks[transferID]['status'])
                tmpLog.debug(tmpStr)
                return None, ''
        # end of loop over transfer groups
        tmpLog.debug('End of loop over transfers groups - ending check_status function')
        return None,'no valid transfer id found'
예제 #9
0
 def trigger_stage_out(self, jobspec):
     # make logger
     tmpLog = self.make_logger(_logger, 'PandaID={0}'.format(jobspec.PandaID),
                               method_name='trigger_stage_out')
     tmpLog.debug('start')
     # default return
     tmpRetVal = (True, '')
     # check that jobspec.computingSite is defined
     if jobspec.computingSite is None:
         # not found
         tmpLog.error('jobspec.computingSite is not defined')
         return False, 'jobspec.computingSite is not defined'
     else:
         tmpLog.debug('jobspec.computingSite : {0}'.format(jobspec.computingSite))
     # test we have a Globus Transfer Client
     if not self.tc :
         errStr = 'failed to get Globus Transfer Client'
         tmpLog.error(errStr)
         return False, errStr
     # get label
     label = self.make_label(jobspec)
     tmpLog.debug('label={0}'.format(label))
     # get transfer tasks
     tmpStat, transferTasks = globus_utils.get_transfer_tasks(tmpLog,self.tc,label)
     if not tmpStat:
         errStr = 'failed to get transfer tasks'
         tmpLog.error(errStr)
         return False, errStr
     # check if already queued
     if label in transferTasks:
         tmpLog.debug('skip since already queued with {0}'.format(str(transferTasks[label])))
         return True, ''
     # set the Globus destination Endpoint id and path will get them from Agis eventually  
     from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
     queueConfigMapper = QueueConfigMapper()
     queueConfig = queueConfigMapper.get_queue(jobspec.computingSite)
     #self.Globus_srcPath = queueConfig.stager['Globus_srcPath']
     self.srcEndpoint = queueConfig.stager['srcEndpoint']
     self.Globus_srcPath = self.basePath
     self.Globus_dstPath = queueConfig.stager['Globus_dstPath']
     self.dstEndpoint = queueConfig.stager['dstEndpoint']
     # Test the endpoints and create the transfer data class 
     errMsg = None
     try:
         # Test endpoints for activation
         tmpStatsrc, srcStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.srcEndpoint)
         tmpStatdst, dstStr = globus_utils.check_endpoint_activation(tmpLog,self.tc,self.dstEndpoint)
         if tmpStatsrc and tmpStatdst:
             errStr = 'source Endpoint and destination Endpoint activated'
             tmpLog.debug(errStr)
         else:
             errMsg = ''
             if not tmpStatsrc :
                 errMsg += ' source Endpoint not activated '
             if not tmpStatdst :
                 errMsg += ' destination Endpoint not activated '
             tmpLog.error(errMsg)
             tmpRetVal = (False,errMsg)
             return tmpRetVal
         # both endpoints activated now prepare to transfer data
         tdata = TransferData(self.tc,
                              self.srcEndpoint,
                              self.dstEndpoint,
                              label=label,
                              sync_level="checksum")
     except:
         errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
         tmpRetVal = (errStat, errMsg)
         return tmpRetVal
     # loop over all files
     fileAttrs = jobspec.get_output_file_attributes()
     lfns = []
     for fileSpec in jobspec.outFiles:
         scope = fileAttrs[fileSpec.lfn]['scope']
         hash = hashlib.md5()
         hash.update('%s:%s' % (scope, fileSpec.lfn))
         hash_hex = hash.hexdigest()
         correctedscope = "/".join(scope.split('.'))
         srcURL = fileSpec.path
         dstURL = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=self.Globus_dstPath,
                                                                    scope=correctedscope,
                                                                    hash1=hash_hex[0:2],
                                                                    hash2=hash_hex[2:4],
                                                                    lfn=fileSpec.lfn)
         tmpLog.debug('src={srcURL} dst={dstURL}'.format(srcURL=srcURL, dstURL=dstURL))
         # add files to transfer object - tdata
         if os.access(srcURL, os.R_OK):
             tmpLog.debug("tdata.add_item({},{})".format(srcURL,dstURL))
             tdata.add_item(srcURL,dstURL)
             lfns.append(fileSpec.lfn)
         else:
             errMsg = "source file {} does not exist".format(srcURL)
             tmpLog.error(errMsg)
             tmpRetVal = (False,errMsg)
             return tmpRetVal
     # submit transfer 
     try:
         transfer_result = self.tc.submit_transfer(tdata)
         # check status code and message
         tmpLog.debug(str(transfer_result))
         if transfer_result['code'] == "Accepted":
             # succeeded
             # set transfer ID which are used for later lookup
             transferID = transfer_result['task_id']
             tmpLog.debug('successfully submitted id={0}'.format(transferID))
             jobspec.set_groups_to_files({transferID: {'lfns': lfns, 'groupStatus': 'active'}})
             # set
             for fileSpec in jobspec.outFiles:
                 if fileSpec.fileAttributes == None:
                     fileSpec.fileAttributes = {}
                     fileSpec.fileAttributes['transferID'] = transferID
         else:
             tmpRetVal = (False, transfer_result['message'])
     except Exception as e:
         errStat,errMsg = globus_utils.handle_globus_exception(tmpLog)
         if errMsg is None:
             errtype, errvalue = sys.exc_info()[:2]
             errMsg = "{0} {1}".format(errtype.__name__, errvalue)
         tmpRetVal = (errStat,errMsg)
     # return
     tmpLog.debug('done')
     return tmpRetVal