Python FileSpec.scope Examples, pandaharvester.harvestercore.file_spec.FileSpec.scope Python Examples

Example #1

0

Show file

File: act_stager.py Project: HSF/harvester

    def trigger_stage_out(self, jobspec):
        """Trigger the stage-out procedure for the job.
        Create a dummy output file to force harvester to wait until aCT
        job is done

        :param jobspec: job specifications
        :type jobspec: JobSpec
        :return: A tuple of return code (True: success, False: fatal failure, None: temporary failure)
                 and error dialog
        :rtype: (bool, string)
        """
        fileSpec = FileSpec()
        fileSpec.PandaID = jobspec.PandaID
        fileSpec.taskID = jobspec.taskID
        fileSpec.lfn = 'dummy.{0}'.format(jobspec.PandaID)
        fileSpec.scope = 'dummy'
        fileSpec.fileType = 'output'
        jobspec.add_in_file(fileSpec)

        return True, ''

Example #2

0

Show file

File: job_fetcher.py Project: PanDAWMS/panda-harvester

 def run(self):
     while True:
         mainLog = self.make_logger(_logger, 'id={0}'.format(self.get_pid()), method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(harvester_config.jobfetcher.nQueues,
                                                            harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger, 'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             tmpLog.debug('getting {0} jobs'.format(nJobs))
             sw = core_utils.get_stopwatch()
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(siteName, self.nodeName,
                                                       queueConfig.get_source_label(),
                                                       self.nodeName, nJobs,
                                                       queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1} {2}'.format(len(jobs), errStr, sw.get_elapsed_time()))
             # convert to JobSpec
             if len(jobs) > 0:
                 jobSpecs = []
                 fileStatMap = dict()
                 sw_startconvert = core_utils.get_stopwatch()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute('schedulerID',
                                               'harvester-{0}'.format(harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     for tmpLFN, fileAttrs in iteritems(jobSpec.get_input_file_attributes()):
                         # check file status
                         if tmpLFN not in fileStatMap:
                             fileStatMap[tmpLFN] = self.dbProxy.get_file_status(tmpLFN, 'input',
                                                                                queueConfig.ddmEndpointIn,
                                                                                'starting')
                         # make file spec
                         fileSpec = FileSpec()
                         fileSpec.PandaID = jobSpec.PandaID
                         fileSpec.taskID = jobSpec.taskID
                         fileSpec.lfn = tmpLFN
                         fileSpec.endpoint = queueConfig.ddmEndpointIn
                         fileSpec.scope = fileAttrs['scope']
                         # set preparing to skip stage-in if the file is (being) taken care of by another job
                         if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                 or 'to_prepare' in fileStatMap[tmpLFN]:
                             fileSpec.status = 'preparing'
                         else:
                             fileSpec.status = 'to_prepare'
                         if fileSpec.status not in fileStatMap[tmpLFN]:
                             fileStatMap[tmpLFN][fileSpec.status] = 0
                         fileStatMap[tmpLFN][fileSpec.status] += 1
                         fileSpec.fileType = 'input'
                         jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 tmpLog.debug("Converting of {0} jobs {1}".format(len(jobs),sw_startconvert.get_elapsed_time()))
                 sw_insertdb =core_utils.get_stopwatch()
                 self.dbProxy.insert_jobs(jobSpecs)
                 tmpLog.debug('Insert of {0} jobs {1}'.format(len(jobSpecs), sw_insertdb.get_elapsed_time()))
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return

Example #3

0

Show file

File: stageOutTest_go_bulk_stager.py Project: PanDAWMS/panda-harvester

 jobSpec.PandaID = job_id
 jobSpec.modificationTime = datetime.datetime.now()
 realDataset = 'panda.sgotest.' + uuid.uuid4().hex
 ddmEndPointOut = 'BNL-OSG2_DATADISK'
 outFiles_scope_str = ''
 outFiles_str = ''
 realDatasets_str = ''
 ddmEndPointOut_str = ''
 # create up 5 files for output
 for index in range(random.randint(1, 5)):
    fileSpec = FileSpec()
    assFileSpec = FileSpec()
    fileSpec.fileType = 'es_output'
    assFileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex
    fileSpec.lfn = assFileSpec.lfn + '.gz'
    fileSpec.scope = 'panda'
    outFiles_scope_str += 'panda,' 
    outFiles_str += fileSpec.lfn + ','
    realDatasets_str += realDataset + ","
    ddmEndPointOut_str += ddmEndPointOut + ","
    assFileSpec.fileType = 'es_output'
    assFileSpec.fsize = random.randint(10, 100)
    # create source file
    hash = hashlib.md5()
    hash.update('%s:%s' % (scope, fileSpec.lfn))
    hash_hex = hash.hexdigest()
    correctedscope = "/".join(scope.split('.'))
    assFileSpec.path = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=queueConfig.stager['Globus_srcPath'],
                                                                         scope=correctedscope,
                                                                         hash1=hash_hex[0:2],
                                                                         hash2=hash_hex[2:4],

Example #4

0

Show file

 def run(self):
     while True:
         mainLog = self.make_logger(_logger,
                                    'id={0}'.format(self.get_pid()),
                                    method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(
             harvester_config.jobfetcher.nQueues,
             harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger,
                                       'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             default_prodSourceLabel = queueConfig.get_source_label()
             pdpm = getattr(queueConfig,
                            'prodSourceLabelRandomWeightsPermille', {})
             choice_list = core_utils.make_choice_list(
                 pdpm=pdpm, default=default_prodSourceLabel)
             prodSourceLabel = random.choice(choice_list)
             tmpLog.debug('getting {0} jobs for prodSourceLabel {1}'.format(
                 nJobs, prodSourceLabel))
             sw = core_utils.get_stopwatch()
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(
                 siteName, self.nodeName, prodSourceLabel, self.nodeName,
                 nJobs, queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1} {2}'.format(
                 len(jobs), errStr, sw.get_elapsed_time()))
             # convert to JobSpec
             if len(jobs) > 0:
                 # get extractor plugin
                 if hasattr(queueConfig, 'extractor'):
                     extractorCore = self.pluginFactory.get_plugin(
                         queueConfig.extractor)
                 else:
                     extractorCore = None
                 jobSpecs = []
                 fileStatMap = dict()
                 sw_startconvert = core_utils.get_stopwatch()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute(
                         'schedulerID', 'harvester-{0}'.format(
                             harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     fileGroupDictList = [
                         jobSpec.get_input_file_attributes()
                     ]
                     if extractorCore is not None:
                         fileGroupDictList.append(
                             extractorCore.get_aux_inputs(jobSpec))
                     for fileGroupDict in fileGroupDictList:
                         for tmpLFN, fileAttrs in iteritems(fileGroupDict):
                             # check file status
                             if tmpLFN not in fileStatMap:
                                 fileStatMap[
                                     tmpLFN] = self.dbProxy.get_file_status(
                                         tmpLFN, 'input',
                                         queueConfig.ddmEndpointIn,
                                         'starting')
                             # make file spec
                             fileSpec = FileSpec()
                             fileSpec.PandaID = jobSpec.PandaID
                             fileSpec.taskID = jobSpec.taskID
                             fileSpec.lfn = tmpLFN
                             fileSpec.endpoint = queueConfig.ddmEndpointIn
                             fileSpec.scope = fileAttrs['scope']
                             # set preparing to skip stage-in if the file is (being) taken care of by another job
                             if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                     or 'to_prepare' in fileStatMap[tmpLFN]:
                                 fileSpec.status = 'preparing'
                             else:
                                 fileSpec.status = 'to_prepare'
                             if fileSpec.status not in fileStatMap[tmpLFN]:
                                 fileStatMap[tmpLFN][fileSpec.status] = 0
                             fileStatMap[tmpLFN][fileSpec.status] += 1
                             if 'INTERNAL_FileType' in fileAttrs:
                                 fileSpec.fileType = fileAttrs[
                                     'INTERNAL_FileType']
                                 jobSpec.auxInput = JobSpec.AUX_hasAuxInput
                             else:
                                 fileSpec.fileType = 'input'
                             if 'INTERNAL_URL' in fileAttrs:
                                 fileSpec.url = fileAttrs['INTERNAL_URL']
                             jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 tmpLog.debug("Converting of {0} jobs {1}".format(
                     len(jobs), sw_startconvert.get_elapsed_time()))
                 sw_insertdb = core_utils.get_stopwatch()
                 self.dbProxy.insert_jobs(jobSpecs)
                 tmpLog.debug('Insert of {0} jobs {1}'.format(
                     len(jobSpecs), sw_insertdb.get_elapsed_time()))
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return

Example #5

0

Show file

File: stageInTest_globus.py Project: pavlo-svirin/harvester

realDatasets_str = ''
realDatasetsIn_str = ''
ddmEndPointIn_str = ''
GUID_str = ''
fsize_str = ''
checksum_str = ''
scope_in_str = ''

# create up 5 files for input
for index in range(random.randint(1, 5)):
    fileSpec = FileSpec()
    assFileSpec = FileSpec()
    fileSpec.fileType = 'input'
    assFileSpec.lfn = 'panda.sgotest.' + uuid.uuid4().hex
    fileSpec.lfn = assFileSpec.lfn
    fileSpec.scope = 'panda'
    inFiles_scope_str += 'panda,'
    inFiles_str += fileSpec.lfn + ','
    realDatasets_str += realDataset + ","
    realDatasetsIn_str += realDataset + ","
    ddmEndPointIn_str += ddmEndPointIn + ","
    # some dummy inputs
    GUID_str += 'd82e8e5e301b77489fd4da04bcdd6565,'
    fsize_str += '3084569129,'
    checksum_str += 'ad:9f60d29f,'
    scope_in_str += 'panda,'
    #
    assFileSpec.fileType = 'input'
    assFileSpec.fsize = random.randint(10, 100)
    # create source file
    hash = hashlib.md5()

Example #6

0

Show file

File: job_fetcher.py Project: jtchilders/panda-harvester

 def run(self):
     while True:
         mainLog = self.make_logger(_logger,
                                    'id={0}'.format(self.ident),
                                    method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(
             harvester_config.jobfetcher.nQueues,
             harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger,
                                       'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             tmpLog.debug('getting {0} jobs'.format(nJobs))
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(
                 siteName, self.nodeName, queueConfig.get_source_label(),
                 self.nodeName, nJobs, queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1}'.format(len(jobs), errStr))
             # convert to JobSpec
             if len(jobs) > 0:
                 jobSpecs = []
                 fileStatMap = dict()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute(
                         'schedulerID', 'harvester-{0}'.format(
                             harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     for tmpLFN, fileAttrs in iteritems(
                             jobSpec.get_input_file_attributes()):
                         # check file status
                         if tmpLFN not in fileStatMap:
                             fileStatMap[
                                 tmpLFN] = self.dbProxy.get_file_status(
                                     tmpLFN, 'input',
                                     queueConfig.ddmEndpointIn, 'starting')
                         # make file spec
                         fileSpec = FileSpec()
                         fileSpec.PandaID = jobSpec.PandaID
                         fileSpec.taskID = jobSpec.taskID
                         fileSpec.lfn = tmpLFN
                         fileSpec.endpoint = queueConfig.ddmEndpointIn
                         fileSpec.scope = fileAttrs['scope']
                         # set preparing to skip stage-in if the file is (being) taken care of by another job
                         if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                 or 'to_prepare' in fileStatMap[tmpLFN]:
                             fileSpec.status = 'preparing'
                         else:
                             fileSpec.status = 'to_prepare'
                         if fileSpec.status not in fileStatMap[tmpLFN]:
                             fileStatMap[tmpLFN][fileSpec.status] = 0
                         fileStatMap[tmpLFN][fileSpec.status] += 1
                         fileSpec.fileType = 'input'
                         jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 self.dbProxy.insert_jobs(jobSpecs)
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return