Python FileSpec.status Examples

Programming Language: Python

Namespace/Package Name: pandaharvester.harvestercore.file_spec

Class/Type: FileSpec

Method/Function: status

Examples at hotexamples.com: 4

Python FileSpec.status - 4 examples found. These are the top rated real world Python examples of pandaharvester.harvestercore.file_spec.FileSpec.status extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

lfn(7)

FileSpec(5)

fileType(5)

scope(4)

PandaID(3)

taskID(3)

status(3)

path(3)

fsize(2)

fileAttributes(2)

endpoint(2)

chksum(2)

objstoreID(1)

pathConvention(1)

checksum(1)

add_associated_file(1)

url(1)

Example #1

Show file

File: job_fetcher.py Project: PanDAWMS/panda-harvester

 def run(self):
     while True:
         mainLog = self.make_logger(_logger, 'id={0}'.format(self.get_pid()), method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(harvester_config.jobfetcher.nQueues,
                                                            harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger, 'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             tmpLog.debug('getting {0} jobs'.format(nJobs))
             sw = core_utils.get_stopwatch()
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(siteName, self.nodeName,
                                                       queueConfig.get_source_label(),
                                                       self.nodeName, nJobs,
                                                       queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1} {2}'.format(len(jobs), errStr, sw.get_elapsed_time()))
             # convert to JobSpec
             if len(jobs) > 0:
                 jobSpecs = []
                 fileStatMap = dict()
                 sw_startconvert = core_utils.get_stopwatch()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute('schedulerID',
                                               'harvester-{0}'.format(harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     for tmpLFN, fileAttrs in iteritems(jobSpec.get_input_file_attributes()):
                         # check file status
                         if tmpLFN not in fileStatMap:
                             fileStatMap[tmpLFN] = self.dbProxy.get_file_status(tmpLFN, 'input',
                                                                                queueConfig.ddmEndpointIn,
                                                                                'starting')
                         # make file spec
                         fileSpec = FileSpec()
                         fileSpec.PandaID = jobSpec.PandaID
                         fileSpec.taskID = jobSpec.taskID
                         fileSpec.lfn = tmpLFN
                         fileSpec.endpoint = queueConfig.ddmEndpointIn
                         fileSpec.scope = fileAttrs['scope']
                         # set preparing to skip stage-in if the file is (being) taken care of by another job
                         if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                 or 'to_prepare' in fileStatMap[tmpLFN]:
                             fileSpec.status = 'preparing'
                         else:
                             fileSpec.status = 'to_prepare'
                         if fileSpec.status not in fileStatMap[tmpLFN]:
                             fileStatMap[tmpLFN][fileSpec.status] = 0
                         fileStatMap[tmpLFN][fileSpec.status] += 1
                         fileSpec.fileType = 'input'
                         jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 tmpLog.debug("Converting of {0} jobs {1}".format(len(jobs),sw_startconvert.get_elapsed_time()))
                 sw_insertdb =core_utils.get_stopwatch()
                 self.dbProxy.insert_jobs(jobSpecs)
                 tmpLog.debug('Insert of {0} jobs {1}'.format(len(jobSpecs), sw_insertdb.get_elapsed_time()))
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return

Example #2

Show file

File: job_fetcher.py Project: jtchilders/panda-harvester

 def run(self):
     while True:
         mainLog = self.make_logger(_logger,
                                    'id={0}'.format(self.ident),
                                    method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(
             harvester_config.jobfetcher.nQueues,
             harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger,
                                       'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             tmpLog.debug('getting {0} jobs'.format(nJobs))
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(
                 siteName, self.nodeName, queueConfig.get_source_label(),
                 self.nodeName, nJobs, queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1}'.format(len(jobs), errStr))
             # convert to JobSpec
             if len(jobs) > 0:
                 jobSpecs = []
                 fileStatMap = dict()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute(
                         'schedulerID', 'harvester-{0}'.format(
                             harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     for tmpLFN, fileAttrs in iteritems(
                             jobSpec.get_input_file_attributes()):
                         # check file status
                         if tmpLFN not in fileStatMap:
                             fileStatMap[
                                 tmpLFN] = self.dbProxy.get_file_status(
                                     tmpLFN, 'input',
                                     queueConfig.ddmEndpointIn, 'starting')
                         # make file spec
                         fileSpec = FileSpec()
                         fileSpec.PandaID = jobSpec.PandaID
                         fileSpec.taskID = jobSpec.taskID
                         fileSpec.lfn = tmpLFN
                         fileSpec.endpoint = queueConfig.ddmEndpointIn
                         fileSpec.scope = fileAttrs['scope']
                         # set preparing to skip stage-in if the file is (being) taken care of by another job
                         if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                 or 'to_prepare' in fileStatMap[tmpLFN]:
                             fileSpec.status = 'preparing'
                         else:
                             fileSpec.status = 'to_prepare'
                         if fileSpec.status not in fileStatMap[tmpLFN]:
                             fileStatMap[tmpLFN][fileSpec.status] = 0
                         fileStatMap[tmpLFN][fileSpec.status] += 1
                         fileSpec.fileType = 'input'
                         jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 self.dbProxy.insert_jobs(jobSpecs)
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return

Example #3

Show file

 def run(self):
     while True:
         mainLog = self.make_logger(_logger,
                                    'id={0}'.format(self.get_pid()),
                                    method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(
             harvester_config.jobfetcher.nQueues,
             harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger,
                                       'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             default_prodSourceLabel = queueConfig.get_source_label()
             pdpm = getattr(queueConfig,
                            'prodSourceLabelRandomWeightsPermille', {})
             choice_list = core_utils.make_choice_list(
                 pdpm=pdpm, default=default_prodSourceLabel)
             prodSourceLabel = random.choice(choice_list)
             tmpLog.debug('getting {0} jobs for prodSourceLabel {1}'.format(
                 nJobs, prodSourceLabel))
             sw = core_utils.get_stopwatch()
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(
                 siteName, self.nodeName, prodSourceLabel, self.nodeName,
                 nJobs, queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1} {2}'.format(
                 len(jobs), errStr, sw.get_elapsed_time()))
             # convert to JobSpec
             if len(jobs) > 0:
                 # get extractor plugin
                 if hasattr(queueConfig, 'extractor'):
                     extractorCore = self.pluginFactory.get_plugin(
                         queueConfig.extractor)
                 else:
                     extractorCore = None
                 jobSpecs = []
                 fileStatMap = dict()
                 sw_startconvert = core_utils.get_stopwatch()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute(
                         'schedulerID', 'harvester-{0}'.format(
                             harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     fileGroupDictList = [
                         jobSpec.get_input_file_attributes()
                     ]
                     if extractorCore is not None:
                         fileGroupDictList.append(
                             extractorCore.get_aux_inputs(jobSpec))
                     for fileGroupDict in fileGroupDictList:
                         for tmpLFN, fileAttrs in iteritems(fileGroupDict):
                             # check file status
                             if tmpLFN not in fileStatMap:
                                 fileStatMap[
                                     tmpLFN] = self.dbProxy.get_file_status(
                                         tmpLFN, 'input',
                                         queueConfig.ddmEndpointIn,
                                         'starting')
                             # make file spec
                             fileSpec = FileSpec()
                             fileSpec.PandaID = jobSpec.PandaID
                             fileSpec.taskID = jobSpec.taskID
                             fileSpec.lfn = tmpLFN
                             fileSpec.endpoint = queueConfig.ddmEndpointIn
                             fileSpec.scope = fileAttrs['scope']
                             # set preparing to skip stage-in if the file is (being) taken care of by another job
                             if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                     or 'to_prepare' in fileStatMap[tmpLFN]:
                                 fileSpec.status = 'preparing'
                             else:
                                 fileSpec.status = 'to_prepare'
                             if fileSpec.status not in fileStatMap[tmpLFN]:
                                 fileStatMap[tmpLFN][fileSpec.status] = 0
                             fileStatMap[tmpLFN][fileSpec.status] += 1
                             if 'INTERNAL_FileType' in fileAttrs:
                                 fileSpec.fileType = fileAttrs[
                                     'INTERNAL_FileType']
                                 jobSpec.auxInput = JobSpec.AUX_hasAuxInput
                             else:
                                 fileSpec.fileType = 'input'
                             if 'INTERNAL_URL' in fileAttrs:
                                 fileSpec.url = fileAttrs['INTERNAL_URL']
                             jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 tmpLog.debug("Converting of {0} jobs {1}".format(
                     len(jobs), sw_startconvert.get_elapsed_time()))
                 sw_insertdb = core_utils.get_stopwatch()
                 self.dbProxy.insert_jobs(jobSpecs)
                 tmpLog.debug('Insert of {0} jobs {1}'.format(
                     len(jobSpecs), sw_insertdb.get_elapsed_time()))
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return

Example #4

Show file

File: updateEventRangesTest.py Project: rukmarr/panda-harvester

import sys
import uuid
from pandaharvester.harvestercore.job_spec import JobSpec
from pandaharvester.harvestercore.file_spec import FileSpec
from pandaharvester.harvestercore.event_spec import EventSpec

from pandaharvester.harvestercore.communicator_pool import CommunicatorPool

rID = sys.argv[1]
taskid = rID.split('-')[0]
pandaid = long(rID.split('-')[1])

job = JobSpec()
job.PandaID = pandaid
event = EventSpec()
file = FileSpec()
file.status = 'finished'
file.objstoreID = 9575
file.pathConvention = 1000
file.lfn = str(uuid.uuid4().hex) + '.zip'
file.fsize = 555
file.chksum = '0d2a9dc9'
event.eventRangeID = rID
event.eventStatus = 'finished'
job.zipEventMap = {1: {'events':[event],
                       'zip':file}}


a = CommunicatorPool()
a.update_jobs([job])