def test():
    '''test submission'''
    from pandaharvester.harvestercore.job_spec import JobSpec
    from pandaharvester.harvestercore.plugin_factory import PluginFactory

    import json

    queuename = 'ARC-TEST'
    queueconfmapper = QueueConfigMapper()
    queueconf = queueconfmapper.get_queue(queuename)
    pluginfactory = PluginFactory()

    pandajob = '{"jobsetID": 11881, "logGUID": "88ee8a52-5c70-490c-a585-5eb6f48e4152", "cmtConfig": "x86_64-slc6-gcc49-opt", "prodDBlocks": "mc16_13TeV:mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.merge.EVNT.e5340_e5984_tid11329621_00", "dispatchDBlockTokenForOut": "NULL,NULL", "destinationDBlockToken": "dst:CERN-PROD_DATADISK,dst:NDGF-T1_DATADISK", "destinationSE": "CERN-PROD_PRESERVATION", "realDatasets": "mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.HITS.e5340_e5984_s3126_tid11364822_00,mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.log.e5340_e5984_s3126_tid11364822_00", "prodUserID": "gingrich", "GUID": "A407D965-B139-A543-8851-A8E134A678D7", "realDatasetsIn": "mc16_13TeV:mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.merge.EVNT.e5340_e5984_tid11329621_00", "nSent": 2, "cloud": "WORLD", "StatusCode": 0, "homepackage": "AtlasOffline/21.0.15", "inFiles": "EVNT.11329621._001079.pool.root.1", "processingType": "simul", "currentPriority": 900, "fsize": "129263662", "fileDestinationSE": "CERN-PROD_PRESERVATION,BOINC_MCORE", "scopeOut": "mc16_13TeV", "minRamCount": 1573, "jobDefinitionID": 0, "maxWalltime": 40638, "scopeLog": "mc16_13TeV", "transformation": "Sim_tf.py", "maxDiskCount": 485, "coreCount": 1, "prodDBlockToken": "NULL", "transferType": "NULL", "destinationDblock": "mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.HITS.e5340_e5984_s3126_tid11364822_00_sub0418634273,mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.log.e5340_e5984_s3126_tid11364822_00_sub0418634276", "dispatchDBlockToken": "NULL", "jobPars": "--inputEVNTFile=EVNT.11329621._001079.pool.root.1 --maxEvents=50 --postInclude \\"default:RecJobTransforms/UseFrontier.py\\" --preExec \\"EVNTtoHITS:simFlags.SimBarcodeOffset.set_Value_and_Lock(200000)\\" \\"EVNTtoHITS:simFlags.TRTRangeCut=30.0;simFlags.TightMuonStepping=True\\" --preInclude \\"EVNTtoHITS:SimulationJobOptions/preInclude.BeamPipeKill.py,SimulationJobOptions/preInclude.FrozenShowersFCalOnly.py\\" --skipEvents=4550 --firstEvent=5334551 --outputHITSFile=HITS.11364822._128373.pool.root.1 --physicsList=FTFP_BERT_ATL_VALIDATION --randomSeed=106692 --DBRelease=\\"all:current\\" --conditionsTag \\"default:OFLCOND-MC16-SDR-14\\" --geometryVersion=\\"default:ATLAS-R2-2016-01-00-01_VALIDATION\\" --runNumber=364168 --AMITag=s3126 --DataRunNumber=284500 --simulator=FullG4 --truthStrategy=MC15aPlus", "attemptNr": 2, "swRelease": "Atlas-21.0.15", "nucleus": "CERN-PROD", "maxCpuCount": 40638, "outFiles": "HITS.11364822._128373.pool.root.11,log.11364822._128373.job.log.tgz.11", "ddmEndPointOut": "CERN-PROD_DATADISK,NDGF-T1_DATADISK", "scopeIn": "mc16_13TeV", "PandaID": 3487584273, "sourceSite": "NULL", "dispatchDblock": "panda.11364822.07.05.GEN.0c9b1d3b-feec-411a-89e4-1cbf7347d70c_dis003487584270", "prodSourceLabel": "managed", "checksum": "ad:cd0bf10b", "jobName": "mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.e5340_e5984_s3126.3433643361", "ddmEndPointIn": "NDGF-T1_DATADISK", "taskID": 11364822, "logFile": "log.11364822._128373.job.log.tgz.1"}'
    pandajob = json.loads(pandajob)
    jspec = JobSpec()
    jspec.convert_job_json(pandajob)
    jspec.computingSite = queuename
    jspeclist = [jspec]

    maker = pluginfactory.get_plugin(queueconf.workerMaker)
    wspec = maker.make_worker(jspeclist, queueconf)

    wspec.hasJob = 1
    wspec.set_jobspec_list(jspeclist)

    sub = ARCSubmitter()
    print sub.submit_workers([wspec])
    print wspec.batchID
Exemple #2
0
def test():
    '''test submission'''
    from pandaharvester.harvestercore.job_spec import JobSpec
    from pandaharvester.harvestercore.plugin_factory import PluginFactory

    import json

    queuename = 'ARC-TEST'
    queueconfmapper = QueueConfigMapper()
    queueconf = queueconfmapper.get_queue(queuename)
    pluginfactory = PluginFactory()

    pandajob = '{"jobsetID": 11881, "logGUID": "88ee8a52-5c70-490c-a585-5eb6f48e4152", "cmtConfig": "x86_64-slc6-gcc49-opt", "prodDBlocks": "mc16_13TeV:mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.merge.EVNT.e5340_e5984_tid11329621_00", "dispatchDBlockTokenForOut": "NULL,NULL", "destinationDBlockToken": "dst:CERN-PROD_DATADISK,dst:NDGF-T1_DATADISK", "destinationSE": "CERN-PROD_PRESERVATION", "realDatasets": "mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.HITS.e5340_e5984_s3126_tid11364822_00,mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.log.e5340_e5984_s3126_tid11364822_00", "prodUserID": "gingrich", "GUID": "A407D965-B139-A543-8851-A8E134A678D7", "realDatasetsIn": "mc16_13TeV:mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.merge.EVNT.e5340_e5984_tid11329621_00", "nSent": 2, "cloud": "WORLD", "StatusCode": 0, "homepackage": "AtlasOffline/21.0.15", "inFiles": "EVNT.11329621._001079.pool.root.1", "processingType": "simul", "currentPriority": 900, "fsize": "129263662", "fileDestinationSE": "CERN-PROD_PRESERVATION,BOINC_MCORE", "scopeOut": "mc16_13TeV", "minRamCount": 1573, "jobDefinitionID": 0, "maxWalltime": 40638, "scopeLog": "mc16_13TeV", "transformation": "Sim_tf.py", "maxDiskCount": 485, "coreCount": 1, "prodDBlockToken": "NULL", "transferType": "NULL", "destinationDblock": "mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.HITS.e5340_e5984_s3126_tid11364822_00_sub0418634273,mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.log.e5340_e5984_s3126_tid11364822_00_sub0418634276", "dispatchDBlockToken": "NULL", "jobPars": "--inputEVNTFile=EVNT.11329621._001079.pool.root.1 --maxEvents=50 --postInclude \\"default:RecJobTransforms/UseFrontier.py\\" --preExec \\"EVNTtoHITS:simFlags.SimBarcodeOffset.set_Value_and_Lock(200000)\\" \\"EVNTtoHITS:simFlags.TRTRangeCut=30.0;simFlags.TightMuonStepping=True\\" --preInclude \\"EVNTtoHITS:SimulationJobOptions/preInclude.BeamPipeKill.py,SimulationJobOptions/preInclude.FrozenShowersFCalOnly.py\\" --skipEvents=4550 --firstEvent=5334551 --outputHITSFile=HITS.11364822._128373.pool.root.1 --physicsList=FTFP_BERT_ATL_VALIDATION --randomSeed=106692 --DBRelease=\\"all:current\\" --conditionsTag \\"default:OFLCOND-MC16-SDR-14\\" --geometryVersion=\\"default:ATLAS-R2-2016-01-00-01_VALIDATION\\" --runNumber=364168 --AMITag=s3126 --DataRunNumber=284500 --simulator=FullG4 --truthStrategy=MC15aPlus", "attemptNr": 2, "swRelease": "Atlas-21.0.15", "nucleus": "CERN-PROD", "maxCpuCount": 40638, "outFiles": "HITS.11364822._128373.pool.root.11,log.11364822._128373.job.log.tgz.11", "ddmEndPointOut": "CERN-PROD_DATADISK,NDGF-T1_DATADISK", "scopeIn": "mc16_13TeV", "PandaID": 3487584273, "sourceSite": "NULL", "dispatchDblock": "panda.11364822.07.05.GEN.0c9b1d3b-feec-411a-89e4-1cbf7347d70c_dis003487584270", "prodSourceLabel": "managed", "checksum": "ad:cd0bf10b", "jobName": "mc16_13TeV.364168.Sherpa_221_NNPDF30NNLO_Wmunu_MAXHTPTV500_1000.simul.e5340_e5984_s3126.3433643361", "ddmEndPointIn": "NDGF-T1_DATADISK", "taskID": 11364822, "logFile": "log.11364822._128373.job.log.tgz.1"}'
    pandajob = json.loads(pandajob)
    jspec = JobSpec()
    jspec.convert_job_json(pandajob)
    jspec.computingSite = queuename
    jspeclist = [jspec]

    maker = pluginfactory.get_plugin(queueconf.workerMaker)
    wspec = maker.make_worker(jspeclist, queueconf)

    wspec.hasJob = 1
    wspec.set_jobspec_list(jspeclist)

    sub = ARCSubmitter()
    print sub.submit_workers([wspec])
    print wspec.batchID
# check if db lock exits
locked = stagerCore.dbInterface.get_object_lock('dummy_id_for_out_0',lock_interval=120)
if not locked:
   tmpLog.debug('DB Already locked by another thread')
# now unlock db
unlocked = stagerCore.dbInterface.release_object_lock('dummy_id_for_out_0')
if unlocked :
   tmpLog.debug('unlocked db')
else:
   tmpLog.debug(' Could not unlock db')

# loop over the job id's creating various JobSpecs
jobSpec_list = []
for job_id in range(begin_job_id,end_job_id+1):
   jobSpec = JobSpec()
   jobSpec.jobParams = {
                        'scopeLog': 'panda',
                        'logFile': 'log',
                        }
   jobSpec.computingSite = queueName
   jobSpec.PandaID = job_id
   jobSpec.modificationTime = datetime.datetime.now()
   realDataset = 'panda.sgotest.' + uuid.uuid4().hex
   ddmEndPointOut = 'BNL-OSG2_DATADISK'
   outFiles_scope_str = ''
   outFiles_str = ''
   realDatasets_str = ''
   ddmEndPointOut_str = ''
   # create up 5 files for output
   for index in range(random.randint(1, 5)):
    os.remove(harvester_config.db.database_filename)
except Exception:
    pass

for loggerName, loggerObj in iteritems(logging.Logger.manager.loggerDict):
    if loggerName.startswith('panda.log'):
        if len(loggerObj.handlers) == 0:
            continue
        if loggerName.split('.')[-1] in ['db_proxy']:
            continue
        stdoutHandler = logging.StreamHandler(sys.stdout)
        stdoutHandler.setFormatter(loggerObj.handlers[0].formatter)
        loggerObj.addHandler(stdoutHandler)

queueConfigMapper = QueueConfigMapper()

proxy = DBProxy()
proxy.make_tables(queueConfigMapper)

job = JobSpec()
job.PandaID = 1

job.modificationTime = datetime.datetime.now()
proxy.insert_jobs([job])

newJob = proxy.get_job(1)

a = CommunicatorPool()
a.get_jobs('siteName', 'nodeName', 'prodSourceLabel', 'computingElement', 1,
           {})
Exemple #5
0
 def run(self):
     while True:
         mainLog = self.make_logger(_logger,
                                    'id={0}'.format(self.get_pid()),
                                    method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(
             harvester_config.jobfetcher.nQueues,
             harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger,
                                       'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             default_prodSourceLabel = queueConfig.get_source_label()
             pdpm = getattr(queueConfig,
                            'prodSourceLabelRandomWeightsPermille', {})
             choice_list = core_utils.make_choice_list(
                 pdpm=pdpm, default=default_prodSourceLabel)
             prodSourceLabel = random.choice(choice_list)
             tmpLog.debug('getting {0} jobs for prodSourceLabel {1}'.format(
                 nJobs, prodSourceLabel))
             sw = core_utils.get_stopwatch()
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(
                 siteName, self.nodeName, prodSourceLabel, self.nodeName,
                 nJobs, queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1} {2}'.format(
                 len(jobs), errStr, sw.get_elapsed_time()))
             # convert to JobSpec
             if len(jobs) > 0:
                 # get extractor plugin
                 if hasattr(queueConfig, 'extractor'):
                     extractorCore = self.pluginFactory.get_plugin(
                         queueConfig.extractor)
                 else:
                     extractorCore = None
                 jobSpecs = []
                 fileStatMap = dict()
                 sw_startconvert = core_utils.get_stopwatch()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute(
                         'schedulerID', 'harvester-{0}'.format(
                             harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     fileGroupDictList = [
                         jobSpec.get_input_file_attributes()
                     ]
                     if extractorCore is not None:
                         fileGroupDictList.append(
                             extractorCore.get_aux_inputs(jobSpec))
                     for fileGroupDict in fileGroupDictList:
                         for tmpLFN, fileAttrs in iteritems(fileGroupDict):
                             # check file status
                             if tmpLFN not in fileStatMap:
                                 fileStatMap[
                                     tmpLFN] = self.dbProxy.get_file_status(
                                         tmpLFN, 'input',
                                         queueConfig.ddmEndpointIn,
                                         'starting')
                             # make file spec
                             fileSpec = FileSpec()
                             fileSpec.PandaID = jobSpec.PandaID
                             fileSpec.taskID = jobSpec.taskID
                             fileSpec.lfn = tmpLFN
                             fileSpec.endpoint = queueConfig.ddmEndpointIn
                             fileSpec.scope = fileAttrs['scope']
                             # set preparing to skip stage-in if the file is (being) taken care of by another job
                             if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                     or 'to_prepare' in fileStatMap[tmpLFN]:
                                 fileSpec.status = 'preparing'
                             else:
                                 fileSpec.status = 'to_prepare'
                             if fileSpec.status not in fileStatMap[tmpLFN]:
                                 fileStatMap[tmpLFN][fileSpec.status] = 0
                             fileStatMap[tmpLFN][fileSpec.status] += 1
                             if 'INTERNAL_FileType' in fileAttrs:
                                 fileSpec.fileType = fileAttrs[
                                     'INTERNAL_FileType']
                                 jobSpec.auxInput = JobSpec.AUX_hasAuxInput
                             else:
                                 fileSpec.fileType = 'input'
                             if 'INTERNAL_URL' in fileAttrs:
                                 fileSpec.url = fileAttrs['INTERNAL_URL']
                             jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 tmpLog.debug("Converting of {0} jobs {1}".format(
                     len(jobs), sw_startconvert.get_elapsed_time()))
                 sw_insertdb = core_utils.get_stopwatch()
                 self.dbProxy.insert_jobs(jobSpecs)
                 tmpLog.debug('Insert of {0} jobs {1}'.format(
                     len(jobSpecs), sw_insertdb.get_elapsed_time()))
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return
 def run(self):
     while True:
         mainLog = self.make_logger(_logger,
                                    'id={0}'.format(self.ident),
                                    method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(
             harvester_config.jobfetcher.nQueues,
             harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger,
                                       'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             tmpLog.debug('getting {0} jobs'.format(nJobs))
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(
                 siteName, self.nodeName, queueConfig.get_source_label(),
                 self.nodeName, nJobs, queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1}'.format(len(jobs), errStr))
             # convert to JobSpec
             if len(jobs) > 0:
                 jobSpecs = []
                 fileStatMap = dict()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute(
                         'schedulerID', 'harvester-{0}'.format(
                             harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     for tmpLFN, fileAttrs in iteritems(
                             jobSpec.get_input_file_attributes()):
                         # check file status
                         if tmpLFN not in fileStatMap:
                             fileStatMap[
                                 tmpLFN] = self.dbProxy.get_file_status(
                                     tmpLFN, 'input',
                                     queueConfig.ddmEndpointIn, 'starting')
                         # make file spec
                         fileSpec = FileSpec()
                         fileSpec.PandaID = jobSpec.PandaID
                         fileSpec.taskID = jobSpec.taskID
                         fileSpec.lfn = tmpLFN
                         fileSpec.endpoint = queueConfig.ddmEndpointIn
                         fileSpec.scope = fileAttrs['scope']
                         # set preparing to skip stage-in if the file is (being) taken care of by another job
                         if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                 or 'to_prepare' in fileStatMap[tmpLFN]:
                             fileSpec.status = 'preparing'
                         else:
                             fileSpec.status = 'to_prepare'
                         if fileSpec.status not in fileStatMap[tmpLFN]:
                             fileStatMap[tmpLFN][fileSpec.status] = 0
                         fileStatMap[tmpLFN][fileSpec.status] += 1
                         fileSpec.fileType = 'input'
                         jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 self.dbProxy.insert_jobs(jobSpecs)
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return
 def run(self):
     while True:
         mainLog = self.make_logger(_logger, 'id={0}'.format(self.get_pid()), method_name='run')
         mainLog.debug('getting number of jobs to be fetched')
         # get number of jobs to be fetched
         nJobsPerQueue = self.dbProxy.get_num_jobs_to_fetch(harvester_config.jobfetcher.nQueues,
                                                            harvester_config.jobfetcher.lookupTime)
         mainLog.debug('got {0} queues'.format(len(nJobsPerQueue)))
         # loop over all queues
         for queueName, nJobs in iteritems(nJobsPerQueue):
             # check queue
             if not self.queueConfigMapper.has_queue(queueName):
                 continue
             tmpLog = self.make_logger(_logger, 'queueName={0}'.format(queueName),
                                       method_name='run')
             # get queue
             queueConfig = self.queueConfigMapper.get_queue(queueName)
             # upper limit
             if nJobs > harvester_config.jobfetcher.maxJobs:
                 nJobs = harvester_config.jobfetcher.maxJobs
             # get jobs
             tmpLog.debug('getting {0} jobs'.format(nJobs))
             sw = core_utils.get_stopwatch()
             siteName = queueConfig.siteName
             jobs, errStr = self.communicator.get_jobs(siteName, self.nodeName,
                                                       queueConfig.get_source_label(),
                                                       self.nodeName, nJobs,
                                                       queueConfig.getJobCriteria)
             tmpLog.info('got {0} jobs with {1} {2}'.format(len(jobs), errStr, sw.get_elapsed_time()))
             # convert to JobSpec
             if len(jobs) > 0:
                 jobSpecs = []
                 fileStatMap = dict()
                 sw_startconvert = core_utils.get_stopwatch()
                 for job in jobs:
                     timeNow = datetime.datetime.utcnow()
                     jobSpec = JobSpec()
                     jobSpec.convert_job_json(job)
                     jobSpec.computingSite = queueName
                     jobSpec.status = 'starting'
                     jobSpec.subStatus = 'fetched'
                     jobSpec.creationTime = timeNow
                     jobSpec.stateChangeTime = timeNow
                     jobSpec.configID = queueConfig.configID
                     jobSpec.set_one_attribute('schedulerID',
                                               'harvester-{0}'.format(harvester_config.master.harvester_id))
                     if queueConfig.zipPerMB is not None and jobSpec.zipPerMB is None:
                         jobSpec.zipPerMB = queueConfig.zipPerMB
                     for tmpLFN, fileAttrs in iteritems(jobSpec.get_input_file_attributes()):
                         # check file status
                         if tmpLFN not in fileStatMap:
                             fileStatMap[tmpLFN] = self.dbProxy.get_file_status(tmpLFN, 'input',
                                                                                queueConfig.ddmEndpointIn,
                                                                                'starting')
                         # make file spec
                         fileSpec = FileSpec()
                         fileSpec.PandaID = jobSpec.PandaID
                         fileSpec.taskID = jobSpec.taskID
                         fileSpec.lfn = tmpLFN
                         fileSpec.endpoint = queueConfig.ddmEndpointIn
                         fileSpec.scope = fileAttrs['scope']
                         # set preparing to skip stage-in if the file is (being) taken care of by another job
                         if 'ready' in fileStatMap[tmpLFN] or 'preparing' in fileStatMap[tmpLFN] \
                                 or 'to_prepare' in fileStatMap[tmpLFN]:
                             fileSpec.status = 'preparing'
                         else:
                             fileSpec.status = 'to_prepare'
                         if fileSpec.status not in fileStatMap[tmpLFN]:
                             fileStatMap[tmpLFN][fileSpec.status] = 0
                         fileStatMap[tmpLFN][fileSpec.status] += 1
                         fileSpec.fileType = 'input'
                         jobSpec.add_in_file(fileSpec)
                     jobSpec.trigger_propagation()
                     jobSpecs.append(jobSpec)
                 # insert to DB
                 tmpLog.debug("Converting of {0} jobs {1}".format(len(jobs),sw_startconvert.get_elapsed_time()))
                 sw_insertdb =core_utils.get_stopwatch()
                 self.dbProxy.insert_jobs(jobSpecs)
                 tmpLog.debug('Insert of {0} jobs {1}'.format(len(jobSpecs), sw_insertdb.get_elapsed_time()))
         mainLog.debug('done')
         # check if being terminated
         if self.terminated(harvester_config.jobfetcher.sleepTime):
             mainLog.debug('terminated')
             return
Exemple #8
0
import sys
import time
import os
from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
from pandaharvester.harvestercore.job_spec import JobSpec

queueName = sys.argv[1]

queueConfigMapper = QueueConfigMapper()

queueConfig = queueConfigMapper.get_queue(queueName)

jobSpec = JobSpec()
jobSpec.jobParams = {'inFiles': 'DAOD_STDM4.09596175._000008.pool.root.1',
                     'scopeIn': 'mc15_13TeV',
                     'fsize': '658906675',
                     'GUID': '7e3776f9bb0af341b03e59d3de895a13',
                     'checksum': 'ad:3734bdd9',
                     'ddmEndPointIn': 'BNL-OSG2_DATADISK',
                     'realDatasetsIn': 'mc15_13TeV.363638.MGPy8EG_N30NLO_Wmunu_Ht500_700_BFilter.merge.DAOD_STDM4.e4944_s2726_r7772_r7676_p2842_tid09596175_00',
                     }
jobSpec.computingSite = queueName

from pandaharvester.harvestercore.plugin_factory import PluginFactory

pluginFactory = PluginFactory()

# get plugin
preparatorCore = pluginFactory.get_plugin(queueConfig.preparator)
print ("plugin={0}".format(preparatorCore.__class__.__name__))
Exemple #9
0
    endPoint=queueConfig.stager['Globus_srcPath'],
    scope=correctedscope,
    hash1=hash_hex[0:2],
    hash2=hash_hex[2:4],
    lfn=assFileSpec.lfn)
if not os.path.exists(os.path.dirname(assFileSpec.path)):
    print "os.makedirs({})".format(os.path.dirname(assFileSpec.path))
    os.makedirs(os.path.dirname(assFileSpec.path))
oFile = open(assFileSpec.path, 'w')
oFile.write(''.join(
    random.choice(string.ascii_uppercase + string.digits)
    for _ in range(assFileSpec.fsize)))
oFile.close()
fileSpec.add_associated_file(assFileSpec)

jobSpec = JobSpec()
jobSpec.jobParams = {
    'outFiles': fileSpec.lfn + ',log',
    'scopeOut': 'panda',
    'scopeLog': 'panda',
    'logFile': 'log',
    'realDatasets': 'panda.' + fileSpec.lfn,
    'ddmEndPointOut': 'BNL-OSG2_DATADISK',
}
jobSpec.computingSite = queueName
jobSpec.PandaID = job_id
jobSpec.add_out_file(fileSpec)

print "file to transfer - {}".format(assFileSpec.path)
print "dump(jobSpec)"
#dump(jobSpec)
# check if db lock exits
locked = stagerCore.dbInterface.get_object_lock('dummy_id_for_out_0',
                                                lock_interval=120)
if not locked:
    tmpLog.debug('DB Already locked by another thread')
# now unlock db
unlocked = stagerCore.dbInterface.release_object_lock('dummy_id_for_out_0')
if unlocked:
    tmpLog.debug('unlocked db')
else:
    tmpLog.debug(' Could not unlock db')

# loop over the job id's creating various JobSpecs
jobSpec_list = []
for job_id in range(begin_job_id, end_job_id + 1):
    jobSpec = JobSpec()
    jobSpec.jobParams = {
        'scopeLog': 'panda',
        'logFile': 'log',
    }
    jobSpec.computingSite = queueName
    jobSpec.PandaID = job_id
    jobSpec.modificationTime = datetime.datetime.now()
    realDataset = 'panda.sgotest.' + uuid.uuid4().hex
    ddmEndPointOut = 'BNL-OSG2_DATADISK'
    outFiles_scope_str = ''
    outFiles_str = ''
    realDatasets_str = ''
    ddmEndPointOut_str = ''
    # create up 5 files for output
    for index in range(random.randint(1, 5)):
import sys
import time
from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
from pandaharvester.harvestercore.job_spec import JobSpec
from pilot.info.filespec import FileSpec

queueName = sys.argv[1]

queueConfigMapper = QueueConfigMapper()

queueConfig = queueConfigMapper.get_queue(queueName)

jobSpec = JobSpec()
new_file_data = {
    'scope': 'test',
    'lfn': 'TXT.19772875._044894.tar.gz.1',
    'attemptNr': 0
}
new_file_spec = FileSpec(filetype='input', **new_file_data)
new_file_spec.attemptNr = 0
new_file_spec.path = '/home/psvirin/harvester3'

jobSpec.inFiles = {new_file_spec}
jobSpec.outFiles = {}
jobSpec.jobParams = {
    'inFiles':
    'TXT.19772875._044894.tar.gz.1',
    'scopeIn':
    'mc15_13TeV',
    'fsize':
    '658906675',
Exemple #12
0
except Exception:
    pass

for loggerName, loggerObj in iteritems(logging.Logger.manager.loggerDict):
    if loggerName.startswith('panda.log'):
        if len(loggerObj.handlers) == 0:
            continue
        if loggerName.split('.')[-1] in ['db_proxy']:
            continue
        stdoutHandler = logging.StreamHandler(sys.stdout)
        stdoutHandler.setFormatter(loggerObj.handlers[0].formatter)
        loggerObj.addHandler(stdoutHandler)

queueConfigMapper = QueueConfigMapper()

proxy = DBProxy()
proxy.make_tables(queueConfigMapper)

job = JobSpec()
job.PandaID = 1


job.modificationTime = datetime.datetime.now()
proxy.insert_jobs([job])

newJob = proxy.get_job(1)


a = CommunicatorPool()
a.get_jobs('siteName', 'nodeName', 'prodSourceLabel', 'computingElement', 1, {})
hash_hex = hash.hexdigest()
correctedscope = "/".join(scope.split('.'))
assFileSpec.path = "{endPoint}/{scope}/{hash1}/{hash2}/{lfn}".format(endPoint=queueConfig.stager['Globus_srcPath'],
                                                                     scope=correctedscope,
                                                                     hash1=hash_hex[0:2],
                                                                     hash2=hash_hex[2:4],
                                                                     lfn=assFileSpec.lfn)
if not os.path.exists(os.path.dirname(assFileSpec.path)):
   print "os.makedirs({})".format(os.path.dirname(assFileSpec.path))
   os.makedirs(os.path.dirname(assFileSpec.path))
oFile = open(assFileSpec.path, 'w')
oFile.write(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(assFileSpec.fsize)))
oFile.close()
fileSpec.add_associated_file(assFileSpec)

jobSpec = JobSpec()
jobSpec.jobParams = {'outFiles': fileSpec.lfn + ',log',
                     'scopeOut': 'panda',
                     'scopeLog': 'panda',
                     'logFile': 'log',
                     'realDatasets': 'panda.' + fileSpec.lfn,
                     'ddmEndPointOut': 'BNL-OSG2_DATADISK',
                     }
jobSpec.computingSite = queueName
jobSpec.PandaID = job_id
jobSpec.add_out_file(fileSpec)

print "file to transfer - {}".format(assFileSpec.path) 
print "dump(jobSpec)"
#dump(jobSpec)
                                                                                                                                                           
    queueConfig = queueConfigMapper.get_queue(queueName)

    pluginFactory = PluginFactory()

    com = CommunicatorPool()

    # get job
    jobSpecList = []
    if queueConfig.mapType != WorkSpec.MT_NoJob:
        jobs, errStr = com.get_jobs(queueConfig.queueName, 'nodeName', queueConfig.prodSourceLabel,
                                    'computingElement', 1, None)
        if len(jobs) == 0:
            print ("Failed to get jobs at {0} due to {1}".format(queueConfig.queueName, errStr))
            sys.exit(0)

        jobSpec = JobSpec()
        jobSpec.convert_job_json(jobs[0])

        # set input file paths
        inFiles = jobSpec.get_input_file_attributes()
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = '{0}/{1}'.format(os.getcwd(), inLFN)
        jobSpec.set_input_file_paths(inFiles)
        jobSpecList.append(jobSpec)

    maker = pluginFactory.get_plugin(queueConfig.workerMaker)
    workSpec = maker.make_worker(jobSpecList, queueConfig, 'SCORE') # TODO: needs to be thought

    workSpec.accessPoint = queueConfig.messenger['accessPoint']
    workSpec.mapType = queueConfig.mapType
    workSpec.computingSite = queueConfig.queueName
import json
import sys
import time
from pprint import pprint

#from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper
from pandaharvester.harvestercore.job_spec import JobSpec
from pandaharvester.harvesterextractor.aux_extractor import AuxExtractor

job_data_json = """{"container_name": "atlas.athena:21.0.15_DBRelease-100.0.2_Patched", "PandaID": 4731765799, "jobsetID": 1,"taskID": 1, "transformation": "Sim_tf.py", "attemptNr": 1,"currentPriority": 1, "outFiles": "", "realDatasets": "", "ddmEndPointOut": "", "scopeOut": "", "scopeLog": "", "logFile": "", "logGUID": "", "files": [{"row_id": 30133581516, "pandaid": 4731765799, "modificationtime": "2020-05-18T10:29:21", "guid": "ADCF2DEC-3412-C64B-B8FB-E8629680AA4D", "lfn": "EVNT.21265061._000036.pool.root.1", "type": "input", "dataset": "mc16_13TeV.830011.H7EG_jetjet_JZ1.merge.EVNT.e7954_e7400_tid21265061_00", "status": "ready", "proddblock": "mc16_13TeV:mc16_13TeV.830011.H7EG_jetjet_JZ1.merge.EVNT.e7954_e7400_tid21265061_00", "proddblocktoken": "", "dispatchdblock": "", "dispatchdblocktoken": "", "destinationdblock": "", "destinationdblocktoken": "", "destinationse": "", "fsize": 307204932, "md5sum": "", "checksum": "ad:698afd11", "scope": "mc16_13TeV", "jeditaskid": 21265064, "datasetid": 311267780, "fileid": 21006488431, "attemptnr": 22, "destination": " ", "fsizemb": "292.97", "ruciodatasetname": "mc16_13TeV:mc16_13TeV.830011.H7EG_jetjet_JZ1.merge.EVNT.e7954_e7400_tid21265061_00", "datasetname": "mc16_13TeV:mc16_13TeV.830011.H7EG_jetjet_JZ1.merge.EVNT.e7954_e7400_tid21265061_00", "ddmsite": "LRZ-LMU", "creationdate": "2020-05-18T10:29:21", "oldfiletable": 1, "destinationdblock_vis": "", "maxattempt": 30}, {"row_id": 30133581517, "pandaid": 4731765799, "modificationtime": "2020-05-18T10:29:21", "guid": "819a609a-1650-455b-8980-9c190cb77064", "lfn": "352", "type": "pseudo_input", "dataset": "seq_number", "status": "unknown", "proddblock": "seq_number", "proddblocktoken": "", "dispatchdblock": "", "dispatchdblocktoken": "", "destinationdblock": "", "destinationdblocktoken": "", "destinationse": "", "fsize": 0, "md5sum": "", "checksum": "", "scope": "", "jeditaskid": 21265064, "datasetid": 311267781, "fileid": 21006489271, "attemptnr": 21, "destination": " ", "fsizemb": "0.00", "ruciodatasetname": "seq_number", "datasetname": "seq_number", "ddmsite": "LRZ-LMU", "creationdate": "2020-05-18T10:29:21", "oldfiletable": 1, "destinationdblock_vis": ""}, {"row_id": 30133581519, "pandaid": 4731765799, "modificationtime": "2020-05-18T10:29:21", "guid": "", "lfn": "HITS.21265064._002580.pool.root.1", "type": "output", "dataset": "mc16_13TeV.830011.H7EG_jetjet_JZ1.simul.HITS.e7954_e7400_s3126_tid21265064_00", "status": "failed", "proddblock": "", "proddblocktoken": "", "dispatchdblock": "", "dispatchdblocktoken": "", "destinationdblock": "mc16_13TeV.830011.NA.simul.HITS.21265064_sub2290908", "destinationdblocktoken": "dst:TOKYO-LCG2_DATADISK", "destinationse": "nucleus:TOKYO-LCG2", "fsize": 0, "md5sum": "", "checksum": "", "scope": "mc16_13TeV", "jeditaskid": 21265064, "datasetid": 311267782, "fileid": 21092403937, "attemptnr": 0, "destination": " ", "fsizemb": "0.00", "ruciodatasetname": "mc16_13TeV.830011.H7EG_jetjet_JZ1.simul.HITS.e7954_e7400_s3126_tid21265064_00", "datasetname": "mc16_13TeV.830011.H7EG_jetjet_JZ1.simul.HITS.e7954_e7400_s3126_tid21265064_00", "ddmsite": "TOKYO-LCG2", "dsttoken": "DATADISK", "creationdate": "2020-05-18T10:29:21", "oldfiletable": 1, "destinationdblock_vis": "sub2290908"}, {"row_id": 30133581518, "pandaid": 4731765799, "modificationtime": "2020-05-18T10:29:21", "guid": "73a868bd-acd5-4fb4-ade9-a66badd0e5a9", "lfn": "log.21265064._002580.job.log.tgz.1", "type": "log", "dataset": "mc16_13TeV.830011.H7EG_jetjet_JZ1.simul.log.e7954_e7400_s3126_tid21265064_00", "status": "failed", "proddblock": "", "proddblocktoken": "", "dispatchdblock": "", "dispatchdblocktoken": "", "destinationdblock": "mc16_13TeV.830011.NA.simul.log.21265064_sub2290905", "destinationdblocktoken": "ddd:LRZ-LMU_DATADISK", "destinationse": "LRZ-LMU_MUC", "fsize": 0, "md5sum": "", "checksum": "", "scope": "mc16_13TeV", "jeditaskid": 21265064, "datasetid": 311267783, "fileid": 21092403936, "attemptnr": 0, "destination": " ", "fsizemb": "0.00", "ruciodatasetname": "mc16_13TeV.830011.H7EG_jetjet_JZ1.simul.log.e7954_e7400_s3126_tid21265064_00", "datasetname": "mc16_13TeV.830011.H7EG_jetjet_JZ1.simul.log.e7954_e7400_s3126_tid21265064_00", "ddmsite": "LRZ-LMU", "creationdate": "2020-05-18T10:29:21", "oldfiletable": 1, "destinationdblock_vis": "sub2290905"}], "job": {"pandaid": 4731765799, "jobdefinitionid": 0, "schedulerid": "", "pilotid": "", "creationtime": "2020-05-18 03:42:54", "creationhost": "", "modificationtime": "2020-05-18 10:29:21", "modificationhost": "aipanda058.cern.ch", "atlasrelease": "Atlas-21.0.15", "transformation": "Sim_tf.py", "homepackage": "AtlasOffline/21.0.15", "prodserieslabel": "pandatest", "prodsourcelabel": "managed", "produserid": "dhirsch", "gshare": "Validation", "assignedpriority": 888, "currentpriority": 888, "attemptnr": 22, "maxattempt": 22, "jobname": "mc16_13TeV.830011.H7EG_jetjet_JZ1.simul.e7954_e7400_s3126.4727254713", "maxcpucount": 4611, "maxcpuunit": "kSI2kseconds", "maxdiskcount": 5255, "maxdiskunit": "MB  ", "ipconnectivity": "yes  ", "minramcount": 44100, "minramunit": "MB", "starttime": null, "endtime": "2020-05-18T10:29:21", "cpuconsumptiontime": 0, "cpuconsumptionunit": "", "commandtopilot": "tobekilled", "transexitcode": "", "piloterrorcode": 0, "piloterrordiag": "", "exeerrorcode": 0, "exeerrordiag": "", "superrorcode": 0, "superrordiag": "", "ddmerrorcode": 0, "ddmerrordiag": "", "brokerageerrorcode": 0, "brokerageerrordiag": "", "jobdispatchererrorcode": 0, "jobdispatchererrordiag": "", "taskbuffererrorcode": 100, "taskbuffererrordiag": "reassigned by JEDI", "computingsite": "LRZ-LMU_MUC", "computingelement": "", "jobparameters": "", "metadata": "", "proddblock": "mc16_13TeV:mc16_13TeV.830011.H7EG_jetjet_JZ1.merge.EVNT.e7954_e7400_tid21265061_00", "dispatchdblock": "", "destinationdblock": "mc16_13TeV.830011.H7EG_jetjet_JZ1.simul.log.e7954_e7400_s3126_tid21265064_00", "destinationse": "dst:TOKYO-LCG2_DATADISK/ATLASDATADISK", "nevents": 0, "grid": "", "cloud": "WORLD", "cpuconversion": null, "sourcesite": "", "destinationsite": "", "transfertype": "", "taskid": 21265064, "cmtconfig": "x86_64-slc6-gcc49-opt", "statechangetime": "2020-05-18 10:29:21", "proddbupdatetime": "2020-05-18T03:42:54", "lockedby": "jedi", "relocationflag": 1, "jobexecutionid": 0, "vo": "atlas", "pilottiming": "", "workinggroup": "AP_MCGN", "processingtype": "simul", "produsername": "******", "ninputfiles": null, "countrygroup": "", "batchid": "", "parentid": null, "specialhandling": "ddm:rucio,hc:DE,de", "jobsetid": 30864, "corecount": 96, "ninputdatafiles": 1, "inputfiletype": "EVNT", "inputfileproject": "mc16_13TeV", "inputfilebytes": 307204932, "noutputdatafiles": 0, "outputfilebytes": 0, "jobmetrics": "", "workqueue_id": 16, "jeditaskid": 21265064, "jobstatus": "closed", "actualcorecount": null, "reqid": 30864, "nucleus": "TOKYO-LCG2", "jobsubstatus": "toreassign", "eventservice": "ordinary", "hs06": 1920, "hs06sec": null, "maxrss": null, "maxvmem": null, "maxswap": null, "maxpss": null, "avgrss": null, "avgvmem": null, "avgswap": null, "avgpss": null, "maxwalltime": 4611, "resourcetype": "MCORE", "failedattempt": 4, "totrchar": null, "totwchar": null, "totrbytes": null, "totwbytes": null, "raterchar": null, "ratewchar": null, "raterbytes": null, "ratewbytes": null, "diskio": null, "memoryleak": null, "memoryleakx2": null, "container_name": "atlas.athena:21.0.15_DBRelease-100.0.2_Patched", "outputfiletype": "log", "homecloud": "DE", "errorinfo": "", "jobinfo": "", "duration": "", "durationsec": 0, "durationmin": 0, "waittime": "0:6:46:27", "priorityrange": "800:899", "jobsetrange": "30800:30899"}, "dsfiles": []}"""

job_data = json.loads(job_data_json)

job_data[
    "jobPars"] = '--inputEVNTFile=EVNT.21265061._000036.pool.root.1 --maxEvents=1000 --postInclude "default:RecJobTransforms/UseFrontier.py" --preExec "EVNTtoHITS:simFlags.SimBarcodeOffset.set_Value_and_Lock(200000)" "EVNTtoHITS:simFlags.TRTRangeCut=30.0;simFlags.TightMuonStepping=True" --preInclude "EVNTtoHITS:SimulationJobOptions/preInclude.BeamPipeKill.py,SimulationJobOptions/preInclude.FrozenShowersFCalOnly.py" --skipEvents=1000 --firstEvent=331001 --outputHITSFile=HITS.21265064._002580.pool.root.1 --physicsList=FTFP_BERT_ATL_VALIDATION --randomSeed=352 --DBRelease="all:current" --conditionsTag "default:OFLCOND-MC16-SDR-14" --geometryVersion="default:ATLAS-R2-2016-01-00-01_VALIDATION" --runNumber=830011 --AMITag=s3126 --DataRunNumber=284500 --simulator=FullG4 --truthStrategy=MC15aPlus'

jobSpec = JobSpec()
jobSpec.convert_job_json(job_data)

#pprint(jobSpec.jobParams)

ae = AuxExtractor()
print(ae.get_aux_inputs(jobSpec))
fileSpec = FileSpec()
fileSpec.fileType = 'output'
fileSpec.lfn = file_prefix + uuid.uuid4().hex + '.gz'
fileSpec.fileAttributes = {'guid': str(uuid.uuid4())}
fileSpec.checksum = '0d439274'
assFileSpec = FileSpec()
assFileSpec.lfn = file_prefix + uuid.uuid4().hex
assFileSpec.fileType = 'es_output'
assFileSpec.fsize = random.randint(10, 100)
assFileSpec.path = os.getcwd() + '/' + assFileSpec.lfn
oFile = open(assFileSpec.lfn, 'w')
oFile.write(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(assFileSpec.fsize)))
oFile.close()
fileSpec.add_associated_file(assFileSpec)
jobSpec = JobSpec()
jobSpec.jobParams = {'outFiles': fileSpec.lfn + ',log',
                     'scopeOut': 'panda',
                     'scopeLog': 'panda',
                     'logFile': 'log',
                     'realDatasets': 'panda.' + fileSpec.lfn,
                     'ddmEndPointOut': 'BNL-OSG2_DATADISK',
                     }
jobSpec.add_out_file(fileSpec)

pluginFactory = PluginFactory()

# get stage-out plugin
stagerCore = pluginFactory.get_plugin(queueConfig.stager)
print ("plugin={0}".format(stagerCore.__class__.__name__))
Exemple #17
0
fileSpec.fileType = 'output'
fileSpec.lfn = file_prefix + uuid.uuid4().hex + '.gz'
fileSpec.fileAttributes = {'guid': str(uuid.uuid4())}
fileSpec.checksum = '0d439274'
assFileSpec = FileSpec()
assFileSpec.lfn = file_prefix + uuid.uuid4().hex
assFileSpec.fileType = 'es_output'
assFileSpec.fsize = random.randint(10, 100)
assFileSpec.path = os.getcwd() + '/' + assFileSpec.lfn
oFile = open(assFileSpec.lfn, 'w')
oFile.write(''.join(
    random.choice(string.ascii_uppercase + string.digits)
    for _ in range(assFileSpec.fsize)))
oFile.close()
fileSpec.add_associated_file(assFileSpec)
jobSpec = JobSpec()
jobSpec.jobParams = {
    'outFiles': fileSpec.lfn + ',log',
    'scopeOut': 'panda',
    'scopeLog': 'panda',
    'logFile': 'log',
    'realDatasets': 'panda.' + fileSpec.lfn,
    'ddmEndPointOut': 'BNL-OSG2_DATADISK',
}
jobSpec.add_out_file(fileSpec)

pluginFactory = PluginFactory()

# get stage-out plugin
stagerCore = pluginFactory.get_plugin(queueConfig.stager)
print("plugin={0}".format(stagerCore.__class__.__name__))
Exemple #18
0
    def make_worker(self, jobspec_list, queue_config, resource_type):
        tmpLog = self.make_logger(_logger,
                                  'queue={0}'.format(queue_config.queueName),
                                  method_name='make_worker')

        tmpLog.debug('jobspec_list: {0}'.format(jobspec_list))

        workSpec = WorkSpec()
        workSpec.creationTime = datetime.datetime.utcnow()

        # get the queue configuration from the DB
        panda_queues_dict = PandaQueuesDict()
        queue_dict = panda_queues_dict.get(queue_config.queueName, {})

        unified_queue = queue_dict.get('capability', '') == 'ucore'
        # case of traditional (non-unified) queue: look at the queue configuration
        if not unified_queue:
            workSpec.nCore = queue_dict.get('corecount', 1) or 1
            workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1

        # case of unified queue: look at the resource type and queue configuration
        else:
            catchall = queue_dict.get('catchall', '')
            if 'useMaxRam' in catchall or queue_config.queueName in (
                    'Taiwan-LCG2-HPC2_Unified', 'Taiwan-LCG2-HPC_Unified',
                    'DESY-ZN_UCORE'):
                # temporary hack to debug killed workers in Taiwan queues
                site_corecount = queue_dict.get('corecount', 1) or 1
                site_maxrss = queue_dict.get('maxrss', 1) or 1

                # some cases need to overwrite those values
                if 'SCORE' in resource_type:
                    # the usual pilot streaming use case
                    workSpec.nCore = 1
                    workSpec.minRamCount = int(
                        math.ceil(site_maxrss / site_corecount))
                else:
                    # default values
                    workSpec.nCore = site_corecount
                    workSpec.minRamCount = site_maxrss
            else:
                workSpec.nCore, workSpec.minRamCount = self.rt_mapper.calculate_worker_requirements(
                    resource_type, queue_dict)

        # parameters that are independent on traditional vs unified
        workSpec.maxWalltime = queue_dict.get('maxtime', 1)
        workSpec.maxDiskCount = queue_dict.get('maxwdir', 1)
        walltimeLimit_default = getattr(queue_config, 'walltimeLimit', 0)

        if len(jobspec_list) > 0:
            # get info from jobs
            nCore = 0
            minRamCount = 0
            maxDiskCount = 0
            maxWalltime = 0
            ioIntensity = 0
            for jobSpec in jobspec_list:
                job_corecount, job_memory = self.get_job_core_and_memory(
                    queue_dict, jobSpec)
                nCore += job_corecount
                minRamCount += job_memory
                try:
                    maxDiskCount += jobSpec.jobParams['maxDiskCount']
                except Exception:
                    pass
                try:
                    ioIntensity += jobSpec.jobParams['ioIntensity']
                except Exception:
                    pass
            try:
                # maxWallTime from AGIS or qconf, not trusting job currently
                maxWalltime = queue_dict.get('maxtime', walltimeLimit_default)
            except Exception:
                pass

            if (nCore > 0 and 'nCore' in self.jobAttributesToUse) \
               or unified_queue:
                workSpec.nCore = nCore
            if (minRamCount > 0 and 'minRamCount' in self.jobAttributesToUse) \
               or unified_queue:
                workSpec.minRamCount = minRamCount
            if maxDiskCount > 0 and 'maxDiskCount' in self.jobAttributesToUse:
                workSpec.maxDiskCount = maxDiskCount
            if maxWalltime > 0 and 'maxWalltime' in self.jobAttributesToUse:
                workSpec.maxWalltime = maxWalltime
            if ioIntensity > 0 and 'ioIntensity' in self.jobAttributesToUse:
                workSpec.ioIntensity = ioIntensity
            workSpec.pilotType = jobspec_list[0].get_pilot_type()
        else:
            # when no job
            # randomize pilot type with weighting
            pdpm = getattr(queue_config,
                           'prodSourceLabelRandomWeightsPermille', {})
            choice_list = core_utils.make_choice_list(pdpm=pdpm,
                                                      default='managed')
            tmp_prodsourcelabel = random.choice(choice_list)
            fake_job = JobSpec()
            fake_job.jobParams = {}
            fake_job.jobParams['prodSourceLabel'] = tmp_prodsourcelabel
            workSpec.pilotType = fake_job.get_pilot_type()
            del fake_job
            if workSpec.pilotType in ['RC', 'ALRB', 'PT']:
                tmpLog.info('a worker has pilotType={0}'.format(
                    workSpec.pilotType))
        # TODO: this needs to be improved with real resource types
        if resource_type and resource_type != 'ANY':
            workSpec.resourceType = resource_type
        elif workSpec.nCore == 1:
            workSpec.resourceType = 'SCORE'
        else:
            workSpec.resourceType = 'MCORE'

        return workSpec
        errStr = ''
        if not tmpStatsrc:
            errStr += ' source Endpoint not activated '
        if not tmpStatdst:
            errStr += ' destination Endpoint not activated '
        tmpLog.error(errStr)
        sys.exit(2)
    # We are sending test files from our destination machine to the source machine
    # both endpoints activated now prepare to transfer data
    tdata = TransferData(tc, dstEndpoint, srcEndpoint, sync_level="checksum")
except:
    errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
    sys.exit(1)

# create JobSpec
jobSpec = JobSpec()
jobSpec.jobParams = {
    'scopeLog': 'panda',
    'logFile': 'log',
}
jobSpec.computingSite = queueName
jobSpec.PandaID = job_id
jobSpec.modificationTime = datetime.datetime.now()
realDataset = 'panda.sgotest.' + uuid.uuid4().hex
ddmEndPointIn = 'BNL-OSG2_DATADISK'
inFiles_scope_str = ''
inFiles_str = ''
realDatasets_str = ''
realDatasetsIn_str = ''
ddmEndPointIn_str = ''
GUID_str = ''
import sys

queueName = sys.argv[1]

from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper

queueConfigMapper = QueueConfigMapper()

queueConfig = queueConfigMapper.get_queue(queueName)

from pandaharvester.harvestercore.job_spec import JobSpec

jobSpec = JobSpec()
jobSpec.computingSite = sys.argv[1]
jobSpec.jobParams = {'inFiles': 'EVNT.06820166._000001.pool.root.1',
                     'scopeIn': 'mc15_13TeV',
                     'fsize': '196196765',
                     'GUID': 'B7F387CD-1F97-1C47-88BD-D8785442C49D',
                     'checksum': 'ad:326e445d',
                     'ddmEndPointIn': 'MWT2_DATADISK',
                     'realDatasetsIn': 'mc15_13TeV:mc15_13TeV.301042.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_250M400.evgen.EVNT.e3649_tid06820166_00',
                     }

from pandaharvester.harvestercore.plugin_factory import PluginFactory

pluginFactory = PluginFactory()

# get plugin
preparatorCore = pluginFactory.get_plugin(queueConfig.preparator)
print ("plugin={0}".format(preparatorCore.__class__.__name__))
         errStr += ' source Endpoint not activated '
      if not tmpStatdst :
         errStr += ' destination Endpoint not activated '
      tmpLog.error(errStr)
      sys.exit(2)
   # both endpoints activated now prepare to transfer data
   # We are sending test files from our destination machine to the source machine
   tdata = TransferData(tc,dstEndpoint,srcEndpoint,sync_level="checksum")
except:
   errStat, errMsg = globus_utils.handle_globus_exception(tmpLog)
   sys.exit(1)
 
# loop over the job id's creating various JobSpecs
jobSpec_list = []
for job_id in range(begin_job_id,end_job_id+1):
   jobSpec = JobSpec()
   jobSpec.jobParams = {
                        'scopeLog': 'panda',
                        'logFile': 'log',
                        }
   jobSpec.computingSite = queueName
   jobSpec.PandaID = job_id
   jobSpec.modificationTime = datetime.datetime.now()
   realDataset = 'panda.sgotest.' + uuid.uuid4().hex
   ddmEndPointIn = 'BNL-OSG2_DATADISK'
   inFiles_scope_str = ''
   inFiles_str = ''
   realDatasets_str = ''
   realDatasetsIn_str = ''
   ddmEndPointIn_str = ''
   GUID_str = ''
Exemple #22
0
    pluginFactory = PluginFactory()

    com = CommunicatorPool()

    # get job
    jobSpecList = []
    if queueConfig.mapType != WorkSpec.MT_NoJob:
        jobs, errStr = com.get_jobs(queueConfig.queueName, 'nodeName',
                                    queueConfig.prodSourceLabel,
                                    'computingElement', 1, None)
        if len(jobs) == 0:
            print("Failed to get jobs at {0} due to {1}".format(
                queueConfig.queueName, errStr))
            sys.exit(0)

        jobSpec = JobSpec()
        jobSpec.convert_job_json(jobs[0])

        # set input file paths
        inFiles = jobSpec.get_input_file_attributes()
        for inLFN, inFile in iteritems(inFiles):
            inFile['path'] = '{0}/{1}'.format(os.getcwd(), inLFN)
        jobSpec.set_input_file_paths(inFiles)
        jobSpecList.append(jobSpec)

    maker = pluginFactory.get_plugin(queueConfig.workerMaker)
    workSpec = maker.make_worker(jobSpecList, queueConfig, jobType,
                                 resourceType)

    workSpec.accessPoint = queueConfig.messenger['accessPoint']
    workSpec.mapType = queueConfig.mapType
import sys
import uuid
from pandaharvester.harvestercore.job_spec import JobSpec
from pandaharvester.harvestercore.file_spec import FileSpec
from pandaharvester.harvestercore.event_spec import EventSpec

from pandaharvester.harvestercore.communicator_pool import CommunicatorPool

rID = sys.argv[1]
taskid = rID.split('-')[0]
pandaid = long(rID.split('-')[1])

job = JobSpec()
job.PandaID = pandaid
event = EventSpec()
file = FileSpec()
file.status = 'finished'
file.objstoreID = 9575
file.pathConvention = 1000
file.lfn = str(uuid.uuid4().hex) + '.zip'
file.fsize = 555
file.chksum = '0d2a9dc9'
event.eventRangeID = rID
event.eventStatus = 'finished'
job.zipEventMap = {1: {'events':[event],
                       'zip':file}}


a = CommunicatorPool()
a.update_jobs([job])