import time import os from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper from pandaharvester.harvestercore.job_spec import JobSpec queueName = sys.argv[1] queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(queueName) jobSpec = JobSpec() jobSpec.jobParams = {'inFiles': 'DAOD_STDM4.09596175._000008.pool.root.1', 'scopeIn': 'mc15_13TeV', 'fsize': '658906675', 'GUID': '7e3776f9bb0af341b03e59d3de895a13', 'checksum': 'ad:3734bdd9', 'ddmEndPointIn': 'BNL-OSG2_DATADISK', 'realDatasetsIn': 'mc15_13TeV.363638.MGPy8EG_N30NLO_Wmunu_Ht500_700_BFilter.merge.DAOD_STDM4.e4944_s2726_r7772_r7676_p2842_tid09596175_00', } jobSpec.computingSite = queueName from pandaharvester.harvestercore.plugin_factory import PluginFactory pluginFactory = PluginFactory() # get plugin preparatorCore = pluginFactory.get_plugin(queueConfig.preparator) print ("plugin={0}".format(preparatorCore.__class__.__name__)) print ("testing stagein:") print ("BasePath from preparator configuration: %s " % preparatorCore.basePath)
queueName = sys.argv[1] from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queueConfigMapper = QueueConfigMapper() queueConfig = queueConfigMapper.get_queue(queueName) from pandaharvester.harvestercore.job_spec import JobSpec jobSpec = JobSpec() jobSpec.computingSite = sys.argv[1] jobSpec.jobParams = {'inFiles': 'EVNT.06820166._000001.pool.root.1', 'scopeIn': 'mc15_13TeV', 'fsize': '196196765', 'GUID': 'B7F387CD-1F97-1C47-88BD-D8785442C49D', 'checksum': 'ad:326e445d', 'ddmEndPointIn': 'MWT2_DATADISK', 'realDatasetsIn': 'mc15_13TeV:mc15_13TeV.301042.PowhegPythia8EvtGen_AZNLOCTEQ6L1_DYtautau_250M400.evgen.EVNT.e3649_tid06820166_00', } from pandaharvester.harvestercore.plugin_factory import PluginFactory pluginFactory = PluginFactory() # get plugin preparatorCore = pluginFactory.get_plugin(queueConfig.preparator) print ("plugin={0}".format(preparatorCore.__class__.__name__)) print ("testing preparation") tmpStat, tmpOut = preparatorCore.trigger_preparation(jobSpec) if tmpStat:
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = self.make_logger(_logger, 'queue={0}'.format(queue_config.queueName), method_name='make_worker') tmpLog.debug('jobspec_list: {0}'.format(jobspec_list)) workSpec = WorkSpec() workSpec.creationTime = datetime.datetime.utcnow() # get the queue configuration from the DB panda_queues_dict = PandaQueuesDict() queue_dict = panda_queues_dict.get(queue_config.queueName, {}) unified_queue = queue_dict.get('capability', '') == 'ucore' # case of traditional (non-unified) queue: look at the queue configuration if not unified_queue: workSpec.nCore = queue_dict.get('corecount', 1) or 1 workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1 # case of unified queue: look at the resource type and queue configuration else: catchall = queue_dict.get('catchall', '') if 'useMaxRam' in catchall or queue_config.queueName in ( 'Taiwan-LCG2-HPC2_Unified', 'Taiwan-LCG2-HPC_Unified', 'DESY-ZN_UCORE'): # temporary hack to debug killed workers in Taiwan queues site_corecount = queue_dict.get('corecount', 1) or 1 site_maxrss = queue_dict.get('maxrss', 1) or 1 # some cases need to overwrite those values if 'SCORE' in resource_type: # the usual pilot streaming use case workSpec.nCore = 1 workSpec.minRamCount = int( math.ceil(site_maxrss / site_corecount)) else: # default values workSpec.nCore = site_corecount workSpec.minRamCount = site_maxrss else: workSpec.nCore, workSpec.minRamCount = self.rt_mapper.calculate_worker_requirements( resource_type, queue_dict) # parameters that are independent on traditional vs unified workSpec.maxWalltime = queue_dict.get('maxtime', 1) workSpec.maxDiskCount = queue_dict.get('maxwdir', 1) walltimeLimit_default = getattr(queue_config, 'walltimeLimit', 0) if len(jobspec_list) > 0: # get info from jobs nCore = 0 minRamCount = 0 maxDiskCount = 0 maxWalltime = 0 ioIntensity = 0 for jobSpec in jobspec_list: job_corecount, job_memory = self.get_job_core_and_memory( queue_dict, jobSpec) nCore += job_corecount minRamCount += job_memory try: maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass try: ioIntensity += jobSpec.jobParams['ioIntensity'] except Exception: pass try: # maxWallTime from AGIS or qconf, not trusting job currently maxWalltime = queue_dict.get('maxtime', walltimeLimit_default) except Exception: pass if (nCore > 0 and 'nCore' in self.jobAttributesToUse) \ or unified_queue: workSpec.nCore = nCore if (minRamCount > 0 and 'minRamCount' in self.jobAttributesToUse) \ or unified_queue: workSpec.minRamCount = minRamCount if maxDiskCount > 0 and 'maxDiskCount' in self.jobAttributesToUse: workSpec.maxDiskCount = maxDiskCount if maxWalltime > 0 and 'maxWalltime' in self.jobAttributesToUse: workSpec.maxWalltime = maxWalltime if ioIntensity > 0 and 'ioIntensity' in self.jobAttributesToUse: workSpec.ioIntensity = ioIntensity workSpec.pilotType = jobspec_list[0].get_pilot_type() else: # when no job # randomize pilot type with weighting pdpm = getattr(queue_config, 'prodSourceLabelRandomWeightsPermille', {}) choice_list = core_utils.make_choice_list(pdpm=pdpm, default='managed') tmp_prodsourcelabel = random.choice(choice_list) fake_job = JobSpec() fake_job.jobParams = {} fake_job.jobParams['prodSourceLabel'] = tmp_prodsourcelabel workSpec.pilotType = fake_job.get_pilot_type() del fake_job if workSpec.pilotType in ['RC', 'ALRB', 'PT']: tmpLog.info('a worker has pilotType={0}'.format( workSpec.pilotType)) # TODO: this needs to be improved with real resource types if resource_type and resource_type != 'ANY': workSpec.resourceType = resource_type elif workSpec.nCore == 1: workSpec.resourceType = 'SCORE' else: workSpec.resourceType = 'MCORE' return workSpec
lfn=assFileSpec.lfn) if not os.path.exists(os.path.dirname(assFileSpec.path)): print "os.makedirs({})".format(os.path.dirname(assFileSpec.path)) os.makedirs(os.path.dirname(assFileSpec.path)) oFile = open(assFileSpec.path, 'w') oFile.write(''.join( random.choice(string.ascii_uppercase + string.digits) for _ in range(assFileSpec.fsize))) oFile.close() fileSpec.add_associated_file(assFileSpec) jobSpec = JobSpec() jobSpec.jobParams = { 'outFiles': fileSpec.lfn + ',log', 'scopeOut': 'panda', 'scopeLog': 'panda', 'logFile': 'log', 'realDatasets': 'panda.' + fileSpec.lfn, 'ddmEndPointOut': 'BNL-OSG2_DATADISK', } jobSpec.computingSite = queueName jobSpec.PandaID = job_id jobSpec.add_out_file(fileSpec) print "file to transfer - {}".format(assFileSpec.path) print "dump(jobSpec)" #dump(jobSpec) pluginFactory = PluginFactory() # get stage-out plugin stagerCore = pluginFactory.get_plugin(queueConfig.stager)
fileSpec.fileAttributes = {'guid': str(uuid.uuid4())} fileSpec.checksum = '0d439274' assFileSpec = FileSpec() assFileSpec.lfn = file_prefix + uuid.uuid4().hex assFileSpec.fileType = 'es_output' assFileSpec.fsize = random.randint(10, 100) assFileSpec.path = os.getcwd() + '/' + assFileSpec.lfn oFile = open(assFileSpec.lfn, 'w') oFile.write(''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(assFileSpec.fsize))) oFile.close() fileSpec.add_associated_file(assFileSpec) jobSpec = JobSpec() jobSpec.jobParams = {'outFiles': fileSpec.lfn + ',log', 'scopeOut': 'panda', 'scopeLog': 'panda', 'logFile': 'log', 'realDatasets': 'panda.' + fileSpec.lfn, 'ddmEndPointOut': 'BNL-OSG2_DATADISK', } jobSpec.add_out_file(fileSpec) pluginFactory = PluginFactory() # get stage-out plugin stagerCore = pluginFactory.get_plugin(queueConfig.stager) print ("plugin={0}".format(stagerCore.__class__.__name__)) print ("testing zip") tmpStat, tmpOut = stagerCore.zip_output(jobSpec) if tmpStat: print (" OK")
locked = stagerCore.dbInterface.get_object_lock('dummy_id_for_out_0',lock_interval=120) if not locked: tmpLog.debug('DB Already locked by another thread') # now unlock db unlocked = stagerCore.dbInterface.release_object_lock('dummy_id_for_out_0') if unlocked : tmpLog.debug('unlocked db') else: tmpLog.debug(' Could not unlock db') # loop over the job id's creating various JobSpecs jobSpec_list = [] for job_id in range(begin_job_id,end_job_id+1): jobSpec = JobSpec() jobSpec.jobParams = { 'scopeLog': 'panda', 'logFile': 'log', } jobSpec.computingSite = queueName jobSpec.PandaID = job_id jobSpec.modificationTime = datetime.datetime.now() realDataset = 'panda.sgotest.' + uuid.uuid4().hex ddmEndPointOut = 'BNL-OSG2_DATADISK' outFiles_scope_str = '' outFiles_str = '' realDatasets_str = '' ddmEndPointOut_str = '' # create up 5 files for output for index in range(random.randint(1, 5)): fileSpec = FileSpec() assFileSpec = FileSpec() fileSpec.fileType = 'es_output'
errStr += ' source Endpoint not activated ' if not tmpStatdst: errStr += ' destination Endpoint not activated ' tmpLog.error(errStr) sys.exit(2) # We are sending test files from our destination machine to the source machine # both endpoints activated now prepare to transfer data tdata = TransferData(tc, dstEndpoint, srcEndpoint, sync_level="checksum") except: errStat, errMsg = globus_utils.handle_globus_exception(tmpLog) sys.exit(1) # create JobSpec jobSpec = JobSpec() jobSpec.jobParams = { 'scopeLog': 'panda', 'logFile': 'log', } jobSpec.computingSite = queueName jobSpec.PandaID = job_id jobSpec.modificationTime = datetime.datetime.now() realDataset = 'panda.sgotest.' + uuid.uuid4().hex ddmEndPointIn = 'BNL-OSG2_DATADISK' inFiles_scope_str = '' inFiles_str = '' realDatasets_str = '' realDatasetsIn_str = '' ddmEndPointIn_str = '' GUID_str = '' fsize_str = '' checksum_str = '' scope_in_str = ''