def single_thread_test(nObjects=3): time_point = time.time() print('clear') mq.fifo.clear() print('size', mq.size()) time_consumed = time.time() - time_point print('Time consumed: ', time_consumed) time_point = time.time() for i in range(nObjects): workspec = WorkSpec() workspec.workerID = i data = {'random': [random.random(), random.random()]} workspec.workAttributes = data # print('put') mq.put(workspec) # print('size', mq.size()) time_consumed = time.time() - time_point print('Time consumed: {0} sec ; Avg: {1} obj/sec '.format( time_consumed, nObjects / time_consumed)) print('size', mq.size()) print('peek') print(mq.peek()) time_point = time.time() for i in range(nObjects): # print('get') obj = mq.get(timeout=3) # print(obj) # print('size', mq.size()) time_consumed = time.time() - time_point print('Time consumed: {0} sec ; Avg: {1} obj/sec '.format( time_consumed, nObjects / time_consumed))
def single_thread_test(nObjects=3, protective=False): time_point = time.time() print('clear') mq.fifo.clear() print('size', mq.size()) time_consumed = time.time() - time_point print('Time consumed: ', time_consumed) time_point = time.time() for i in range(nObjects): workspec = WorkSpec() workspec.workerID = i data = {'random': [random.random(), random.random()]} workspec.workAttributes = data # print('put') mq.put(workspec) # print('size', mq.size()) time_consumed = time.time() - time_point print('Time consumed: {0} sec ; Avg: {1} obj/sec '.format(time_consumed, nObjects/time_consumed)) print('size', mq.size()) print('peek') print(mq.peek()) time_point = time.time() for i in range(nObjects): # print('get') obj = mq.get(timeout=3, protective=protective) # print(obj) # print('size', mq.size()) time_consumed = time.time() - time_point print('Time consumed: {0} sec ; Avg: {1} obj/sec '.format(time_consumed, nObjects/time_consumed))
def make_worker(self, jobspec_list, queue_config, resource_type): workSpec = WorkSpec() workSpec.resourceType = resource_type if len(jobspec_list) > 0: workSpec.nCore = 0 workSpec.minRamCount = 0 workSpec.maxDiskCount = 0 workSpec.maxWalltime = 0 for jobSpec in jobspec_list: try: workSpec.nCore += jobSpec.jobParams['coreCount'] except Exception: workSpec.nCore += 1 try: workSpec.minRamCount += jobSpec.jobParams['minRamCount'] except Exception: pass try: workSpec.maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass try: if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): workSpec.maxWalltime = max( int(queue_config.walltimeLimit), jobSpec.jobParams['maxWalltime']) else: workSpec.maxWalltime = queue_config.walltimeLimit except Exception: pass return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): workSpec = WorkSpec() workSpec.resourceType = resource_type if len(jobspec_list) > 0: workSpec.nCore = 0 workSpec.minRamCount = 0 workSpec.maxDiskCount = 0 workSpec.maxWalltime = 0 for jobSpec in jobspec_list: try: workSpec.nCore += jobSpec.jobParams['coreCount'] except Exception: workSpec.nCore += 1 try: workSpec.minRamCount += jobSpec.jobParams['minRamCount'] except Exception: pass try: workSpec.maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass try: if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): workSpec.maxWalltime = max(int(queue_config.walltimeLimit), jobSpec.jobParams['maxWalltime']) else: workSpec.maxWalltime = queue_config.walltimeLimit except Exception: pass return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = self.make_logger(baseLogger, method_name='make_worker') workSpec = WorkSpec() self.nJobsPerWorker = len(jobspec_list) tmpLog.info("Worker for {0} jobs will be prepared".format(self.nJobsPerWorker)) if self.nJobsPerWorker > 0: workSpec.nCore = int(queue_config.submitter['nCorePerNode']) * self.nJobsPerWorker workSpec.minRamCount = 0 workSpec.maxDiskCount = 0 workSpec.maxWalltime = 0 if queue_config.walltimeLimit: workSpec.maxWalltime = queue_config.walltimeLimit tmpLog.debug("Wall time limit for worker: {0}".format(workSpec.maxWalltime)) for jobSpec in jobspec_list: try: workSpec.minRamCount = max(workSpec.minRamCount, jobSpec.jobParams['minRamCount']) except Exception: pass try: workSpec.maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass # try: we should not relay on job parameters yet (not relaible) # if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): # workSpec.maxWalltime = max(workSpec.maxWalltime, jobSpec.jobParams['maxWalltime']) # except Exception: # pass workSpec.workParams = self._get_executable(queue_config) return workSpec
def test(): from pandaharvester.harvestercore.work_spec import WorkSpec wspec = WorkSpec() jobid = "gsiftp://pcoslo5.cern.ch:2811/jobs/XkNNDmultdtn1ZPzno6AuCjpABFKDmABFKDmwqyLDmABFKDm8dOcOn" wspec.batchID = jobid workAttributes = {"arcjob": {}} workAttributes["arcjob"]["JobID"] = wspec.batchID workAttributes["arcjob"][ "JobStatusURL"] = "ldap://{0}:2135/mds-vo-name=local,o=grid??sub?(nordugrid-job-globalid={1})".format( urlparse(jobid).netloc, jobid) workAttributes["arcjob"]["JobStatusInterfaceName"] = "org.nordugrid.ldapng" jobmanagementurl = arc.URL(wspec.batchID) jobmanagementurl.ChangePath("/jobs") workAttributes["arcjob"]["JobManagementURL"] = jobmanagementurl.str() workAttributes["arcjob"][ "JobManagementInterfaceName"] = "org.nordugrid.gridftpjob" workAttributes["proxyrole"] = 'production' wspec.workAttributes = workAttributes wspec.accessPoint = '/tmp' wspec.mapType = WorkSpec.MT_OneToOne wspec.pandaid_list = [1234] print wspec.workAttributes messenger = ARCMessenger() print messenger.events_requested(wspec) print messenger.feed_events(wspec, {'event': 1234}) print messenger.events_to_update(wspec) messenger.acknowledge_events_files(wspec)
def test(jobid): '''Kill a job''' from pandaharvester.harvestercore.work_spec import WorkSpec import json wspec = WorkSpec() wspec.batchID = jobid workAttributes = {"arcjob": {}} workAttributes["arcjob"]["JobID"] = wspec.batchID workAttributes["arcjob"]["JobStatusURL"] = "ldap://{0}:2135/mds-vo-name=local,o=grid??sub?(nordugrid-job-globalid={1})".format(urlparse.urlparse(jobid).netloc, wspec.batchID) workAttributes["arcjob"]["JobStatusInterfaceName"] = "org.nordugrid.ldapng" jobmanagementurl = arc.URL(wspec.batchID) jobmanagementurl.ChangePath("/jobs") workAttributes["arcjob"]["JobManagementURL"] = jobmanagementurl.str() workAttributes["arcjob"]["JobManagementInterfaceName"] = "org.nordugrid.gridftpjob" wspec.workAttributes = workAttributes print wspec.workAttributes sweeper = ARCSweeper() print sweeper.kill_worker(wspec)
def test(): from pandaharvester.harvestercore.work_spec import WorkSpec wspec = WorkSpec() jobid = "gsiftp://pcoslo5.cern.ch:2811/jobs/XkNNDmultdtn1ZPzno6AuCjpABFKDmABFKDmwqyLDmABFKDm8dOcOn" wspec.batchID = jobid workAttributes = {"arcjob": {}} workAttributes["arcjob"]["JobID"] = wspec.batchID workAttributes["arcjob"]["JobStatusURL"] = "ldap://{0}:2135/mds-vo-name=local,o=grid??sub?(nordugrid-job-globalid={1})".format(urlparse(jobid).netloc, jobid) workAttributes["arcjob"]["JobStatusInterfaceName"] = "org.nordugrid.ldapng" jobmanagementurl = arc.URL(wspec.batchID) jobmanagementurl.ChangePath("/jobs") workAttributes["arcjob"]["JobManagementURL"] = jobmanagementurl.str() workAttributes["arcjob"]["JobManagementInterfaceName"] = "org.nordugrid.gridftpjob" workAttributes["proxyrole"] = 'production' wspec.workAttributes = workAttributes wspec.accessPoint = '/tmp' wspec.mapType = WorkSpec.MT_OneToOne wspec.pandaid_list = [1234] print wspec.workAttributes messenger = ARCMessenger() print messenger.events_requested(wspec) print messenger.feed_events(wspec, {'event': 1234}) print messenger.events_to_update(wspec) messenger.acknowledge_events_files(wspec)
def test(jobid): '''Test checking status''' from pandaharvester.harvestercore.work_spec import WorkSpec wspec = WorkSpec() wspec.batchID = jobid #"gsiftp://pikolit.ijs.si:2811/jobs/HtgKDmtCe7qn4J8tmqCBXHLnABFKDmABFKDmBcGKDmABFKDm4NCTCn" workAttributes = {"arcjob": {}} workAttributes["arcjob"]["JobID"] = wspec.batchID workAttributes["arcjob"][ "JobStatusURL"] = "ldap://{0}:2135/mds-vo-name=local,o=grid??sub?(nordugrid-job-globalid={1})".format( urlparse.urlparse(jobid).netloc, jobid) workAttributes["arcjob"]["JobStatusInterfaceName"] = "org.nordugrid.ldapng" jobmanagementurl = arc.URL(wspec.batchID) jobmanagementurl.ChangePath("/jobs") workAttributes["arcjob"]["JobManagementURL"] = jobmanagementurl.str() workAttributes["arcjob"][ "JobManagementInterfaceName"] = "org.nordugrid.gridftpjob" wspec.workAttributes = workAttributes print wspec.workAttributes monitor = ARCMonitor() print monitor.check_workers([wspec])
def test(jobid): '''Kill a job''' from pandaharvester.harvestercore.work_spec import WorkSpec import json wspec = WorkSpec() wspec.batchID = jobid workAttributes = {"arcjob": {}} workAttributes["arcjob"]["JobID"] = wspec.batchID workAttributes["arcjob"][ "JobStatusURL"] = "ldap://{0}:2135/mds-vo-name=local,o=grid??sub?(nordugrid-job-globalid={1})".format( urlparse.urlparse(jobid).netloc, wspec.batchID) workAttributes["arcjob"]["JobStatusInterfaceName"] = "org.nordugrid.ldapng" jobmanagementurl = arc.URL(wspec.batchID) jobmanagementurl.ChangePath("/jobs") workAttributes["arcjob"]["JobManagementURL"] = jobmanagementurl.str() workAttributes["arcjob"][ "JobManagementInterfaceName"] = "org.nordugrid.gridftpjob" wspec.workAttributes = workAttributes print wspec.workAttributes sweeper = ARCSweeper() print sweeper.kill_worker(wspec)
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = self.make_logger(_logger, 'queue={0}'.format(queue_config.queueName), method_name='make_worker') tmpLog.debug('jobspec_list: {0}'.format(jobspec_list)) workSpec = WorkSpec() workSpec.creationTime = datetime.datetime.utcnow() # get the queue configuration from the DB panda_queues_dict = PandaQueuesDict() queue_dict = panda_queues_dict.get(queue_config.queueName, {}) workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1 workSpec.maxWalltime = queue_dict.get('maxtime', 1) workSpec.maxDiskCount = queue_dict.get('maxwdir', 1) # get info from jobs if len(jobspec_list) > 0: nRemainingEvents = 0 for jobspec in jobspec_list: if jobspec.nRemainingEvents: nRemainingEvents += jobspec.nRemainingEvents nCore, maxWalltime = self.calculate_worker_requirements(nRemainingEvents) workSpec.nCore = nCore workSpec.maxWalltime = maxWalltime # TODO: this needs to be improved with real resource types if resource_type and resource_type != 'ANY': workSpec.resourceType = resource_type elif workSpec.nCore == 1: workSpec.resourceType = 'SCORE' else: workSpec.resourceType = 'MCORE' return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = self.make_logger(baseLogger, method_name='make_worker') workSpec = WorkSpec() self.nJobsPerWorker = len(jobspec_list) tmpLog.info("Worker for {0} jobs will be prepared".format( self.nJobsPerWorker)) if self.nJobsPerWorker > 0: workSpec.nCore = int( queue_config.submitter['nCorePerNode']) * self.nJobsPerWorker workSpec.minRamCount = 0 workSpec.maxDiskCount = 0 workSpec.maxWalltime = 0 if queue_config.walltimeLimit: workSpec.maxWalltime = queue_config.walltimeLimit tmpLog.debug("Wall time limit for worker: {0}".format( workSpec.maxWalltime)) for jobSpec in jobspec_list: try: workSpec.minRamCount = max( workSpec.minRamCount, jobSpec.jobParams['minRamCount']) except: pass try: workSpec.maxDiskCount += jobSpec.jobParams['maxDiskCount'] except: pass #try: we should not relay on job parameters yet (not relaible) # if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): # workSpec.maxWalltime = max(workSpec.maxWalltime, jobSpec.jobParams['maxWalltime']) #except: # pass workSpec.workParams = self._get_executable(queue_config) return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = core_utils.make_logger(baseLogger, 'queue={0}'.format( queue_config.queueName), method_name='make_worker') tmpLog.info("Multi node worker preparation started.") tmpLog.info("Worker size: {0} jobs on {2} nodes for {1} sec.".format( self.nJobsPerWorker, self.walltimelimit, self.nNodes)) workSpec = WorkSpec() workSpec.nCore = self.nNodes * queue_config.submitter['nCorePerNode'] workSpec.minRamCount = 0 workSpec.maxDiskCount = 0 workSpec.maxWalltime = self.walltimelimit workSpec.workParams = self._get_executable() if len(jobspec_list) > 0: # push case: we know the job and set the parameters of the job for jobSpec in jobspec_list: try: workSpec.minRamCount += jobSpec.jobParams['minRamCount'] except Exception: pass try: workSpec.maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass #try: # if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): # workSpec.maxWalltime = max(int(queue_config.walltimeLimit), jobSpec.jobParams['maxWalltime']) # else: # workSpec.maxWalltime = queue_config.walltimeLimit #except Exception: # pass tmpLog.info( "Worker for {0} nodes with {2} jobs with walltime {1} sec. defined" .format(self.nNodes, workSpec.maxWalltime, self.nJobsPerWorker)) return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = core_utils.make_logger(baseLogger, 'queue={0}'.format(queue_config.queueName), method_name='make_worker') tmpLog.info("Multi node worker preparation started.") tmpLog.info("Worker size: {0} jobs on {2} nodes for {1} sec.".format(self.nJobsPerWorker, self.walltimelimit, self.nNodes)) workSpec = WorkSpec() workSpec.nCore = self.nNodes * queue_config.submitter['nCorePerNode'] workSpec.minRamCount = 0 workSpec.maxDiskCount = 0 workSpec.maxWalltime = self.walltimelimit workSpec.workParams = self._get_executable() if len(jobspec_list) > 0: # push case: we know the job and set the parameters of the job for jobSpec in jobspec_list: try: workSpec.minRamCount += jobSpec.jobParams['minRamCount'] except Exception: pass try: workSpec.maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass #try: # if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): # workSpec.maxWalltime = max(int(queue_config.walltimeLimit), jobSpec.jobParams['maxWalltime']) # else: # workSpec.maxWalltime = queue_config.walltimeLimit #except Exception: # pass tmpLog.info("Worker for {0} nodes with {2} jobs with walltime {1} sec. defined".format(self.nNodes, workSpec.maxWalltime, self.nJobsPerWorker)) return workSpec
def make_worker(self, jobspec_list, queue_config, job_type, resource_type): tmpLog = self.make_logger(_logger, 'queue={0}'.format(queue_config.queueName), method_name='make_worker') tmpLog.debug('jobspec_list: {0}'.format(jobspec_list)) workSpec = WorkSpec() workSpec.creationTime = datetime.datetime.utcnow() # get the queue configuration from the DB panda_queues_dict = PandaQueuesDict() queue_dict = panda_queues_dict.get(queue_config.queueName, {}) workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1 workSpec.maxWalltime = queue_dict.get('maxtime', 1) workSpec.maxDiskCount = queue_dict.get('maxwdir', 1) # get info from jobs if len(jobspec_list) > 0: nRemainingEvents = 0 for jobspec in jobspec_list: if jobspec.nRemainingEvents: nRemainingEvents += jobspec.nRemainingEvents nCore, maxWalltime = self.calculate_worker_requirements( nRemainingEvents) workSpec.nCore = nCore workSpec.maxWalltime = maxWalltime # TODO: this needs to be improved with real resource types if resource_type and resource_type != 'ANY': workSpec.resourceType = resource_type elif workSpec.nCore == 1: workSpec.resourceType = 'SCORE' else: workSpec.resourceType = 'MCORE' return workSpec
except: tmp_log.error('Excepted with: {0}'.format(traceback.format_exc())) if __name__ == "__main__": """ Quick tests """ from pandaharvester.harvestercore.queue_config_mapper import QueueConfigMapper queue_config_mapper = QueueConfigMapper() apfmon = Apfmon(queue_config_mapper) apfmon.create_factory() apfmon.create_labels() worker_a = WorkSpec() worker_a.batchID = 1 worker_a.computingSite = 'CERN-PROD-DEV_UCORE' worker_a.computingElement = 'bla1' worker_a.workAttributes = { "batchLog": "https://aipanda024.cern.ch/condor_logs/18-07-19_09/grid.9659.0.log", "stdErr": "https://aipanda024.cern.ch/condor_logs/18-07-19_09/grid.9659.0.err", "stdOut": "https://aipanda024.cern.ch/condor_logs/18-07-19_09/grid.9659.0.out" } worker_a.pandaid_list = [1234, 5678] worker_b = WorkSpec() worker_b.batchID = 2
def _put_object(i_index): workspec = WorkSpec() workspec.workerID = i_index data = {'random': [(i_index**2) % 2**16, random.random()]} workspec.workAttributes = data mq.put(workspec)
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = self.make_logger(_logger, 'queue={0}'.format(queue_config.queueName), method_name='make_worker') tmpLog.debug('jobspec_list: {0}'.format(jobspec_list)) workSpec = WorkSpec() workSpec.creationTime = datetime.datetime.utcnow() # get the queue configuration from the DB panda_queues_dict = PandaQueuesDict() queue_dict = panda_queues_dict.get(queue_config.queueName, {}) unified_queue = 'unifiedPandaQueue' in queue_dict.get('catchall', '')\ or queue_dict.get('capability', '') == 'ucore' # case of traditional (non-unified) queue: look at the queue configuration if not unified_queue: workSpec.nCore = queue_dict.get('corecount', 1) or 1 workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1 # case of unified queue: look at the resource type and queue configuration else: if queue_config.queueName in ('Taiwan-LCG2-HPC2_Unified', 'Taiwan-LCG2-HPC_Unified'): # temporary hack to debug killed workers in Taiwan queues site_corecount = queue_dict.get('corecount', 1) or 1 site_maxrss = queue_dict.get('maxrss', 1) or 1 # some cases need to overwrite those values if 'SCORE' in resource_type: # the usual pilot streaming use case workSpec.nCore = 1 workSpec.minRamCount = site_maxrss / site_corecount else: # default values workSpec.nCore = site_corecount workSpec.minRamCount = site_maxrss else: workSpec.nCore, workSpec.minRamCount = self.rt_mapper.calculate_worker_requirements( resource_type, queue_dict) # parameters that are independent on traditional vs unified workSpec.maxWalltime = queue_dict.get('maxtime', 1) workSpec.maxDiskCount = queue_dict.get('maxwdir', 1) # get info from jobs if len(jobspec_list) > 0: nCore = 0 minRamCount = 0 maxDiskCount = 0 maxWalltime = 0 for jobSpec in jobspec_list: job_corecount, job_memory = self.get_job_core_and_memory( queue_dict, jobSpec) nCore += job_corecount minRamCount += job_memory try: maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass try: if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): if hasattr(queue_config, 'maxWalltime'): maxWalltime = max(int(queue_config.walltimeLimit), jobSpec.jobParams['maxWalltime']) else: maxWalltime = jobSpec.jobParams['maxWalltime'] else: maxWalltime = queue_config.walltimeLimit except Exception: pass if (nCore > 0 and 'nCore' in self.jobAttributesToUse) \ or unified_queue: workSpec.nCore = nCore if (minRamCount > 0 and 'minRamCount' in self.jobAttributesToUse) \ or unified_queue: workSpec.minRamCount = minRamCount if maxDiskCount > 0 and 'maxDiskCount' in self.jobAttributesToUse: workSpec.maxDiskCount = maxDiskCount if maxWalltime > 0 and 'maxWalltime' in self.jobAttributesToUse: workSpec.maxWalltime = maxWalltime # TODO: this needs to be improved with real resource types if resource_type and resource_type != 'ANY': workSpec.resourceType = resource_type elif workSpec.nCore == 1: workSpec.resourceType = 'SCORE' else: workSpec.resourceType = 'MCORE' return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = core_utils.make_logger(_logger, 'queue={0}'.format( queue_config.queueName), method_name='make_worker') tmpLog.debug('jobspec_list: {0}'.format(jobspec_list)) workSpec = WorkSpec() if len(jobspec_list) > 0: # push case: we know the job and set the parameters of the job workSpec.nCore = 0 workSpec.minRamCount = 0 workSpec.maxDiskCount = 0 workSpec.maxWalltime = 0 for jobSpec in jobspec_list: try: workSpec.nCore += jobSpec.jobParams['coreCount'] except: workSpec.nCore += 1 try: workSpec.minRamCount += jobSpec.jobParams['minRamCount'] except: pass try: workSpec.maxDiskCount += jobSpec.jobParams['maxDiskCount'] except: pass try: if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): workSpec.maxWalltime = max( int(queue_config.walltimeLimit), jobSpec.jobParams['maxWalltime']) else: workSpec.maxWalltime = queue_config.walltimeLimit except: pass else: # pull case: there is no predefined job, so we need to set the parameters based on the queue definition # and the resource type of the job # get the queue configuration from the DB panda_queues_cache = self.dbInterface.get_cache( 'panda_queues.json') panda_queues_dict = dict( ) if not panda_queues_cache else panda_queues_cache.data queue_dict = panda_queues_dict.get(queue_config.queueName, {}) unified_queue = 'unifiedPandaQueue' in queue_dict.get( 'catchall', '') # case of traditional (non-unified) queue: look at the queue configuration if not unified_queue: workSpec.nCore = queue_dict.get('corecount', 1) or 1 workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1 # case of unified queue: look at the resource type and queue configuration else: site_corecount = queue_dict.get('corecount', 1) or 1 site_maxrss = queue_dict.get('maxrss', 1) or 1 if 'SCORE' in resource_type: workSpec.nCore = 1 workSpec.minRamCount = site_maxrss / site_corecount else: workSpec.nCore = site_corecount workSpec.minRamCount = site_maxrss # parameters that are independent on traditional vs unified workSpec.maxWalltime = queue_dict.get('maxtime', 1) workSpec.maxDiskCount = queue_dict.get('maxwdir', 1) return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = self.make_logger(_logger, 'queue={0}'.format(queue_config.queueName), method_name='make_worker') tmpLog.debug('jobspec_list: {0}'.format(jobspec_list)) workSpec = WorkSpec() workSpec.creationTime = datetime.datetime.utcnow() # get the queue configuration from the DB panda_queues_dict = PandaQueuesDict() queue_dict = panda_queues_dict.get(queue_config.queueName, {}) unified_queue = queue_dict.get('capability', '') == 'ucore' # case of traditional (non-unified) queue: look at the queue configuration if not unified_queue: workSpec.nCore = queue_dict.get('corecount', 1) or 1 workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1 # case of unified queue: look at the resource type and queue configuration else: catchall = queue_dict.get('catchall', '') if 'useMaxRam' in catchall or queue_config.queueName in ('Taiwan-LCG2-HPC2_Unified', 'Taiwan-LCG2-HPC_Unified', 'DESY-ZN_UCORE'): # temporary hack to debug killed workers in Taiwan queues site_corecount = queue_dict.get('corecount', 1) or 1 site_maxrss = queue_dict.get('maxrss', 1) or 1 # some cases need to overwrite those values if 'SCORE' in resource_type: # the usual pilot streaming use case workSpec.nCore = 1 workSpec.minRamCount = site_maxrss / site_corecount else: # default values workSpec.nCore = site_corecount workSpec.minRamCount = site_maxrss else: workSpec.nCore, workSpec.minRamCount = self.rt_mapper.calculate_worker_requirements(resource_type, queue_dict) # parameters that are independent on traditional vs unified workSpec.maxWalltime = queue_dict.get('maxtime', 1) workSpec.maxDiskCount = queue_dict.get('maxwdir', 1) walltimeLimit_default = getattr(queue_config, 'walltimeLimit', 0) if len(jobspec_list) > 0: # get info from jobs nCore = 0 minRamCount = 0 maxDiskCount = 0 maxWalltime = 0 ioIntensity = 0 for jobSpec in jobspec_list: job_corecount, job_memory = self.get_job_core_and_memory(queue_dict, jobSpec) nCore += job_corecount minRamCount += job_memory try: maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass try: ioIntensity += jobSpec.jobParams['ioIntensity'] except Exception: pass try: # maxWallTime from AGIS or qconf, not trusting job currently maxWalltime = queue_dict.get('maxtime', walltimeLimit_default) except Exception: pass if (nCore > 0 and 'nCore' in self.jobAttributesToUse) \ or unified_queue: workSpec.nCore = nCore if (minRamCount > 0 and 'minRamCount' in self.jobAttributesToUse) \ or unified_queue: workSpec.minRamCount = minRamCount if maxDiskCount > 0 and 'maxDiskCount' in self.jobAttributesToUse: workSpec.maxDiskCount = maxDiskCount if maxWalltime > 0 and 'maxWalltime' in self.jobAttributesToUse: workSpec.maxWalltime = maxWalltime if ioIntensity > 0 and 'ioIntensity' in self.jobAttributesToUse: workSpec.ioIntensity = ioIntensity workSpec.pilotType = jobspec_list[0].get_pilot_type() else: # when no job # randomize pilot type with weighting workSpec.pilotType = random.choice(self.pilotTypeRandomList) if workSpec.pilotType in ['RC', 'ALRB', 'PT']: tmpLog.info('a worker has pilotType={0}'.format(workSpec.pilotType)) # TODO: this needs to be improved with real resource types if resource_type and resource_type != 'ANY': workSpec.resourceType = resource_type elif workSpec.nCore == 1: workSpec.resourceType = 'SCORE' else: workSpec.resourceType = 'MCORE' return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = self.make_logger(_logger, 'queue={0}'.format(queue_config.queueName), method_name='make_worker') tmpLog.debug('jobspec_list: {0}'.format(jobspec_list)) workSpec = WorkSpec() workSpec.creationTime = datetime.datetime.utcnow() # get the queue configuration from the DB panda_queues_dict = PandaQueuesDict() queue_dict = panda_queues_dict.get(queue_config.queueName, {}) unified_queue = queue_dict.get('capability', '') == 'ucore' # case of traditional (non-unified) queue: look at the queue configuration if not unified_queue: workSpec.nCore = queue_dict.get('corecount', 1) or 1 workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1 # case of unified queue: look at the resource type and queue configuration else: catchall = queue_dict.get('catchall', '') if 'useMaxRam' in catchall or queue_config.queueName in ( 'Taiwan-LCG2-HPC2_Unified', 'Taiwan-LCG2-HPC_Unified', 'DESY-ZN_UCORE'): # temporary hack to debug killed workers in Taiwan queues site_corecount = queue_dict.get('corecount', 1) or 1 site_maxrss = queue_dict.get('maxrss', 1) or 1 # some cases need to overwrite those values if 'SCORE' in resource_type: # the usual pilot streaming use case workSpec.nCore = 1 workSpec.minRamCount = int( math.ceil(site_maxrss / site_corecount)) else: # default values workSpec.nCore = site_corecount workSpec.minRamCount = site_maxrss else: workSpec.nCore, workSpec.minRamCount = self.rt_mapper.calculate_worker_requirements( resource_type, queue_dict) # parameters that are independent on traditional vs unified workSpec.maxWalltime = queue_dict.get('maxtime', 1) workSpec.maxDiskCount = queue_dict.get('maxwdir', 1) walltimeLimit_default = getattr(queue_config, 'walltimeLimit', 0) if len(jobspec_list) > 0: # get info from jobs nCore = 0 minRamCount = 0 maxDiskCount = 0 maxWalltime = 0 ioIntensity = 0 for jobSpec in jobspec_list: job_corecount, job_memory = self.get_job_core_and_memory( queue_dict, jobSpec) nCore += job_corecount minRamCount += job_memory try: maxDiskCount += jobSpec.jobParams['maxDiskCount'] except Exception: pass try: ioIntensity += jobSpec.jobParams['ioIntensity'] except Exception: pass try: # maxWallTime from AGIS or qconf, not trusting job currently maxWalltime = queue_dict.get('maxtime', walltimeLimit_default) except Exception: pass if (nCore > 0 and 'nCore' in self.jobAttributesToUse) \ or unified_queue: workSpec.nCore = nCore if (minRamCount > 0 and 'minRamCount' in self.jobAttributesToUse) \ or unified_queue: workSpec.minRamCount = minRamCount if maxDiskCount > 0 and 'maxDiskCount' in self.jobAttributesToUse: workSpec.maxDiskCount = maxDiskCount if maxWalltime > 0 and 'maxWalltime' in self.jobAttributesToUse: workSpec.maxWalltime = maxWalltime if ioIntensity > 0 and 'ioIntensity' in self.jobAttributesToUse: workSpec.ioIntensity = ioIntensity workSpec.pilotType = jobspec_list[0].get_pilot_type() else: # when no job # randomize pilot type with weighting pdpm = getattr(queue_config, 'prodSourceLabelRandomWeightsPermille', {}) choice_list = core_utils.make_choice_list(pdpm=pdpm, default='managed') tmp_prodsourcelabel = random.choice(choice_list) fake_job = JobSpec() fake_job.jobParams = {} fake_job.jobParams['prodSourceLabel'] = tmp_prodsourcelabel workSpec.pilotType = fake_job.get_pilot_type() del fake_job if workSpec.pilotType in ['RC', 'ALRB', 'PT']: tmpLog.info('a worker has pilotType={0}'.format( workSpec.pilotType)) # TODO: this needs to be improved with real resource types if resource_type and resource_type != 'ANY': workSpec.resourceType = resource_type elif workSpec.nCore == 1: workSpec.resourceType = 'SCORE' else: workSpec.resourceType = 'MCORE' return workSpec
def make_worker(self, jobspec_list, queue_config, resource_type): tmpLog = core_utils.make_logger(_logger, 'queue={0}'.format( queue_config.queueName), method_name='make_worker') tmpLog.debug('jobspec_list: {0}'.format(jobspec_list)) workSpec = WorkSpec() # get the queue configuration from the DB panda_queues_dict = PandaQueuesDict() queue_dict = panda_queues_dict.get(queue_config.queueName, {}) unified_queue = 'unifiedPandaQueue' in queue_dict.get('catchall', '') # case of traditional (non-unified) queue: look at the queue configuration if not unified_queue: workSpec.nCore = queue_dict.get('corecount', 1) or 1 workSpec.minRamCount = queue_dict.get('maxrss', 1) or 1 # case of unified queue: look at the resource type and queue configuration else: site_corecount = queue_dict.get('corecount', 1) or 1 site_maxrss = queue_dict.get('maxrss', 1) or 1 if 'SCORE' in resource_type: workSpec.nCore = 1 workSpec.minRamCount = site_maxrss / site_corecount else: workSpec.nCore = site_corecount workSpec.minRamCount = site_maxrss # parameters that are independent on traditional vs unified workSpec.maxWalltime = queue_dict.get('maxtime', 1) workSpec.maxDiskCount = queue_dict.get('maxwdir', 1) # get info from jobs if len(jobspec_list) > 0: nCore = 0 minRamCount = 0 maxDiskCount = 0 maxWalltime = 0 for jobSpec in jobspec_list: try: nCore += jobSpec.jobParams['coreCount'] except: nCore += 1 try: minRamCount += jobSpec.jobParams['minRamCount'] except: pass try: maxDiskCount += jobSpec.jobParams['maxDiskCount'] except: pass try: if jobSpec.jobParams['maxWalltime'] not in (None, "NULL"): if hasattr(queue_config, 'maxWalltime'): maxWalltime = max(int(queue_config.walltimeLimit), jobSpec.jobParams['maxWalltime']) else: maxWalltime = jobSpec.jobParams['maxWalltime'] else: maxWalltime = queue_config.walltimeLimit except: pass if nCore > 0 and 'nCore' in self.jobAttributesToUse: workSpec.nCore = nCore if minRamCount > 0 and 'minRamCount' in self.jobAttributesToUse: workSpec.minRamCount = minRamCount if maxDiskCount > 0 and 'maxDiskCount' in self.jobAttributesToUse: workSpec.maxDiskCount = maxDiskCount if maxWalltime > 0 and 'maxWalltime' in self.jobAttributesToUse: workSpec.maxWalltime = maxWalltime # TODO: this needs to be improved with real resource types if resource_type and resource_type != 'ANY': workSpec.resourceType = resource_type elif workSpec.nCore == 1: workSpec.resourceType = 'SCORE' else: workSpec.resourceType = 'MCORE' return workSpec