def distribute(self): """ Submit the step to the scheduler parallelizing the iterable inputs """ self.status = JOB_STATUS.QUEUED #initialize the scheduler if self.local_step: self.scheduler = get_scheduler("SCHED_LOCAL") else: self.scheduler = scheduler if Step.__cfg_is_changed(self.cfg): Step.__write_cfg_file(self.cfg) Step.__remove_pickle(self.cfg) elif self.is_pickled(): self.load_pickle() if self.status == JOB_STATUS.SUCCEEDED: self.log.info( 'Skipping step %s: configuration has not been changed' % self.name) return len(self.jobs) iterables = self.get_iterables() if iterables: # Step needs to be distributed for iterable in iterables: # If this is a file, convert it to list from file contents iterable_input = self.cfg.get(iterable, []) if not hasattr(iterable_input, '__iter__') \ and os.path.exists(iterable_input): with open(iterable_input) as f: self.cfg[iterable] = f.read().splitlines() for index in range(0, len(self.cfg[iterables[0]])): #copy the config file job_cfg = copy.deepcopy(self.cfg) #copy the iterable specific to the job for iterable in iterables: if iterable in self.cfg and self.cfg[ iterable]: # permit a null file job_cfg[iterable] = self.cfg[iterable][index] job_cfg['meta']['pipeline'] = self.cfg['meta']['pipeline'] job_cfg['meta']['step'] = self.cfg['meta']['step'] for key, value in self.cfg['meta']['job'].iteritems(): job_cfg['meta']['job'][key] = value[index] self.submit_job(job_cfg) else: job_cfg = copy.deepcopy(self.cfg) self.submit_job(job_cfg) return len(self.jobs)
def distribute(self): """ Submit the step to the scheduler parallelizing the iterable inputs """ self.status = JOB_STATUS.QUEUED #initialize the scheduler if self.local_step: self.scheduler = get_scheduler("SCHED_LOCAL") else: self.scheduler = scheduler if Step.__cfg_is_changed(self.cfg): Step.__write_cfg_file(self.cfg) Step.__remove_pickle(self.cfg) elif self.is_pickled(): self.load_pickle() if self.status == JOB_STATUS.SUCCEEDED: self.log.info('Skipping step %s: configuration has not been changed' % self.name) return len(self.jobs) iterables = self.get_iterables() if iterables: # Step needs to be distributed for iterable in iterables: # If this is a file, convert it to list from file contents iterable_input = self.cfg.get(iterable,[]) if not hasattr(iterable_input, '__iter__') \ and os.path.exists(iterable_input): with open(iterable_input) as f: self.cfg[iterable] = f.read().splitlines() for index in range(0, len(self.cfg[iterables[0]])): #copy the config file job_cfg = copy.deepcopy(self.cfg) #copy the iterable specific to the job for iterable in iterables: if iterable in self.cfg and self.cfg[iterable]: # permit a null file job_cfg[iterable] = self.cfg[iterable][index] job_cfg['meta']['pipeline'] = self.cfg['meta']['pipeline'] job_cfg['meta']['step'] = self.cfg['meta']['step'] for key, value in self.cfg['meta']['job'].iteritems(): job_cfg['meta']['job'][key] = value[index] self.submit_job(job_cfg) else: job_cfg = copy.deepcopy(self.cfg) self.submit_job(job_cfg) return len(self.jobs)
def __init__(self): self.bootstrap = STARTUP_CYCLE self.status = JOB_STATUS.QUEUED self.meta = {'pipeline': {}, 'step': {}, 'job': {}} self.requirements = {'memory': '1', 'cpus': '1'} self.output_dir = '.' self.jobs = OrderedDict() self.cmd_count = 0 logger.set_stdout_level(logger.DEBUG) self.log = logger.get_log() # parse specs and create keys self.spec["name"] = self.__module__.replace('nespipe.steps.', '').split('.')[-1] self.name = self.spec["name"] self.__version__ = self.spec['version'] self.local_step = self.spec.get('local', False) global scheduler if self.local_step: self.scheduler = get_scheduler("SCHED_LOCAL") else: self.scheduler = scheduler for k, v in self.spec["args"].iteritems(): for param in v: if param.get('name', None): setattr(self, param['name'], param.get('value', [])) ut.dict_update( self.requirements, self.spec.get('requirements', { 'memory': '1', 'cpus': '1' })) for k, v in self.requirements.iteritems(): setattr(self, k, int(v)) #set the jvm memory if 'memory' in self.requirements: self.jvm_memory = int(int(self.requirements['memory']) * 0.9) if not self.jvm_memory: self.jvm_memory = 1
def __init__(self): self.bootstrap = STARTUP_CYCLE self.status = JOB_STATUS.QUEUED self.meta = { 'pipeline':{}, 'step':{}, 'job':{}} self.requirements = {'memory' : '1', 'cpus' : '1'} self.output_dir = '.' self.jobs = OrderedDict() self.cmd_count = 0 logger.set_stdout_level(logger.DEBUG) self.log = logger.get_log() # parse specs and create keys self.spec["name"] = self.__module__.replace('nespipe.steps.','').split('.')[-1] self.name = self.spec["name"] self.__version__ = self.spec['version'] self.local_step = self.spec.get('local', False) global scheduler if self.local_step: self.scheduler = get_scheduler("SCHED_LOCAL") else: self.scheduler = scheduler for k, v in self.spec["args"].iteritems(): for param in v: if param.get('name', None): setattr(self, param['name'], param.get('value', [])) ut.dict_update(self.requirements, self.spec.get('requirements', {'memory' : '1', 'cpus' : '1'})) for k, v in self.requirements.iteritems(): setattr(self, k, int(v)) #set the jvm memory if 'memory' in self.requirements: self.jvm_memory = int(int(self.requirements['memory']) * 0.9) if not self.jvm_memory: self.jvm_memory = 1
def set_scheduler(schedname): global scheduler scheduler = get_scheduler(schedname)
from nespipe.core.constants import * from nespipe.core.schedulers import get_scheduler from nespipe.core.constants import * NOT_DB_ATTR = [ 'cfg', 'reqs', 'jvm_memory', 'memory', 'cpus', 'jobs', 'sys_path', 'step_class', 'cmd_count' 'name', 'local_step', '__version__', 'scheduler', 'log' ] STEP_PICKLE = '.status.pickle' ITERABLE_TYPE = 'input_key_iterable' STARTUP_CYCLE = 50 scheduler = get_scheduler() def set_scheduler(schedname): global scheduler scheduler = get_scheduler(schedname) class Step(object): """ Base class for any step Members: - status: current status of the step - parameters: dictionary containing the definition of the parameters N.B. The actual values are stored as members
from nespipe.core.constants import * NOT_DB_ATTR = ['cfg', 'reqs', 'jvm_memory', 'memory', 'cpus', 'jobs', 'sys_path', 'step_class', 'cmd_count' 'name', 'local_step', '__version__', 'scheduler', 'log'] STEP_PICKLE = '.status.pickle' ITERABLE_TYPE = 'input_key_iterable' STARTUP_CYCLE = 50 scheduler = get_scheduler() def set_scheduler(schedname): global scheduler scheduler = get_scheduler(schedname) class Step(object): """ Base class for any step Members: - status: current status of the step - parameters: dictionary containing the definition of the parameters N.B. The actual values are stored as members - meta: dictionary containing the metadata information