def update_status(self): """ Update list of completed jobs """ for step_name in copy.copy(self.running): self.log.debug('Running jobs: %s' % ','.join(self.running)) step_status, jobs_status = self.running[step_name].get_status() self.db.update_step_status(step_name, step_status, jobs_status) if self.status == JOB_STATUS.QUEUED and step_status == JOB_STATUS.RUNNING: self.status = JOB_STATUS.RUNNING if step_status == JOB_STATUS.SUCCEEDED: self.completed.append(step_name) self.log.info("Step %s completed" % step_name) self.outputs[step_name] = self.running[step_name].keys_values('outputs') self.outputs[step_name]['output_dir'] = self.running[step_name].output_dir self.update_metadata(step_name, self.running[step_name].meta) self.db.set_step_outputs(step_name, self.outputs[step_name]) self.log.debug('Got outputs:\n%s' % ut.format_dict(self.outputs[step_name], indent=4)) self.running.pop(step_name) self.log.info('Completed jobs: (%s)' % ','.join(self.completed)) elif step_status == JOB_STATUS.FAILED: self.log.error('Step %s failed' % step_name) self.log.error('+++ Stopping pipeline %s +++' % self.name) self.status = JOB_STATUS.FAILED self.all_ok = False elif step_status == JOB_STATUS.INTERRUPTED: self.log.error('Step %s interrupted' % step_name) self.log.error('+++ Stopping pipeline %s +++' % self.name) self.status = JOB_STATUS.INTERRUPTED self.all_ok = False self.db.update_pipeline_status(self.status)
def update_status(self): """ Update list of completed jobs """ for step_name in copy.copy(self.running): self.log.debug('Running jobs: %s' % ','.join(self.running)) step_status, jobs_status = self.running[step_name].get_status() self.db.update_step_status(step_name, step_status, jobs_status) if self.status == JOB_STATUS.QUEUED and step_status == JOB_STATUS.RUNNING: self.status = JOB_STATUS.RUNNING if step_status == JOB_STATUS.SUCCEEDED: self.completed.append(step_name) self.log.info("Step %s completed" % step_name) self.outputs[step_name] = self.running[step_name].keys_values( 'outputs') self.outputs[step_name]['output_dir'] = self.running[ step_name].output_dir self.update_metadata(step_name, self.running[step_name].meta) self.db.set_step_outputs(step_name, self.outputs[step_name]) self.log.debug( 'Got outputs:\n%s' % ut.format_dict(self.outputs[step_name], indent=4)) self.running.pop(step_name) self.log.info('Completed jobs: (%s)' % ','.join(self.completed)) elif step_status == JOB_STATUS.FAILED: self.log.error('Step %s failed' % step_name) self.log.error('+++ Stopping pipeline %s +++' % self.name) self.status = JOB_STATUS.FAILED self.all_ok = False elif step_status == JOB_STATUS.INTERRUPTED: self.log.error('Step %s interrupted' % step_name) self.log.error('+++ Stopping pipeline %s +++' % self.name) self.status = JOB_STATUS.INTERRUPTED self.all_ok = False self.db.update_pipeline_status(self.status)
def run_step(self, step_name): """ Configure and run a job for the given step """ #skip the input step if step_name == 'inputs': self.completed.append(step_name) self.outputs[step_name] = self.cfg['config']['steps'].get(step_name, {}) self.outputs[step_name]['output_dir'] = '' self.db.update_step_status(step_name, JOB_STATUS.RUNNING) self.db.update_step_status(step_name, JOB_STATUS.SUCCEEDED) self.db.set_step_outputs(step_name, self.outputs[step_name]) else: if self.one_step: step_config = self.cfg step_config['sys_path'] = self.sys_path step_config['output_dir'] = self.output_dir step_config['meta'] = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} }} ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline']) elif step_name == FINAL_STEP: step_config = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} } } ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline']) step_config['name'] = FINAL_STEP step_config['step_class'] = self.dag.node[step_name]['class_name'] step_config['target_dir'] = self.output_dir step_config['source_dir'] = self.work_dir step_config['output_dir'] = os.path.join(self.work_dir, step_name) self.configure_finalstep(step_config) else: step_config = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} } } ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline']) step_class = self.dag.node[step_name]['class_name'] step_config['name'] = step_name step_config['sys_path'] = self.sys_path step_config['step_class'] = step_class step_config['output_dir'] = os.path.join(self.work_dir, step_name) # 1. Form input keys # Remember: edges are labelled by 'from' keys for pred in self.dag.predecessors(step_name): edge = self.dag[pred][step_name] # Not an actual loop: just get key/value for bind_to, bind_from in edge.get('bindings', {}).iteritems(): to_key = bind_to.split('.')[1] if hasattr(bind_from, '__iter__'): for from_key in bind_from: key = from_key.split('.')[1] out = self.outputs[pred][key] if to_key in step_config: if isinstance(step_config[to_key], basestring): step_config[to_key] = [step_config[to_key]] step_config[to_key].extend(out) else: step_config[to_key] = out else: from_key = bind_from.split('.')[1] out = self.outputs[pred][from_key] if to_key in step_config: if isinstance(step_config[to_key], basestring): step_config[to_key] = [step_config[to_key]] step_config[to_key].extend(out) else: step_config[to_key] = out # Transfer metadata of previous step to next step for key in self.meta['steps'].get(pred, {}): step_config['meta'][key] = self.meta['steps'][pred][key] # 2. Form step config. if not self.one_step: ut.dict_update(step_config, self.cfg['config']['steps'].get(step_name, {}), replace=False) if step_name == FINAL_STEP: # final step: pass full pipeline metadata step_config['meta'].update(self.meta) else: self.update_metadata(step_name, step_config[KEY_META]) # 3. Submit step self.log.info('Executing step %s' % str(step_name)) self.log.debug(' step configuration:\n %s' % ut.format_dict(step_config, indent=4)) self.log.info(' step %s queued ' % str(step_name)) self.running[step_name] = Step.load_step(step_config) job_counter = self.running[step_name].distribute() self.db.start_step(step_name, step_config, job_counter)
def run_step(self, step_name): """ Configure and run a job for the given step """ #skip the input step if step_name == 'inputs': self.completed.append(step_name) self.outputs[step_name] = self.cfg['config']['steps'].get( step_name, {}) self.outputs[step_name]['output_dir'] = '' self.db.update_step_status(step_name, JOB_STATUS.RUNNING) self.db.update_step_status(step_name, JOB_STATUS.SUCCEEDED) self.db.set_step_outputs(step_name, self.outputs[step_name]) else: if self.one_step: step_config = self.cfg step_config['sys_path'] = self.sys_path step_config['output_dir'] = self.output_dir step_config['meta'] = { 'meta': { 'pipeline': {}, 'step': {}, 'job': {} } } ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline']) elif step_name == FINAL_STEP: step_config = {'meta': {'pipeline': {}, 'step': {}, 'job': {}}} ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline']) step_config['name'] = FINAL_STEP step_config['step_class'] = self.dag.node[step_name][ 'class_name'] step_config['target_dir'] = self.output_dir step_config['source_dir'] = self.work_dir step_config['output_dir'] = os.path.join( self.work_dir, step_name) self.configure_finalstep(step_config) else: step_config = {'meta': {'pipeline': {}, 'step': {}, 'job': {}}} ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline']) step_class = self.dag.node[step_name]['class_name'] step_config['name'] = step_name step_config['sys_path'] = self.sys_path step_config['step_class'] = step_class step_config['output_dir'] = os.path.join( self.work_dir, step_name) # 1. Form input keys # Remember: edges are labelled by 'from' keys for pred in self.dag.predecessors(step_name): edge = self.dag[pred][step_name] # Not an actual loop: just get key/value for bind_to, bind_from in edge.get('bindings', {}).iteritems(): to_key = bind_to.split('.')[1] if hasattr(bind_from, '__iter__'): for from_key in bind_from: key = from_key.split('.')[1] out = self.outputs[pred][key] if to_key in step_config: if isinstance(step_config[to_key], basestring): step_config[to_key] = [ step_config[to_key] ] step_config[to_key].extend(out) else: step_config[to_key] = out else: from_key = bind_from.split('.')[1] out = self.outputs[pred][from_key] if to_key in step_config: if isinstance(step_config[to_key], basestring): step_config[to_key] = [step_config[to_key]] step_config[to_key].extend(out) else: step_config[to_key] = out # Transfer metadata of previous step to next step for key in self.meta['steps'].get(pred, {}): step_config['meta'][key] = self.meta['steps'][pred][ key] # 2. Form step config. if not self.one_step: ut.dict_update(step_config, self.cfg['config']['steps'].get(step_name, {}), replace=False) if step_name == FINAL_STEP: # final step: pass full pipeline metadata step_config['meta'].update(self.meta) else: self.update_metadata(step_name, step_config[KEY_META]) # 3. Submit step self.log.info('Executing step %s' % str(step_name)) self.log.debug(' step configuration:\n %s' % ut.format_dict(step_config, indent=4)) self.log.info(' step %s queued ' % str(step_name)) self.running[step_name] = Step.load_step(step_config) job_counter = self.running[step_name].distribute() self.db.start_step(step_name, step_config, job_counter)