Example #1
0
    def update_status(self):
        """
        Update list of completed jobs
        """
        for step_name in copy.copy(self.running):
            self.log.debug('Running jobs: %s' % ','.join(self.running))
            step_status, jobs_status = self.running[step_name].get_status()
            self.db.update_step_status(step_name, step_status, jobs_status)
            if self.status == JOB_STATUS.QUEUED and step_status == JOB_STATUS.RUNNING:
                self.status = JOB_STATUS.RUNNING
            if step_status == JOB_STATUS.SUCCEEDED:
                self.completed.append(step_name)
                self.log.info("Step %s completed" % step_name)
                self.outputs[step_name] = self.running[step_name].keys_values('outputs')
                self.outputs[step_name]['output_dir'] = self.running[step_name].output_dir
                
                self.update_metadata(step_name, self.running[step_name].meta)
                self.db.set_step_outputs(step_name, self.outputs[step_name])
                self.log.debug('Got outputs:\n%s' % ut.format_dict(self.outputs[step_name], indent=4))
                self.running.pop(step_name)
                self.log.info('Completed jobs: (%s)' % ','.join(self.completed))
            elif step_status == JOB_STATUS.FAILED:
                self.log.error('Step %s failed' % step_name)
                self.log.error('+++ Stopping pipeline %s +++' % self.name)
                self.status = JOB_STATUS.FAILED
                self.all_ok = False
            elif step_status == JOB_STATUS.INTERRUPTED:
                self.log.error('Step %s interrupted' % step_name)
                self.log.error('+++ Stopping pipeline %s +++' % self.name)
                self.status = JOB_STATUS.INTERRUPTED
                self.all_ok = False

            self.db.update_pipeline_status(self.status)
Example #2
0
    def update_status(self):
        """
        Update list of completed jobs
        """
        for step_name in copy.copy(self.running):
            self.log.debug('Running jobs: %s' % ','.join(self.running))
            step_status, jobs_status = self.running[step_name].get_status()
            self.db.update_step_status(step_name, step_status, jobs_status)
            if self.status == JOB_STATUS.QUEUED and step_status == JOB_STATUS.RUNNING:
                self.status = JOB_STATUS.RUNNING
            if step_status == JOB_STATUS.SUCCEEDED:
                self.completed.append(step_name)
                self.log.info("Step %s completed" % step_name)
                self.outputs[step_name] = self.running[step_name].keys_values(
                    'outputs')
                self.outputs[step_name]['output_dir'] = self.running[
                    step_name].output_dir

                self.update_metadata(step_name, self.running[step_name].meta)
                self.db.set_step_outputs(step_name, self.outputs[step_name])
                self.log.debug(
                    'Got outputs:\n%s' %
                    ut.format_dict(self.outputs[step_name], indent=4))
                self.running.pop(step_name)
                self.log.info('Completed jobs: (%s)' %
                              ','.join(self.completed))
            elif step_status == JOB_STATUS.FAILED:
                self.log.error('Step %s failed' % step_name)
                self.log.error('+++ Stopping pipeline %s +++' % self.name)
                self.status = JOB_STATUS.FAILED
                self.all_ok = False
            elif step_status == JOB_STATUS.INTERRUPTED:
                self.log.error('Step %s interrupted' % step_name)
                self.log.error('+++ Stopping pipeline %s +++' % self.name)
                self.status = JOB_STATUS.INTERRUPTED
                self.all_ok = False

            self.db.update_pipeline_status(self.status)
Example #3
0
    def run_step(self, step_name):
        """
        Configure and run a job for the given step
        """

        #skip the input step
        if step_name == 'inputs':
            self.completed.append(step_name)
            self.outputs[step_name] = self.cfg['config']['steps'].get(step_name, {})
            self.outputs[step_name]['output_dir'] = ''
            self.db.update_step_status(step_name, JOB_STATUS.RUNNING)
            self.db.update_step_status(step_name, JOB_STATUS.SUCCEEDED)
            self.db.set_step_outputs(step_name, self.outputs[step_name])
        else:
            if self.one_step:
                step_config = self.cfg
                step_config['sys_path'] = self.sys_path
                step_config['output_dir'] = self.output_dir
                step_config['meta'] = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} }}
                ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline'])
            elif step_name == FINAL_STEP:
                step_config = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} } }
                ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline'])
                step_config['name'] = FINAL_STEP
                step_config['step_class'] = self.dag.node[step_name]['class_name']
                step_config['target_dir'] = self.output_dir
                step_config['source_dir'] = self.work_dir
                step_config['output_dir'] = os.path.join(self.work_dir, step_name)
                self.configure_finalstep(step_config)
            else:
                step_config = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} } }
                ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline'])
                step_class = self.dag.node[step_name]['class_name']
                step_config['name'] = step_name
                step_config['sys_path'] = self.sys_path
                step_config['step_class'] = step_class
                step_config['output_dir'] = os.path.join(self.work_dir, step_name)

                # 1. Form input keys
                # Remember: edges are labelled by 'from' keys
                for pred in self.dag.predecessors(step_name):
                    edge = self.dag[pred][step_name]
                    # Not an actual loop: just get key/value
                    for bind_to, bind_from in edge.get('bindings', {}).iteritems():
                        to_key = bind_to.split('.')[1]
                        if hasattr(bind_from, '__iter__'):
                            for from_key in bind_from:
                                key = from_key.split('.')[1]
                                out = self.outputs[pred][key]
                                if to_key in step_config:
                                    if isinstance(step_config[to_key], basestring):
                                        step_config[to_key] = [step_config[to_key]]
                                    step_config[to_key].extend(out)
                                else:
                                    step_config[to_key] = out
                        else:
                            from_key = bind_from.split('.')[1]
                            out = self.outputs[pred][from_key]
                            if to_key in step_config:
                                if isinstance(step_config[to_key], basestring):
                                    step_config[to_key] = [step_config[to_key]]
                                step_config[to_key].extend(out)
                            else:
                                step_config[to_key] = out

                    # Transfer metadata of previous step to next step
                    for key in self.meta['steps'].get(pred, {}):
                        step_config['meta'][key] = self.meta['steps'][pred][key]

            # 2. Form step config.
            if not self.one_step:
                ut.dict_update(step_config, self.cfg['config']['steps'].get(step_name, {}), replace=False)
                if step_name == FINAL_STEP:
                    # final step: pass full pipeline metadata
                    step_config['meta'].update(self.meta)
                else:
                    self.update_metadata(step_name, step_config[KEY_META])

            # 3. Submit step
            self.log.info('Executing step %s' % str(step_name))
            self.log.debug('  step configuration:\n %s' % ut.format_dict(step_config, indent=4))
            self.log.info('  step %s queued ' % str(step_name))

            self.running[step_name] = Step.load_step(step_config)
            job_counter = self.running[step_name].distribute()
            self.db.start_step(step_name, step_config, job_counter)
Example #4
0
    def run_step(self, step_name):
        """
        Configure and run a job for the given step
        """

        #skip the input step
        if step_name == 'inputs':
            self.completed.append(step_name)
            self.outputs[step_name] = self.cfg['config']['steps'].get(
                step_name, {})
            self.outputs[step_name]['output_dir'] = ''
            self.db.update_step_status(step_name, JOB_STATUS.RUNNING)
            self.db.update_step_status(step_name, JOB_STATUS.SUCCEEDED)
            self.db.set_step_outputs(step_name, self.outputs[step_name])
        else:
            if self.one_step:
                step_config = self.cfg
                step_config['sys_path'] = self.sys_path
                step_config['output_dir'] = self.output_dir
                step_config['meta'] = {
                    'meta': {
                        'pipeline': {},
                        'step': {},
                        'job': {}
                    }
                }
                ut.dict_update(step_config['meta']['pipeline'],
                               self.meta['pipeline'])
            elif step_name == FINAL_STEP:
                step_config = {'meta': {'pipeline': {}, 'step': {}, 'job': {}}}
                ut.dict_update(step_config['meta']['pipeline'],
                               self.meta['pipeline'])
                step_config['name'] = FINAL_STEP
                step_config['step_class'] = self.dag.node[step_name][
                    'class_name']
                step_config['target_dir'] = self.output_dir
                step_config['source_dir'] = self.work_dir
                step_config['output_dir'] = os.path.join(
                    self.work_dir, step_name)
                self.configure_finalstep(step_config)
            else:
                step_config = {'meta': {'pipeline': {}, 'step': {}, 'job': {}}}
                ut.dict_update(step_config['meta']['pipeline'],
                               self.meta['pipeline'])
                step_class = self.dag.node[step_name]['class_name']
                step_config['name'] = step_name
                step_config['sys_path'] = self.sys_path
                step_config['step_class'] = step_class
                step_config['output_dir'] = os.path.join(
                    self.work_dir, step_name)

                # 1. Form input keys
                # Remember: edges are labelled by 'from' keys
                for pred in self.dag.predecessors(step_name):
                    edge = self.dag[pred][step_name]
                    # Not an actual loop: just get key/value
                    for bind_to, bind_from in edge.get('bindings',
                                                       {}).iteritems():
                        to_key = bind_to.split('.')[1]
                        if hasattr(bind_from, '__iter__'):
                            for from_key in bind_from:
                                key = from_key.split('.')[1]
                                out = self.outputs[pred][key]
                                if to_key in step_config:
                                    if isinstance(step_config[to_key],
                                                  basestring):
                                        step_config[to_key] = [
                                            step_config[to_key]
                                        ]
                                    step_config[to_key].extend(out)
                                else:
                                    step_config[to_key] = out
                        else:
                            from_key = bind_from.split('.')[1]
                            out = self.outputs[pred][from_key]
                            if to_key in step_config:
                                if isinstance(step_config[to_key], basestring):
                                    step_config[to_key] = [step_config[to_key]]
                                step_config[to_key].extend(out)
                            else:
                                step_config[to_key] = out

                    # Transfer metadata of previous step to next step
                    for key in self.meta['steps'].get(pred, {}):
                        step_config['meta'][key] = self.meta['steps'][pred][
                            key]

            # 2. Form step config.
            if not self.one_step:
                ut.dict_update(step_config,
                               self.cfg['config']['steps'].get(step_name, {}),
                               replace=False)
                if step_name == FINAL_STEP:
                    # final step: pass full pipeline metadata
                    step_config['meta'].update(self.meta)
                else:
                    self.update_metadata(step_name, step_config[KEY_META])

            # 3. Submit step
            self.log.info('Executing step %s' % str(step_name))
            self.log.debug('  step configuration:\n %s' %
                           ut.format_dict(step_config, indent=4))
            self.log.info('  step %s queued ' % str(step_name))

            self.running[step_name] = Step.load_step(step_config)
            job_counter = self.running[step_name].distribute()
            self.db.start_step(step_name, step_config, job_counter)