Python Step.load_step 예제들, nespipe.core.step.Step.load_step Python 예제들

예제 #1

0

파일 보기

파일: runs.py 프로젝트: blankenberg/pypers

        def get(self, run_id):
            """
            Return the dag of the given run
            """

            pipeline = db.pipelines.find_one({'run_id': run_id}, {'config': 1, 'file_registry':1})
            file_registry = pipeline.get('file_registry', [])
            if file_registry:
                file_registry = json.loads(file_registry)

            conf_str = json.loads(pipeline['config'])
            config = Pipeline.load_cfg(conf_str);
            result_steps = config.get('config', {}).get('pipeline', {}).get('results', [])
            delete_steps = config.get('config', {}).get('pipeline', {}).get('delete', [])
            delete_steps.append('finalize')
            delete_steps.append('inputs')

            steps = list(db.steps.find(
                {"run_id":run_id, "name": {"$nin": delete_steps}, "jobs": {"$elemMatch": {"outputs": {"$exists": True}}}},
                {"name":1, "jobs":1, "outputs.output_dir": 1, "step_config": 1}))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = []
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs', key_filter={'type':'file'}):
                                for i, filename in enumerate(job['outputs'][key]):
                                    output = { 'path': filename }

                                    if not isinstance(filename, list):
                                        output['archived'] = (filename in file_registry)
                                    else:
                                        output['archived'] = False
                                    output_files.append(output)

                    if output_files:
                        outputs[step['name']] = defaultdict(list)
                        outputs[step['name']]['archive'] = step['name'] in result_steps

                        outputs[step['name']]['dir'] = step.get('outputs', {}).get('output_dir')
                        outputs[step['name']]['files'] = copy.deepcopy(output_files)


            return outputs

예제 #2

0

파일 보기

파일: pipelines.py 프로젝트: blankenberg/pypers

    def __init__(self, cfg, user='******', db=True, schedname="SCHED_CONDOR"):
        """
        Read in the pipeline graph and load the configuration.
        """
        self.all_ok = True
        self.user = user
        self.status = JOB_STATUS.QUEUED
        self.lock = ''

        self.completed = []
        self.running = {}
        self.outputs = {}
        self.schedname = schedname
        db_model_name = "MONGO_DB" if db else "STUB_DB"

        # Load configuration
        self.one_step = False
        try:
            self.cfg = Pipeline.load_cfg(cfg)
        except Exception as e1:
            print('Failed to load config as pipeline (error=%s). Trying as step' % e1)
            try:
                self.cfg = Step.load_cfg(cfg)
                self.step = Step.load_step(self.cfg)
                self.one_step = True
            except Exception as e2:
                 Exception("Unable to load config file %s:\n" \
                           "pipeline load: %s\n" \
                           "step load: %s" % (cfg, e1, e2))

        # Set all additional information
        self.run_id = self.cfg.get('run_id')
        if self.one_step:
            self.name  = self.step.name
            self.label = self.step.name
            self.project_name = self.cfg.get('project_name', '')
            self.description  = self.cfg.get('description', '')
            self.output_dir   = self.step.output_dir
            self.ordered      = [self.step.name]
        else:
            self.name  = self.cfg['name']
            self.label = self.cfg['label']
            self.project_name = self.cfg['config']['pipeline'].get('project_name', '')
            self.description  = self.cfg['config']['pipeline'].get('description', '')
            self.output_dir   = self.cfg['config']['pipeline']['output_dir']
            if not self.output_dir.startswith('/scratch'):
                self.cfg['dag']['nodes'][FINAL_STEP] = 'utils.Finalize' #TODO: Make it work for one_step as well
            self.ordered      = Pipeline.ordered_steps(self.cfg)


        self.sys_path = self.cfg.get('sys_path')
        if self.sys_path:
            sys.path.insert(0, self.sys_path)

        self.dag = self.create_dag(self.cfg, one_step=self.one_step)

        self.meta = {
            'pipeline': {
                'label': self.label,
                'project_name': self.project_name,
                'descr': self.description,
                'run_id': self.run_id
            },
            'steps': {},
            'job' : {}
        }

        self.db = db_models[db_model_name](self.name, self.cfg, self.ordered, self.user, output_dir=self.output_dir)
        if hasattr(self.db, 'run_id'):
            self.run_id = self.db.run_id
            self.cfg['run_id'] = self.run_id

        # Define the output directories
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir, 0775)

        # Use default output dir under /scratch/cgi/nespipe (linked to user-defined dir.)
        # if: a) this run is using the db (so we have a run ID); b) it is not a demux. run;
        # and c) the user-defined directory is not already under /scratch
        if self.run_id and not (self.name == 'demultiplexing'):
            dirname = '%s_%d' % (self.name, self.db.run_id)
            self.output_dir = os.path.join(self.output_dir, dirname)
            if not os.path.exists(self.output_dir):
                os.makedirs(self.output_dir, 0775)
            # In case of /scratch, do not create an additional sub-directory
            if self.output_dir.startswith('/scratch'):
                self.work_dir = self.output_dir
            else:
                self.work_dir = os.path.join(WORK_DIR, self.user, dirname)
                if not os.path.exists(self.work_dir):
                    os.makedirs(self.work_dir, 0775)
                symlink = os.path.join(self.output_dir, 'work_area')
                if not os.path.exists(symlink):
                    os.symlink(self.work_dir, symlink)
        else:
            self.work_dir = self.output_dir

        ut.pretty_print('Output directories: output_dir=%s, work_dir=%s' % (self.output_dir, self.work_dir))
        self.db.update_pipeline(self.run_id, {'output_dir': self.output_dir,
                                              'work_dir':   self.work_dir })

예제 #3

0

파일 보기

파일: pipelines.py 프로젝트: blankenberg/pypers

    def run_step(self, step_name):
        """
        Configure and run a job for the given step
        """

        #skip the input step
        if step_name == 'inputs':
            self.completed.append(step_name)
            self.outputs[step_name] = self.cfg['config']['steps'].get(step_name, {})
            self.outputs[step_name]['output_dir'] = ''
            self.db.update_step_status(step_name, JOB_STATUS.RUNNING)
            self.db.update_step_status(step_name, JOB_STATUS.SUCCEEDED)
            self.db.set_step_outputs(step_name, self.outputs[step_name])
        else:
            if self.one_step:
                step_config = self.cfg
                step_config['sys_path'] = self.sys_path
                step_config['output_dir'] = self.output_dir
                step_config['meta'] = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} }}
                ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline'])
            elif step_name == FINAL_STEP:
                step_config = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} } }
                ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline'])
                step_config['name'] = FINAL_STEP
                step_config['step_class'] = self.dag.node[step_name]['class_name']
                step_config['target_dir'] = self.output_dir
                step_config['source_dir'] = self.work_dir
                step_config['output_dir'] = os.path.join(self.work_dir, step_name)
                self.configure_finalstep(step_config)
            else:
                step_config = { 'meta' : { 'pipeline':{}, 'step':{}, 'job':{} } }
                ut.dict_update(step_config['meta']['pipeline'], self.meta['pipeline'])
                step_class = self.dag.node[step_name]['class_name']
                step_config['name'] = step_name
                step_config['sys_path'] = self.sys_path
                step_config['step_class'] = step_class
                step_config['output_dir'] = os.path.join(self.work_dir, step_name)

                # 1. Form input keys
                # Remember: edges are labelled by 'from' keys
                for pred in self.dag.predecessors(step_name):
                    edge = self.dag[pred][step_name]
                    # Not an actual loop: just get key/value
                    for bind_to, bind_from in edge.get('bindings', {}).iteritems():
                        to_key = bind_to.split('.')[1]
                        if hasattr(bind_from, '__iter__'):
                            for from_key in bind_from:
                                key = from_key.split('.')[1]
                                out = self.outputs[pred][key]
                                if to_key in step_config:
                                    if isinstance(step_config[to_key], basestring):
                                        step_config[to_key] = [step_config[to_key]]
                                    step_config[to_key].extend(out)
                                else:
                                    step_config[to_key] = out
                        else:
                            from_key = bind_from.split('.')[1]
                            out = self.outputs[pred][from_key]
                            if to_key in step_config:
                                if isinstance(step_config[to_key], basestring):
                                    step_config[to_key] = [step_config[to_key]]
                                step_config[to_key].extend(out)
                            else:
                                step_config[to_key] = out

                    # Transfer metadata of previous step to next step
                    for key in self.meta['steps'].get(pred, {}):
                        step_config['meta'][key] = self.meta['steps'][pred][key]

            # 2. Form step config.
            if not self.one_step:
                ut.dict_update(step_config, self.cfg['config']['steps'].get(step_name, {}), replace=False)
                if step_name == FINAL_STEP:
                    # final step: pass full pipeline metadata
                    step_config['meta'].update(self.meta)
                else:
                    self.update_metadata(step_name, step_config[KEY_META])

            # 3. Submit step
            self.log.info('Executing step %s' % str(step_name))
            self.log.debug('  step configuration:\n %s' % ut.format_dict(step_config, indent=4))
            self.log.info('  step %s queued ' % str(step_name))

            self.running[step_name] = Step.load_step(step_config)
            job_counter = self.running[step_name].distribute()
            self.db.start_step(step_name, step_config, job_counter)

예제 #4

0

파일 보기

파일: pipelines.py 프로젝트: fronga/pypers

    def __init__(self, cfg, user='******', db=True, schedname="SCHED_CONDOR"):
        """
        Read in the pipeline graph and load the configuration.
        """
        self.all_ok = True
        self.user = user
        self.status = JOB_STATUS.QUEUED
        self.lock = ''

        self.completed = []
        self.running = {}
        self.outputs = {}
        self.schedname = schedname
        db_model_name = "MONGO_DB" if db else "STUB_DB"

        # Load configuration
        self.one_step = False
        try:
            self.cfg = Pipeline.load_cfg(cfg)
        except Exception as e1:
            print(
                'Failed to load config as pipeline (error=%s). Trying as step'
                % e1)
            try:
                self.cfg = Step.load_cfg(cfg)
                self.step = Step.load_step(self.cfg)
                self.one_step = True
            except Exception as e2:
                Exception("Unable to load config file %s:\n" \
                          "pipeline load: %s\n" \
                          "step load: %s" % (cfg, e1, e2))

        # Set all additional information
        self.run_id = self.cfg.get('run_id')
        if self.one_step:
            self.name = self.step.name
            self.label = self.step.name
            self.project_name = self.cfg.get('project_name', '')
            self.description = self.cfg.get('description', '')
            self.output_dir = self.step.output_dir
            self.ordered = [self.step.name]
        else:
            self.name = self.cfg['name']
            self.label = self.cfg['label']
            self.project_name = self.cfg['config']['pipeline'].get(
                'project_name', '')
            self.description = self.cfg['config']['pipeline'].get(
                'description', '')
            self.output_dir = self.cfg['config']['pipeline']['output_dir']
            if not self.output_dir.startswith('/scratch'):
                self.cfg['dag']['nodes'][
                    FINAL_STEP] = 'utils.Finalize'  #TODO: Make it work for one_step as well
            self.ordered = Pipeline.ordered_steps(self.cfg)

        self.sys_path = self.cfg.get('sys_path')
        if self.sys_path:
            sys.path.insert(0, self.sys_path)

        self.dag = self.create_dag(self.cfg, one_step=self.one_step)

        self.meta = {
            'pipeline': {
                'label': self.label,
                'project_name': self.project_name,
                'descr': self.description,
                'run_id': self.run_id
            },
            'steps': {},
            'job': {}
        }

        self.db = db_models[db_model_name](self.name,
                                           self.cfg,
                                           self.ordered,
                                           self.user,
                                           output_dir=self.output_dir)
        if hasattr(self.db, 'run_id'):
            self.run_id = self.db.run_id
            self.cfg['run_id'] = self.run_id

        # Define the output directories
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir, 0775)

        # Use default output dir under /scratch/cgi/nespipe (linked to user-defined dir.)
        # if: a) this run is using the db (so we have a run ID); b) it is not a demux. run;
        # and c) the user-defined directory is not already under /scratch
        if self.run_id and not (self.name == 'demultiplexing'):
            dirname = '%s_%d' % (self.name, self.db.run_id)
            self.output_dir = os.path.join(self.output_dir, dirname)
            if not os.path.exists(self.output_dir):
                os.makedirs(self.output_dir, 0775)
            # In case of /scratch, do not create an additional sub-directory
            if self.output_dir.startswith('/scratch'):
                self.work_dir = self.output_dir
            else:
                self.work_dir = os.path.join(WORK_DIR, self.user, dirname)
                if not os.path.exists(self.work_dir):
                    os.makedirs(self.work_dir, 0775)
                symlink = os.path.join(self.output_dir, 'work_area')
                if not os.path.exists(symlink):
                    os.symlink(self.work_dir, symlink)
        else:
            self.work_dir = self.output_dir

        ut.pretty_print('Output directories: output_dir=%s, work_dir=%s' %
                        (self.output_dir, self.work_dir))
        self.db.update_pipeline(self.run_id, {
            'output_dir': self.output_dir,
            'work_dir': self.work_dir
        })

예제 #5

0

파일 보기

파일: pipelines.py 프로젝트: fronga/pypers

    def run_step(self, step_name):
        """
        Configure and run a job for the given step
        """

        #skip the input step
        if step_name == 'inputs':
            self.completed.append(step_name)
            self.outputs[step_name] = self.cfg['config']['steps'].get(
                step_name, {})
            self.outputs[step_name]['output_dir'] = ''
            self.db.update_step_status(step_name, JOB_STATUS.RUNNING)
            self.db.update_step_status(step_name, JOB_STATUS.SUCCEEDED)
            self.db.set_step_outputs(step_name, self.outputs[step_name])
        else:
            if self.one_step:
                step_config = self.cfg
                step_config['sys_path'] = self.sys_path
                step_config['output_dir'] = self.output_dir
                step_config['meta'] = {
                    'meta': {
                        'pipeline': {},
                        'step': {},
                        'job': {}
                    }
                }
                ut.dict_update(step_config['meta']['pipeline'],
                               self.meta['pipeline'])
            elif step_name == FINAL_STEP:
                step_config = {'meta': {'pipeline': {}, 'step': {}, 'job': {}}}
                ut.dict_update(step_config['meta']['pipeline'],
                               self.meta['pipeline'])
                step_config['name'] = FINAL_STEP
                step_config['step_class'] = self.dag.node[step_name][
                    'class_name']
                step_config['target_dir'] = self.output_dir
                step_config['source_dir'] = self.work_dir
                step_config['output_dir'] = os.path.join(
                    self.work_dir, step_name)
                self.configure_finalstep(step_config)
            else:
                step_config = {'meta': {'pipeline': {}, 'step': {}, 'job': {}}}
                ut.dict_update(step_config['meta']['pipeline'],
                               self.meta['pipeline'])
                step_class = self.dag.node[step_name]['class_name']
                step_config['name'] = step_name
                step_config['sys_path'] = self.sys_path
                step_config['step_class'] = step_class
                step_config['output_dir'] = os.path.join(
                    self.work_dir, step_name)

                # 1. Form input keys
                # Remember: edges are labelled by 'from' keys
                for pred in self.dag.predecessors(step_name):
                    edge = self.dag[pred][step_name]
                    # Not an actual loop: just get key/value
                    for bind_to, bind_from in edge.get('bindings',
                                                       {}).iteritems():
                        to_key = bind_to.split('.')[1]
                        if hasattr(bind_from, '__iter__'):
                            for from_key in bind_from:
                                key = from_key.split('.')[1]
                                out = self.outputs[pred][key]
                                if to_key in step_config:
                                    if isinstance(step_config[to_key],
                                                  basestring):
                                        step_config[to_key] = [
                                            step_config[to_key]
                                        ]
                                    step_config[to_key].extend(out)
                                else:
                                    step_config[to_key] = out
                        else:
                            from_key = bind_from.split('.')[1]
                            out = self.outputs[pred][from_key]
                            if to_key in step_config:
                                if isinstance(step_config[to_key], basestring):
                                    step_config[to_key] = [step_config[to_key]]
                                step_config[to_key].extend(out)
                            else:
                                step_config[to_key] = out

                    # Transfer metadata of previous step to next step
                    for key in self.meta['steps'].get(pred, {}):
                        step_config['meta'][key] = self.meta['steps'][pred][
                            key]

            # 2. Form step config.
            if not self.one_step:
                ut.dict_update(step_config,
                               self.cfg['config']['steps'].get(step_name, {}),
                               replace=False)
                if step_name == FINAL_STEP:
                    # final step: pass full pipeline metadata
                    step_config['meta'].update(self.meta)
                else:
                    self.update_metadata(step_name, step_config[KEY_META])

            # 3. Submit step
            self.log.info('Executing step %s' % str(step_name))
            self.log.debug('  step configuration:\n %s' %
                           ut.format_dict(step_config, indent=4))
            self.log.info('  step %s queued ' % str(step_name))

            self.running[step_name] = Step.load_step(step_config)
            job_counter = self.running[step_name].distribute()
            self.db.start_step(step_name, step_config, job_counter)

예제 #6

0

파일 보기

파일: runs.py 프로젝트: blankenberg/pypers

        def post(self, run_id):
            """
            Pushes files into iRODS
            """

            data = request.get_json(force=True)

            runmeta   = data.get('meta')
            selection = data.get('selection')
            user      = auth_get_username(request.authorization, data.get('user'))

            npdis = dbmodel.get_npdi_projects()
            npdi = runmeta.get('Project NPDI ID', '')
            study_nickname = runmeta.get('Study nickname', 'Required field missing')
            if (npdi + study_nickname) not in npdis:
                return {'pipeline': {
                            'Project': '%s (%s)' %(npdi, study_nickname)
                        }}, 400

            run = db.pipelines.find_one({'run_id': run_id}, {'meta':1, 'run_id':1})

            steps_names = selection.keys()
            steps = list(db.steps.find(
                {"run_id":run_id, "name": {'$in': steps_names}, "jobs": {"$elemMatch": {"outputs": {"$exists": True}}}},
                {"name":1, "jobs":1, "outputs.output_dir": 1, "step_config": 1}))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = {}
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs', key_filter={'type':'file'}):
                                for i, filename in enumerate(job['outputs'][key]):
                                    filemeta = {'step': step['name'], 'job_id': job_id}
                                    ext = os.path.splitext(filename)[1][1:].upper()
                                    for key in job.get('meta', {}):
                                        meta = job['meta'][key]                                       
                                        if key == 'sample_id':
                                            okey = 'Operational sample accession'
                                        else:
                                            okey = key

                                        if isinstance(meta, list):
                                            filemeta[okey] = meta[i]
                                        else:
                                            filemeta[okey] = meta

                                    filemeta['File type'] = 'Processed data file'
                                    filemeta['File format'] = ext

                                    output_files[filename] = filemeta

                    if output_files:
                        outputs[step['name']] = output_files


            input_files = []
            meta_data   = []
            for step_name, step_selection in selection.iteritems():
                for filepath in step_selection:
                    input_files.append(filepath)

                    filemeta = outputs[step_name][filepath]
                    filemeta.update(runmeta)
                    meta_data.append(filemeta)

            cfg = Pipeline.load_cfg(pipeline_specs['irods_lz'])
            cfg['config']['steps']['irods_mvtolz'] = {
                'input_files' : input_files,
                'meta_data'   : meta_data
            }
            cfg['config']['steps']['irods_monitorlz'] = {
                'prun_id' : run['run_id']
            }

            cfg['config']['pipeline']['project_name'] = run['meta']['project_name']
            cfg['config']['pipeline']['description'] = 'Archive data for run %s' %run['run_id']
            cfg['config']['pipeline']['output_dir'] = '/scratch/cgi/irods'

            # Get id from DB
            db_info = dbmodel.PipelineDb(cfg['name'], cfg, Pipeline.ordered_steps(cfg), user)
            cfg['run_id'] = db_info.run_id

            ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" % (cfg['label'], cfg['run_id'], user))
            return pm.add_pipeline(cfg, user)

예제 #7

0

파일 보기

파일: runs.py 프로젝트: fronga/pypers

        def post(self, run_id):
            """
            Pushes files into iRODS
            """

            data = request.get_json(force=True)

            runmeta = data.get('meta')
            selection = data.get('selection')
            user = auth_get_username(request.authorization, data.get('user'))

            npdis = dbmodel.get_npdi_projects()
            npdi = runmeta.get('Project NPDI ID', '')
            study_nickname = runmeta.get('Study nickname',
                                         'Required field missing')
            if (npdi + study_nickname) not in npdis:
                return {
                    'pipeline': {
                        'Project': '%s (%s)' % (npdi, study_nickname)
                    }
                }, 400

            run = db.pipelines.find_one({'run_id': run_id}, {
                'meta': 1,
                'run_id': 1
            })

            steps_names = selection.keys()
            steps = list(
                db.steps.find(
                    {
                        "run_id": run_id,
                        "name": {
                            '$in': steps_names
                        },
                        "jobs": {
                            "$elemMatch": {
                                "outputs": {
                                    "$exists": True
                                }
                            }
                        }
                    }, {
                        "name": 1,
                        "jobs": 1,
                        "outputs.output_dir": 1,
                        "step_config": 1
                    }))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = {}
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs',
                                             key_filter={'type': 'file'}):
                                for i, filename in enumerate(
                                        job['outputs'][key]):
                                    filemeta = {
                                        'step': step['name'],
                                        'job_id': job_id
                                    }
                                    ext = os.path.splitext(
                                        filename)[1][1:].upper()
                                    for key in job.get('meta', {}):
                                        meta = job['meta'][key]
                                        if key == 'sample_id':
                                            okey = 'Operational sample accession'
                                        else:
                                            okey = key

                                        if isinstance(meta, list):
                                            filemeta[okey] = meta[i]
                                        else:
                                            filemeta[okey] = meta

                                    filemeta[
                                        'File type'] = 'Processed data file'
                                    filemeta['File format'] = ext

                                    output_files[filename] = filemeta

                    if output_files:
                        outputs[step['name']] = output_files

            input_files = []
            meta_data = []
            for step_name, step_selection in selection.iteritems():
                for filepath in step_selection:
                    input_files.append(filepath)

                    filemeta = outputs[step_name][filepath]
                    filemeta.update(runmeta)
                    meta_data.append(filemeta)

            cfg = Pipeline.load_cfg(pipeline_specs['irods_lz'])
            cfg['config']['steps']['irods_mvtolz'] = {
                'input_files': input_files,
                'meta_data': meta_data
            }
            cfg['config']['steps']['irods_monitorlz'] = {
                'prun_id': run['run_id']
            }

            cfg['config']['pipeline']['project_name'] = run['meta'][
                'project_name']
            cfg['config']['pipeline'][
                'description'] = 'Archive data for run %s' % run['run_id']
            cfg['config']['pipeline']['output_dir'] = '/scratch/cgi/irods'

            # Get id from DB
            db_info = dbmodel.PipelineDb(cfg['name'], cfg,
                                         Pipeline.ordered_steps(cfg), user)
            cfg['run_id'] = db_info.run_id

            ut.pretty_print("Submitting pipeline %s (ID %d) for user %s" %
                            (cfg['label'], cfg['run_id'], user))
            return pm.add_pipeline(cfg, user)

예제 #8

0

파일 보기

파일: runs.py 프로젝트: fronga/pypers

        def get(self, run_id):
            """
            Return the dag of the given run
            """

            pipeline = db.pipelines.find_one({'run_id': run_id}, {
                'config': 1,
                'file_registry': 1
            })
            file_registry = pipeline.get('file_registry', [])
            if file_registry:
                file_registry = json.loads(file_registry)

            conf_str = json.loads(pipeline['config'])
            config = Pipeline.load_cfg(conf_str)
            result_steps = config.get('config', {}).get('pipeline',
                                                        {}).get('results', [])
            delete_steps = config.get('config', {}).get('pipeline',
                                                        {}).get('delete', [])
            delete_steps.append('finalize')
            delete_steps.append('inputs')

            steps = list(
                db.steps.find(
                    {
                        "run_id": run_id,
                        "name": {
                            "$nin": delete_steps
                        },
                        "jobs": {
                            "$elemMatch": {
                                "outputs": {
                                    "$exists": True
                                }
                            }
                        }
                    }, {
                        "name": 1,
                        "jobs": 1,
                        "outputs.output_dir": 1,
                        "step_config": 1
                    }))

            outputs = {}
            for step in steps:
                if step.get('step_config', {}):
                    s = Step.load_step(step['step_config'])
                    output_files = []
                    for job_id, job in enumerate(step['jobs']):
                        for key in job['outputs']:
                            if key in s.keys(key_groups='outputs',
                                             key_filter={'type': 'file'}):
                                for i, filename in enumerate(
                                        job['outputs'][key]):
                                    output = {'path': filename}

                                    if not isinstance(filename, list):
                                        output['archived'] = (filename
                                                              in file_registry)
                                    else:
                                        output['archived'] = False
                                    output_files.append(output)

                    if output_files:
                        outputs[step['name']] = defaultdict(list)
                        outputs[step['name']]['archive'] = step[
                            'name'] in result_steps

                        outputs[step['name']]['dir'] = step.get(
                            'outputs', {}).get('output_dir')
                        outputs[step['name']]['files'] = copy.deepcopy(
                            output_files)

            return outputs