Example #1
0
 def tar(self, files, job_id):
     return assembly.tar_list(self.outpath, files,
                              self.name + str(job_id) + '.tar.gz')
Example #2
0
def eval(x, env):
    "Evaluate an expression in an environment."
    if isa(x, Symbol):             # variable reference
        try:
            return env.find(x)[x]
        except:
            raise Exception('Module "{}" not found'.format(x))
    elif not isa(x, list):         # constant literal
        return x
    elif x[0] == 'quote':          # (quote exp)
        (_, exp) = x
        return exp

    ####### Casting to FileSet Types
    elif x[0] in ['contigs', 'paired', 'single', 'reference']:
        wlink = WaspLink()
        eval_files = []
        try:
            for exp in x[1:]:
                eval_files += eval(exp, env).files
            wlink['default_output'] = asmtypes.set_factory(x[0], eval_files,
                                                           name='{}_override'.format(x[0]))
        except Exception as e:
            wlink['default_output'] = asmtypes.set_factory(x[0], x[1:])
        return wlink
    ##################################

    elif x[0] == 'if':             # (if test conseq alt)
        if len(x) == 4:
            (_, test, conseq, alt) = x
        elif len(x) == 3:
            (_, test, conseq) = x
            alt = None
        if eval(test, env):
            return eval(conseq, env)
        elif alt:
            return eval(alt, env)

    elif x[0] == 'set!':           # (set! var exp)
        (_, var, exp) = x
        env.find(var)[var] = eval(exp, env)
    elif x[0] == 'setparam':
        (_, param, value) = x
        try:
            env.parameters[param] = env.find(value)[value]
        except:
            env.parameters[param] = value
    elif x[0] == 'define':         # (define var exp)
        (_, var, exp) = x
        try:
            env[var] = eval(exp, env)
        except Exception as e:
            logger.warning('Failed to evaluate definition of "{}": {}'.format(var, e))
            logger.debug(traceback.format_exc())
            env[var] = None
    elif x[0] == 'sort':
        seq = [link for link in eval(x[1], env) if link is not None and link.output]
        logger.debug(seq)
        if len(seq) == 1: return seq
        try: pred = x[2]
        except: pred = '<'
        try:
            k = x[3]
            assert k == ':key'
            lam = x[4]
            eval(['define', 'sort_func', lam], env)
        except: lam = None
        rev = pred == '>'
        if lam:
            l = sorted(seq, key=lambda n: eval(['sort_func', n], env), reverse=rev)
        else:
            l = sorted(seq, reverse=rev)
        return l
    elif x[0] == 'lambda':         # (lambda (var*) exp)
        (_, vars, exp) = x
        return lambda *args: eval(exp, Env(vars, args, env))
    elif x[0] == 'upload':          # (upload exp) Store each intermediate for return
        (_,  exp) = x
        try:
            val = eval(exp, env)
            results = val
        except Exception as e:
            logger.warn('Failed to evaluate upload of "{}": {}'. format(to_string(exp), e))
            logger.debug(traceback.format_exc())
            env.errors.append(e)
            env.exceptions.append(traceback.format_exc())
            results = None
        if type(results) is list:
            for r in results:
                env.emissions.append(r)
        elif results:
            env.emissions.append(results)
        return results

    elif x[0] == 'get':
        (_, key, exp) = x
        chain = eval(exp, env)
        assert type(chain) is WaspLink
        val = chain.get_value(key)
        if isinstance(val, asmtypes.FileSet):
            chain['default_output'] = val
            return chain
        else: # A value
            return val
    elif x[0] == 'all_files': ## Gets all data from module directory
        (_, exp) = x
        chain = eval(exp, env)
        assert type(chain) is WaspLink
        all_files = utils.ls_recursive(chain['outpath'])
        module = chain['module']
        chain['default_output'] = asmtypes.set_factory('misc', all_files,
                                                       name='{}.all_files'.format(module),
                                                       keep_name=True)
        return chain
    elif x[0] == 'tar': ## Tar outputs from WaspLink(s)
        bare_exp, kwargs = extract_kwargs(x)
        wlinks = [eval(exp, env) for exp in bare_exp[1:]]

        ### Format tarball name
        if 'name' in kwargs:
            tar_name = '{}.tar.gz'.format(kwargs['name'])
        else: # Generate Tar Name
            tar_name = '{}.tar.gz'.format('_'.join([w['module'] for w in wlinks]))

        ### Tag the tarball fileset
        tag = kwargs.get('tag')
        tags = [tag] if tag else []

        ### Create new link
        chain = WaspLink('tar', wlinks)
        filelist = []
        for w in wlinks:
            filelist += w.files
        chain['default_output'] = asmtypes.set_factory(
            'tar', utils.tar_list(env.outpath, filelist, tar_name),
            name=tar_name, keep_name=True, tags=tags)
        return chain

    elif x[0] == 'begin':          # (begin exp*) Return each intermediate
        inner_env = Env(outer=env)
        val = []
        for exp in x[1:]:
            try:
                ret = eval(exp, inner_env)
                if ret:val.append(ret)
            except Exception as e:
                if list(e):
                    logger.warning('Failed to eval "{}": {}'.format(to_string(exp), e))
                    logger.debug(traceback.format_exc())
                    env.errors.append(e)
                    env.exceptions.append(traceback.format_exc())
        if val:
            return val if len(val) > 1 else val[0]

    elif x[0] == 'print':
        for exp in x[1:]:
            print eval(exp, env)

    elif x[0] == 'prog':          # same as begin, but use same env
        val = []
        for exp in x[1:]:
            try:
                ret = eval(exp, env)
                if ret: val.append(ret)
            except Exception as e:
                if list(e):
                    logger.warning('Failed to eval "{}": {}'.format(to_string(exp), e))
                    logger.debug(traceback.format_exc())
                    env.errors.append(e)
                    env.exceptions.append(traceback.format_exc())
        if val:
            return val if len(val) > 1 else val[0]


    else:                          # (proc exp*)
        exps = [eval(exp, env) for exp in x]
        proc = exps.pop(0)
        env.next_stage(x[0])
        try: ## Assembly functions
            return proc(*exps, env=env)
        except TypeError as e: ## Built-in functions
            logger.debug(traceback.format_exc())
            return proc(*exps)
Example #3
0
    def run_pipeline(self, pipes, job_data, contigs_only=True):
        """
        Runs all pipelines in list PIPES
        """
        all_pipes = []
        for p in pipes:
            all_pipes += self.pmanager.parse_input(p)
        logging.info('{} pipelines:'.format(len(all_pipes)))
        for p in all_pipes:
            print '->'.join(p)
        #include_reads = self.pmanager.output_type(pipeline[-1]) == 'reads'
        include_reads = False
        pipeline_num = 1
        all_files = []
        pipe_outputs = []
        logfiles = []
        ale_reports = {}
        final_contigs = []
        final_scaffolds = []
        output_types = []
        exceptions = []
        num_pipes = len(all_pipes)
        for pipe in all_pipes:
            try:
                #job_data = copy.deepcopy(job_data_global)
                #job_data['out_report'] = job_data_global['out_report'] 
                pipeline, overrides = self.pmanager.parse_pipe(pipe)
                job_data.add_pipeline(pipeline_num, pipeline)
                num_stages = len(pipeline)
                pipeline_stage = 1
                pipeline_results = []
                cur_outputs = []

                # Reset job data 
                job_data['reads'] = copy.deepcopy(job_data['raw_reads'])
                job_data['processed_reads'] = []
                print job_data

                self.out_report.write('\n{0} Pipeline {1}: {2} {0}\n'.format('='*15, pipeline_num, pipe))
                pipe_suffix = '' # filename code for indiv pipes
                pipe_start_time = time.time()
                pipe_alive = True

                # Store data record for pipeline

                for module_name in pipeline:
                    if not pipe_alive:
                        self.out_report.write('\n{0} Module Failure, Killing Pipe {0}'.format(
                                'X'*10))
                        break
                    module_code = '' # unique code for data reuse
                    print '\n\n{0} Running module: {1} {2}'.format(
                        '='*20, module_name, '='*(35-len(module_name)))
                    self.garbage_collect(self.datapath, job_data['user'], 2147483648) # 2GB

                    ## PROGRESS CALCULATION
                    pipes_complete = (pipeline_num - 1) / float(num_pipes)
                    stage_complete = (pipeline_stage - 1) / float(num_stages)
                    pct_segment = 1.0 / num_pipes
                    stage_complete *= pct_segment
                    total_complete = pipes_complete + stage_complete
                    cur_state = 'Running:[{}%|P:{}/{}|S:{}/{}|{}]'.format(
                        int(total_complete * 100), pipeline_num, num_pipes,
                        pipeline_stage, num_stages, module_name)
                    self.metadata.update_job(job_data['uid'], 'status', cur_state)

                    ## LOG REPORT For now, module code is 1st and last letter
                    short_name = self.pmanager.get_short_name(module_name)
                    if short_name:
                        #pipe_suffix += short_name.capitalize()
                        module_code += short_name.capitalize()
                    else:
                        #pipe_suffix += module_name[0].upper() + module_name[-1]
                        module_code += module_name[0].upper() + module_name[-1]
                    mod_overrides =  overrides[pipeline_stage - 1]
                    for k in mod_overrides.keys():
                                #pipe_suffix += '_{}{}'.format(k[0], par[k])
                        module_code += '_{}{}'.format(k[0], mod_overrides[k])
                    pipe_suffix += module_code
                    self.out_report.write('PIPELINE {} -- STAGE {}: {}\n'.format(
                            pipeline_num, pipeline_stage, module_name))
                    logging.debug('New job_data for stage {}: {}'.format(
                            pipeline_stage, job_data))
                    job_data['params'] = overrides[pipeline_stage-1].items()
                    module_start_time = time.time()
                    ## RUN MODULE
                    # Check if output data exists
                    reuse_data = False
                    enable_reuse = True # KILL SWITCH
                    if enable_reuse:
                        for k, pipe in enumerate(pipe_outputs):
                            if reuse_data:
                                break
                            if not pipe:
                                continue
                            # Check that all previous pipes match
                            for i in range(pipeline_stage):
                                try:
                                    if not pipe[i][0] == cur_outputs[i][0]:
                                        break
                                except:
                                    pass
                                try:
                                    if (pipe[i][0] == module_code and i == pipeline_stage - 1):
                                        #and overrides[i].items() == job_data['params']): #copy!
                                        print('Found previously computed data, reusing {}.'.format(
                                                module_code))
                                        output = [] + pipe[i][1]
                                        pfix = (k+1, i+1)
                                        alldata = [] + pipe[i][2]
                                        reuse_data = True
                                        job_data.get_pipeline(pipeline_num).get_module(
                                            pipeline_stage)['elapsed_time'] = time.time(
                                            job_data.get_pipeline(i).get_module(
                                                    pipeline_stage)['elapsed_time'])

                                        break
                                except: # Previous pipes may be shorter
                                    pass

                    output_type = self.pmanager.output_type(module_name)

                    if not reuse_data:
                        output, alldata, mod_log = self.pmanager.run_module(
                            module_name, job_data, all_data=True, reads=include_reads)

                        ##### Module produced no output, attach log and proceed to next #####
                        if not output:
                            pipe_alive = False
                            try:
                                print mod_log
                                logfiles.append(mod_log)
                            except:
                                print 'error attaching ', mod_log
                            break


                        ##### Prefix outfiles with pipe stage (only assembler modules) #####
                        alldata = [asm.prefix_file_move(
                                file, "P{}_S{}_{}".format(pipeline_num, pipeline_stage, module_name)) 
                                    for file in alldata]
                        module_elapsed_time = time.time() - module_start_time
                        job_data.get_pipeline(pipeline_num).get_module(
                            pipeline_stage)['elapsed_time'] = module_elapsed_time


                        if alldata: #If log was renamed
                            mod_log = asm.prefix_file(mod_log, "P{}_S{}_{}".format(
                                    pipeline_num, pipeline_stage, module_name))

                    if output_type == 'contigs' or output_type == 'scaffolds': #Assume assembly contigs
                        if reuse_data:
                            p_num, p_stage = pfix
                        else:
                            p_num, p_stage = pipeline_num, pipeline_stage

                        # If plugin returned scaffolds
                        if type(output) is tuple and len(output) == 2:
                            out_contigs = output[0]
                            out_scaffolds = output[1]
                            cur_scaffolds = [asm.prefix_file(
                                    file, "P{}_S{}_{}".format(p_num, p_stage, module_name)) 
                                        for file in out_scaffolds]
                        else:
                            out_contigs = output
                        cur_contigs = [asm.prefix_file(
                                file, "P{}_S{}_{}".format(p_num, p_stage, module_name)) 
                                    for file in out_contigs]

                        #job_data['reads'] = asm.arast_reads(alldata)
                        job_data['contigs'] = cur_contigs

                    elif output_type == 'reads': #Assume preprocessing
                        if include_reads and reuse_data: # data was prefixed and moved
                            for d in output:
                                files = [asm.prefix_file(f, "P{}_S{}_{}".format(
                                            pipeline_num, pipeline_stage, module_name)) for f in d['files']]
                                d['files'] = files
                                d['short_reads'] = [] + files
                        job_data['reads'] = output
                        job_data['processed_reads'] = list(job_data['reads'])
                        
                    else: # Generic return, don't use in further stages
                        pipeline_results += output
                        logging.info('Generic plugin output: {}'.format(output))
   

                    if pipeline_stage == num_stages: # Last stage, add contig for assessment
                        if output and (output_type == 'contigs' or output_type == 'scaffolds'): #If a contig was produced
                            fcontigs = cur_contigs
                            rcontigs = [asm.rename_file_symlink(f, 'P{}_{}'.format(
                                        pipeline_num, pipe_suffix)) for f in fcontigs]
                            try:
                                rscaffolds = [asm.rename_file_symlink(f, 'P{}_{}_{}'.format(
                                            pipeline_num, pipe_suffix, 'scaff')) for f in cur_scaffolds]
                                if rscaffolds:
                                    scaffold_data = {'files': rscaffolds, 'name': pipe_suffix}
                                    final_scaffolds.append(scaffold_data)
                                    output_types.append(output_type)
                            except:
                                pass
                            if rcontigs:
                                contig_data = {'files': rcontigs, 'name': pipe_suffix, 'alignment_bam': []}
                                final_contigs.append(contig_data)
                                output_types.append(output_type)
                    try:
                        logfiles.append(mod_log)
                    except:
                        print 'error attaching ', mod_log
                    pipeline_stage += 1
                    cur_contigs = []
                    cur_scaffolds = []

                    cur_outputs.append([module_code, output, alldata])
                pipe_elapsed_time = time.time() - pipe_start_time
                pipe_ftime = str(datetime.timedelta(seconds=int(pipe_elapsed_time)))
                job_data.get_pipeline(pipeline_num)['elapsed_time'] = pipe_elapsed_time



                if not output:
                    self.out_report.write('ERROR: No contigs produced. See module log\n')
                else:

                    ## Assessment
                    #self.pmanager.run_module('reapr', job_data)
                    #print job_data
                    # TODO reapr break may be diff from final reapr align!
                    # ale_out, _, _ = self.pmanager.run_module('ale', job_data)
                    # if ale_out:
                    #     job_data.get_pipeline(pipeline_num).import_ale(ale_out)
                    #     ale_reports[pipe_suffix] = ale_out
                    pipeline_datapath = '{}/{}/pipeline{}/'.format(job_data['datapath'], 
                                                                   job_data['job_id'],
                                                                   pipeline_num)
                    try:
                        os.makedirs(pipeline_datapath)
                    except:
                        logging.info("{} exists, skipping mkdir".format(pipeline_datapath))

                    # all_files.append(asm.tar_list(pipeline_datapath, pipeline_results, 
                    #                     'pipe{}_{}.tar.gz'.format(pipeline_num, pipe_suffix)))

                    all_files += pipeline_results

                self.out_report.write('Pipeline {} total time: {}\n\n'.format(pipeline_num, pipe_ftime))
                job_data.get_pipeline(pipeline_num)['name'] = pipe_suffix
                pipe_outputs.append(cur_outputs)
                pipeline_num += 1

            except:
                print "ERROR: Pipeline #{} Failed".format(pipeline_num)
                print format_exc(sys.exc_info())
                e = str(sys.exc_info()[1])
                if e.find('Terminated') != -1:
                    raise Exception(e)
                exceptions.append(module_name + ':\n' + str(sys.exc_info()[1]))
                pipeline_num += 1

        ## ANALYSIS: Quast
        job_data['final_contigs'] = final_contigs
        job_data['final_scaffolds'] = final_scaffolds
        job_data['params'] = [] #clear overrides from last stage

        summary = []  # Quast reports for contigs and scaffolds
        try: #Try to assess, otherwise report pipeline errors
            if job_data['final_contigs']:
                    job_data['contig_type'] = 'contigs'
                    quast_report, quast_tar, z1, q_log = self.pmanager.run_module('quast', job_data, 
                                                                                  tar=True, meta=True)
                    if quast_report:
                        summary.append(quast_report[0])
                    with open(q_log) as infile:
                        self.out_report.write(infile.read())
            else:
                quast_report, quast_tar = '',''

            if job_data['final_scaffolds']:
                scaff_data = dict(job_data)
                scaff_data['final_contigs'] = job_data['final_scaffolds']
                scaff_data['contig_type'] = 'scaffolds'
                scaff_report, scaff_tar, _, scaff_log = self.pmanager.run_module('quast', scaff_data, 
                                                                          tar=True, meta=True)
                scaffold_quast = True
                if scaff_report:
                    summary.append(scaff_report[0])
                with open(scaff_log) as infile:
                    self.out_report.write('\n Quast Report - Scaffold Mode \n')
                    self.out_report.write(infile.read())
            else:
                scaffold_quast = False
        except:
            if exceptions:
                if len(exceptions) > 1:
                    raise Exception('Multiple Errors')
                else:
                    raise Exception(exceptions[0])
            else:
                raise Exception(str(sys.exc_info()[1]))


        ## CONCAT MODULE LOG FILES
        self.out_report.write("\n\n{0} Begin Module Logs {0}\n".format("="*10))
        for log in logfiles:
            self.out_report.write("\n\n{0} Begin Module {0}\n".format("="*10))
            try:
                with open(log) as infile:
                    self.out_report.write(infile.read())
            except:
                self.out_report.write("Error writing log file")



        ## Format Returns
        ctg_analysis = quast_tar.rsplit('/', 1)[0] + '/{}_ctg_qst.tar.gz'.format(job_data['job_id'])
        try:
            os.rename(quast_tar, ctg_analysis)
            return_files = [ctg_analysis]
        except:
            #summary = ''
            return_files = []

        if scaffold_quast:
            scf_analysis = scaff_tar.rsplit('/', 1)[0] + '/{}_scf_qst.tar.gz'.format(job_data['job_id'])
            #summary = quast_report[0]
            os.rename(scaff_tar, scf_analysis)
            return_files.append(scf_analysis)

        contig_files = []
        for data in final_contigs + final_scaffolds:
            for f in data['files']:
                contig_files.append(os.path.realpath(f))

        return_files += all_files

        ## Deduplicate
        seen = set()
        for f in return_files:
            seen.add(f)
        return_files = [f for f in seen]

        #if exceptions:        
            # if len(exceptions) > 1:
            #     raise Exception('Multiple Errors')
            # else:
            #     raise Exception(exceptions[0])

        if contig_files:
            return_files.append(asm.tar_list('{}/{}'.format(job_data['datapath'], job_data['job_id']),
                                             contig_files, '{}_assemblies.tar.gz'.format(
                        job_data['job_id'])))
        print "return files: {}".format(return_files)

        return return_files, summary, contig_files, exceptions
Example #4
0
 def tar(self, files, job_id):
     return assembly.tar_list(self.outpath, files,
                              self.name + str(job_id) + '.tar.gz')
Example #5
0
    def run_pipeline(self, pipes, job_data, contigs_only=True):
        """
        Runs all pipelines in list PIPES
        """
        all_pipes = []
        for p in pipes:
            all_pipes += self.pmanager.parse_input(p)
        logging.info('{} pipelines:'.format(len(all_pipes)))
        for p in all_pipes:
            print '->'.join(p)
        #include_reads = self.pmanager.output_type(pipeline[-1]) == 'reads'
        include_reads = False
        pipeline_num = 1
        all_files = []
        pipe_outputs = []
        logfiles = []
        ale_reports = {}
        final_contigs = []
        final_scaffolds = []
        output_types = []
        exceptions = []
        num_pipes = len(all_pipes)
        for pipe in all_pipes:
            try:
                #job_data = copy.deepcopy(job_data_global)
                #job_data['out_report'] = job_data_global['out_report']
                pipeline, overrides = self.pmanager.parse_pipe(pipe)
                job_data.add_pipeline(pipeline_num, pipeline)
                num_stages = len(pipeline)
                pipeline_stage = 1
                pipeline_results = []
                cur_outputs = []

                # Reset job data
                job_data['reads'] = copy.deepcopy(job_data['raw_reads'])
                job_data['processed_reads'] = []
                print job_data

                self.out_report.write('\n{0} Pipeline {1}: {2} {0}\n'.format(
                    '=' * 15, pipeline_num, pipe))
                pipe_suffix = ''  # filename code for indiv pipes
                pipe_start_time = time.time()
                pipe_alive = True

                # Store data record for pipeline

                for module_name in pipeline:
                    if not pipe_alive:
                        self.out_report.write(
                            '\n{0} Module Failure, Killing Pipe {0}'.format(
                                'X' * 10))
                        break
                    module_code = ''  # unique code for data reuse
                    print '\n\n{0} Running module: {1} {2}'.format(
                        '=' * 20, module_name, '=' * (35 - len(module_name)))
                    self.garbage_collect(self.datapath, job_data['user'],
                                         2147483648)  # 2GB

                    ## PROGRESS CALCULATION
                    pipes_complete = (pipeline_num - 1) / float(num_pipes)
                    stage_complete = (pipeline_stage - 1) / float(num_stages)
                    pct_segment = 1.0 / num_pipes
                    stage_complete *= pct_segment
                    total_complete = pipes_complete + stage_complete
                    cur_state = 'Running:[{}%|P:{}/{}|S:{}/{}|{}]'.format(
                        int(total_complete * 100), pipeline_num, num_pipes,
                        pipeline_stage, num_stages, module_name)
                    self.metadata.update_job(job_data['uid'], 'status',
                                             cur_state)

                    ## LOG REPORT For now, module code is 1st and last letter
                    short_name = self.pmanager.get_short_name(module_name)
                    if short_name:
                        #pipe_suffix += short_name.capitalize()
                        module_code += short_name.capitalize()
                    else:
                        #pipe_suffix += module_name[0].upper() + module_name[-1]
                        module_code += module_name[0].upper() + module_name[-1]
                    mod_overrides = overrides[pipeline_stage - 1]
                    for k in mod_overrides.keys():
                        #pipe_suffix += '_{}{}'.format(k[0], par[k])
                        module_code += '_{}{}'.format(k[0], mod_overrides[k])
                    pipe_suffix += module_code
                    self.out_report.write(
                        'PIPELINE {} -- STAGE {}: {}\n'.format(
                            pipeline_num, pipeline_stage, module_name))
                    logging.debug('New job_data for stage {}: {}'.format(
                        pipeline_stage, job_data))
                    job_data['params'] = overrides[pipeline_stage - 1].items()
                    module_start_time = time.time()
                    ## RUN MODULE
                    # Check if output data exists
                    reuse_data = False
                    enable_reuse = True  # KILL SWITCH
                    if enable_reuse:
                        for k, pipe in enumerate(pipe_outputs):
                            if reuse_data:
                                break
                            if not pipe:
                                continue
                            # Check that all previous pipes match
                            for i in range(pipeline_stage):
                                try:
                                    if not pipe[i][0] == cur_outputs[i][0]:
                                        break
                                except:
                                    pass
                                try:
                                    if (pipe[i][0] == module_code
                                            and i == pipeline_stage - 1):
                                        #and overrides[i].items() == job_data['params']): #copy!
                                        print(
                                            'Found previously computed data, reusing {}.'
                                            .format(module_code))
                                        output = [] + pipe[i][1]
                                        pfix = (k + 1, i + 1)
                                        alldata = [] + pipe[i][2]
                                        reuse_data = True
                                        job_data.get_pipeline(
                                            pipeline_num).get_module(
                                                pipeline_stage
                                            )['elapsed_time'] = time.time(
                                                job_data.get_pipeline(i).
                                                get_module(pipeline_stage)
                                                ['elapsed_time'])

                                        break
                                except:  # Previous pipes may be shorter
                                    pass

                    output_type = self.pmanager.output_type(module_name)

                    if not reuse_data:
                        output, alldata, mod_log = self.pmanager.run_module(
                            module_name,
                            job_data,
                            all_data=True,
                            reads=include_reads)

                        ##### Module produced no output, attach log and proceed to next #####
                        if not output:
                            pipe_alive = False
                            try:
                                print mod_log
                                logfiles.append(mod_log)
                            except:
                                print 'error attaching ', mod_log
                            break

                        ##### Prefix outfiles with pipe stage (only assembler modules) #####
                        alldata = [
                            asm.prefix_file_move(
                                file,
                                "P{}_S{}_{}".format(pipeline_num,
                                                    pipeline_stage,
                                                    module_name))
                            for file in alldata
                        ]
                        module_elapsed_time = time.time() - module_start_time
                        job_data.get_pipeline(pipeline_num).get_module(
                            pipeline_stage
                        )['elapsed_time'] = module_elapsed_time

                        if alldata:  #If log was renamed
                            mod_log = asm.prefix_file(
                                mod_log,
                                "P{}_S{}_{}".format(pipeline_num,
                                                    pipeline_stage,
                                                    module_name))

                    if output_type == 'contigs' or output_type == 'scaffolds':  #Assume assembly contigs
                        if reuse_data:
                            p_num, p_stage = pfix
                        else:
                            p_num, p_stage = pipeline_num, pipeline_stage

                        # If plugin returned scaffolds
                        if type(output) is tuple and len(output) == 2:
                            out_contigs = output[0]
                            out_scaffolds = output[1]
                            cur_scaffolds = [
                                asm.prefix_file(
                                    file, "P{}_S{}_{}".format(
                                        p_num, p_stage, module_name))
                                for file in out_scaffolds
                            ]
                        else:
                            out_contigs = output
                        cur_contigs = [
                            asm.prefix_file(
                                file,
                                "P{}_S{}_{}".format(p_num, p_stage,
                                                    module_name))
                            for file in out_contigs
                        ]

                        #job_data['reads'] = asm.arast_reads(alldata)
                        job_data['contigs'] = cur_contigs

                    elif output_type == 'reads':  #Assume preprocessing
                        if include_reads and reuse_data:  # data was prefixed and moved
                            for d in output:
                                files = [
                                    asm.prefix_file(
                                        f, "P{}_S{}_{}".format(
                                            pipeline_num, pipeline_stage,
                                            module_name)) for f in d['files']
                                ]
                                d['files'] = files
                                d['short_reads'] = [] + files
                        job_data['reads'] = output
                        job_data['processed_reads'] = list(job_data['reads'])

                    else:  # Generic return, don't use in further stages
                        pipeline_results += output
                        logging.info(
                            'Generic plugin output: {}'.format(output))

                    if pipeline_stage == num_stages:  # Last stage, add contig for assessment
                        if output and (output_type == 'contigs'
                                       or output_type == 'scaffolds'
                                       ):  #If a contig was produced
                            fcontigs = cur_contigs
                            rcontigs = [
                                asm.rename_file_symlink(
                                    f, 'P{}_{}'.format(pipeline_num,
                                                       pipe_suffix))
                                for f in fcontigs
                            ]
                            try:
                                rscaffolds = [
                                    asm.rename_file_symlink(
                                        f, 'P{}_{}_{}'.format(
                                            pipeline_num, pipe_suffix,
                                            'scaff')) for f in cur_scaffolds
                                ]
                                if rscaffolds:
                                    scaffold_data = {
                                        'files': rscaffolds,
                                        'name': pipe_suffix
                                    }
                                    final_scaffolds.append(scaffold_data)
                                    output_types.append(output_type)
                            except:
                                pass
                            if rcontigs:
                                contig_data = {
                                    'files': rcontigs,
                                    'name': pipe_suffix,
                                    'alignment_bam': []
                                }
                                final_contigs.append(contig_data)
                                output_types.append(output_type)
                    try:
                        logfiles.append(mod_log)
                    except:
                        print 'error attaching ', mod_log
                    pipeline_stage += 1
                    cur_contigs = []
                    cur_scaffolds = []

                    cur_outputs.append([module_code, output, alldata])
                pipe_elapsed_time = time.time() - pipe_start_time
                pipe_ftime = str(
                    datetime.timedelta(seconds=int(pipe_elapsed_time)))
                job_data.get_pipeline(
                    pipeline_num)['elapsed_time'] = pipe_elapsed_time

                if not output:
                    self.out_report.write(
                        'ERROR: No contigs produced. See module log\n')
                else:

                    ## Assessment
                    #self.pmanager.run_module('reapr', job_data)
                    #print job_data
                    # TODO reapr break may be diff from final reapr align!
                    # ale_out, _, _ = self.pmanager.run_module('ale', job_data)
                    # if ale_out:
                    #     job_data.get_pipeline(pipeline_num).import_ale(ale_out)
                    #     ale_reports[pipe_suffix] = ale_out
                    pipeline_datapath = '{}/{}/pipeline{}/'.format(
                        job_data['datapath'], job_data['job_id'], pipeline_num)
                    try:
                        os.makedirs(pipeline_datapath)
                    except:
                        logging.info("{} exists, skipping mkdir".format(
                            pipeline_datapath))

                    # all_files.append(asm.tar_list(pipeline_datapath, pipeline_results,
                    #                     'pipe{}_{}.tar.gz'.format(pipeline_num, pipe_suffix)))

                    all_files += pipeline_results

                self.out_report.write('Pipeline {} total time: {}\n\n'.format(
                    pipeline_num, pipe_ftime))
                job_data.get_pipeline(pipeline_num)['name'] = pipe_suffix
                pipe_outputs.append(cur_outputs)
                pipeline_num += 1

            except:
                print "ERROR: Pipeline #{} Failed".format(pipeline_num)
                print format_exc(sys.exc_info())
                e = str(sys.exc_info()[1])
                if e.find('Terminated') != -1:
                    raise Exception(e)
                exceptions.append(module_name + ':\n' + str(sys.exc_info()[1]))
                pipeline_num += 1

        ## ANALYSIS: Quast
        job_data['final_contigs'] = final_contigs
        job_data['final_scaffolds'] = final_scaffolds
        job_data['params'] = []  #clear overrides from last stage

        summary = []  # Quast reports for contigs and scaffolds
        try:  #Try to assess, otherwise report pipeline errors
            if job_data['final_contigs']:
                job_data['contig_type'] = 'contigs'
                quast_report, quast_tar, z1, q_log = self.pmanager.run_module(
                    'quast', job_data, tar=True, meta=True)
                if quast_report:
                    summary.append(quast_report[0])
                with open(q_log) as infile:
                    self.out_report.write(infile.read())
            else:
                quast_report, quast_tar = '', ''

            if job_data['final_scaffolds']:
                scaff_data = dict(job_data)
                scaff_data['final_contigs'] = job_data['final_scaffolds']
                scaff_data['contig_type'] = 'scaffolds'
                scaff_report, scaff_tar, _, scaff_log = self.pmanager.run_module(
                    'quast', scaff_data, tar=True, meta=True)
                scaffold_quast = True
                if scaff_report:
                    summary.append(scaff_report[0])
                with open(scaff_log) as infile:
                    self.out_report.write('\n Quast Report - Scaffold Mode \n')
                    self.out_report.write(infile.read())
            else:
                scaffold_quast = False
        except:
            if exceptions:
                if len(exceptions) > 1:
                    raise Exception('Multiple Errors')
                else:
                    raise Exception(exceptions[0])
            else:
                raise Exception(str(sys.exc_info()[1]))

        ## CONCAT MODULE LOG FILES
        self.out_report.write("\n\n{0} Begin Module Logs {0}\n".format("=" *
                                                                       10))
        for log in logfiles:
            self.out_report.write("\n\n{0} Begin Module {0}\n".format("=" *
                                                                      10))
            try:
                with open(log) as infile:
                    self.out_report.write(infile.read())
            except:
                self.out_report.write("Error writing log file")

        ## Format Returns
        ctg_analysis = quast_tar.rsplit(
            '/', 1)[0] + '/{}_ctg_qst.tar.gz'.format(job_data['job_id'])
        try:
            os.rename(quast_tar, ctg_analysis)
            return_files = [ctg_analysis]
        except:
            #summary = ''
            return_files = []

        if scaffold_quast:
            scf_analysis = scaff_tar.rsplit(
                '/', 1)[0] + '/{}_scf_qst.tar.gz'.format(job_data['job_id'])
            #summary = quast_report[0]
            os.rename(scaff_tar, scf_analysis)
            return_files.append(scf_analysis)

        contig_files = []
        for data in final_contigs + final_scaffolds:
            for f in data['files']:
                contig_files.append(os.path.realpath(f))

        return_files += all_files

        ## Deduplicate
        seen = set()
        for f in return_files:
            seen.add(f)
        return_files = [f for f in seen]

        #if exceptions:
        # if len(exceptions) > 1:
        #     raise Exception('Multiple Errors')
        # else:
        #     raise Exception(exceptions[0])

        if contig_files:
            return_files.append(
                asm.tar_list(
                    '{}/{}'.format(job_data['datapath'],
                                   job_data['job_id']), contig_files,
                    '{}_assemblies.tar.gz'.format(job_data['job_id'])))
        print "return files: {}".format(return_files)

        return return_files, summary, contig_files, exceptions