Esempio n. 1
0
 def insert_output(self, output, default_type, module_name):
     """ Parses the output dict of a completed module and stores the 
     data and information within the WaspLink object """
     filesets = []
     for outtype, outvalue in output.items():
         name = '{}_{}'.format(module_name, outtype)
         if not type(outvalue) is list: 
             outvalue = [outvalue]
         ## Store default output
         if default_type == outtype:
             if isinstance(outvalue[0], asmtypes.FileSet):
                 self['default_output'] = outvalue
             else: # Files
                 self['default_output'] = asmtypes.set_factory(outtype, [asmtypes.FileInfo(f) for f in outvalue],
                                                               name=name)
         ## Store all outputs and values
         outputs = []
         are_files = False
         for out in outvalue:
             try:
                 if os.path.exists(out): # These are files, convert to FileInfo format
                     outputs.append(asmtypes.FileInfo(out))
                     are_files = True
                 else:
                     raise Exception('Not a file')
             except Exception as e: # Not a file
                 outputs = outvalue
                 break
         if are_files:
             filesets.append(asmtypes.set_factory(outtype, outputs, name=name))
         else:
             self['info'][outtype] = outputs if not len(outputs) == 1 else outputs[0]
     self['data'] = asmtypes.FileSetContainer(filesets)
Esempio n. 2
0
 def func_on_contigs(*wasplinks):
     contigs = []
     for w in wasplinks:
         contigs += w['default_output'].files
     newlink = wasp.WaspLink()
     output = func(contigs)
     newlink['default_output'] = asmtypes.set_factory('contigs', [output])
     return newlink
Esempio n. 3
0
 def func_on_contigs(*wasplinks):
     contigs = []
     for w in wasplinks:
         contigs += w['default_output'].files
     newlink = wasp.WaspLink()
     output = func(contigs)
     newlink['default_output'] = asmtypes.set_factory('contigs', [output])
     return newlink
Esempio n. 4
0
    def wasp_data(self):
        """
        Compatibility layer for wasp data types.
        Scans self for certain data types and populates a FileSetContainer
        """
        all_sets = []
        #### Convert Old Reads Format to ReadSets
        for set_type in ['reads', 'reference', 'contigs']:
            if set_type in self:
                for fs in self[set_type]:
                    ### Get supported set attributes (ins, std, etc)
                    kwargs = {}
                    for key in ['insert', 'stdev', 'tags']:
                        if key in fs:
                            kwargs[key] = fs[key]
                    all_sets.append(
                        asmtypes.set_factory(
                            fs['type'],
                            [asmtypes.FileInfo(f)
                             for f in fs['files']], **kwargs))

        #### Convert final_contigs from pipeline mode
        if 'final_contigs' in self:
            if self['final_contigs']:  ## Not empty
                ## Remove left over contigs
                del (self['contigs'])
                for contig_data in self['final_contigs']:
                    all_sets.append(
                        asmtypes.set_factory(
                            'contigs',
                            [
                                asmtypes.FileInfo(fs, )
                                for fs in contig_data['files']
                            ],
                            #{'name':contig_data['name']}))
                            name=contig_data['name']))

        #### Convert Contig/Ref format
        # for set_type in ['contigs', 'reference']:
        #     if set_type in self:
        #         all_sets.append(asmtypes.set_factory(set_type, [asmtypes.FileInfo(fs) for fs in self[set_type]]))

        return asmtypes.FileSetContainer(all_sets)
Esempio n. 5
0
    def insert_output(self, output, default_type, module_name):
        """ Parses the output dict of a completed module and stores the
        data and information within the WaspLink object """
        filesets = []
        for outtype, outvalue in output.items():
            name = '{}_{}'.format(module_name, outtype)
            if not type(outvalue) is list:
                outvalue = [outvalue]
            ## Store default output
            if default_type == outtype:
                if isinstance(outvalue[0], asmtypes.FileSet):
                    for out in outvalue:
                        out['tags'].append(module_name)
                    self['default_output'] = outvalue

                else: # Files
                    self['default_output'] = asmtypes.set_factory(outtype, [asmtypes.FileInfo(f) for f in outvalue],
                                                                  name=name)
                    self['default_output']['tags'].append(module_name)
            ## Store all outputs and values
            outputs = []
            are_files = False
            for out in outvalue:
                try:
                    if os.path.exists(out): # These are files, convert to FileInfo format
                        outputs.append(asmtypes.FileInfo(out))
                        are_files = True
                    else:
                        raise Exception('Not a file')
                except Exception as e: # Not a file
                    outputs = outvalue
                    break
            if are_files:
                filesets.append(asmtypes.set_factory(outtype, outputs, name=name))
            else:
                self['info'][outtype] = outputs if not len(outputs) == 1 else outputs[0]
        self['data'] = asmtypes.FileSetContainer(filesets)
Esempio n. 6
0
    def wasp_data(self):
        """
        Compatibility layer for wasp data types.
        Scans self for certain data types and populates a FileSetContainer
        """
        all_sets = []
        #### Convert Old Reads Format to ReadSets
        for set_type in ['reads', 'reference', 'contigs']:
            if set_type in self:
                for fs in self[set_type]:
                    ### Get supported set attributes (ins, std, etc)
                    kwargs = {}
                    for key in ['insert', 'stdev', 'platform', 'tags']:
                        if key in fs:
                            kwargs[key] = fs[key]
                    print fs['files'], fs['type']
                    all_sets.append(asmtypes.set_factory(fs['type'],
                                                         [asmtypes.FileInfo(f) for f in fs['files']],
                                                         **kwargs))

        #### Convert final_contigs from pipeline mode
        if 'final_contigs' in self:
            if self['final_contigs']: ## Not empty
                ## Remove left over contigs
                del(self['contigs'])
                for contig_data in self['final_contigs']:
                    all_sets.append(asmtypes.set_factory('contigs',
                                                         [asmtypes.FileInfo(fs,) for fs in contig_data['files']],
                                                         #{'name':contig_data['name']}))
                                                         name=contig_data['name']))

        #### Convert Contig/Ref format
        # for set_type in ['contigs', 'reference']:
        #     if set_type in self:
        #         all_sets.append(asmtypes.set_factory(set_type, [asmtypes.FileInfo(fs) for fs in self[set_type]]))

        return asmtypes.FileSetContainer(all_sets)
Esempio n. 7
0
 def wasp_run(self):
     #### Save and restore insert data, handle extra output
     orig_sets = copy.deepcopy(self.data.readsets)
     output = self.run()
     if output['reads']:
         if type(output['reads'][0]) is list:  ## Multiple Libraries
             for i, readset in enumerate(output['reads']):
                 orig_sets[i].update_files(readset)
                 orig_sets[i]['name'] = '{}_reads'.format(self.name)
         else:
             orig_sets[0].update_files(output['reads'])
             orig_sets[0]['name'] = '{}_reads'.format(self.name)
     readsets = orig_sets
     try:
         readsets.append(asmtypes.set_factory('single', output['extra'], 
                                              name='{}_single'.format(self.name)))
     except:pass
     return {'reads': readsets}
Esempio n. 8
0
    def run(self):
        contigsets = self.data.contigsets
        if len(contigsets) < 2:
            raise Exception("Fewer than 2 contig sets")
        merged_files = []

        if len(contigsets) == 2:
            mfile = self.merge(contigsets[0], contigsets[1])
            if mfile:
                merged_files.append(mfile)
        elif len(contigsets) > 2:
            mfile = contigsets[0]
            for i, cset in enumerate(contigsets[1:]):
                mfile = asmtypes.set_factory(
                    "contigs", [self.merge(mfile, cset)], name="merged{}_contigs".format(i + 1)
                )
            merged_files = mfile.files

        return {"contigs": merged_files}
Esempio n. 9
0
    def run(self):
        contigsets = self.data.contigsets
        if len(contigsets) < 2:
            raise Exception('Fewer than 2 contig sets')
        merged_files = []

        if len(contigsets) == 2:
            mfile = self.merge(contigsets[0], contigsets[1])
            if mfile:
                merged_files.append(mfile)
        elif len(contigsets) > 2:
            mfile = contigsets[0]
            for i, cset in enumerate(contigsets[1:]):
                mfile = asmtypes.set_factory(
                    'contigs', [self.merge(mfile, cset)],
                    name='merged{}_contigs'.format(i + 1))
            merged_files = mfile.files

        return {'contigs': merged_files}
Esempio n. 10
0
 def wasp_run(self):
     #### Save and restore insert data, handle extra output
     orig_sets = copy.deepcopy(self.data.readsets)
     output = self.run()
     if output['reads']:
         if type(output['reads'][0]) is list:  ## Multiple Libraries
             for i, readset in enumerate(output['reads']):
                 orig_sets[i].update_files(readset)
                 orig_sets[i]['name'] = '{}_reads'.format(self.name)
         else:
             orig_sets[0].update_files(output['reads'])
             orig_sets[0]['name'] = '{}_reads'.format(self.name)
     else:
         return
     readsets = orig_sets
     try:
         for extra in output['extra']:
             readsets.append(
                 asmtypes.set_factory('single',
                                      extra,
                                      name='{}_single'.format(self.name)))
     except Exception as e:
         logger.warn('Wasp handles extra output: {}'.format(e))
     return {'reads': readsets}
Esempio n. 11
0
    def compute(self, body):
        self.job_list_lock.acquire()
        try:
            job_data = self.prepare_job_data(body)
            self.job_list.append(job_data)
        except:
            logger.error("Error in adding new job to job_list")
            raise
        finally:
            self.job_list_lock.release()

        status = ''
        logger.debug('job_data = {}'.format(job_data))

        params = json.loads(body)
        job_id = params['job_id']
        data_id = params['data_id']
        uid = params['_id']
        user = params['ARASTUSER']
        token = params['oauth_token']
        pipelines = params.get('pipeline')
        recipe = params.get('recipe')
        wasp_in = params.get('wasp')
        jobpath = os.path.join(self.datapath, user, str(data_id), str(job_id))

        url = shock.verify_shock_url(self.shockurl)

        self.start_time = time.time()

        timer_thread = UpdateTimer(self.metadata, 29, time.time(), uid, self.done_flag)
        timer_thread.start()

        #### Parse pipeline to wasp exp
        reload(recipes)
        if recipe:
            try: wasp_exp = recipes.get(recipe[0], job_id)
            except AttributeError: raise Exception('"{}" recipe not found.'.format(recipe[0]))
        elif wasp_in:
            wasp_exp = wasp_in[0]
        elif not pipelines:
            wasp_exp = recipes.get('auto', job_id)
        elif pipelines:
            ## Legacy client
            if pipelines[0] == 'auto':
                wasp_exp = recipes.get('auto', job_id)
            ##########
            else:
                if type(pipelines[0]) is not list: # --assemblers
                    pipelines = [pipelines]
                all_pipes = []
                for p in pipelines:
                    all_pipes += self.pmanager.parse_input(p)
                logger.debug("pipelines = {}".format(all_pipes))
                wasp_exp = wasp.pipelines_to_exp(all_pipes, params['job_id'])
        else:
            raise asmtypes.ArastClientRequestError('Malformed job request.')
        logger.debug('Wasp Expression: {}'.format(wasp_exp))
        w_engine = wasp.WaspEngine(self.pmanager, job_data, self.metadata)

        ###### Run Job
        try:
            w_engine.run_expression(wasp_exp, job_data)
            ###### Upload all result files and place them into appropriate tags
            uploaded_fsets = job_data.upload_results(url, token)

            # Format report
            new_report = open('{}.tmp'.format(self.out_report_name), 'w')

            ### Log errors
            if len(job_data['errors']) > 0:
                new_report.write('PIPELINE ERRORS\n')
                for i,e in enumerate(job_data['errors']):
                    new_report.write('{}: {}\n'.format(i, e))
            try: ## Get Quast output
                quast_report = job_data['wasp_chain'].find_module('quast')['data'].find_type('report')[0].files[0]
                with open(quast_report) as q:
                    new_report.write(q.read())
            except:
                new_report.write('No Summary File Generated!\n\n\n')
            self.out_report.close()
            with open(self.out_report_name) as old:
                new_report.write(old.read())

            for log in job_data['logfiles']:
                new_report.write('\n{1} {0} {1}\n'.format(os.path.basename(log), '='*20))
                with open(log) as l:
                    new_report.write(l.read())

            ### Log tracebacks
            if len(job_data['tracebacks']) > 0:
                new_report.write('EXCEPTION TRACEBACKS\n')
                for i,e in enumerate(job_data['tracebacks']):
                    new_report.write('{}: {}\n'.format(i, e))

            new_report.close()
            os.remove(self.out_report_name)
            shutil.move(new_report.name, self.out_report_name)
            res = self.upload(url, user, token, self.out_report_name)
            report_info = asmtypes.FileInfo(self.out_report_name, shock_url=url, shock_id=res['data']['id'])

            self.metadata.update_job(uid, 'report', [asmtypes.set_factory('report', [report_info])])
            status = 'Complete with errors' if job_data.get('errors') else 'Complete'

            ## Make compatible with JSON dumps()
            del job_data['out_report']
            del job_data['initial_reads']
            del job_data['raw_reads']
            self.metadata.update_job(uid, 'data', job_data)
            self.metadata.update_job(uid, 'result_data', uploaded_fsets)
            ###### Legacy Support #######
            filesets = uploaded_fsets.append(asmtypes.set_factory('report', [report_info]))
            contigsets = [fset for fset in uploaded_fsets if fset.type == 'contigs' or fset.type == 'scaffolds']
            download_ids = {fi['filename']: fi['shock_id'] for fset in uploaded_fsets for fi in fset['file_infos']}
            contig_ids = {fi['filename']: fi['shock_id'] for fset in contigsets for fi in fset['file_infos']}
            self.metadata.update_job(uid, 'result_data_legacy', [download_ids])
            self.metadata.update_job(uid, 'contig_ids', [contig_ids])
            ###################

            sys.stdout.flush()
            touch(os.path.join(jobpath, "_DONE_"))
            logger.info('============== JOB COMPLETE ===============')

        except asmtypes.ArastUserInterrupt:
            status = 'Terminated by user'
            sys.stdout.flush()
            touch(os.path.join(jobpath, "_CANCELLED__"))
            logger.info('============== JOB KILLED ===============')

        finally:
            self.remove_job_from_lists(job_data)
            logger.debug('Reinitialize plugin manager...') # Reinitialize to get live changes
            self.pmanager = ModuleManager(self.threads, self.kill_list, self.kill_list_lock, self.job_list, self.binpath, self.modulebin)

        self.metadata.update_job(uid, 'status', status)
Esempio n. 12
0
def eval(x, env):
    "Evaluate an expression in an environment."
    if isa(x, Symbol):             # variable reference
        try:
            return env.find(x)[x]
        except:
            raise Exception('Module "{}" not found'.format(x))
    elif not isa(x, list):         # constant literal
        return x
    elif x[0] == 'quote':          # (quote exp)
        (_, exp) = x
        return exp

    ####### Casting to FileSet Types
    elif x[0] in ['contigs', 'paired', 'single', 'reference']:
        wlink = WaspLink()
        eval_files = []
        try:
            for exp in x[1:]:
                eval_files += eval(exp, env).files
            wlink['default_output'] = asmtypes.set_factory(x[0], eval_files,
                                                           name='{}_override'.format(x[0]))
        except Exception as e:
            wlink['default_output'] = asmtypes.set_factory(x[0], x[1:])
        return wlink
    ##################################

    elif x[0] == 'if':             # (if test conseq alt)
        if len(x) == 4:
            (_, test, conseq, alt) = x
        elif len(x) == 3:
            (_, test, conseq) = x
            alt = None
        if eval(test, env):
            return eval(conseq, env)
        elif alt:
            return eval(alt, env)

    elif x[0] == 'set!':           # (set! var exp)
        (_, var, exp) = x
        env.find(var)[var] = eval(exp, env)
    elif x[0] == 'setparam':
        (_, param, value) = x
        try:
            env.parameters[param] = env.find(value)[value]
        except:
            env.parameters[param] = value
    elif x[0] == 'define':         # (define var exp)
        (_, var, exp) = x
        try:
            env[var] = eval(exp, env)
        except Exception as e:
            logger.warning('Failed to evaluate definition of "{}": {}'.format(var, e))
            logger.debug(traceback.format_exc())
            env[var] = None
    elif x[0] == 'sort':
        seq = [link for link in eval(x[1], env) if link is not None and link.output]
        logger.debug(seq)
        if len(seq) == 1: return seq
        try: pred = x[2]
        except: pred = '<'
        try:
            k = x[3]
            assert k == ':key'
            lam = x[4]
            eval(['define', 'sort_func', lam], env)
        except: lam = None
        rev = pred == '>'
        if lam:
            l = sorted(seq, key=lambda n: eval(['sort_func', n], env), reverse=rev)
        else:
            l = sorted(seq, reverse=rev)
        return l
    elif x[0] == 'lambda':         # (lambda (var*) exp)
        (_, vars, exp) = x
        return lambda *args: eval(exp, Env(vars, args, env))
    elif x[0] == 'upload':          # (upload exp) Store each intermediate for return
        (_,  exp) = x
        try:
            val = eval(exp, env)
            results = val
        except Exception as e:
            logger.warn('Failed to evaluate upload of "{}": {}'. format(to_string(exp), e))
            logger.debug(traceback.format_exc())
            env.errors.append(e)
            env.exceptions.append(traceback.format_exc())
            results = None
        if type(results) is list:
            for r in results:
                env.emissions.append(r)
        elif results:
            env.emissions.append(results)
        return results

    elif x[0] == 'get':
        (_, key, exp) = x
        chain = eval(exp, env)
        assert type(chain) is WaspLink
        val = chain.get_value(key)
        if isinstance(val, asmtypes.FileSet):
            chain['default_output'] = val
            return chain
        else: # A value
            return val
    elif x[0] == 'all_files': ## Gets all data from module directory
        (_, exp) = x
        chain = eval(exp, env)
        assert type(chain) is WaspLink
        all_files = utils.ls_recursive(chain['outpath'])
        module = chain['module']
        chain['default_output'] = asmtypes.set_factory('misc', all_files,
                                                       name='{}.all_files'.format(module),
                                                       keep_name=True)
        return chain
    elif x[0] == 'tar': ## Tar outputs from WaspLink(s)
        bare_exp, kwargs = extract_kwargs(x)
        wlinks = [eval(exp, env) for exp in bare_exp[1:]]

        ### Format tarball name
        if 'name' in kwargs:
            tar_name = '{}.tar.gz'.format(kwargs['name'])
        else: # Generate Tar Name
            tar_name = '{}.tar.gz'.format('_'.join([w['module'] for w in wlinks]))

        ### Tag the tarball fileset
        tag = kwargs.get('tag')
        tags = [tag] if tag else []

        ### Create new link
        chain = WaspLink('tar', wlinks)
        filelist = []
        for w in wlinks:
            filelist += w.files
        chain['default_output'] = asmtypes.set_factory(
            'tar', utils.tar_list(env.outpath, filelist, tar_name),
            name=tar_name, keep_name=True, tags=tags)
        return chain

    elif x[0] == 'begin':          # (begin exp*) Return each intermediate
        inner_env = Env(outer=env)
        val = []
        for exp in x[1:]:
            try:
                ret = eval(exp, inner_env)
                if ret:val.append(ret)
            except Exception as e:
                if list(e):
                    logger.warning('Failed to eval "{}": {}'.format(to_string(exp), e))
                    logger.debug(traceback.format_exc())
                    env.errors.append(e)
                    env.exceptions.append(traceback.format_exc())
        if val:
            return val if len(val) > 1 else val[0]

    elif x[0] == 'print':
        for exp in x[1:]:
            print eval(exp, env)

    elif x[0] == 'prog':          # same as begin, but use same env
        val = []
        for exp in x[1:]:
            try:
                ret = eval(exp, env)
                if ret: val.append(ret)
            except Exception as e:
                if list(e):
                    logger.warning('Failed to eval "{}": {}'.format(to_string(exp), e))
                    logger.debug(traceback.format_exc())
                    env.errors.append(e)
                    env.exceptions.append(traceback.format_exc())
        if val:
            return val if len(val) > 1 else val[0]


    else:                          # (proc exp*)
        exps = [eval(exp, env) for exp in x]
        proc = exps.pop(0)
        env.next_stage(x[0])
        try: ## Assembly functions
            return proc(*exps, env=env)
        except TypeError as e: ## Built-in functions
            logger.debug(traceback.format_exc())
            return proc(*exps)
Esempio n. 13
0
    def compute(self, body):
        self.job_list_lock.acquire()
        try:
            job_data = self.prepare_job_data(body)
            self.job_list.append(job_data)
        except:
            logger.error("Error in adding new job to job_list")
            raise
        finally:
            self.job_list_lock.release()

        status = ''
        logger.debug('job_data = {}'.format(job_data))

        params = json.loads(body)
        job_id = params['job_id']
        data_id = params['data_id']
        uid = params['_id']
        user = params['ARASTUSER']
        token = params['oauth_token']
        pipelines = params.get('pipeline')
        recipe = params.get('recipe')
        wasp_in = params.get('wasp')
        jobpath = os.path.join(self.datapath, user, str(data_id), str(job_id))

        url = shock.verify_shock_url(self.shockurl)

        self.start_time = time.time()

        timer_thread = UpdateTimer(self.metadata, 29, time.time(), uid,
                                   self.done_flag)
        timer_thread.start()

        #### Parse pipeline to wasp exp
        reload(recipes)
        if recipe:
            try:
                wasp_exp = recipes.get(recipe[0], job_id)
            except AttributeError:
                raise Exception('"{}" recipe not found.'.format(recipe[0]))
        elif wasp_in:
            wasp_exp = wasp_in[0]
        elif not pipelines:
            wasp_exp = recipes.get('auto', job_id)
        elif pipelines:
            ## Legacy client
            if pipelines[0] == 'auto':
                wasp_exp = recipes.get('auto', job_id)
            ##########
            else:
                if type(pipelines[0]) is not list:  # --assemblers
                    pipelines = [pipelines]
                all_pipes = []
                for p in pipelines:
                    all_pipes += self.pmanager.parse_input(p)
                logger.debug("pipelines = {}".format(all_pipes))
                wasp_exp = wasp.pipelines_to_exp(all_pipes, params['job_id'])
        else:
            raise asmtypes.ArastClientRequestError('Malformed job request.')
        logger.debug('Wasp Expression: {}'.format(wasp_exp))
        w_engine = wasp.WaspEngine(self.pmanager, job_data, self.metadata)

        ###### Run Job
        try:
            w_engine.run_expression(wasp_exp, job_data)
            ###### Upload all result files and place them into appropriate tags
            uploaded_fsets = job_data.upload_results(url, token)

            # Format report
            new_report = open('{}.tmp'.format(self.out_report_name), 'w')

            ### Log errors
            if len(job_data['errors']) > 0:
                new_report.write('PIPELINE ERRORS\n')
                for i, e in enumerate(job_data['errors']):
                    new_report.write('{}: {}\n'.format(i, e))
            try:  ## Get Quast output
                quast_report = job_data['wasp_chain'].find_module(
                    'quast')['data'].find_type('report')[0].files[0]
                with open(quast_report) as q:
                    new_report.write(q.read())
            except:
                new_report.write('No Summary File Generated!\n\n\n')
            self.out_report.close()
            with open(self.out_report_name) as old:
                new_report.write(old.read())

            for log in job_data['logfiles']:
                new_report.write('\n{1} {0} {1}\n'.format(
                    os.path.basename(log), '=' * 20))
                with open(log) as l:
                    new_report.write(l.read())

            ### Log tracebacks
            if len(job_data['tracebacks']) > 0:
                new_report.write('EXCEPTION TRACEBACKS\n')
                for i, e in enumerate(job_data['tracebacks']):
                    new_report.write('{}: {}\n'.format(i, e))

            new_report.close()
            os.remove(self.out_report_name)
            shutil.move(new_report.name, self.out_report_name)
            res = self.upload(url, user, token, self.out_report_name)
            report_info = asmtypes.FileInfo(self.out_report_name,
                                            shock_url=url,
                                            shock_id=res['data']['id'])

            self.metadata.update_job(
                uid, 'report', [asmtypes.set_factory('report', [report_info])])
            status = 'Complete with errors' if job_data.get(
                'errors') else 'Complete'

            ## Make compatible with JSON dumps()
            del job_data['out_report']
            del job_data['initial_reads']
            del job_data['raw_reads']
            self.metadata.update_job(uid, 'data', job_data)
            self.metadata.update_job(uid, 'result_data', uploaded_fsets)
            ###### Legacy Support #######
            filesets = uploaded_fsets.append(
                asmtypes.set_factory('report', [report_info]))
            contigsets = [
                fset for fset in uploaded_fsets
                if fset.type == 'contigs' or fset.type == 'scaffolds'
            ]
            download_ids = {
                fi['filename']: fi['shock_id']
                for fset in uploaded_fsets for fi in fset['file_infos']
            }
            contig_ids = {
                fi['filename']: fi['shock_id']
                for fset in contigsets for fi in fset['file_infos']
            }
            self.metadata.update_job(uid, 'result_data_legacy', [download_ids])
            self.metadata.update_job(uid, 'contig_ids', [contig_ids])
            ###################

            sys.stdout.flush()
            touch(os.path.join(jobpath, "_DONE_"))
            logger.info('============== JOB COMPLETE ===============')

        except asmtypes.ArastUserInterrupt:
            status = 'Terminated by user'
            sys.stdout.flush()
            touch(os.path.join(jobpath, "_CANCELLED__"))
            logger.info('============== JOB KILLED ===============')

        finally:
            self.remove_job_from_lists(job_data)
            logger.debug('Reinitialize plugin manager...'
                         )  # Reinitialize to get live changes
            self.pmanager = ModuleManager(self.threads, self.kill_list,
                                          self.kill_list_lock, self.job_list,
                                          self.binpath, self.modulebin)

        self.metadata.update_job(uid, 'status', status)
Esempio n. 14
0
    def compute(self, body):
        error = False
        params = json.loads(body)
        job_id = params['job_id']
        uid = params['_id']
        user = params['ARASTUSER']
        token = params['oauth_token']
        pipelines = params['pipeline']
        recipe = None
        wasp_in = None
        try: ## In case legacy
            recipe = params['recipe']
            wasp_in = params['wasp']
        except:pass

        #support legacy arast client
        if len(pipelines) > 0:
            if type(pipelines[0]) is not list:
                pipelines = [pipelines]
                
        ### Download files (if necessary)
        datapath, all_files = self.get_data(body)
        rawpath = datapath + '/raw/'
        jobpath = os.path.join(datapath, str(job_id))
        try:
            os.makedirs(jobpath)
        except Exception as e:
            print e
            raise Exception ('Data Error')

        ### Create job log
        self.out_report_name = '{}/{}_report.txt'.format(jobpath, str(job_id))
        self.out_report = open(self.out_report_name, 'w')

        ### Create data to pass to pipeline
        reads = []
        reference = []
        for fileset in all_files:
            if len(fileset['files']) != 0:
                if (fileset['type'] == 'single' or 
                    fileset['type'] == 'paired'):
                    reads.append(fileset)
                elif fileset['type'] == 'reference':
                    reference.append(fileset)
                else:
                    raise Exception('fileset error')

        job_data = ArastJob({'job_id' : params['job_id'], 
                    'uid' : params['_id'],
                    'user' : params['ARASTUSER'],
                    'reads': reads,
                    'logfiles': [],
                    'reference': reference,
                    'initial_reads': list(reads),
                    'raw_reads': copy.deepcopy(reads),
                    'params': [],
                    'exceptions': [],
                    'pipeline_data': {},
                    'datapath': datapath,
                    'out_report' : self.out_report})
                    
        self.out_report.write("Arast Pipeline: Job {}\n".format(job_id))
        self.job_list.append(job_data)
        self.start_time = time.time()

        timer_thread = UpdateTimer(self.metadata, 29, time.time(), uid, self.done_flag)
        timer_thread.start()
        
        url = "http://%s" % (self.shockurl)
        status = ''

        #### Parse pipeline to wasp exp
        wasp_exp = pipelines[0][0]
        reload(recipes)
        if recipe:
            try: wasp_exp = recipes.get(recipe[0])
            except AttributeError: raise Exception('"{}" recipe not found.'.format(recipe[0]))
        elif wasp_in:
            wasp_exp = wasp_in[0]
        elif pipelines[0] == 'auto':
            wasp_exp = recipes.get('auto')
        else:
            all_pipes = []
            for p in pipelines:
                all_pipes += self.pmanager.parse_input(p)
            print all_pipes
            wasp_exp = wasp.pipelines_to_exp(all_pipes, params['job_id'])
            logging.info('Wasp Expression: {}'.format(wasp_exp))
        print('Wasp Expression: {}'.format(wasp_exp))
        w_engine = wasp.WaspEngine(self.pmanager, job_data, self.metadata)
        w_engine.run_expression(wasp_exp, job_data)

        ###### Upload all result files and place them into appropriate tags
        uploaded_fsets = job_data.upload_results(url, token)
        
        for i, job in enumerate(self.job_list):
            if job['user'] == job_data['user'] and job['job_id'] == job_data['job_id']:
                self.job_list.pop(i)


        # Format report
        new_report = open('{}.tmp'.format(self.out_report_name), 'w')

        ### Log exceptions
        if len(job_data['exceptions']) > 0:
            new_report.write('PIPELINE ERRORS\n')
            for i,e in enumerate(job_data['exceptions']):
                new_report.write('{}: {}\n'.format(i, e))
        try: ## Get Quast output
            quast_report = job_data['wasp_chain'].find_module('quast')['data'].find_type('report')[0].files[0]
            with open(quast_report) as q:
                new_report.write(q.read())
        except:
            new_report.write('No Summary File Generated!\n\n\n')
        self.out_report.close()
        with open(self.out_report_name) as old:
            new_report.write(old.read())

        for log in job_data['logfiles']:
            new_report.write('\n{1} {0} {1}\n'.format(os.path.basename(log), '='*20))
            with open(log) as l:
                new_report.write(l.read())
        new_report.close()
        os.remove(self.out_report_name)
        shutil.move(new_report.name, self.out_report_name)
        res = self.upload(url, user, token, self.out_report_name)
        report_info = asmtypes.FileInfo(self.out_report_name, shock_url=url, shock_id=res['data']['id'])

        self.metadata.update_job(uid, 'report', [asmtypes.set_factory('report', [report_info])])
        status = 'Complete with errors' if job_data['exceptions'] else 'Complete'

        ## Make compatible with JSON dumps()
        del job_data['out_report']
        del job_data['initial_reads']
        del job_data['raw_reads']
        self.metadata.update_job(uid, 'data', job_data)
        self.metadata.update_job(uid, 'result_data', uploaded_fsets)
        self.metadata.update_job(uid, 'status', status)

        ###### Legacy Support #######
        filesets = uploaded_fsets.append(asmtypes.set_factory('report', [report_info]))
        contigsets = [fset for fset in uploaded_fsets if fset.type == 'contigs' or fset.type == 'scaffolds']
        download_ids = {fi['filename']: fi['shock_id'] for fset in uploaded_fsets for fi in fset['file_infos']}
        contig_ids = {fi['filename']: fi['shock_id'] for fset in contigsets for fi in fset['file_infos']}
        self.metadata.update_job(uid, 'result_data_legacy', [download_ids])
        self.metadata.update_job(uid, 'contig_ids', [contig_ids])
        ###################

        print '============== JOB COMPLETE ==============='