Esempio n. 1
0
    def wasp_data(self):
        """
        Compatibility layer for wasp data types.
        Scans self for certain data types and populates a FileSetContainer
        """
        all_sets = []
        #### Convert Old Reads Format to ReadSets
        for set_type in ['reads', 'reference', 'contigs']:
            if set_type in self:
                for fs in self[set_type]:
                    ### Get supported set attributes (ins, std, etc)
                    kwargs = {}
                    for key in ['insert', 'stdev', 'tags']:
                        if key in fs:
                            kwargs[key] = fs[key]
                    all_sets.append(
                        asmtypes.set_factory(
                            fs['type'],
                            [asmtypes.FileInfo(f)
                             for f in fs['files']], **kwargs))

        #### Convert final_contigs from pipeline mode
        if 'final_contigs' in self:
            if self['final_contigs']:  ## Not empty
                ## Remove left over contigs
                del (self['contigs'])
                for contig_data in self['final_contigs']:
                    all_sets.append(
                        asmtypes.set_factory(
                            'contigs',
                            [
                                asmtypes.FileInfo(fs, )
                                for fs in contig_data['files']
                            ],
                            #{'name':contig_data['name']}))
                            name=contig_data['name']))

        #### Convert Contig/Ref format
        # for set_type in ['contigs', 'reference']:
        #     if set_type in self:
        #         all_sets.append(asmtypes.set_factory(set_type, [asmtypes.FileInfo(fs) for fs in self[set_type]]))

        return asmtypes.FileSetContainer(all_sets)
Esempio n. 2
0
    def insert_output(self, output, default_type, module_name):
        """ Parses the output dict of a completed module and stores the
        data and information within the WaspLink object """
        filesets = []
        for outtype, outvalue in output.items():
            name = '{}_{}'.format(module_name, outtype)
            if not type(outvalue) is list:
                outvalue = [outvalue]
            ## Store default output
            if default_type == outtype:
                if isinstance(outvalue[0], asmtypes.FileSet):
                    for out in outvalue:
                        out['tags'].append(module_name)
                    self['default_output'] = outvalue

                else: # Files
                    self['default_output'] = asmtypes.set_factory(outtype, [asmtypes.FileInfo(f) for f in outvalue],
                                                                  name=name)
                    self['default_output']['tags'].append(module_name)
            ## Store all outputs and values
            outputs = []
            are_files = False
            for out in outvalue:
                try:
                    if os.path.exists(out): # These are files, convert to FileInfo format
                        outputs.append(asmtypes.FileInfo(out))
                        are_files = True
                    else:
                        raise Exception('Not a file')
                except Exception as e: # Not a file
                    outputs = outvalue
                    break
            if are_files:
                filesets.append(asmtypes.set_factory(outtype, outputs, name=name))
            else:
                self['info'][outtype] = outputs if not len(outputs) == 1 else outputs[0]
        self['data'] = asmtypes.FileSetContainer(filesets)
Esempio n. 3
0
    def compute(self, body):
        self.job_list_lock.acquire()
        try:
            job_data = self.prepare_job_data(body)
            self.job_list.append(job_data)
        except:
            logger.error("Error in adding new job to job_list")
            raise
        finally:
            self.job_list_lock.release()

        status = ''
        logger.debug('job_data = {}'.format(job_data))

        params = json.loads(body)
        job_id = params['job_id']
        data_id = params['data_id']
        uid = params['_id']
        user = params['ARASTUSER']
        token = params['oauth_token']
        pipelines = params.get('pipeline')
        recipe = params.get('recipe')
        wasp_in = params.get('wasp')
        jobpath = os.path.join(self.datapath, user, str(data_id), str(job_id))

        url = shock.verify_shock_url(self.shockurl)

        self.start_time = time.time()

        timer_thread = UpdateTimer(self.metadata, 29, time.time(), uid,
                                   self.done_flag)
        timer_thread.start()

        #### Parse pipeline to wasp exp
        reload(recipes)
        if recipe:
            try:
                wasp_exp = recipes.get(recipe[0], job_id)
            except AttributeError:
                raise Exception('"{}" recipe not found.'.format(recipe[0]))
        elif wasp_in:
            wasp_exp = wasp_in[0]
        elif not pipelines:
            wasp_exp = recipes.get('auto', job_id)
        elif pipelines:
            ## Legacy client
            if pipelines[0] == 'auto':
                wasp_exp = recipes.get('auto', job_id)
            ##########
            else:
                if type(pipelines[0]) is not list:  # --assemblers
                    pipelines = [pipelines]
                all_pipes = []
                for p in pipelines:
                    all_pipes += self.pmanager.parse_input(p)
                logger.debug("pipelines = {}".format(all_pipes))
                wasp_exp = wasp.pipelines_to_exp(all_pipes, params['job_id'])
        else:
            raise asmtypes.ArastClientRequestError('Malformed job request.')
        logger.debug('Wasp Expression: {}'.format(wasp_exp))
        w_engine = wasp.WaspEngine(self.pmanager, job_data, self.metadata)

        ###### Run Job
        try:
            w_engine.run_expression(wasp_exp, job_data)
            ###### Upload all result files and place them into appropriate tags
            uploaded_fsets = job_data.upload_results(url, token)

            # Format report
            new_report = open('{}.tmp'.format(self.out_report_name), 'w')

            ### Log errors
            if len(job_data['errors']) > 0:
                new_report.write('PIPELINE ERRORS\n')
                for i, e in enumerate(job_data['errors']):
                    new_report.write('{}: {}\n'.format(i, e))
            try:  ## Get Quast output
                quast_report = job_data['wasp_chain'].find_module(
                    'quast')['data'].find_type('report')[0].files[0]
                with open(quast_report) as q:
                    new_report.write(q.read())
            except:
                new_report.write('No Summary File Generated!\n\n\n')
            self.out_report.close()
            with open(self.out_report_name) as old:
                new_report.write(old.read())

            for log in job_data['logfiles']:
                new_report.write('\n{1} {0} {1}\n'.format(
                    os.path.basename(log), '=' * 20))
                with open(log) as l:
                    new_report.write(l.read())

            ### Log tracebacks
            if len(job_data['tracebacks']) > 0:
                new_report.write('EXCEPTION TRACEBACKS\n')
                for i, e in enumerate(job_data['tracebacks']):
                    new_report.write('{}: {}\n'.format(i, e))

            new_report.close()
            os.remove(self.out_report_name)
            shutil.move(new_report.name, self.out_report_name)
            res = self.upload(url, user, token, self.out_report_name)
            report_info = asmtypes.FileInfo(self.out_report_name,
                                            shock_url=url,
                                            shock_id=res['data']['id'])

            self.metadata.update_job(
                uid, 'report', [asmtypes.set_factory('report', [report_info])])
            status = 'Complete with errors' if job_data.get(
                'errors') else 'Complete'

            ## Make compatible with JSON dumps()
            del job_data['out_report']
            del job_data['initial_reads']
            del job_data['raw_reads']
            self.metadata.update_job(uid, 'data', job_data)
            self.metadata.update_job(uid, 'result_data', uploaded_fsets)
            ###### Legacy Support #######
            filesets = uploaded_fsets.append(
                asmtypes.set_factory('report', [report_info]))
            contigsets = [
                fset for fset in uploaded_fsets
                if fset.type == 'contigs' or fset.type == 'scaffolds'
            ]
            download_ids = {
                fi['filename']: fi['shock_id']
                for fset in uploaded_fsets for fi in fset['file_infos']
            }
            contig_ids = {
                fi['filename']: fi['shock_id']
                for fset in contigsets for fi in fset['file_infos']
            }
            self.metadata.update_job(uid, 'result_data_legacy', [download_ids])
            self.metadata.update_job(uid, 'contig_ids', [contig_ids])
            ###################

            sys.stdout.flush()
            touch(os.path.join(jobpath, "_DONE_"))
            logger.info('============== JOB COMPLETE ===============')

        except asmtypes.ArastUserInterrupt:
            status = 'Terminated by user'
            sys.stdout.flush()
            touch(os.path.join(jobpath, "_CANCELLED__"))
            logger.info('============== JOB KILLED ===============')

        finally:
            self.remove_job_from_lists(job_data)
            logger.debug('Reinitialize plugin manager...'
                         )  # Reinitialize to get live changes
            self.pmanager = ModuleManager(self.threads, self.kill_list,
                                          self.kill_list_lock, self.job_list,
                                          self.binpath, self.modulebin)

        self.metadata.update_job(uid, 'status', status)
Esempio n. 4
0
 def upload_data_file_info(self, filename, curl=False):
     """ Returns FileInfo Object """
     self.init_shock()
     res = self.shock.upload_reads(filename, curl=curl)
     return asmtypes.FileInfo(filename, shock_url=self.shockurl, shock_id=res['data']['id'],
                              create_time=str(datetime.datetime.utcnow()))