def wasp_data(self): """ Compatibility layer for wasp data types. Scans self for certain data types and populates a FileSetContainer """ all_sets = [] #### Convert Old Reads Format to ReadSets for set_type in ['reads', 'reference', 'contigs']: if set_type in self: for fs in self[set_type]: ### Get supported set attributes (ins, std, etc) kwargs = {} for key in ['insert', 'stdev', 'tags']: if key in fs: kwargs[key] = fs[key] all_sets.append( asmtypes.set_factory( fs['type'], [asmtypes.FileInfo(f) for f in fs['files']], **kwargs)) #### Convert final_contigs from pipeline mode if 'final_contigs' in self: if self['final_contigs']: ## Not empty ## Remove left over contigs del (self['contigs']) for contig_data in self['final_contigs']: all_sets.append( asmtypes.set_factory( 'contigs', [ asmtypes.FileInfo(fs, ) for fs in contig_data['files'] ], #{'name':contig_data['name']})) name=contig_data['name'])) #### Convert Contig/Ref format # for set_type in ['contigs', 'reference']: # if set_type in self: # all_sets.append(asmtypes.set_factory(set_type, [asmtypes.FileInfo(fs) for fs in self[set_type]])) return asmtypes.FileSetContainer(all_sets)
def insert_output(self, output, default_type, module_name): """ Parses the output dict of a completed module and stores the data and information within the WaspLink object """ filesets = [] for outtype, outvalue in output.items(): name = '{}_{}'.format(module_name, outtype) if not type(outvalue) is list: outvalue = [outvalue] ## Store default output if default_type == outtype: if isinstance(outvalue[0], asmtypes.FileSet): for out in outvalue: out['tags'].append(module_name) self['default_output'] = outvalue else: # Files self['default_output'] = asmtypes.set_factory(outtype, [asmtypes.FileInfo(f) for f in outvalue], name=name) self['default_output']['tags'].append(module_name) ## Store all outputs and values outputs = [] are_files = False for out in outvalue: try: if os.path.exists(out): # These are files, convert to FileInfo format outputs.append(asmtypes.FileInfo(out)) are_files = True else: raise Exception('Not a file') except Exception as e: # Not a file outputs = outvalue break if are_files: filesets.append(asmtypes.set_factory(outtype, outputs, name=name)) else: self['info'][outtype] = outputs if not len(outputs) == 1 else outputs[0] self['data'] = asmtypes.FileSetContainer(filesets)
def compute(self, body): self.job_list_lock.acquire() try: job_data = self.prepare_job_data(body) self.job_list.append(job_data) except: logger.error("Error in adding new job to job_list") raise finally: self.job_list_lock.release() status = '' logger.debug('job_data = {}'.format(job_data)) params = json.loads(body) job_id = params['job_id'] data_id = params['data_id'] uid = params['_id'] user = params['ARASTUSER'] token = params['oauth_token'] pipelines = params.get('pipeline') recipe = params.get('recipe') wasp_in = params.get('wasp') jobpath = os.path.join(self.datapath, user, str(data_id), str(job_id)) url = shock.verify_shock_url(self.shockurl) self.start_time = time.time() timer_thread = UpdateTimer(self.metadata, 29, time.time(), uid, self.done_flag) timer_thread.start() #### Parse pipeline to wasp exp reload(recipes) if recipe: try: wasp_exp = recipes.get(recipe[0], job_id) except AttributeError: raise Exception('"{}" recipe not found.'.format(recipe[0])) elif wasp_in: wasp_exp = wasp_in[0] elif not pipelines: wasp_exp = recipes.get('auto', job_id) elif pipelines: ## Legacy client if pipelines[0] == 'auto': wasp_exp = recipes.get('auto', job_id) ########## else: if type(pipelines[0]) is not list: # --assemblers pipelines = [pipelines] all_pipes = [] for p in pipelines: all_pipes += self.pmanager.parse_input(p) logger.debug("pipelines = {}".format(all_pipes)) wasp_exp = wasp.pipelines_to_exp(all_pipes, params['job_id']) else: raise asmtypes.ArastClientRequestError('Malformed job request.') logger.debug('Wasp Expression: {}'.format(wasp_exp)) w_engine = wasp.WaspEngine(self.pmanager, job_data, self.metadata) ###### Run Job try: w_engine.run_expression(wasp_exp, job_data) ###### Upload all result files and place them into appropriate tags uploaded_fsets = job_data.upload_results(url, token) # Format report new_report = open('{}.tmp'.format(self.out_report_name), 'w') ### Log errors if len(job_data['errors']) > 0: new_report.write('PIPELINE ERRORS\n') for i, e in enumerate(job_data['errors']): new_report.write('{}: {}\n'.format(i, e)) try: ## Get Quast output quast_report = job_data['wasp_chain'].find_module( 'quast')['data'].find_type('report')[0].files[0] with open(quast_report) as q: new_report.write(q.read()) except: new_report.write('No Summary File Generated!\n\n\n') self.out_report.close() with open(self.out_report_name) as old: new_report.write(old.read()) for log in job_data['logfiles']: new_report.write('\n{1} {0} {1}\n'.format( os.path.basename(log), '=' * 20)) with open(log) as l: new_report.write(l.read()) ### Log tracebacks if len(job_data['tracebacks']) > 0: new_report.write('EXCEPTION TRACEBACKS\n') for i, e in enumerate(job_data['tracebacks']): new_report.write('{}: {}\n'.format(i, e)) new_report.close() os.remove(self.out_report_name) shutil.move(new_report.name, self.out_report_name) res = self.upload(url, user, token, self.out_report_name) report_info = asmtypes.FileInfo(self.out_report_name, shock_url=url, shock_id=res['data']['id']) self.metadata.update_job( uid, 'report', [asmtypes.set_factory('report', [report_info])]) status = 'Complete with errors' if job_data.get( 'errors') else 'Complete' ## Make compatible with JSON dumps() del job_data['out_report'] del job_data['initial_reads'] del job_data['raw_reads'] self.metadata.update_job(uid, 'data', job_data) self.metadata.update_job(uid, 'result_data', uploaded_fsets) ###### Legacy Support ####### filesets = uploaded_fsets.append( asmtypes.set_factory('report', [report_info])) contigsets = [ fset for fset in uploaded_fsets if fset.type == 'contigs' or fset.type == 'scaffolds' ] download_ids = { fi['filename']: fi['shock_id'] for fset in uploaded_fsets for fi in fset['file_infos'] } contig_ids = { fi['filename']: fi['shock_id'] for fset in contigsets for fi in fset['file_infos'] } self.metadata.update_job(uid, 'result_data_legacy', [download_ids]) self.metadata.update_job(uid, 'contig_ids', [contig_ids]) ################### sys.stdout.flush() touch(os.path.join(jobpath, "_DONE_")) logger.info('============== JOB COMPLETE ===============') except asmtypes.ArastUserInterrupt: status = 'Terminated by user' sys.stdout.flush() touch(os.path.join(jobpath, "_CANCELLED__")) logger.info('============== JOB KILLED ===============') finally: self.remove_job_from_lists(job_data) logger.debug('Reinitialize plugin manager...' ) # Reinitialize to get live changes self.pmanager = ModuleManager(self.threads, self.kill_list, self.kill_list_lock, self.job_list, self.binpath, self.modulebin) self.metadata.update_job(uid, 'status', status)
def upload_data_file_info(self, filename, curl=False): """ Returns FileInfo Object """ self.init_shock() res = self.shock.upload_reads(filename, curl=curl) return asmtypes.FileInfo(filename, shock_url=self.shockurl, shock_id=res['data']['id'], create_time=str(datetime.datetime.utcnow()))