def loadReadsSet(self): if hasattr(self.__class__, 'reads_set_ref'): return self.__class__.reads_set_ref pe_reads_ref = self.pe_reads_ref reads_set_name = 'TophatTestReadsSet' # create the set object reads_set_data = { 'description': 'Reads Set for testing Bowtie', 'items': [{ 'ref': pe_reads_ref, 'label': 'rs1' }, { 'ref': pe_reads_ref, 'label': 'rs2' }, { 'ref': pe_reads_ref, 'label': 'rs3' }] } # test a save set_api = SetAPI(self.srv_wiz_url, service_ver='dev') res = set_api.save_reads_set_v1({ 'data': reads_set_data, 'output_object_name': reads_set_name, 'workspace': self.getWsName() }) reads_set_ref = res['set_ref'] print('Loaded ReadsSet: ' + reads_set_ref) return reads_set_ref
def exec_remove_adapters(self, ctx, params): """ :param params: instance of type "RemoveAdaptersParams" -> structure: parameter "output_workspace" of String, parameter "output_object_name" of String, parameter "input_reads" of type "ws_ref" (@ref ws), parameter "five_prime" of type "FivePrimeOptions" (unfortunately, we have to name the fields uniquely between 3' and 5' options due to the current implementation of grouped parameters) -> structure: parameter "adapter_sequence_5P" of String, parameter "anchored_5P" of type "boolean" (@range (0, 1)), parameter "three_prime" of type "ThreePrimeOptions" -> structure: parameter "adapter_sequence_3P" of String, parameter "anchored_3P" of type "boolean" (@range (0, 1)), parameter "error_tolerance" of Double, parameter "min_overlap_length" of Long, parameter "min_read_length" of Long, parameter "discard_untrimmed" of type "boolean" (@range (0, 1)) :returns: instance of type "exec_RemoveAdaptersResult" -> structure: parameter "report" of String, parameter "output_reads_ref" of String """ # ctx is the context object # return variables are: result #BEGIN exec_remove_adapters console = [] self.log(console, 'Running exec_remove_adapters() with parameters: ') self.log(console, "\n" + pformat(params)) self.log(console, "-----------------------------------------------\n") report = '' returnVal = dict() returnVal['output_reads_ref'] = None token = ctx['token'] wsClient = workspaceService(self.config['workspace-url'], token=token) ws = Workspace(self.config['workspace-url'], token=token) #setAPI_Client = SetAPI (url=self.config['SDK_CALLBACK_URL'], token=token) # for SDK local, doesn't work for SetAPI setAPI_Client = SetAPI(url=self.config['service-wizard-url'], token=token) # for dynamic service headers = {'Authorization': 'OAuth ' + token} env = os.environ.copy() env['KB_AUTH_TOKEN'] = token # 0. param checks required_params = [ 'output_workspace', 'input_reads', 'output_object_name' ] for arg in required_params: if arg not in params or params[arg] == None or params[arg] == '': raise ValueError("Must define required param: '" + arg + "'") # 1. load provenance provenance = [{}] if 'provenance' in ctx: provenance = ctx['provenance'] # add additional info to provenance here, in this case the input data object reference provenance[0]['input_ws_objects'] = [str(params['input_reads'])] # 2. Determine whether read library, ReadsSet or RNASeqSampleSet is input object # try: # object_info tuple [ OBJID_I, NAME_I, TYPE_I, SAVE_DATE_I, VERSION_I, SAVED_BY_I, WSID_I, WORKSPACE_I, CHSUM_I, SIZE_I, META_I ] = range(11) input_reads_obj_info = wsClient.get_object_info_new( {'objects': [{ 'ref': params['input_reads'] }]})[0] input_reads_obj_type = input_reads_obj_info[TYPE_I] input_reads_obj_type = re.sub( '-[0-9]+\.[0-9]+$', "", input_reads_obj_type) # remove trailing version #input_reads_obj_version = input_reads_obj_info[VERSION_I] # this is object version, not type version except Exception as e: raise ValueError( 'Unable to get read library object from workspace: (' + str(params['input_reads']) + ')' + str(e)) acceptable_types = [ "KBaseSets.ReadsSet", "KBaseRNASeq.RNASeqSampleSet", "KBaseFile.PairedEndLibrary", "KBaseFile.SingleEndLibrary", "KBaseAssembly.PairedEndLibrary", "KBaseAssembly.SingleEndLibrary" ] if input_reads_obj_type not in acceptable_types: raise ValueError("Input reads of type: '" + input_reads_obj_type + "'. Must be one of " + ", ".join(acceptable_types)) # 3. Retrieve the set details # readsSet_ref_list = [] readsSet_names_list = [] readsSet_types_list = [] if "KBaseSets.ReadsSet" in input_reads_obj_type: try: input_readsSet_obj = setAPI_Client.get_reads_set_v1({ 'ref': params['input_reads'], 'include_item_info': 1 }) except Exception as e: raise ValueError( 'SetAPI FAILURE: Unable to get read library set object from workspace: (' + str(params['input_reads']) + ")\n" + str(e)) for readsLibrary_obj in input_readsSet_obj['data']['items']: readsSet_ref_list.append(readsLibrary_obj['ref']) NAME_I = 1 TYPE_I = 2 readsSet_names_list.append(readsLibrary_obj['info'][NAME_I]) this_type = readsLibrary_obj['info'][TYPE_I] this_type = re.sub('-[0-9]+\.[0-9]+$', "", this_type) # remove trailing version readsSet_types_list.append(this_type) elif "KBaseRNASeq.RNASeqSampleSet" in input_reads_obj_type: sample_set = ws.get_objects2( {"objects": [{ "ref": params['input_reads'] }]})["data"][0]["data"] sample_refs = list() for i in range(len(sample_set["sample_ids"])): readsSet_ref_list.append(sample_set["sample_ids"][i]) sample_refs.append({"ref": sample_set["sample_ids"][i]}) info = ws.get_object_info3({"objects": sample_refs}) for j in range(len(info["infos"])): NAME_I = 1 TYPE_I = 2 readsSet_names_list.append(info["infos"][j][NAME_I]) sample_type = info["infos"][j][TYPE_I] sample_type = re.sub('-[0-9]+\.[0-9]+$', "", sample_type) # remove trailing version readsSet_types_list.append(sample_type) else: readsSet_ref_list = [params['input_reads']] readsSet_names_list = [params['output_object_name']] readsSet_types_list = [input_reads_obj_type] # 4. Iterate through readsLibrary memebers of set # report = '' cutadapt_readsSet_ref = None cutadapt_readsLib_refs = [] for reads_item_i, input_reads_library_ref in enumerate( readsSet_ref_list): exec_remove_adapters_OneLibrary_params = { 'output_workspace': params['output_workspace'], 'input_reads': input_reads_library_ref, 'reads_type': readsSet_types_list[reads_item_i] } if (input_reads_obj_type != "KBaseSets.ReadsSet" and input_reads_obj_type != "KBaseRNASeq.RNASeqSampleSet"): exec_remove_adapters_OneLibrary_params[ 'output_object_name'] = params['output_object_name'] else: exec_remove_adapters_OneLibrary_params[ 'output_object_name'] = readsSet_names_list[ reads_item_i] + "_cutadapt" optional_params = [ 'float error_tolerance', 'min_overlap_length', 'min_read_length', 'discard_untrimmed' ] optional_g_params = { 'five_prime': ['adapter_sequence_5P', 'anchored_5P'], 'three_prime': ['adapter_sequence_3P', 'anchored_3P'] } for arg in optional_params: if arg in params and params[arg] != None: exec_remove_adapters_OneLibrary_params[arg] = params[arg] for group in optional_g_params.keys(): if group in params and params[group] != None: exec_remove_adapters_OneLibrary_params[group] = dict() for arg in optional_g_params[group]: if arg in params[group] and params[group][arg] != None: exec_remove_adapters_OneLibrary_params[group][ arg] = params[group][arg] msg = "\n\nRUNNING exec_remove_adapters_OneLibrary() ON LIBRARY: " + str( input_reads_library_ref) + " " + str( readsSet_names_list[reads_item_i]) + "\n" msg += "----------------------------------------------------------------------------\n" report += msg self.log(console, msg) # RUN exec_remove_adapters_OneLibrary_retVal = self.exec_remove_adapters_OneLibrary( ctx, exec_remove_adapters_OneLibrary_params)[0] report += exec_remove_adapters_OneLibrary_retVal['report'] + "\n\n" cutadapt_readsLib_refs.append( exec_remove_adapters_OneLibrary_retVal['output_reads_ref']) # 5. Conclude # Just one Library if (input_reads_obj_type != "KBaseSets.ReadsSet" and input_reads_obj_type != "KBaseRNASeq.RNASeqSampleSet"): # create return output object result = { 'report': report, 'output_reads_ref': cutadapt_readsLib_refs[0], } # ReadsSet or SampleSet else: # save cutadapt readsSet some_cutadapt_output_created = False items = [] for i, lib_ref in enumerate(cutadapt_readsLib_refs): if lib_ref == None: #items.append(None) # can't have 'None' items in ReadsSet continue else: some_cutadapt_output_created = True try: label = input_readsSet_obj['data']['items'][i]['label'] except: NAME_I = 1 label = ws.get_object_info3( {'objects': [{ 'ref': lib_ref }]})['infos'][0][NAME_I] label = label + "_cutadapt" items.append({ 'ref': lib_ref, 'label': label #'data_attachment': , #'info': }) if some_cutadapt_output_created: reads_desc_ext = " + Cutadapt" #reads_name_ext = "_cutadapt" descText = "" reads_name_ext = "" try: descText = input_readsSet_obj['data']['description'] except: NAME_I = 1 descText = ws.get_object_info3( {'objects': [{ 'ref': params['input_reads'] }]})['infos'][0][NAME_I] descText = descText + reads_desc_ext output_readsSet_obj = {'description': descText, 'items': items} output_readsSet_name = str( params['output_object_name']) + reads_name_ext cutadapt_readsSet_ref = setAPI_Client.save_reads_set_v1({ 'workspace_name': params['output_workspace'], 'output_object_name': output_readsSet_name, 'data': output_readsSet_obj })['set_ref'] else: raise ValueError("No cutadapt output created") # create return output object result = { 'report': report, 'output_reads_ref': cutadapt_readsSet_ref } #END exec_remove_adapters # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError('Method exec_remove_adapters return value ' + 'result is not type dict as required.') # return the results return [result]
def KButil_Build_InSilico_Metagenomes_with_Grinder(self, ctx, params): """ :param params: instance of type "KButil_Build_InSilico_Metagenomes_with_Grinder_Params" (KButil_Build_InSilico_Metagenomes_with_Grinder() ** ** Use Grinder to generate in silico shotgun metagenomes) -> structure: parameter "workspace_name" of type "workspace_name" (** The workspace object refs are of form: ** ** objects = ws.get_objects([{'ref': params['workspace_id']+'/'+params['obj_name']}]) ** ** "ref" means the entire name combining the workspace id and the object name ** "id" is a numerical identifier of the workspace or object, and should just be used for workspace ** "name" is a string identifier of a workspace or object. This is received from Narrative.), parameter "input_refs" of type "data_obj_ref", parameter "output_name" of type "data_obj_name", parameter "desc" of String, parameter "num_reads_per_lib" of Long, parameter "population_percs" of String, parameter "read_len_mean" of Long, parameter "read_len_stddev" of Double, parameter "pairs_flag" of Long, parameter "mate_orientation" of String, parameter "insert_len_mean" of Long, parameter "insert_len_stddev" of Double, parameter "mutation_dist" of String, parameter "mutation_ratio" of String, parameter "qual_good" of Long, parameter "qual_bad" of Long, parameter "len_bias_flag" of Long, parameter "random_seed" of Long :returns: instance of type "KButil_Build_InSilico_Metagenomes_with_Grinder_Output" -> structure: parameter "report_name" of type "data_obj_name", parameter "report_ref" of type "data_obj_ref" """ # ctx is the context object # return variables are: returnVal #BEGIN KButil_Build_InSilico_Metagenomes_with_Grinder #### STEP 0: basic init ## console = [] invalid_msgs = [] report_text = '' self.log(console, 'Running KButil_Build_InSilico_Metagenomes_with_Grinder(): ') self.log(console, "\n" + pformat(params)) # Auth token = ctx['token'] headers = {'Authorization': 'OAuth ' + token} env = os.environ.copy() env['KB_AUTH_TOKEN'] = token # API Clients #SERVICE_VER = 'dev' # DEBUG SERVICE_VER = 'release' wsClient = workspaceService(self.workspaceURL, token=token) readsUtils_Client = ReadsUtils(url=self.callbackURL, token=ctx['token']) # SDK local #setAPI_Client = SetAPI (url=self.callbackURL, token=ctx['token']) # for SDK local. local doesn't work for SetAPI setAPI_Client = SetAPI(url=self.serviceWizardURL, token=ctx['token']) # for dynamic service auClient = AssemblyUtil(self.callbackURL, token=ctx['token'], service_ver=SERVICE_VER) dfu = DFUClient(self.callbackURL) # param checks required_params = [ 'workspace_name', 'input_refs', 'output_name', 'num_reads_per_lib', 'population_percs', 'read_len_mean', 'read_len_stddev', 'pairs_flag', 'mate_orientation', 'insert_len_mean', 'insert_len_stddev', 'mutation_dist', 'mutation_ratio', 'qual_good', 'qual_bad', 'len_bias_flag', 'random_seed' ] for arg in required_params: if arg not in params or params[arg] == None or params[arg] == '': raise ValueError("Must define required param: '" + arg + "'") # cast to str unpredictable numerical params (mostly used in string context) numerical_params = [ 'num_reads_per_lib', 'read_len_mean', 'read_len_stddev', 'pairs_flag', 'insert_len_mean', 'insert_len_stddev', 'qual_good', 'qual_bad', 'len_bias_flag', 'random_seed' ] for arg in numerical_params: if arg not in params or params[arg] == None or params[arg] == '': continue params[arg] = str(params[arg]) # load provenance provenance = [{}] if 'provenance' in ctx: provenance = ctx['provenance'] provenance[0]['input_ws_objects'] = [] for input_ref in params['input_refs']: provenance[0]['input_ws_objects'].append(input_ref) # set the output paths timestamp = int( (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) output_dir = os.path.join(self.scratch, 'output.' + str(timestamp)) if not os.path.exists(output_dir): os.makedirs(output_dir) html_output_dir = os.path.join(output_dir, 'html') if not os.path.exists(html_output_dir): os.makedirs(html_output_dir) #### STEP 1: Parse population_percs and write to file ## abundance_str = params['population_percs'].strip() abundance_file_path = os.path.join(output_dir, 'my_abundances.txt') abundance_config_num_libs = 0 abundance_config_num_libs_set = False grinder_genome_ids = [] header = [] out_buf = [] for row in abundance_str.split("\n"): cols = re.split(r'\s+', row) if cols[0].upper() == "GENOME": for col in cols: if col == '': continue header.append(col) continue grinder_genome_ids.append(cols[0]) self.log(console, "GRINDER GENOME ID: '" + cols[0] + "'") # DEBUG out_row = [] for col in cols: if col == '': continue elif col == '%': continue elif col.endswith('%'): col = col.rstrip('%') out_row.append(col) out_buf.append("\t".join(out_row)) num_samples = len(out_row) - 1 # first col is genome id if not abundance_config_num_libs_set: abundance_config_num_libs_set = True abundance_config_num_libs = num_samples elif num_samples != abundance_config_num_libs: invalid_msgs.append( "inconsistent number of samples in population_percs input field" ) # data validation if abundance_config_num_libs == 0: invalid_msgs.append( "unable to find sample percentages in population_percs input field" ) sample_sums = [] for row_i, abund_row_str in enumerate(out_buf): abund_row = abund_row_str.split() for sample_i, abund in enumerate(abund_row[1:]): if row_i == 0: sample_sums.append(0) #self.log (console, "row_i: "+str(row_i)+" sample_i: "+str(sample_i)) # DEBUG sample_sums[sample_i] += float(abund) for sample_i, sample_sum in enumerate(sample_sums): if sample_sum < 99.5 or sample_sum > 100.5: self.log( invalid_msgs, "Sample: " + str(sample_i + 1) + " " + header[sample_i + 1] + " proportions is not summing to 100.0. Summing to: " + str(sample_sum)) if len(invalid_msgs) == 0: with open(abundance_file_path, 'w') as abundance_fh: for out_line in out_buf: abundance_fh.write(out_line + "\n") # DEBUG with open(abundance_file_path, 'r') as abundance_fh: for out_line in abundance_fh.readlines(): out_line = out_line.rstrip() self.log(console, "ABUNDANCE_CONFIG: '" + out_line + "'") #### STEP 2: get genome scaffold sequences ## if len(invalid_msgs) == 0: genomes_src_db_file_path = os.path.join(output_dir, 'genomes.fna') read_buf_size = 65536 write_buf_size = 65536 accepted_input_types = ["KBaseGenomes.Genome"] genome_refs = params['input_refs'] genome_obj_names = [] genome_sci_names = [] assembly_refs = [] for i, input_ref in enumerate(genome_refs): # genome obj info try: [ OBJID_I, NAME_I, TYPE_I, SAVE_DATE_I, VERSION_I, SAVED_BY_I, WSID_I, WORKSPACE_I, CHSUM_I, SIZE_I, META_I ] = range(11) # object_info tuple input_obj_info = wsClient.get_object_info_new( {'objects': [{ 'ref': input_ref }]})[0] input_obj_type = re.sub( '-[0-9]+\.[0-9]+$', "", input_obj_info[TYPE_I]) # remove trailing version genome_obj_names.append(input_obj_info[NAME_I]) except Exception as e: raise ValueError('Unable to get object from workspace: (' + input_ref + ')' + str(e)) if input_obj_type not in accepted_input_types: raise ValueError("Input object of type '" + input_obj_type + "' not accepted. Must be one of " + ", ".join(accepted_input_types)) # genome obj data try: genome_obj = wsClient.get_objects([{ 'ref': input_ref }])[0]['data'] genome_sci_names.append(genome_obj['scientific_name']) except: raise ValueError("unable to fetch genome: " + input_ref) # Get assembly_refs if ('contigset_ref' not in genome_obj or genome_obj['contigset_ref'] == None) \ and ('assembly_ref' not in genome_obj or genome_obj['assembly_ref'] == None): msg = "Genome " + genome_obj_names[ i] + " (ref:" + input_ref + ") " + genome_sci_names[ i] + " MISSING BOTH contigset_ref AND assembly_ref. Cannot process. Exiting." self.log(console, msg) self.log(invalid_msgs, msg) continue elif 'assembly_ref' in genome_obj and genome_obj[ 'assembly_ref'] != None: msg = "Genome " + genome_obj_names[ i] + " (ref:" + input_ref + ") " + genome_sci_names[ i] + " USING assembly_ref: " + str( genome_obj['assembly_ref']) self.log(console, msg) assembly_refs.append(genome_obj['assembly_ref']) elif 'contigset_ref' in genome_obj and genome_obj[ 'contigset_ref'] != None: msg = "Genome " + genome_obj_names[ i] + " (ref:" + input_ref + ") " + genome_sci_names[ i] + " USING contigset_ref: " + str( genome_obj['contigset_ref']) self.log(console, msg) assembly_refs.append(genome_obj['contigset_ref']) # get fastas for scaffolds if len(invalid_msgs) == 0: contig_file_paths = [] for genome_i, input_ref in enumerate(genome_refs): contig_file = auClient.get_assembly_as_fasta({ 'ref': assembly_refs[genome_i] }).get('path') sys.stdout.flush() contig_file_path = dfu.unpack_file({'file_path': contig_file})['file_path'] contig_file_paths.append(contig_file_path) # reformat FASTA IDs for Grinder with open(genomes_src_db_file_path, 'w', write_buf_size) as genomes_src_db_fh: for genome_i, contig_file_path in enumerate(contig_file_paths): #self.log(console,str(genome_i)+" CONTIG_FILE: "+contig_file_path) # DEBUG #contig_ids = [] with open(contig_file_path, 'r', read_buf_size) as contig_fh: genome_seq = '' contig_seq = '' contig_seqs = [] for contig_line in contig_fh.readlines(): contig_line = contig_line.rstrip() if contig_line.startswith('>'): #contig_id = contig_line.strip()[1:].split(' ')[0] #contig_ids.append(contig_id) #genomes_src_db_fh.write(">"+grinder_genome_ids[genome_i]+"\n") if contig_seq != '': contig_seqs.append(contig_seq) contig_seq = '' continue else: #genomes_src_db_fh.write(contig_line) contig_seq += contig_line if contig_seq != '': contig_seqs.append(contig_seq) contig_seq = '' # write joined contigs to file genome_seq = "NNNNNNNNNN".join( contig_seqs ) # NOTE: Using "-exclude_chars" grinder opt on N to avoid contig joins genome_seq = genome_seq.upper( ) # grinder might require upper case? genomes_src_db_fh.write(">" + grinder_genome_ids[genome_i] + "\n") genomes_src_db_fh.write(genome_seq + "\n") genome_seq = '' contig_seqs = [] # DEBUG #for contig_id in contig_ids: # self.log(console, "\tCONTIG_ID: "+contig_id) # DEBUG # DEBUG toggle = 0 with open(genomes_src_db_file_path, 'r', write_buf_size) as genomes_src_db_fh: for contig_line in genomes_src_db_fh.readlines(): contig_line = contig_line.rstrip() if contig_line.startswith('>'): self.log(console, 'GENOMES_SRC_DB: ' + contig_line) genome_id = contig_line[1:] toggle = 0 elif toggle == 0: #elif genome_id == 'G3': self.log( console, 'GENOMES_SRC_DB: ' + contig_line[0:50] + '...') toggle += 1 #### STEP 3: Run Grinder ## if len(invalid_msgs) == 0: cmd = [] cmd.append(self.GRINDER) # output cmd.append('-base_name') cmd.append(params['output_name']) cmd.append('-output_dir') cmd.append(output_dir) # contigs input cmd.append('-reference_file') cmd.append(genomes_src_db_file_path) # abundances cmd.append('-abundance_file') cmd.append(abundance_file_path) # library size cmd.append('-total_reads') cmd.append(str(params['num_reads_per_lib'])) # num libraries (overridden by abundance file?) cmd.append('-num_libraries') cmd.append(str(abundance_config_num_libs)) # read and insert lens cmd.append('-read_dist') cmd.append(str(params['read_len_mean'])) cmd.append('normal') cmd.append(str(params['read_len_stddev'])) if str(params['pairs_flag']) == '1': cmd.append('-insert_dist') cmd.append(str(params['insert_len_mean'])) cmd.append('normal') cmd.append(str(params['insert_len_stddev'])) # mate orientation cmd.append('-mate_orientation') cmd.append(params['mate_orientation']) # genome len bias cmd.append('-length_bias') cmd.append(str(params['len_bias_flag'])) # mutation model cmd.append('-mutation_dist') cmd.append(str(params['mutation_dist'])) cmd.append('-mutation_ratio') cmd.append(str(params['mutation_ratio'])) # qual scores cmd.append('-fastq_output') cmd.append('1') cmd.append('-qual_levels') cmd.append(str(params['qual_good'])) cmd.append(str(params['qual_bad'])) # skip contig joins cmd.append('-exclude_chars') cmd.append('NX') # explicitly request bidirectional cmd.append('-unidirectional') cmd.append('0') # random seed if 'random_seed' in params and params[ 'random_seed'] != None and params['random_seed'] != '': cmd.append('-random_seed') cmd.append(str(params['random_seed'])) # RUN cmd_str = " ".join(cmd) self.log(console, "===========================================") self.log(console, "RUNNING: " + cmd_str) self.log(console, "===========================================") cmdProcess = subprocess.Popen(cmd_str, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True) outputlines = [] while True: line = cmdProcess.stdout.readline() outputlines.append(line) if not line: break self.log(console, line.replace('\n', '')) cmdProcess.stdout.close() cmdProcess.wait() self.log(console, 'return code: ' + str(cmdProcess.returncode) + '\n') if cmdProcess.returncode != 0: raise ValueError('Error running kb_grinder, return code: ' + str(cmdProcess.returncode) + '\n') #report_text += "\n".join(outputlines) #report_text += "cmdstring: " + cmdstring + " stdout: " + stdout + " stderr " + stderr # capture output for report and paths to out files report_text_buf = [] struct_file_paths = [] struct_file_names = [] fastq_file_paths = [] for out_line in outputlines: out_line = out_line.rstrip() if 'Community structure' in out_line: clean_line = out_line.lstrip() struct_file_path = re.split(r'\s+', clean_line)[3] struct_file_paths.append(struct_file_path) struct_file_names.append(struct_file_path.split('/')[-1]) self.log(console, "STRUCT_FILE_NAME: '" + struct_file_path.split('/')[-1]) # DEBUG elif 'FASTQ file' in out_line: clean_line = out_line.lstrip() fastq_file_paths.append(re.split(r'\s+', clean_line)[3]) else: report_text_buf.append(out_line) report_text += "\n".join(report_text_buf) #### STEP 4: Upload Read Libs and create reads set ## if len(invalid_msgs) == 0: lib_obj_refs = [] lib_obj_names = [] readsSet_items = [] for sample_i, fastq_file_path in enumerate(fastq_file_paths): if not os.path.isfile (fastq_file_path) \ or os.path.getsize (fastq_file_path) == 0: raise ValueError("empty read lib generated: " + fastq_file_path) else: # lib obj name if len(fastq_file_paths) == 0: output_obj_name = params['output_name'] else: if str(params['pairs_flag']) == '1': output_obj_name = params[ 'output_name'] + '-sample' + str( sample_i + 1) + ".PairedEndLib" else: output_obj_name = params[ 'output_name'] + '-sample' + str( sample_i + 1) + ".SingleEndLib" lib_obj_names.append(output_obj_name) # upload lib and get obj ref self.log( console, 'Uploading trimmed paired reads: ' + output_obj_name) sequencing_tech = 'artificial reads' if str(params['pairs_flag']) == '1': interleaved = 1 else: interleaved = 0 lib_obj_ref = readsUtils_Client.upload_reads({ 'wsname': str(params['workspace_name']), 'name': output_obj_name, 'fwd_file': fastq_file_path, 'interleaved': interleaved, 'sequencing_tech': sequencing_tech })['obj_ref'] lib_obj_refs.append(lib_obj_ref) os.remove(fastq_file_path) # free up disk # add to readsSet readsSet_items.append({ 'ref': lib_obj_ref, 'label': output_obj_name }) # create readsset readsSet_obj_ref = None if len(lib_obj_refs) > 1: readsSet_obj = { 'description': "Grinder Metagenome from " + " ".join(genome_obj_names), 'items': readsSet_items } readsSet_obj_name = params['output_name'] readsSet_obj_ref = setAPI_Client.save_reads_set_v1({ 'workspace_name': params['workspace_name'], 'output_object_name': readsSet_obj_name, 'data': readsSet_obj })['set_ref'] #### STEP 5: Build report ## reportName = 'kb_grinder_report_' + str(uuid.uuid4()) reportObj = { 'objects_created': [], #'text_message': '', # or is it 'message'? 'message': '', # or is it 'text_message'? 'direct_html': '', #'direct_html_link_index': 0, 'file_links': [], 'html_links': [], 'workspace_name': params['workspace_name'], 'report_object_name': reportName } # message if len(invalid_msgs) > 0: report_text = "\n".join(invalid_msgs) reportObj['message'] = report_text if len(invalid_msgs) == 0: # objs if readsSet_obj_ref != None: reportObj['objects_created'].append({ 'ref': readsSet_obj_ref, 'desc': params['output_name'] + " ReadsSet" }) for lib_obj_i, lib_obj_ref in enumerate(lib_obj_refs): reportObj['objects_created'].append({ 'ref': lib_obj_ref, 'desc': lib_obj_names[lib_obj_i] }) # downloadable data for data_i, data_path in enumerate(struct_file_paths): try: upload_ret = dfu.file_to_shock({ 'file_path': data_path, #'pack': 'zip'}) 'make_handle': 0 }) except: raise ValueError('error uploading ' + data_path + ' file to shock') reportObj['file_links'].append({ 'shock_id': upload_ret['shock_id'], 'name': struct_file_names[data_i], 'label': struct_file_names[data_i] }) # html report """ try: html_upload_ret = dfu.file_to_shock({'file_path': html_output_dir, 'make_handle': 0, 'pack': 'zip'}) except: raise ValueError ('error uploading html report to shock') reportObj['direct_html_link_index'] = 0 reportObj['html_links'] = [{'shock_id': html_upload_ret['shock_id'], 'name': html_file, 'label': params['output_name']+' HTML' } ] """ # save report object # SERVICE_VER = 'release' reportClient = KBaseReport(self.callbackURL, token=ctx['token'], service_ver=SERVICE_VER) #report_info = report.create({'report':reportObj, 'workspace_name':params['workspace_name']}) report_info = reportClient.create_extended_report(reportObj) returnVal = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END KButil_Build_InSilico_Metagenomes_with_Grinder # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError( 'Method KButil_Build_InSilico_Metagenomes_with_Grinder return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]