def import_file(self, ctx, params): # 1) validate parameters and extract defaults self.validate_params(params) # 2) define default parameters default_params = { 'taxon_wsname': 'ReferenceTaxons', 'scientific_name': "unknown_taxon", 'taxon_reference': None, 'source': 'User', 'release': None, 'type': 'User upload' } # 3) Add defaults if they don't exist for field in default_params: if field not in params: params[field] = default_params[field] # 4) Do the upload result = upload_genome( shock_service_url=self.cfg['shock-url'], handle_service_url=self.cfg['handle-service-url'], workspace_service_url=self.cfg['workspace-url'], callback_url=os.environ['SDK_CALLBACK_URL'], input_fasta_file=params["fasta_file"], input_gff_file=params["gff_file"], workspace_name=params['workspace_name'], core_genome_name=params['genome_name'], scientific_name=params['scientific_name'], taxon_wsname=params['taxon_wsname'], taxon_reference=params['taxon_reference'], source=params['source'], release=params['release'], genome_type=params['type']) # 4) Generate Report output_data_ref = params['workspace_name'] + "/" + params['genome_name'] reportObj = { 'objects_created': [{ 'ref': output_data_ref, 'description': 'KBase Genome object' }], 'text_message': result['report_string'] } reportClient = KBaseReport(os.environ['SDK_CALLBACK_URL']) report_info = reportClient.create({ 'report': reportObj, 'workspace_name': params['workspace_name'] }) # 5) return the result info = result['genome_info'] details = { 'genome_ref': str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]), 'report_name': report_info['name'], 'report_ref': report_info['ref'] } return details
def filter_contigs(self, ctx, params): """ The actual function is declared using 'funcdef' to specify the name and input/return arguments to the function. For all typical KBase Apps that run in the Narrative, your function should have the 'authentication required' modifier. :param params: instance of type "FilterContigsParams" (A 'typedef' can also be used to define compound or container objects, like lists, maps, and structures. The standard KBase convention is to use structures, as shown here, to define the input and output of your function. Here the input is a reference to the Assembly data object, a workspace to save output, and a length threshold for filtering. To define lists and maps, use a syntax similar to C++ templates to indicate the type contained in the list or map. For example: list <string> list_of_strings; mapping <string, int> map_of_ints;) -> structure: parameter "assembly_input_ref" of type "assembly_ref" (A 'typedef' allows you to provide a more specific name for a type. Built-in primitive types include 'string', 'int', 'float'. Here we define a type named assembly_ref to indicate a string that should be set to a KBase ID reference to an Assembly data object.), parameter "workspace_name" of String, parameter "min_length" of Long :returns: instance of type "FilterContigsResults" (Here is the definition of the output of the function. The output can be used by other SDK modules which call your code, or the output visualizations in the Narrative. 'report_name' and 'report_ref' are special output fields- if defined, the Narrative can automatically render your Report.) -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "assembly_output" of type "assembly_ref" (A 'typedef' allows you to provide a more specific name for a type. Built-in primitive types include 'string', 'int', 'float'. Here we define a type named assembly_ref to indicate a string that should be set to a KBase ID reference to an Assembly data object.), parameter "n_initial_contigs" of Long, parameter "n_contigs_removed" of Long, parameter "n_contigs_remaining" of Long """ # ctx is the context object # return variables are: output #BEGIN filter_contigs # Print statements to stdout/stderr are captured and available as the App log print('Starting Filter Contigs function. Params=') pprint(params) # Step 1 - Parse/examine the parameters and catch any errors # It is important to check that parameters exist and are defined, and that nice error # messages are returned to users. Parameter values go through basic validation when # defined in a Narrative App, but advanced users or other SDK developers can call # this function directly, so validation is still important. print('Validating parameters.') if 'workspace_name' not in params: raise ValueError( 'Parameter workspace_name is not set in input arguments') workspace_name = params['workspace_name'] if 'assembly_input_ref' not in params: raise ValueError( 'Parameter assembly_input_ref is not set in input arguments') assembly_input_ref = params['assembly_input_ref'] if 'min_length' not in params: raise ValueError( 'Parameter min_length is not set in input arguments') min_length_orig = params['min_length'] min_length = None try: min_length = int(min_length_orig) except ValueError: raise ValueError( 'Cannot parse integer from min_length parameter (' + str(min_length_orig) + ')') if min_length < 0: raise ValueError('min_length parameter cannot be negative (' + str(min_length) + ')') # Step 2 - Download the input data as a Fasta and # We can use the AssemblyUtils module to download a FASTA file from our Assembly data object. # The return object gives us the path to the file that was created. print('Downloading Assembly data as a Fasta file.') assemblyUtil = AssemblyUtil(self.callback_url) fasta_file = assemblyUtil.get_assembly_as_fasta( {'ref': assembly_input_ref}) # Step 3 - Actually perform the filter operation, saving the good contigs to a new fasta file. # We can use BioPython to parse the Fasta file and build and save the output to a file. good_contigs = [] n_total = 0 n_remaining = 0 for record in SeqIO.parse(fasta_file['path'], 'fasta'): n_total += 1 if len(record.seq) >= min_length: good_contigs.append(record) n_remaining += 1 print('Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' + str(n_total)) filtered_fasta_file = os.path.join(self.shared_folder, 'filtered.fasta') SeqIO.write(good_contigs, filtered_fasta_file, 'fasta') # Step 4 - Save the new Assembly back to the system print('Uploading filtered Assembly data.') new_assembly = assemblyUtil.save_assembly_from_fasta({ 'file': { 'path': filtered_fasta_file }, 'workspace_name': workspace_name, 'assembly_name': fasta_file['assembly_name'] }) # Step 5 - Build a Report and return reportObj = { 'objects_created': [{ 'ref': new_assembly, 'description': 'Filtered contigs' }], 'text_message': 'Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' + str(n_total) } report = KBaseReport(self.callback_url) report_info = report.create({ 'report': reportObj, 'workspace_name': params['workspace_name'] }) # STEP 6: contruct the output to send back output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'], 'assembly_output': new_assembly, 'n_initial_contigs': n_total, 'n_contigs_removed': n_total - n_remaining, 'n_contigs_remaining': n_remaining } print('returning:' + pformat(output)) #END filter_contigs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method filter_contigs return value ' + 'output is not type dict as required.') # return the results return [output]
def filter_contigs(self, ctx, params): # ctx is the context object # return variables are: returnVal #BEGIN filter_contigs # Print statements to stdout/stderr are captured and available as the App log print('Starting Filter Contigs function. Params=') pprint(params) # Step 1 - Parse/examine the parameters and catch any errors # It is important to check that parameters exist and are defined, and that nice error # messages are returned to users. Parameter values go through basic validation when # defined in a Narrative App, but advanced users or other SDK developers can call # this function directly, so validation is still important. print('Validating parameters.') if 'workspace_name' not in params: raise ValueError('Parameter workspace_name is not set in input arguments') workspace_name = params['workspace_name'] if 'assembly_input_ref' not in params: raise ValueError('Parameter assembly_input_ref is not set in input arguments') assembly_input_ref = params['assembly_input_ref'] if 'min_length' not in params: raise ValueError('Parameter min_length is not set in input arguments') min_length_orig = params['min_length'] min_length = None try: min_length = int(min_length_orig) except ValueError: raise ValueError('Cannot parse integer from min_length parameter (' + str(min_length_orig) + ')') if min_length < 0: raise ValueError('min_length parameter cannot be negative (' + str(min_length) + ')') # Step 2 - Download the input data as a Fasta and # We can use the AssemblyUtils module to download a FASTA file from our Assembly data object. # The return object gives us the path to the file that was created. print('Downloading Assembly data as a Fasta file.') assemblyUtil = AssemblyUtil(self.callback_url) fasta_file = assemblyUtil.get_assembly_as_fasta({'ref': assembly_input_ref}) # Step 3 - Actually perform the filter operation, saving the good contigs to a new fasta file. # We can use BioPython to parse the Fasta file and build and save the output to a file. good_contigs = [] n_total = 0 n_remaining = 0 for record in SeqIO.parse(fasta_file['path'], 'fasta'): n_total += 1 if len(record.seq) >= min_length: good_contigs.append(record) n_remaining += 1 print('Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' + str(n_total)) filtered_fasta_file = os.path.join(self.shared_folder, 'filtered.fasta') SeqIO.write(good_contigs, filtered_fasta_file, 'fasta') # Step 4 - Save the new Assembly back to the system print('Uploading filtered Assembly data.') new_assembly = assemblyUtil.save_assembly_from_fasta({'file': {'path': filtered_fasta_file}, 'workspace_name': workspace_name, 'assembly_name': fasta_file['assembly_name'] }) # Step 5 - Build a Report and return reportObj = { 'objects_created': [{'ref': new_assembly, 'description': 'Filtered contigs'}], 'text_message': 'Filtered Assembly to ' + str(n_remaining) + ' contigs out of ' + str(n_total) } report = KBaseReport(self.callback_url) report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']}) # STEP 6: contruct the output to send back output = {'report_name': report_info['name'], 'report_ref': report_info['ref'], 'assembly_output': new_assembly, 'n_initial_contigs': n_total, 'n_contigs_removed': n_total - n_remaining, 'n_contigs_remaining': n_remaining } print('returning:' + pformat(output)) #END filter_contigs # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method filter_contigs return value ' + 'output is not type dict as required.') # return the results return [output]
def run_megahit(self, ctx, params): """ :param params: instance of type "MegaHitParams" (run_megahit() ** ** @optional megahit_parameter_preset ** @optional min_contig_len) -> structure: parameter "workspace_name" of String, parameter "input_reads_ref" of String, parameter "output_contigset_name" of String, parameter "combined_assembly_flag" of Long, parameter "megahit_parameter_preset" of String, parameter "min_contig_len" of Long, parameter "kmer_params" of type "Kmer_Params" (Kmer Params ** @optional min_count ** @optional k_min ** @optional k_max ** @optional k_step ** @optional k_list) -> structure: parameter "min_count" of Long, parameter "k_min" of Long, parameter "k_max" of Long, parameter "k_step" of Long, parameter "k_list" of list of Long :returns: instance of type "MegaHitOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN run_megahit console = [] self.log(console, 'Running run_megahit() with params=') self.log(console, "\n" + pformat(params)) #SERVICE_VER = 'dev' # DEBUG SERVICE_VER = 'release' ### STEP 1: basic parameter checks + parsing required_params = [ 'workspace_name', 'input_reads_ref', 'output_contigset_name' ] for required_param in required_params: if required_param not in params or params[required_param] == None: raise ValueError("Must define required param: '" + required_param + "'") ### STEP 2: call exec_megahit() - input params are the same, so just pass through exec_megahit_output = self.exec_megahit(ctx, params)[0] ### STEP 3: save the report reportObj = { 'objects_created': [], 'text_message': exec_megahit_output['report_text'] } for obj_ref in exec_megahit_output['output_contigset_refs']: reportObj['objects_created'].append({ 'ref': obj_ref, 'description': 'Assembled contigs' }) reportClient = KBaseReport(self.callbackURL, token=ctx['token'], service_ver=SERVICE_VER) report_info = reportClient.create({ 'report': reportObj, 'workspace_name': params['workspace_name'] }) ### STEP 4: contruct the output to send back output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END run_megahit # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method run_megahit return value ' + 'output is not type dict as required.') # return the results return [output]
def remove_adapters(self, ctx, params): """ :param params: instance of type "RemoveAdaptersParams" -> structure: parameter "output_workspace" of String, parameter "output_object_name" of String, parameter "input_reads" of type "ws_ref" (@ref ws), parameter "five_prime" of type "FivePrimeOptions" (unfortunately, we have to name the fields uniquely between 3' and 5' options due to the current implementation of grouped parameters) -> structure: parameter "adapter_sequence_5P" of String, parameter "anchored_5P" of type "boolean" (@range (0, 1)), parameter "three_prime" of type "ThreePrimeOptions" -> structure: parameter "adapter_sequence_3P" of String, parameter "anchored_3P" of type "boolean" (@range (0, 1)), parameter "error_tolerance" of Double, parameter "min_overlap_length" of Long, parameter "min_read_length" of Long, parameter "discard_untrimmed" of type "boolean" (@range (0, 1)) :returns: instance of type "RemoveAdaptersResult" -> structure: parameter "report_ref" of String, parameter "output_reads_ref" of String """ # ctx is the context object # return variables are: result #BEGIN remove_adapters console = [] self.log(console, 'Running remove_adapters() with parameters: ') self.log(console, "\n" + pformat(params) + "\n") self.log(console, "-------------------------------------------\n") token = ctx['token'] wsClient = workspaceService(self.config['workspace-url'], token=token) headers = {'Authorization': 'OAuth ' + token} env = os.environ.copy() env['KB_AUTH_TOKEN'] = token #SERVICE_VER = 'dev' # DEBUG SERVICE_VER = 'release' # param checks required_params = [ 'output_workspace', 'input_reads', 'output_object_name', 'min_read_length' ] for arg in required_params: if arg not in params or params[arg] == None or params[arg] == '': raise ValueError("Must define required param: '" + arg + "'") # load provenance provenance = [{}] if 'provenance' in ctx: provenance = ctx['provenance'] provenance[0]['input_ws_objects'] = [str(params['input_reads'])] # RUN exec_remove_adapters_retVal = self.exec_remove_adapters(ctx, params)[0] # build report # reportObj = {'objects_created': [], 'text_message': ''} # text report try: reportObj['text_message'] = exec_remove_adapters_retVal['report'] except: raise ValueError("no report generated by exec_remove_adapters()") # output object if exec_remove_adapters_retVal['output_reads_ref'] != None: reportObj['objects_created'].append({ 'ref': exec_remove_adapters_retVal['output_reads_ref'], 'description': 'Post Cutadapt Reads' }) else: raise ValueError("no output generated by exec_remove_adapters()") # save report object report = KBaseReport(self.config['SDK_CALLBACK_URL'], token=ctx['token'], service_ver=SERVICE_VER) report_info = report.create({ 'report': reportObj, 'workspace_name': params['output_workspace'] }) result = { 'output_reads_ref': exec_remove_adapters_retVal['output_reads_ref'], 'report_ref': report_info['ref'], 'report_name': report_info['name'] } #END remove_adapters # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError('Method remove_adapters return value ' + 'result is not type dict as required.') # return the results return [result]
def filter_contigs(self, ctx, params): """ Main method :param params: instance of type "ContigFilterParams" (Input parameter types) -> structure: parameter "workspace_name" of String, parameter "assembly_ref" of String, parameter "min_length" of Long :returns: instance of type "ContigFilterResults" (Output result types) -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "filtered_assembly_ref" of String, parameter "n_total" of Long, parameter "n_remaining" of Long """ # ctx is the context object # return variables are: returnVal #BEGIN filter_contigs for name in ['min_length', 'assembly_ref', 'workspace_name']: if name not in params: raise ValueError('Parameter "' + name + '" is required but missing') if not isinstance(params['min_length'], int) or (params['min_length'] < 0): raise ValueError('Min length must be a non-negative integer') if not isinstance(params['assembly_ref'], basestring) or not len(params['assembly_ref']): raise ValueError('Pass in a valid assembly reference string') print("params['min_length']=%s, params['assembly_ref']=%s" % (params['min_length'], params['assembly_ref'])) print("params['params['workspace_name']=%s" % (params['workspace_name'])) print("self.callback_url=%s" % self.callback_url) print("self.scratch=%s" % self.scratch) print "config = " pprint.pprint(self.config) ############### # Download ref ############## assembly_util = AssemblyUtil(self.callback_url) file = assembly_util.get_assembly_as_fasta({'ref': params['assembly_ref']}) print "assembly fasta file = " pprint.pprint(file) ################################### # Real business - filter the contig ################################### parsed_assembly = SeqIO.parse(file['path'], 'fasta') min_length = params['min_length'] # Keep a list of contigs greater than min_length good_contigs = [] # total contigs regardless of length n_total = 0 # total contigs over the min_length n_remaining = 0 for record in parsed_assembly: n_total += 1 if len(record.seq) >= min_length: good_contigs.append(record) n_remaining += 1 # returnVal = { # 'n_total': n_total, # 'n_remaining': n_remaining # } # returnVal = {} ################## # Output ################## workspace_name = params['workspace_name'] filtered_path = os.path.join(self.scratch, 'filtered.fasta') SeqIO.write(good_contigs, filtered_path, 'fasta') # Upload the filtered data to the workspace new_ref = assembly_util.save_assembly_from_fasta({ 'file': { 'path': filtered_path }, 'workspace_name': workspace_name, 'assembly_name': file['assembly_name'] }) # returnVal = { # 'n_total': n_total, # 'n_remaining': n_remaining, # 'filtered_assembly_ref': new_ref # } ################ # Reporting ################ text_message = "".join([ 'Filtered assembly to ', str(n_remaining), 's contigs out of ', str(n_total) ]) # Data for creating the report, referencing the assembly we uploaded report_data = { 'objects_created': [ {'ref': new_ref, 'description': 'Filtered contigs'} ], 'text_message': text_message } # Initialize the report kbase_report = KBaseReport(self.callback_url) report = kbase_report.create({ 'report': report_data, 'workspace_name': workspace_name }) # Return the report reference and name in our results returnVal = { 'report_ref': report['ref'], 'report_name': report['name'], 'n_total': n_total, 'n_remaining': n_remaining, 'filtered_assembly_ref': new_ref } ############### # BBtools test ############### # bbtools = BBTools(self.callback_url) bbtools = BBTools(self.callback_url, service_ver='beta') # set up input files print "file['path'] = " print file['path'] # print new_ref['filtered_assembly_ref'] rqc_filter_input = { "reads_file": file['path'] # /kb/module/work/tmp/Shewanella_oneidensis_MR-1_assembly.fa } # or, if you want to use a KBase Workspace UPA for your reads object: # rqc_filter_input = { # "reads_library_ref": new_ref['filtered_assembly_ref'] # } # set up parameters (example below, there are many more options, see BBTools.spec) rqc_filter_params = { "qtrim": "rl", "maxns": 3, "minlength": 40, "maxmem": 5 } #"maxmem": 5 # run the local RQCFilter function result = bbtools.run_RQCFilter_local(rqc_filter_input, rqc_filter_params) print "result = " pprint.pprint(result) #END filter_contigs # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method filter_contigs return value returnVal is not type dict as required.') # return the results return [returnVal]
def execReadLibraryPRINSEQ(self, ctx, input_params): """ :param input_params: instance of type "inputPRINSEQ" (execPRINSEQ and execReadLibraryPRINSEQ input input_reads_ref : may be KBaseFile.PairedEndLibrary or KBaseFile.SingleEndLibrary output_ws : workspace to write to output_reads_name : obj_name to create lc_method : Low complexity method - value must be "dust" or "entropy" lc_entropy_threshold : Low complexity threshold - Value must be an integer between 0 and 100. Note a higher lc_entropy_threshold in entropy is more stringent. lc_dust_threshold : Low complexity threshold - Value must be an integer between 0 and 100. Note a lower lc_entropy_threshold is less stringent with dust) -> structure: parameter "input_reads_ref" of type "data_obj_ref", parameter "output_ws" of type "workspace_name" (Common Types), parameter "output_reads_name" of type "data_obj_name", parameter "lc_method" of String, parameter "lc_entropy_threshold" of Long, parameter "lc_dust_threshold" of Long :returns: instance of type "outputReadLibraryExecPRINSEQ" -> structure: parameter "output_filtered_ref" of type "data_obj_ref", parameter "output_unpaired_fwd_ref" of type "data_obj_ref", parameter "output_unpaired_rev_ref" of type "data_obj_ref", parameter "report" of String """ # ctx is the context object # return variables are: output #BEGIN execReadLibraryPRINSEQ console = [] # self.log(console, 'Running execTrimmomatic with parameters: ') # self.log(console, "\n"+pformat(input_params)) report = '' returnVal = dict() # retVal['output_filtered_ref'] = None # retVal['output_unpaired_fwd_ref'] = None # retVal['output_unpaired_rev_ref'] = None token = ctx['token'] wsClient = workspaceService(self.ws_url, token=token) env = os.environ.copy() env['KB_AUTH_TOKEN'] = token # param checks required_params = ['input_reads_ref', 'output_ws', 'lc_method'] # output reads_name is optional. If not set will use old_objects name for required_param in required_params: if required_param not in input_params or input_params[ required_param] is None: raise ValueError("Must define required param: '" + required_param + "'") if (input_params['lc_method'] != 'dust') and (input_params['lc_method'] != 'entropy'): raise ValueError( "lc_method (low complexity method) must be 'dust' or 'entropy', " + "it is currently set to : " + input_params['lc_method']) if not ('lc_entropy_threshold' in input_params or 'lc_dust_threshold' in input_params): raise ValueError( ("A low complexity threshold needs to be " + "entered for {}".format(input_params['lc_method']))) elif input_params['lc_method'] == 'dust': if 'lc_dust_threshold' not in input_params: raise ValueError( ("A low complexity threshold needs to be " + "entered for {}".format(input_params['lc_method']))) else: lc_threshold = input_params['lc_dust_threshold'] else: if 'lc_entropy_threshold' not in input_params: raise ValueError( ("A low complexity threshold needs to be " + "entered for {}".format(input_params['lc_method']))) else: lc_threshold = input_params['lc_entropy_threshold'] if (lc_threshold < 0.0) or (lc_threshold > 100.0): raise ValueError(( "The threshold for {} must be between 0 and 100, it is currently " + "set to : {}").format(input_params['lc_method'], lc_threshold)) reportObj = {'objects_created': [], 'text_message': ''} # load provenance provenance = [{}] if 'provenance' in ctx: provenance = ctx['provenance'] # add additional info to provenance here, in this case the input data object reference provenance[0]['input_ws_objects'] = [ str(input_params['input_reads_ref']) ] # GET THE READS OBJECT # Determine whether read library or read set is input object # try: # object_info tuple [ OBJID_I, NAME_I, TYPE_I, SAVE_DATE_I, VERSION_I, SAVED_BY_I, WSID_I, WORKSPACE_I, CHSUM_I, SIZE_I, META_I ] = range(11) input_reads_obj_info = wsClient.get_object_info_new( {'objects': [{ 'ref': input_params['input_reads_ref'] }]})[0] input_reads_obj_type = input_reads_obj_info[TYPE_I] # input_reads_obj_version = input_reads_obj_info[VERSION_I] # this is object version, not type version except Exception as e: raise ValueError( 'Unable to get read library object from workspace: (' + str(input_params['input_reads_ref']) + ')' + str(e)) # self.log (console, "B4 TYPE: '" + # str(input_reads_obj_type) + # "' VERSION: '" + str(input_reads_obj_version)+"'") # remove trailing version input_reads_obj_type = re.sub('-[0-9]+\.[0-9]+$', "", input_reads_obj_type) # self.log (console, "AF TYPE: '"+str(input_reads_obj_type)+"' VERSION: '" + # str(input_reads_obj_version)+"'") # maybe add below later "KBaseSets.ReadsSet", acceptable_types = [ "KBaseFile.PairedEndLibrary", "KBaseAssembly.PairedEndLibrary", "KBaseAssembly.SingleEndLibrary", "KBaseFile.SingleEndLibrary" ] if input_reads_obj_type not in acceptable_types: raise ValueError("Input reads of type: '" + input_reads_obj_type + "'. Must be one of " + ", ".join(acceptable_types)) if input_reads_obj_type in [ "KBaseFile.PairedEndLibrary", "KBaseAssembly.PairedEndLibrary" ]: read_type = 'PE' elif input_reads_obj_type in [ "KBaseFile.SingleEndLibrary", "KBaseAssembly.SingleEndLibrary" ]: read_type = 'SE' # Instatiate ReadsUtils try: readsUtils_Client = ReadsUtils(url=self.callback_url, token=ctx['token']) # SDK local self._log(None, 'Starting Read File(s) Download') readsLibrary = readsUtils_Client.download_reads({ 'read_libraries': [input_params['input_reads_ref']], 'interleaved': 'false' }) self._log(None, 'Completed Read File(s) Downloading') except Exception as e: raise ValueError( ('Unable to get read library object from workspace: ({})\n' ).format(str(input_params['input_reads_ref']), str(e))) # get WS metadata to get obj_name ws = workspaceService(self.ws_url) try: info = ws.get_object_info_new( {'objects': [{ 'ref': input_params['input_reads_ref'] }]})[0] except workspaceService as wse: self._log(console, 'Logging workspace exception') self._log(str(wse)) raise #determine new object base name new_object_name = info[1] if ('output_reads_name' in input_params and input_params['output_reads_name'] != '' and input_params['output_reads_name'] is not None): new_object_name = input_params['output_reads_name'] # MAKE A DIRECTORY TO PUT THE READ FILE(S) # create the output directory and move the file there # PUT FILES INTO THE DIRECTORY # Sanitize the file names tempdir = tempfile.mkdtemp(dir=self.scratch) export_dir = os.path.join(tempdir, info[1]) os.makedirs(export_dir) if read_type == 'PE': # IF PAIRED END, potentially 6 files created # one of each for the two directions(good(paired), good_singletons, bad) # Take the good paired and (re)upload new reads object. # We throwout the bad reads input_files_info = self._setup_pe_files(readsLibrary, export_dir, input_params) # RUN PRINSEQ with user options (lc_method and lc_threshold) cmd = ( "perl /opt/lib/prinseq-lite-0.20.4/prinseq-lite.pl -fastq {} " "-fastq2 {} -out_format 3 -lc_method {} " "-lc_threshold {}").format( input_files_info["fastq_file_path"], input_files_info["fastq2_file_path"], input_params['lc_method'], lc_threshold) print "Command to be run : " + cmd args = shlex.split(cmd) perl_script = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = perl_script.communicate() found_results = False file_names_dict = dict() for element in output: if "Input and filter stats:" in element: found_results = True element_parts = element.split("Input and filter stats:") # PRINSEQ OUTPUT report = "Input and filter stats:{}".format( element_parts[1]) reportObj['text_message'] = report read_files_list = os.listdir(export_dir) # proc = subprocess.Popen(['ls', '-l', export_dir], stdout=subprocess.PIPE) # proc_output = proc.stdout.read() # print "PROC OUTPUT : " + proc_output for read_filename in read_files_list: file_direction = None print "Read File : {}".format(read_filename) # determine if forward(fastq) or reverse(fastq2) file if input_files_info["fastq_filename"] in read_filename: file_direction = "fwd" elif input_files_info[ "fastq2_filename"] in read_filename: file_direction = "rev" if file_direction is not None: # determine good singleton or good part of a pair. print "TEST: {}_prinseq_good_".format( input_files_info["fastq_filename"]) if ("{}_prinseq_good_singletons".format( input_files_info["fastq_filename"]) in read_filename or "{}_prinseq_good_singletons".format( input_files_info["fastq2_filename"]) in read_filename): # Unpaired singletons that need to be # saved as a new single end reads object file_names_dict["{}_good_singletons".format(file_direction)] = \ os.path.join(export_dir, read_filename) elif ("{}_prinseq_good_".format( input_files_info["fastq_filename"]) in read_filename or "{}_prinseq_good_".format( input_files_info["fastq2_filename"]) in read_filename): file_names_dict["{}_good_pair".format(file_direction)] = \ os.path.join(export_dir, read_filename) if (('fwd_good_pair' in file_names_dict) and ('rev_good_pair' in file_names_dict)): self._log(None, 'Saving new Paired End Reads') returnVal['filtered_paired_end_ref'] = \ readsUtils_Client.upload_reads({'wsname': str(input_params['output_ws']), 'name': new_object_name, 'source_reads_ref': input_params['input_reads_ref'], 'fwd_file': file_names_dict['fwd_good_pair'], 'rev_file': file_names_dict['rev_good_pair'] } )['obj_ref'] reportObj['objects_created'].append({ 'ref': returnVal['filtered_paired_end_ref'], 'description': 'Filtered Paired End Reads', 'object_name': new_object_name }) print "REFERENCE : " + str( returnVal['filtered_paired_end_ref']) else: reportObj['text_message'] += \ "\n\nNo good matching pairs passed low complexity filtering.\n" + \ "Consider loosening the threshold value.\n" if 'fwd_good_singletons' in file_names_dict: self._log(None, 'Saving new Forward Unpaired Reads') fwd_object_name = "{}_fwd_singletons".format( new_object_name) returnVal['output_filtered_fwd_unpaired_end_ref'] = \ readsUtils_Client.upload_reads({'wsname': str(input_params['output_ws']), 'name': fwd_object_name, 'source_reads_ref': input_params['input_reads_ref'], 'fwd_file': file_names_dict['fwd_good_singletons']} )['obj_ref'] reportObj['objects_created'].append({ 'ref': returnVal['output_filtered_fwd_unpaired_end_ref'], 'description': 'Filtered Forward Unpaired End Reads', 'object_name': fwd_object_name }) print "REFERENCE : " + \ str(returnVal['output_filtered_fwd_unpaired_end_ref']) if 'rev_good_singletons' in file_names_dict: self._log(None, 'Saving new Reverse Unpaired Reads') rev_object_name = "{}_rev_singletons".format( new_object_name) returnVal['output_filtered_rev_unpaired_end_ref'] = \ readsUtils_Client.upload_reads({'wsname': str(input_params['output_ws']), 'name': rev_object_name, 'source_reads_ref': input_params['input_reads_ref'], 'fwd_file': file_names_dict['rev_good_singletons']} )['obj_ref'] reportObj['objects_created'].append({ 'ref': returnVal['output_filtered_rev_unpaired_end_ref'], 'description': 'Filtered Reverse Unpaired End Reads', 'object_name': rev_object_name }) print "REFERENCE : " + \ str(returnVal['output_filtered_rev_unpaired_end_ref']) if len(reportObj['objects_created']) > 0: reportObj['text_message'] += "\nOBJECTS CREATED :\n" for obj in reportObj['objects_created']: reportObj['text_message'] += "{} : {}\n".format( obj['object_name'], obj['description']) else: reportObj['text_message'] += \ "\nFiltering filtered out all reads. No objects made.\n" if not found_results: raise Exception('Unable to execute PRINSEQ, Error: {}'.format( str(output))) print "FILES DICT : {}".format(str(file_names_dict)) print "REPORT OBJECT :" print str(reportObj) elif read_type == 'SE': # Download reads Libs to FASTQ files # IF SINGLE END INPUT 2 files created (good and bad) # Take good and (re)upload new reads object input_fwd_file_path = \ readsLibrary['files'][input_params['input_reads_ref']]['files']['fwd'] fastq_filename = self._sanitize_file_name( os.path.basename(input_fwd_file_path)) fastq_file_path = os.path.join(export_dir, fastq_filename) shutil.move(input_fwd_file_path, fastq_file_path) # RUN PRINSEQ with user options (lc_method and lc_threshold) cmd = ( "perl /opt/lib/prinseq-lite-0.20.4/prinseq-lite.pl -fastq {} " "-out_format 3 -lc_method {} " "-lc_threshold {}").format(fastq_file_path, input_params['lc_method'], lc_threshold) print "Command to be run : " + cmd args = shlex.split(cmd) print "ARGS: " + str(args) perl_script = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) output = perl_script.communicate() print "OUTPUT: " + str(output) found_results = False found_se_filtered_file = False file_names_dict = dict() for element in output: if "Input and filter stats:" in element: found_results = True element_parts = element.split("Input and filter stats:") # PRINSEQ OUTPUT report = "Input and filter stats:{}".format( element_parts[1]) reportObj['text_message'] = report read_files_list = os.listdir(export_dir) for read_filename in read_files_list: print "Early Read File : {}".format(read_filename) for read_filename in read_files_list: print "Read File : {}".format(read_filename) if ("{}_prinseq_good_".format(fastq_filename) in read_filename): #Found Good file. Save the Reads objects self._log(None, 'Saving Filtered Single End Reads') returnVal['output_filtered_single_end_ref'] = \ readsUtils_Client.upload_reads({'wsname': str(input_params['output_ws']), 'name': new_object_name, 'source_reads_ref': input_params['input_reads_ref'], 'fwd_file': os.path.join(export_dir, read_filename)} )['obj_ref'] reportObj['objects_created'].append({ 'ref': returnVal['output_filtered_single_end_ref'], 'description': 'Filtered Single End Reads' }) print "REFERENCE : " + str( returnVal['output_filtered_single_end_ref']) found_se_filtered_file = True break if not found_se_filtered_file: reportObj['text_message'] += \ "\n\nNone of the reads passed low complexity filtering.\n" + \ "Consider loosening the threshold value.\n" if not found_results: raise Exception('Unable to execute PRINSEQ, Error: {}'.format( str(output))) print "FILES DICT : {}".format(str(file_names_dict)) print "REPORT OBJECT :" print str(reportObj) # save report object # report = KBaseReport(self.callback_url, token=ctx['token']) #report = KBaseReport(self.callback_url, token=ctx['token'], service_ver=SERVICE_VER) report_info = report.create({ 'report': reportObj, 'workspace_name': input_params['output_ws'] }) output = { 'report_name': report_info['name'], 'report_ref': report_info['ref'] } #END execReadLibraryPRINSEQ # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method execReadLibraryPRINSEQ return value ' + 'output is not type dict as required.') # return the results return [output]
def predict_amr_phenotype(self, ctx, params): """ The AMR prediction function specification :param params: instance of type "AMRPredictionParams" (Structure of input data for AMR prediction) -> structure: parameter "assembly_input_ref" of type "assembly_ref", parameter "species" of String, parameter "workspace_name" of String :returns: instance of type "AMRPredictionResults" (Structure of output of AMR prediction) -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN predict_amr_phenotype # Input validation for name in ['assembly_input_ref', 'species', 'workspace_name']: if name not in params: raise ValueError('Parameter "' + name + '" is required but missing') if not (isinstance(params['assembly_input_ref'], string_types) or isinstance(params['assembly_input_ref'], list)) or not len(params['assembly_input_ref']): raise ValueError('Pass in a valid assembly reference string(s)') # Extract params if not isinstance(params["assembly_input_ref"], list): assemblies = [params["assembly_input_ref"]] else: assemblies = params["assembly_input_ref"] species = params["species"] # Get models for species scm_models = self.get_models_by_algorithm_and_species("scm", species) cart_models = self.get_models_by_algorithm_and_species("cart", species) # Process assemblies predictions = {} assembly_util = AssemblyUtil(self.callback_url) for assembly_ref in assemblies: assembly_predictions = {} # Get the fasta file path and other info assembly = assembly_util.get_assembly_as_fasta({'ref': assembly_ref}) # Extract the k-mers kmers = self.extract_kmers(assembly["path"], k=31) print "Kmers --", assembly["assembly_name"], ":", len(kmers) # Make predictions (SCM) print "SCM models" assembly_predictions["scm"] = {} for antibiotic, model in scm_models.iteritems(): p = model.predict(kmers) assembly_predictions["scm"][antibiotic] = {} assembly_predictions["scm"][antibiotic]["label"] = p[0] assembly_predictions["scm"][antibiotic]["why"] = p[1] # Make predictions (CART) print "CART models" assembly_predictions["cart"] = {} for antibiotic, model in cart_models.iteritems(): p = model.predict(kmers) assembly_predictions["cart"][antibiotic] = {} assembly_predictions["cart"][antibiotic]["label"] = p[0] assembly_predictions["cart"][antibiotic]["why"] = p[1] predictions[assembly["assembly_name"]] = assembly_predictions del assembly_predictions # Generate report text_message = "This is a test report for kover amr (text)" # Data for creating the report, referencing the assembly we uploaded report_data = { 'objects_created': [], 'text_message': text_message, 'direct_html': generate_html_prediction_report(predictions, species) } # Initialize the report kbase_report = KBaseReport(self.callback_url) report = kbase_report.create({ 'report': report_data, 'workspace_name': params['workspace_name'], 'file_links': generate_csv_prediction_report(predictions, species, self.scratch) }) output = { 'report_ref': report['ref'], 'report_name': report['name'] } #END predict_amr_phenotype # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method predict_amr_phenotype return value ' + 'output is not type dict as required.') # return the results return [output]
def run_megahit(self, ctx, params): """ :param params: instance of type "MegaHitParams" (Run MEGAHIT. Most parameters here are just passed forward to MEGAHIT workspace_name - the name of the workspace for input/output read_library_ref - the name of the PE read library (SE library support in the future) output_contig_set_name - the name of the output contigset megahit_parameter_preset - override a group of parameters; possible values: meta '--min-count 2 --k-list 21,41,61,81,99' (generic metagenomes, default) meta-sensitive '--min-count 2 --k-list 21,31,41,51,61,71,81,91,99' (more sensitive but slower) meta-large '--min-count 2 --k-list 27,37,47,57,67,77,87' (large & complex metagenomes, like soil) bulk '--min-count 3 --k-list 31,51,71,91,99 --no-mercy' (experimental, standard bulk sequencing with >= 30x depth) single-cell '--min-count 3 --k-list 21,33,55,77,99,121 --merge_level 20,0.96' (experimental, single cell data) min_count - minimum multiplicity for filtering (k_min+1)-mers, default 2 min_k - minimum kmer size (<= 127), must be odd number, default 21 max_k - maximum kmer size (<= 127), must be odd number, default 99 k_step - increment of kmer size of each iteration (<= 28), must be even number, default 10 k_list - list of kmer size (all must be odd, in the range 15-127, increment <= 28); override `--k-min', `--k-max' and `--k-step' min_contig_length - minimum length of contigs to output, default 200 @optional megahit_parameter_preset @optional min_count @optional k_min @optional k_max @optional k_step @optional k_list @optional min_contig_len) -> structure: parameter "workspace_name" of String, parameter "read_library_ref" of String, parameter "output_contigset_name" of String, parameter "megahit_parameter_preset" of String, parameter "min_count" of Long, parameter "k_min" of Long, parameter "k_max" of Long, parameter "k_step" of Long, parameter "k_list" of list of Long, parameter "min_contig_len" of Long :returns: instance of type "MegaHitOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: output #BEGIN run_megahit print('Running run_megahit with params=') pprint(params) # STEP 1: basic parameter checks + parsing if 'workspace_name' not in params: raise ValueError('workspace_name parameter is required') if 'read_library_ref' not in params: raise ValueError('read_library_ref parameter is required') if 'output_contigset_name' not in params: raise ValueError('output_contigset_name parameter is required') # STEP 2: get the read library as deinterleaved fastq files input_ref = params['read_library_ref'] reads_params = {'read_libraries': [input_ref], 'interleaved': 'false', 'gzipped': None } ru = ReadsUtils(self.callbackURL) reads = ru.download_reads(reads_params)['files'] print('Input reads files:') fwd = reads[input_ref]['files']['fwd'] rev = reads[input_ref]['files']['rev'] pprint('forward: ' + fwd) pprint('reverse: ' + rev) # STEP 3: run megahit # construct the command megahit_cmd = [self.MEGAHIT] # we only support PE reads, so add that megahit_cmd.append('-1') megahit_cmd.append(fwd) megahit_cmd.append('-2') megahit_cmd.append(rev) # if a preset is defined, use that: if 'megahit_parameter_preset' in params: if params['megahit_parameter_preset']: megahit_cmd.append('--presets') megahit_cmd.append(params['megahit_parameter_preset']) if 'min_count' in params: if params['min_count']: megahit_cmd.append('--min-count') megahit_cmd.append(str(params['min_count'])) if 'k_min' in params: if params['k_min']: megahit_cmd.append('--k-min') megahit_cmd.append(str(params['k_min'])) if 'k_max' in params: if params['k_max']: megahit_cmd.append('--k-max') megahit_cmd.append(str(params['k_max'])) if 'k_step' in params: if params['k_step']: megahit_cmd.append('--k-step') megahit_cmd.append(str(params['k_step'])) if 'k_list' in params: if params['k_list']: k_list = [] for k_val in params['k_list']: k_list.append(str(k_val)) megahit_cmd.append('--k-list') megahit_cmd.append(','.join(k_list)) if 'min_contig_len' in params: if params['min_contig_len']: megahit_cmd.append('--min-contig-len') megahit_cmd.append(str(params['min_contig_len'])) # set the output location timestamp = int((datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds() * 1000) output_dir = os.path.join(self.scratch, 'output.' + str(timestamp)) megahit_cmd.append('-o') megahit_cmd.append(output_dir) # run megahit print('running megahit:') print(' ' + ' '.join(megahit_cmd)) p = subprocess.Popen(megahit_cmd, cwd=self.scratch, shell=False) retcode = p.wait() print('Return code: ' + str(retcode)) if p.returncode != 0: raise ValueError('Error running MEGAHIT, return code: ' + str(retcode) + '\n') output_contigs = os.path.join(output_dir, 'final.contigs.fa') # on macs, we cannot run megahit in the shared host scratch space, so we need to move the file there if self.mac_mode: shutil.move(output_contigs, os.path.join(self.host_scratch, 'final.contigs.fa')) output_contigs = os.path.join(self.host_scratch, 'final.contigs.fa') # STEP 4: save the resulting assembly assemblyUtil = AssemblyUtil(self.callbackURL) output_data_ref = assemblyUtil.save_assembly_from_fasta({ 'file': {'path': output_contigs}, 'workspace_name': params['workspace_name'], 'assembly_name': params['output_contigset_name'] }) # STEP 5: generate and save the report # compute a simple contig length distribution for the report lengths = [] for seq_record in SeqIO.parse(output_contigs, 'fasta'): lengths.append(len(seq_record.seq)) report = '' report += 'ContigSet saved to: ' + params['workspace_name'] + '/' + params['output_contigset_name'] + '\n' report += 'Assembled into ' + str(len(lengths)) + ' contigs.\n' report += 'Avg Length: ' + str(sum(lengths) / float(len(lengths))) + ' bp.\n' bins = 10 counts, edges = np.histogram(lengths, bins) report += 'Contig Length Distribution (# of contigs -- min to max basepairs):\n' for c in range(bins): report += ' ' + str(counts[c]) + '\t--\t' + str(edges[c]) + ' to ' + str(edges[c + 1]) + ' bp\n' reportObj = { 'objects_created': [{'ref': output_data_ref, 'description': 'Assembled contigs'}], 'text_message': report } report = KBaseReport(self.callbackURL) report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']}) # STEP 6: contruct the output to send back output = {'report_name': report_info['name'], 'report_ref': report_info['ref']} #END run_megahit # At some point might do deeper type checking... if not isinstance(output, dict): raise ValueError('Method run_megahit return value ' + 'output is not type dict as required.') # return the results return [output]
def import_file(self, params): # 1) validate parameters self._validate_import_file_params(params) # 2) construct the input directory staging area input_directory = os.path.join(self.cfg.sharedFolder, 'fast_gff_upload_' + str(uuid.uuid4())) os.makedirs(input_directory) file_paths = self._stage_input(params, input_directory) # 3) extract out the parameters params = self._set_parsed_params(params) # 4) do the upload result = self.upload_genome( shock_service_url=self.cfg.shockURL, handle_service_url=self.cfg.handleURL, workspace_service_url=self.cfg.workspaceURL, callback_url=self.cfg.callbackURL, input_fasta_file=file_paths["fasta_file"], input_gff_file=file_paths["gff_file"], workspace_name=params['workspace_name'], core_genome_name=params['genome_name'], scientific_name=params['scientific_name'], taxon_wsname=params['taxon_wsname'], taxon_reference=params['taxon_reference'], source=params['source'], genome_type=params['type'], release=params['release']) # 5) generate report output_data_ref = params['workspace_name'] + "/" + params['genome_name'] reportObj = { 'objects_created': [{ 'ref': output_data_ref, 'description': 'KBase Genome object' }], 'text_message': result['report_string'] } reportClient = KBaseReport(os.environ['SDK_CALLBACK_URL']) report_info = reportClient.create({ 'report': reportObj, 'workspace_name': params['workspace_name'] }) # 6) clear the temp directory shutil.rmtree(input_directory) # 7) return the result info = result['genome_info'] details = { 'genome_ref': str(info[6]) + '/' + str(info[0]) + '/' + str(info[4]), 'genome_info': info, 'report_name': report_info['name'], 'report_ref': report_info['ref'] } return details
def fastqutils_stats(self, ctx, params): """ :param params: instance of type "FastqUtilsStatsParams" -> structure: parameter "workspace_name" of type "workspace_name" (A string representing a workspace name.), parameter "read_library_ref" of type "read_library_ref" (A string representing a ContigSet id.) :returns: instance of type "FastqUtilsStatsResult" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN fastqutils_stats print('Running fastqutils_stats with params=') print(pformat(params)) if 'workspace_name' not in params: raise ValueError('workspace_name parameter is required') if 'read_library_ref' not in params: raise ValueError('read_library_ref parameter is required') # Get the read library as deinterleaved fastq files input_ref = params['read_library_ref'] reads_params = {'read_libraries': [input_ref], 'interleaved': 'false', 'gzipped': None } ru = ReadsUtils(self.callbackURL, token=ctx['token']) reads = ru.download_reads(reads_params)['files'] files = [reads[input_ref]['files']['fwd']] if reads[input_ref]['files']['rev']: files.append(reads[input_ref]['files']['rev']) print('running on files:') for f in files: print(f) # construct the command stats_cmd = [self.FASTQUTILS, 'stats'] report = '' for f in files: cmd = stats_cmd cmd.append(f) report += '============== ' + f + ' ==============\n' print('running: ' + ' '.join(cmd)) p = subprocess.Popen(cmd, cwd=self.scratch, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False) while True: line = p.stdout.readline() if not line: break report += line print(line.replace('\n', '')) p.stdout.close() p.wait() report += "\n\n" print('return code: ' + str(p.returncode)) if p.returncode != 0: raise ValueError('Error running ' + self.FASTQUTILS + ', return code: ' + str(p.returncode)) reportObj = { 'objects_created': [], 'text_message': report } report = KBaseReport(self.callbackURL) report_info = report.create({'report': reportObj, 'workspace_name': params['workspace_name']}) returnVal = {'report_name': report_info['name'], 'report_ref': report_info['ref']} #END fastqutils_stats # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method fastqutils_stats return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]
def filter_contigs(self, ctx, workspace_name, contigset, minimum): """ :param workspace_name: instance of String :param contigset: instance of String :param minimum: instance of Long :returns: instance of type "FilterContigResults" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "assembly_ref" of String, parameter "contig_count" of Long, parameter "filtered_contig_count" of Long """ # ctx is the context object # return variables are: returnVal #BEGIN filter_contigs print(workspace_name) print(contigset) print(minimum) def perform_filter(min_length, contigs): result_type = namedtuple( 'filter_result', ['total_count', 'filtered_count', 'filtered_set']) total_count = 0 filtered_count = 0 filtered_set = set() for contig in contigs: if len(contig) > min_length: filtered_count += 1 filtered_set.add(contig) total_count += 1 return result_type(total_count, filtered_count, filtered_set) print('about to get fasta') fasta_file = self.dfu.get_assembly_as_fasta({'ref': contigset}) print('got fasta') contigs = SeqIO.parse(fasta_file['path'], 'fasta') filtered_file = os.path.join(self.scratch, 'filtered.fasta') filtered = perform_filter(minimum, contigs) SeqIO.write(filtered.filtered_set, filtered_file, 'fasta') new_assembly = self.dfu.\ save_assembly_from_fasta({'file': {'path': filtered_file}, 'workspace_name': workspace_name, 'assembly_name': fasta_file['assembly_name'] }) reportObj = { 'objects_created': [{ 'ref': new_assembly, 'description': 'Filtered contigs' }], 'text_message': 'Filtered Assembly to ' + str(filtered.filtered_count) + ' contigs out of ' + str(filtered.total_count) } report = KBaseReport(self.callback_url) report_info = report.create({ 'report': reportObj, 'workspace_name': workspace_name }) returnVal = { 'report_name': report_info['name'], 'report_ref': report_info['ref'], 'contig_count': filtered.total_count, 'filtered_contig_count': filtered.filtered_count } #END filter_contigs # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method filter_contigs return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal]