コード例 #1
0
    def loadReadsSet(self):
        if hasattr(self.__class__, 'reads_set_ref'):
            return self.__class__.reads_set_ref
        pe_reads_ref = self.pe_reads_ref
        reads_set_name = 'TophatTestReadsSet'
        # create the set object

        reads_set_data = {
            'description':
            'Reads Set for testing Bowtie',
            'items': [{
                'ref': pe_reads_ref,
                'label': 'rs1'
            }, {
                'ref': pe_reads_ref,
                'label': 'rs2'
            }, {
                'ref': pe_reads_ref,
                'label': 'rs3'
            }]
        }
        # test a save
        set_api = SetAPI(self.srv_wiz_url, service_ver='dev')
        res = set_api.save_reads_set_v1({
            'data': reads_set_data,
            'output_object_name': reads_set_name,
            'workspace': self.getWsName()
        })
        reads_set_ref = res['set_ref']

        print('Loaded ReadsSet: ' + reads_set_ref)
        return reads_set_ref
コード例 #2
0
    def exec_remove_adapters(self, ctx, params):
        """
        :param params: instance of type "RemoveAdaptersParams" -> structure:
           parameter "output_workspace" of String, parameter
           "output_object_name" of String, parameter "input_reads" of type
           "ws_ref" (@ref ws), parameter "five_prime" of type
           "FivePrimeOptions" (unfortunately, we have to name the fields
           uniquely between 3' and 5' options due to the current
           implementation of grouped parameters) -> structure: parameter
           "adapter_sequence_5P" of String, parameter "anchored_5P" of type
           "boolean" (@range (0, 1)), parameter "three_prime" of type
           "ThreePrimeOptions" -> structure: parameter "adapter_sequence_3P"
           of String, parameter "anchored_3P" of type "boolean" (@range (0,
           1)), parameter "error_tolerance" of Double, parameter
           "min_overlap_length" of Long, parameter "min_read_length" of Long,
           parameter "discard_untrimmed" of type "boolean" (@range (0, 1))
        :returns: instance of type "exec_RemoveAdaptersResult" -> structure:
           parameter "report" of String, parameter "output_reads_ref" of
           String
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN exec_remove_adapters
        console = []
        self.log(console, 'Running exec_remove_adapters() with parameters: ')
        self.log(console, "\n" + pformat(params))
        self.log(console, "-----------------------------------------------\n")
        report = ''
        returnVal = dict()
        returnVal['output_reads_ref'] = None

        token = ctx['token']
        wsClient = workspaceService(self.config['workspace-url'], token=token)
        ws = Workspace(self.config['workspace-url'], token=token)
        #setAPI_Client = SetAPI (url=self.config['SDK_CALLBACK_URL'], token=token) # for SDK local, doesn't work for SetAPI
        setAPI_Client = SetAPI(url=self.config['service-wizard-url'],
                               token=token)  # for dynamic service
        headers = {'Authorization': 'OAuth ' + token}
        env = os.environ.copy()
        env['KB_AUTH_TOKEN'] = token

        # 0. param checks
        required_params = [
            'output_workspace', 'input_reads', 'output_object_name'
        ]
        for arg in required_params:
            if arg not in params or params[arg] == None or params[arg] == '':
                raise ValueError("Must define required param: '" + arg + "'")

        # 1. load provenance
        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']
        # add additional info to provenance here, in this case the input data object reference
        provenance[0]['input_ws_objects'] = [str(params['input_reads'])]

        # 2. Determine whether read library, ReadsSet or RNASeqSampleSet is input object
        #
        try:
            # object_info tuple
            [
                OBJID_I, NAME_I, TYPE_I, SAVE_DATE_I, VERSION_I, SAVED_BY_I,
                WSID_I, WORKSPACE_I, CHSUM_I, SIZE_I, META_I
            ] = range(11)

            input_reads_obj_info = wsClient.get_object_info_new(
                {'objects': [{
                    'ref': params['input_reads']
                }]})[0]
            input_reads_obj_type = input_reads_obj_info[TYPE_I]
            input_reads_obj_type = re.sub(
                '-[0-9]+\.[0-9]+$', "",
                input_reads_obj_type)  # remove trailing version
            #input_reads_obj_version = input_reads_obj_info[VERSION_I]  # this is object version, not type version
        except Exception as e:
            raise ValueError(
                'Unable to get read library object from workspace: (' +
                str(params['input_reads']) + ')' + str(e))

        acceptable_types = [
            "KBaseSets.ReadsSet", "KBaseRNASeq.RNASeqSampleSet",
            "KBaseFile.PairedEndLibrary", "KBaseFile.SingleEndLibrary",
            "KBaseAssembly.PairedEndLibrary", "KBaseAssembly.SingleEndLibrary"
        ]
        if input_reads_obj_type not in acceptable_types:
            raise ValueError("Input reads of type: '" + input_reads_obj_type +
                             "'.  Must be one of " +
                             ", ".join(acceptable_types))

        # 3. Retrieve the set details
        #
        readsSet_ref_list = []
        readsSet_names_list = []
        readsSet_types_list = []
        if "KBaseSets.ReadsSet" in input_reads_obj_type:
            try:
                input_readsSet_obj = setAPI_Client.get_reads_set_v1({
                    'ref':
                    params['input_reads'],
                    'include_item_info':
                    1
                })

            except Exception as e:
                raise ValueError(
                    'SetAPI FAILURE: Unable to get read library set object from workspace: ('
                    + str(params['input_reads']) + ")\n" + str(e))
            for readsLibrary_obj in input_readsSet_obj['data']['items']:
                readsSet_ref_list.append(readsLibrary_obj['ref'])
                NAME_I = 1
                TYPE_I = 2
                readsSet_names_list.append(readsLibrary_obj['info'][NAME_I])
                this_type = readsLibrary_obj['info'][TYPE_I]
                this_type = re.sub('-[0-9]+\.[0-9]+$', "",
                                   this_type)  # remove trailing version
                readsSet_types_list.append(this_type)

        elif "KBaseRNASeq.RNASeqSampleSet" in input_reads_obj_type:
            sample_set = ws.get_objects2(
                {"objects": [{
                    "ref": params['input_reads']
                }]})["data"][0]["data"]
            sample_refs = list()
            for i in range(len(sample_set["sample_ids"])):
                readsSet_ref_list.append(sample_set["sample_ids"][i])
                sample_refs.append({"ref": sample_set["sample_ids"][i]})

            info = ws.get_object_info3({"objects": sample_refs})
            for j in range(len(info["infos"])):
                NAME_I = 1
                TYPE_I = 2
                readsSet_names_list.append(info["infos"][j][NAME_I])
                sample_type = info["infos"][j][TYPE_I]
                sample_type = re.sub('-[0-9]+\.[0-9]+$', "",
                                     sample_type)  # remove trailing version
                readsSet_types_list.append(sample_type)
        else:
            readsSet_ref_list = [params['input_reads']]
            readsSet_names_list = [params['output_object_name']]
            readsSet_types_list = [input_reads_obj_type]

        # 4. Iterate through readsLibrary memebers of set
        #
        report = ''
        cutadapt_readsSet_ref = None
        cutadapt_readsLib_refs = []

        for reads_item_i, input_reads_library_ref in enumerate(
                readsSet_ref_list):
            exec_remove_adapters_OneLibrary_params = {
                'output_workspace': params['output_workspace'],
                'input_reads': input_reads_library_ref,
                'reads_type': readsSet_types_list[reads_item_i]
            }
            if (input_reads_obj_type != "KBaseSets.ReadsSet"
                    and input_reads_obj_type != "KBaseRNASeq.RNASeqSampleSet"):
                exec_remove_adapters_OneLibrary_params[
                    'output_object_name'] = params['output_object_name']
            else:
                exec_remove_adapters_OneLibrary_params[
                    'output_object_name'] = readsSet_names_list[
                        reads_item_i] + "_cutadapt"

            optional_params = [
                'float error_tolerance', 'min_overlap_length',
                'min_read_length', 'discard_untrimmed'
            ]
            optional_g_params = {
                'five_prime': ['adapter_sequence_5P', 'anchored_5P'],
                'three_prime': ['adapter_sequence_3P', 'anchored_3P']
            }
            for arg in optional_params:
                if arg in params and params[arg] != None:
                    exec_remove_adapters_OneLibrary_params[arg] = params[arg]

            for group in optional_g_params.keys():
                if group in params and params[group] != None:
                    exec_remove_adapters_OneLibrary_params[group] = dict()
                    for arg in optional_g_params[group]:
                        if arg in params[group] and params[group][arg] != None:
                            exec_remove_adapters_OneLibrary_params[group][
                                arg] = params[group][arg]

            msg = "\n\nRUNNING exec_remove_adapters_OneLibrary() ON LIBRARY: " + str(
                input_reads_library_ref) + " " + str(
                    readsSet_names_list[reads_item_i]) + "\n"
            msg += "----------------------------------------------------------------------------\n"
            report += msg
            self.log(console, msg)

            # RUN
            exec_remove_adapters_OneLibrary_retVal = self.exec_remove_adapters_OneLibrary(
                ctx, exec_remove_adapters_OneLibrary_params)[0]

            report += exec_remove_adapters_OneLibrary_retVal['report'] + "\n\n"
            cutadapt_readsLib_refs.append(
                exec_remove_adapters_OneLibrary_retVal['output_reads_ref'])

        # 5. Conclude
        # Just one Library
        if (input_reads_obj_type != "KBaseSets.ReadsSet"
                and input_reads_obj_type != "KBaseRNASeq.RNASeqSampleSet"):

            # create return output object
            result = {
                'report': report,
                'output_reads_ref': cutadapt_readsLib_refs[0],
            }
        # ReadsSet or SampleSet
        else:
            # save cutadapt readsSet
            some_cutadapt_output_created = False
            items = []
            for i, lib_ref in enumerate(cutadapt_readsLib_refs):

                if lib_ref == None:
                    #items.append(None)  # can't have 'None' items in ReadsSet
                    continue
                else:
                    some_cutadapt_output_created = True
                    try:
                        label = input_readsSet_obj['data']['items'][i]['label']
                    except:
                        NAME_I = 1
                        label = ws.get_object_info3(
                            {'objects': [{
                                'ref': lib_ref
                            }]})['infos'][0][NAME_I]
                    label = label + "_cutadapt"

                    items.append({
                        'ref': lib_ref,
                        'label': label
                        #'data_attachment': ,
                        #'info':
                    })
            if some_cutadapt_output_created:
                reads_desc_ext = " + Cutadapt"
                #reads_name_ext = "_cutadapt"
                descText = ""
                reads_name_ext = ""
                try:
                    descText = input_readsSet_obj['data']['description']
                except:
                    NAME_I = 1
                    descText = ws.get_object_info3(
                        {'objects': [{
                            'ref': params['input_reads']
                        }]})['infos'][0][NAME_I]
                descText = descText + reads_desc_ext

                output_readsSet_obj = {'description': descText, 'items': items}
                output_readsSet_name = str(
                    params['output_object_name']) + reads_name_ext
                cutadapt_readsSet_ref = setAPI_Client.save_reads_set_v1({
                    'workspace_name':
                    params['output_workspace'],
                    'output_object_name':
                    output_readsSet_name,
                    'data':
                    output_readsSet_obj
                })['set_ref']
            else:
                raise ValueError("No cutadapt output created")

            # create return output object
            result = {
                'report': report,
                'output_reads_ref': cutadapt_readsSet_ref
            }
        #END exec_remove_adapters

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method exec_remove_adapters return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]
コード例 #3
0
    def KButil_Build_InSilico_Metagenomes_with_Grinder(self, ctx, params):
        """
        :param params: instance of type
           "KButil_Build_InSilico_Metagenomes_with_Grinder_Params"
           (KButil_Build_InSilico_Metagenomes_with_Grinder() ** **  Use
           Grinder to generate in silico shotgun metagenomes) -> structure:
           parameter "workspace_name" of type "workspace_name" (** The
           workspace object refs are of form: ** **    objects =
           ws.get_objects([{'ref':
           params['workspace_id']+'/'+params['obj_name']}]) ** ** "ref" means
           the entire name combining the workspace id and the object name **
           "id" is a numerical identifier of the workspace or object, and
           should just be used for workspace ** "name" is a string identifier
           of a workspace or object.  This is received from Narrative.),
           parameter "input_refs" of type "data_obj_ref", parameter
           "output_name" of type "data_obj_name", parameter "desc" of String,
           parameter "num_reads_per_lib" of Long, parameter
           "population_percs" of String, parameter "read_len_mean" of Long,
           parameter "read_len_stddev" of Double, parameter "pairs_flag" of
           Long, parameter "mate_orientation" of String, parameter
           "insert_len_mean" of Long, parameter "insert_len_stddev" of
           Double, parameter "mutation_dist" of String, parameter
           "mutation_ratio" of String, parameter "qual_good" of Long,
           parameter "qual_bad" of Long, parameter "len_bias_flag" of Long,
           parameter "random_seed" of Long
        :returns: instance of type
           "KButil_Build_InSilico_Metagenomes_with_Grinder_Output" ->
           structure: parameter "report_name" of type "data_obj_name",
           parameter "report_ref" of type "data_obj_ref"
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN KButil_Build_InSilico_Metagenomes_with_Grinder

        #### STEP 0: basic init
        ##
        console = []
        invalid_msgs = []
        report_text = ''
        self.log(console,
                 'Running KButil_Build_InSilico_Metagenomes_with_Grinder(): ')
        self.log(console, "\n" + pformat(params))

        # Auth
        token = ctx['token']
        headers = {'Authorization': 'OAuth ' + token}
        env = os.environ.copy()
        env['KB_AUTH_TOKEN'] = token

        # API Clients
        #SERVICE_VER = 'dev'  # DEBUG
        SERVICE_VER = 'release'
        wsClient = workspaceService(self.workspaceURL, token=token)
        readsUtils_Client = ReadsUtils(url=self.callbackURL,
                                       token=ctx['token'])  # SDK local
        #setAPI_Client = SetAPI (url=self.callbackURL, token=ctx['token'])  # for SDK local.  local doesn't work for SetAPI
        setAPI_Client = SetAPI(url=self.serviceWizardURL,
                               token=ctx['token'])  # for dynamic service
        auClient = AssemblyUtil(self.callbackURL,
                                token=ctx['token'],
                                service_ver=SERVICE_VER)
        dfu = DFUClient(self.callbackURL)

        # param checks
        required_params = [
            'workspace_name', 'input_refs', 'output_name', 'num_reads_per_lib',
            'population_percs', 'read_len_mean', 'read_len_stddev',
            'pairs_flag', 'mate_orientation', 'insert_len_mean',
            'insert_len_stddev', 'mutation_dist', 'mutation_ratio',
            'qual_good', 'qual_bad', 'len_bias_flag', 'random_seed'
        ]
        for arg in required_params:
            if arg not in params or params[arg] == None or params[arg] == '':
                raise ValueError("Must define required param: '" + arg + "'")

        # cast to str unpredictable numerical params (mostly used in string context)
        numerical_params = [
            'num_reads_per_lib', 'read_len_mean', 'read_len_stddev',
            'pairs_flag', 'insert_len_mean', 'insert_len_stddev', 'qual_good',
            'qual_bad', 'len_bias_flag', 'random_seed'
        ]
        for arg in numerical_params:
            if arg not in params or params[arg] == None or params[arg] == '':
                continue
            params[arg] = str(params[arg])

        # load provenance
        provenance = [{}]
        if 'provenance' in ctx:
            provenance = ctx['provenance']
        provenance[0]['input_ws_objects'] = []
        for input_ref in params['input_refs']:
            provenance[0]['input_ws_objects'].append(input_ref)

        # set the output paths
        timestamp = int(
            (datetime.utcnow() - datetime.utcfromtimestamp(0)).total_seconds()
            * 1000)
        output_dir = os.path.join(self.scratch, 'output.' + str(timestamp))
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        html_output_dir = os.path.join(output_dir, 'html')
        if not os.path.exists(html_output_dir):
            os.makedirs(html_output_dir)

        #### STEP 1: Parse population_percs and write to file
        ##
        abundance_str = params['population_percs'].strip()
        abundance_file_path = os.path.join(output_dir, 'my_abundances.txt')
        abundance_config_num_libs = 0
        abundance_config_num_libs_set = False
        grinder_genome_ids = []
        header = []
        out_buf = []

        for row in abundance_str.split("\n"):
            cols = re.split(r'\s+', row)
            if cols[0].upper() == "GENOME":
                for col in cols:
                    if col == '':
                        continue
                    header.append(col)
                continue
            grinder_genome_ids.append(cols[0])
            self.log(console, "GRINDER GENOME ID: '" + cols[0] + "'")  # DEBUG
            out_row = []
            for col in cols:
                if col == '':
                    continue
                elif col == '%':
                    continue
                elif col.endswith('%'):
                    col = col.rstrip('%')
                out_row.append(col)
            out_buf.append("\t".join(out_row))
            num_samples = len(out_row) - 1  # first col is genome id
            if not abundance_config_num_libs_set:
                abundance_config_num_libs_set = True
                abundance_config_num_libs = num_samples
            elif num_samples != abundance_config_num_libs:
                invalid_msgs.append(
                    "inconsistent number of samples in population_percs input field"
                )
        # data validation
        if abundance_config_num_libs == 0:
            invalid_msgs.append(
                "unable to find sample percentages in population_percs input field"
            )
        sample_sums = []
        for row_i, abund_row_str in enumerate(out_buf):
            abund_row = abund_row_str.split()
            for sample_i, abund in enumerate(abund_row[1:]):
                if row_i == 0:
                    sample_sums.append(0)
                #self.log (console, "row_i: "+str(row_i)+" sample_i: "+str(sample_i))  # DEBUG
                sample_sums[sample_i] += float(abund)
        for sample_i, sample_sum in enumerate(sample_sums):
            if sample_sum < 99.5 or sample_sum > 100.5:
                self.log(
                    invalid_msgs, "Sample: " + str(sample_i + 1) + " " +
                    header[sample_i + 1] +
                    " proportions is not summing to 100.0. Summing to: " +
                    str(sample_sum))

        if len(invalid_msgs) == 0:
            with open(abundance_file_path, 'w') as abundance_fh:
                for out_line in out_buf:
                    abundance_fh.write(out_line + "\n")
            # DEBUG
            with open(abundance_file_path, 'r') as abundance_fh:
                for out_line in abundance_fh.readlines():
                    out_line = out_line.rstrip()
                    self.log(console, "ABUNDANCE_CONFIG: '" + out_line + "'")

        #### STEP 2: get genome scaffold sequences
        ##
        if len(invalid_msgs) == 0:
            genomes_src_db_file_path = os.path.join(output_dir, 'genomes.fna')
            read_buf_size = 65536
            write_buf_size = 65536
            accepted_input_types = ["KBaseGenomes.Genome"]
            genome_refs = params['input_refs']
            genome_obj_names = []
            genome_sci_names = []
            assembly_refs = []

            for i, input_ref in enumerate(genome_refs):
                # genome obj info
                try:
                    [
                        OBJID_I, NAME_I, TYPE_I, SAVE_DATE_I, VERSION_I,
                        SAVED_BY_I, WSID_I, WORKSPACE_I, CHSUM_I, SIZE_I,
                        META_I
                    ] = range(11)  # object_info tuple
                    input_obj_info = wsClient.get_object_info_new(
                        {'objects': [{
                            'ref': input_ref
                        }]})[0]
                    input_obj_type = re.sub(
                        '-[0-9]+\.[0-9]+$', "",
                        input_obj_info[TYPE_I])  # remove trailing version
                    genome_obj_names.append(input_obj_info[NAME_I])

                except Exception as e:
                    raise ValueError('Unable to get object from workspace: (' +
                                     input_ref + ')' + str(e))
                if input_obj_type not in accepted_input_types:
                    raise ValueError("Input object of type '" +
                                     input_obj_type +
                                     "' not accepted.  Must be one of " +
                                     ", ".join(accepted_input_types))

                # genome obj data
                try:
                    genome_obj = wsClient.get_objects([{
                        'ref': input_ref
                    }])[0]['data']
                    genome_sci_names.append(genome_obj['scientific_name'])
                except:
                    raise ValueError("unable to fetch genome: " + input_ref)

                # Get assembly_refs
                if ('contigset_ref' not in genome_obj or genome_obj['contigset_ref'] == None) \
                   and ('assembly_ref' not in genome_obj or genome_obj['assembly_ref'] == None):
                    msg = "Genome " + genome_obj_names[
                        i] + " (ref:" + input_ref + ") " + genome_sci_names[
                            i] + " MISSING BOTH contigset_ref AND assembly_ref.  Cannot process.  Exiting."
                    self.log(console, msg)
                    self.log(invalid_msgs, msg)
                    continue
                elif 'assembly_ref' in genome_obj and genome_obj[
                        'assembly_ref'] != None:
                    msg = "Genome " + genome_obj_names[
                        i] + " (ref:" + input_ref + ") " + genome_sci_names[
                            i] + " USING assembly_ref: " + str(
                                genome_obj['assembly_ref'])
                    self.log(console, msg)
                    assembly_refs.append(genome_obj['assembly_ref'])
                elif 'contigset_ref' in genome_obj and genome_obj[
                        'contigset_ref'] != None:
                    msg = "Genome " + genome_obj_names[
                        i] + " (ref:" + input_ref + ") " + genome_sci_names[
                            i] + " USING contigset_ref: " + str(
                                genome_obj['contigset_ref'])
                    self.log(console, msg)
                    assembly_refs.append(genome_obj['contigset_ref'])

        # get fastas for scaffolds
        if len(invalid_msgs) == 0:
            contig_file_paths = []

            for genome_i, input_ref in enumerate(genome_refs):
                contig_file = auClient.get_assembly_as_fasta({
                    'ref':
                    assembly_refs[genome_i]
                }).get('path')
                sys.stdout.flush()
                contig_file_path = dfu.unpack_file({'file_path':
                                                    contig_file})['file_path']
                contig_file_paths.append(contig_file_path)

            # reformat FASTA IDs for Grinder
            with open(genomes_src_db_file_path, 'w',
                      write_buf_size) as genomes_src_db_fh:
                for genome_i, contig_file_path in enumerate(contig_file_paths):
                    #self.log(console,str(genome_i)+" CONTIG_FILE: "+contig_file_path)  # DEBUG
                    #contig_ids = []
                    with open(contig_file_path, 'r',
                              read_buf_size) as contig_fh:
                        genome_seq = ''
                        contig_seq = ''
                        contig_seqs = []
                        for contig_line in contig_fh.readlines():
                            contig_line = contig_line.rstrip()
                            if contig_line.startswith('>'):
                                #contig_id = contig_line.strip()[1:].split(' ')[0]
                                #contig_ids.append(contig_id)
                                #genomes_src_db_fh.write(">"+grinder_genome_ids[genome_i]+"\n")
                                if contig_seq != '':
                                    contig_seqs.append(contig_seq)
                                    contig_seq = ''
                                    continue
                            else:
                                #genomes_src_db_fh.write(contig_line)
                                contig_seq += contig_line
                        if contig_seq != '':
                            contig_seqs.append(contig_seq)
                            contig_seq = ''

                    # write joined contigs to file
                    genome_seq = "NNNNNNNNNN".join(
                        contig_seqs
                    )  # NOTE: Using "-exclude_chars" grinder opt on N to avoid contig joins
                    genome_seq = genome_seq.upper(
                    )  # grinder might require upper case?
                    genomes_src_db_fh.write(">" +
                                            grinder_genome_ids[genome_i] +
                                            "\n")
                    genomes_src_db_fh.write(genome_seq + "\n")
                    genome_seq = ''
                    contig_seqs = []

                    # DEBUG
                    #for contig_id in contig_ids:
                    #    self.log(console, "\tCONTIG_ID: "+contig_id)  # DEBUG
            # DEBUG
            toggle = 0
            with open(genomes_src_db_file_path, 'r',
                      write_buf_size) as genomes_src_db_fh:
                for contig_line in genomes_src_db_fh.readlines():
                    contig_line = contig_line.rstrip()
                    if contig_line.startswith('>'):
                        self.log(console, 'GENOMES_SRC_DB: ' + contig_line)
                        genome_id = contig_line[1:]
                        toggle = 0
                    elif toggle == 0:
                        #elif genome_id == 'G3':
                        self.log(
                            console,
                            'GENOMES_SRC_DB: ' + contig_line[0:50] + '...')
                        toggle += 1

        #### STEP 3: Run Grinder
        ##
        if len(invalid_msgs) == 0:
            cmd = []
            cmd.append(self.GRINDER)
            # output
            cmd.append('-base_name')
            cmd.append(params['output_name'])
            cmd.append('-output_dir')
            cmd.append(output_dir)
            # contigs input
            cmd.append('-reference_file')
            cmd.append(genomes_src_db_file_path)
            # abundances
            cmd.append('-abundance_file')
            cmd.append(abundance_file_path)
            # library size
            cmd.append('-total_reads')
            cmd.append(str(params['num_reads_per_lib']))
            # num libraries (overridden by abundance file?)
            cmd.append('-num_libraries')
            cmd.append(str(abundance_config_num_libs))
            # read and insert lens
            cmd.append('-read_dist')
            cmd.append(str(params['read_len_mean']))
            cmd.append('normal')
            cmd.append(str(params['read_len_stddev']))
            if str(params['pairs_flag']) == '1':
                cmd.append('-insert_dist')
                cmd.append(str(params['insert_len_mean']))
                cmd.append('normal')
                cmd.append(str(params['insert_len_stddev']))
                # mate orientation
                cmd.append('-mate_orientation')
                cmd.append(params['mate_orientation'])
            # genome len bias
            cmd.append('-length_bias')
            cmd.append(str(params['len_bias_flag']))
            # mutation model
            cmd.append('-mutation_dist')
            cmd.append(str(params['mutation_dist']))
            cmd.append('-mutation_ratio')
            cmd.append(str(params['mutation_ratio']))
            # qual scores
            cmd.append('-fastq_output')
            cmd.append('1')
            cmd.append('-qual_levels')
            cmd.append(str(params['qual_good']))
            cmd.append(str(params['qual_bad']))
            # skip contig joins
            cmd.append('-exclude_chars')
            cmd.append('NX')
            # explicitly request bidirectional
            cmd.append('-unidirectional')
            cmd.append('0')
            # random seed
            if 'random_seed' in params and params[
                    'random_seed'] != None and params['random_seed'] != '':
                cmd.append('-random_seed')
                cmd.append(str(params['random_seed']))

            # RUN
            cmd_str = " ".join(cmd)
            self.log(console, "===========================================")
            self.log(console, "RUNNING: " + cmd_str)
            self.log(console, "===========================================")

            cmdProcess = subprocess.Popen(cmd_str,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.STDOUT,
                                          shell=True)
            outputlines = []
            while True:
                line = cmdProcess.stdout.readline()
                outputlines.append(line)
                if not line: break
                self.log(console, line.replace('\n', ''))

            cmdProcess.stdout.close()
            cmdProcess.wait()
            self.log(console,
                     'return code: ' + str(cmdProcess.returncode) + '\n')
            if cmdProcess.returncode != 0:
                raise ValueError('Error running kb_grinder, return code: ' +
                                 str(cmdProcess.returncode) + '\n')

            #report_text += "\n".join(outputlines)
            #report_text += "cmdstring: " + cmdstring + " stdout: " + stdout + " stderr " + stderr

            # capture output for report and paths to out files
            report_text_buf = []
            struct_file_paths = []
            struct_file_names = []
            fastq_file_paths = []
            for out_line in outputlines:
                out_line = out_line.rstrip()
                if 'Community structure' in out_line:
                    clean_line = out_line.lstrip()
                    struct_file_path = re.split(r'\s+', clean_line)[3]
                    struct_file_paths.append(struct_file_path)
                    struct_file_names.append(struct_file_path.split('/')[-1])
                    self.log(console, "STRUCT_FILE_NAME: '" +
                             struct_file_path.split('/')[-1])  # DEBUG
                elif 'FASTQ file' in out_line:
                    clean_line = out_line.lstrip()
                    fastq_file_paths.append(re.split(r'\s+', clean_line)[3])
                else:
                    report_text_buf.append(out_line)
            report_text += "\n".join(report_text_buf)

        #### STEP 4: Upload Read Libs and create reads set
        ##
        if len(invalid_msgs) == 0:
            lib_obj_refs = []
            lib_obj_names = []
            readsSet_items = []

            for sample_i, fastq_file_path in enumerate(fastq_file_paths):

                if not os.path.isfile (fastq_file_path) \
                   or os.path.getsize (fastq_file_path) == 0:

                    raise ValueError("empty read lib generated: " +
                                     fastq_file_path)
                else:

                    # lib obj name
                    if len(fastq_file_paths) == 0:
                        output_obj_name = params['output_name']
                    else:
                        if str(params['pairs_flag']) == '1':
                            output_obj_name = params[
                                'output_name'] + '-sample' + str(
                                    sample_i + 1) + ".PairedEndLib"
                        else:
                            output_obj_name = params[
                                'output_name'] + '-sample' + str(
                                    sample_i + 1) + ".SingleEndLib"
                    lib_obj_names.append(output_obj_name)

                    # upload lib and get obj ref
                    self.log(
                        console,
                        'Uploading trimmed paired reads: ' + output_obj_name)
                    sequencing_tech = 'artificial reads'
                    if str(params['pairs_flag']) == '1':
                        interleaved = 1
                    else:
                        interleaved = 0
                    lib_obj_ref = readsUtils_Client.upload_reads({
                        'wsname':
                        str(params['workspace_name']),
                        'name':
                        output_obj_name,
                        'fwd_file':
                        fastq_file_path,
                        'interleaved':
                        interleaved,
                        'sequencing_tech':
                        sequencing_tech
                    })['obj_ref']
                    lib_obj_refs.append(lib_obj_ref)
                    os.remove(fastq_file_path)  # free up disk

                    # add to readsSet
                    readsSet_items.append({
                        'ref': lib_obj_ref,
                        'label': output_obj_name
                    })
            # create readsset
            readsSet_obj_ref = None
            if len(lib_obj_refs) > 1:
                readsSet_obj = {
                    'description':
                    "Grinder Metagenome from " + " ".join(genome_obj_names),
                    'items':
                    readsSet_items
                }
                readsSet_obj_name = params['output_name']
                readsSet_obj_ref = setAPI_Client.save_reads_set_v1({
                    'workspace_name':
                    params['workspace_name'],
                    'output_object_name':
                    readsSet_obj_name,
                    'data':
                    readsSet_obj
                })['set_ref']

        #### STEP 5: Build report
        ##
        reportName = 'kb_grinder_report_' + str(uuid.uuid4())
        reportObj = {
            'objects_created': [],
            #'text_message': '',  # or is it 'message'?
            'message': '',  # or is it 'text_message'?
            'direct_html': '',
            #'direct_html_link_index': 0,
            'file_links': [],
            'html_links': [],
            'workspace_name': params['workspace_name'],
            'report_object_name': reportName
        }

        # message
        if len(invalid_msgs) > 0:
            report_text = "\n".join(invalid_msgs)
        reportObj['message'] = report_text

        if len(invalid_msgs) == 0:
            # objs
            if readsSet_obj_ref != None:
                reportObj['objects_created'].append({
                    'ref':
                    readsSet_obj_ref,
                    'desc':
                    params['output_name'] + " ReadsSet"
                })
            for lib_obj_i, lib_obj_ref in enumerate(lib_obj_refs):
                reportObj['objects_created'].append({
                    'ref':
                    lib_obj_ref,
                    'desc':
                    lib_obj_names[lib_obj_i]
                })
            # downloadable data
            for data_i, data_path in enumerate(struct_file_paths):
                try:
                    upload_ret = dfu.file_to_shock({
                        'file_path': data_path,
                        #'pack': 'zip'})
                        'make_handle': 0
                    })
                except:
                    raise ValueError('error uploading ' + data_path +
                                     ' file to shock')
                reportObj['file_links'].append({
                    'shock_id':
                    upload_ret['shock_id'],
                    'name':
                    struct_file_names[data_i],
                    'label':
                    struct_file_names[data_i]
                })

            # html report
            """
            try:
                html_upload_ret = dfu.file_to_shock({'file_path': html_output_dir,
                                                     'make_handle': 0,
                                                     'pack': 'zip'})
            except:
                raise ValueError ('error uploading html report to shock')
            reportObj['direct_html_link_index'] = 0
            reportObj['html_links'] = [{'shock_id': html_upload_ret['shock_id'],
                                        'name': html_file,
                                        'label': params['output_name']+' HTML'
                                    }
                                   ]
            """

        # save report object
        #
        SERVICE_VER = 'release'
        reportClient = KBaseReport(self.callbackURL,
                                   token=ctx['token'],
                                   service_ver=SERVICE_VER)
        #report_info = report.create({'report':reportObj, 'workspace_name':params['workspace_name']})
        report_info = reportClient.create_extended_report(reportObj)

        returnVal = {
            'report_name': report_info['name'],
            'report_ref': report_info['ref']
        }
        #END KButil_Build_InSilico_Metagenomes_with_Grinder

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError(
                'Method KButil_Build_InSilico_Metagenomes_with_Grinder return value '
                + 'returnVal is not type dict as required.')
        # return the results
        return [returnVal]