Exemple #1
0
 def __init__(self, config):
     self.callback_url = config['SDK_CALLBACK_URL']
     self.scratch = config['scratch']
     self.token = config['KB_AUTH_TOKEN']
     self.dfu = DataFileUtil(self.callback_url)
     self.report_util = kb_GenericsReport(self.callback_url)
     self.data_util = DataUtil(config)
     self.sampleservice_util = SampleServiceUtil(config)
     self.attr_util = AttributesUtil(config)
     self.matrix_util = MatrixUtil(config)
     self.taxon_util = TaxonUtil(config)
     self.matrix_types = [
         x.split(".")[1].split('-')[0]
         for x in self.data_util.list_generic_types()
     ]
     self.taxon_wsname = config['taxon-workspace-name']
     self.kbse = KBaseSearchEngine(config['search-url'])
     self.taxon_cache = dict()
    def setUpClass(cls):
        cls.token = environ.get('KB_AUTH_TOKEN', None)
        config_file = environ.get('KB_DEPLOYMENT_CONFIG', None)
        cls.cfg = {}
        config = ConfigParser()
        config.read(config_file)
        for nameval in config.items('GenericsAPI'):
            cls.cfg[nameval[0]] = nameval[1]
        # Getting username from Auth profile for token
        authServiceUrl = cls.cfg['auth-service-url']
        auth_client = _KBaseAuth(authServiceUrl)
        user_id = auth_client.get_user(cls.token)
        # WARNING: don't call any logging methods on the context object,
        # it'll result in a NoneType error
        cls.ctx = MethodContext(None)
        cls.ctx.update({
            'token':
            cls.token,
            'user_id':
            user_id,
            'provenance': [{
                'service': 'GenericsAPI',
                'method': 'please_never_use_it_in_production',
                'method_params': []
            }],
            'authenticated':
            1
        })
        cls.wsURL = cls.cfg['workspace-url']
        cls.shockURL = cls.cfg['shock-url']
        cls.wsClient = workspaceService(cls.wsURL)
        cls.serviceImpl = GenericsAPI(cls.cfg)
        cls.scratch = cls.cfg['scratch']
        cls.callback_url = os.environ['SDK_CALLBACK_URL']
        cls.matrix_util = MatrixUtil(cls.cfg)

        cls.dfu = DataFileUtil(cls.callback_url)
        cls.hs = HandleService(url=cls.cfg['handle-service-url'],
                               token=cls.token)

        suffix = int(time.time() * 1000)
        cls.wsName = "test_GenericsAPI_" + str(suffix)
        ret = cls.wsClient.create_workspace({'workspace': cls.wsName})
        cls.wsId = ret[0]

        small_file = os.path.join(cls.scratch, 'test.txt')
        with open(small_file, "w") as f:
            f.write("empty content")
        cls.test_shock = cls.dfu.file_to_shock({
            'file_path': small_file,
            'make_handle': True
        })
        cls.handles_to_delete = []
        cls.nodes_to_delete = []
        cls.handles_to_delete.append(cls.test_shock['handle']['hid'])
        cls.nodes_to_delete.append(cls.test_shock['shock_id'])
        cls.prepare_data()
Exemple #3
0
    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self.config = config
        self.config['SDK_CALLBACK_URL'] = os.environ['SDK_CALLBACK_URL']
        self.config['KB_AUTH_TOKEN'] = os.environ['KB_AUTH_TOKEN']
        self.scratch = config['scratch']
        self.attr_util = AttributesUtil(self.config)
        self.matrix_util = MatrixUtil(self.config)
        self.corr_util = CorrelationUtil(self.config)
        self.data_util = DataUtil(self.config)
        self.network_util = NetworkUtil(self.config)
        self.biom_util = BiomUtil(self.config)
        self.pca_util = PCAUtil(self.config)
        self.data_table_util = DataTableUtil(self.config)

        logging.basicConfig(format='%(created)s %(levelname)s: %(message)s',
                            level=logging.INFO)
        #END_CONSTRUCTOR
        pass
Exemple #4
0
class BiomUtil:
    def _mkdir_p(self, path):
        """
        _mkdir_p: make directory for given path
        """
        if not path:
            return
        try:
            os.makedirs(path)
        except OSError as exc:
            if exc.errno == errno.EEXIST and os.path.isdir(path):
                pass
            else:
                raise

    def _process_params(self, params):
        logging.info('start validating import_matrix_from_biom params')

        # check for required parameters
        for p in [
                'obj_type', 'matrix_name', 'workspace_id', 'scale',
                'amplicon_type', 'sequencing_technology',
                'sequencing_instrument', 'target_gene', 'target_subfragment',
                'taxon_calling'
        ]:
            if p not in params:
                raise ValueError(
                    '"{}" parameter is required, but missing'.format(p))

        # check sequencing_technology and sequencing_instrument matching
        sequencing_technology = params.get('sequencing_technology')
        sequencing_instrument = params.get('sequencing_instrument')
        if sequencing_technology not in SEQ_INSTRUMENTS_MAP:
            raise ValueError('Unexpected sequencing technology: {}'.format(
                sequencing_technology))
        expected_instruments = SEQ_INSTRUMENTS_MAP.get(sequencing_technology)
        if sequencing_instrument not in expected_instruments:
            raise ValueError(
                'Please select sequencing instrument among {} for {}'.format(
                    expected_instruments, sequencing_technology))

        # check target_gene and target_subfragment matching
        target_gene = params.get('target_gene')
        target_subfragment = list(set(params.get('target_subfragment')))
        params['target_subfragment'] = target_subfragment

        if target_gene not in TARGET_GENE_SUBFRAGMENT_MAP:
            raise ValueError('Unexpected target gene: {}'.format(target_gene))
        expected_subfragments = TARGET_GENE_SUBFRAGMENT_MAP.get(target_gene)
        if not set(target_subfragment) <= set(expected_subfragments):
            raise ValueError(
                'Please select target subfragments among {} for {}'.format(
                    expected_subfragments, target_gene))

        # check taxon_calling
        taxon_calling = params.get('taxon_calling')
        taxon_calling_method = list(
            set(taxon_calling.get('taxon_calling_method')))
        params['taxon_calling_method'] = taxon_calling_method

        if 'denoising' in taxon_calling_method:
            denoise_method = taxon_calling.get('denoise_method')
            sequence_error_cutoff = taxon_calling.get('sequence_error_cutoff')

            if not (denoise_method and sequence_error_cutoff):
                raise ValueError(
                    'Please provide denoise_method and sequence_error_cutoff')

            params['denoise_method'] = denoise_method
            params['sequence_error_cutoff'] = sequence_error_cutoff

        if 'clustering' in taxon_calling_method:
            clustering_method = taxon_calling.get('clustering_method')
            clustering_cutoff = taxon_calling.get('clustering_cutoff')

            if not (clustering_method and clustering_cutoff):
                raise ValueError(
                    'Please provide clustering_method and clustering_cutoff')

            params['clustering_method'] = clustering_method
            params['clustering_cutoff'] = clustering_cutoff

        obj_type = params.get('obj_type')
        if obj_type not in self.matrix_types:
            raise ValueError('Unknown matrix object type: {}'.format(obj_type))

        scale = params.get('scale')
        if scale not in SCALE_TYPES:
            raise ValueError('Unknown scale type: {}'.format(scale))

        biom_file = None
        tsv_file = None
        fasta_file = None
        metadata_keys = DEFAULT_META_KEYS

        input_local_file = params.get('input_local_file', False)

        if params.get('taxonomic_abundance_tsv') and params.get(
                'taxonomic_fasta'):
            tsv_file = params.get('taxonomic_abundance_tsv')
            fasta_file = params.get('taxonomic_fasta')

            if not (tsv_file and fasta_file):
                raise ValueError('missing TSV or FASTA file')

            if not input_local_file:
                tsv_file = self.dfu.download_staging_file({
                    'staging_file_subdir_path':
                    tsv_file
                }).get('copy_file_path')

                fasta_file = self.dfu.download_staging_file({
                    'staging_file_subdir_path':
                    fasta_file
                }).get('copy_file_path')

            metadata_keys_str = params.get('metadata_keys')
            if metadata_keys_str:
                metadata_keys += [
                    x.strip() for x in metadata_keys_str.split(',')
                ]
            mode = 'tsv_fasta'
        elif params.get('biom_fasta'):
            biom_fasta = params.get('biom_fasta')
            biom_file = biom_fasta.get('biom_file_biom_fasta')
            fasta_file = biom_fasta.get('fasta_file_biom_fasta')

            if not (biom_file and fasta_file):
                raise ValueError('missing BIOM or FASTA file')

            if not input_local_file:
                biom_file = self.dfu.download_staging_file({
                    'staging_file_subdir_path':
                    biom_file
                }).get('copy_file_path')

                fasta_file = self.dfu.download_staging_file({
                    'staging_file_subdir_path':
                    fasta_file
                }).get('copy_file_path')
            mode = 'biom_fasta'
        elif params.get('tsv_fasta'):
            tsv_fasta = params.get('tsv_fasta')
            tsv_file = tsv_fasta.get('tsv_file_tsv_fasta')
            fasta_file = tsv_fasta.get('fasta_file_tsv_fasta')

            if not (tsv_file and fasta_file):
                raise ValueError('missing TSV or FASTA file')

            if not input_local_file:
                tsv_file = self.dfu.download_staging_file({
                    'staging_file_subdir_path':
                    tsv_file
                }).get('copy_file_path')

                fasta_file = self.dfu.download_staging_file({
                    'staging_file_subdir_path':
                    fasta_file
                }).get('copy_file_path')

            metadata_keys_str = tsv_fasta.get('metadata_keys_tsv_fasta')
            if metadata_keys_str:
                metadata_keys += [
                    x.strip() for x in metadata_keys_str.split(',')
                ]
            mode = 'tsv_fasta'
        else:
            raise ValueError('missing valide file group type in parameters')

        return (biom_file, tsv_file, fasta_file, mode,
                list(set(metadata_keys)))

    def _validate_fasta_file(self, df, fasta_file):
        logging.info('start validating FASTA file')
        try:
            fastq_dict = SeqIO.index(fasta_file, "fasta")
        except Exception:
            raise ValueError(
                'Cannot parse file. Please provide valide FASTA file')

        matrix_ids = df.index
        file_ids = fastq_dict.keys()

        unmatched_ids = set(matrix_ids) - set(file_ids)

        if unmatched_ids:
            raise ValueError(
                'FASTA file does not have [{}] OTU id'.format(unmatched_ids))

    def _file_to_amplicon_data(self,
                               biom_file,
                               tsv_file,
                               fasta_file,
                               mode,
                               refs,
                               matrix_name,
                               workspace_id,
                               scale,
                               description,
                               metadata_keys=None):

        amplicon_data = refs

        if mode.startswith('biom'):
            logging.info('start parsing BIOM file for matrix data')
            table = biom.load_table(biom_file)
            observation_metadata = table._observation_metadata
            sample_metadata = table._sample_metadata

            matrix_data = {
                'row_ids': table._observation_ids.tolist(),
                'col_ids': table._sample_ids.tolist(),
                'values': table.matrix_data.toarray().tolist()
            }

            logging.info('start building attribute mapping object')
            amplicon_data.update(
                self.get_attribute_mapping("row", observation_metadata,
                                           matrix_data, matrix_name, refs,
                                           workspace_id))
            amplicon_data.update(
                self.get_attribute_mapping("col", sample_metadata, matrix_data,
                                           matrix_name, refs, workspace_id))

            amplicon_data['attributes'] = {}
            for k in ('create_date', 'generated_by'):
                val = getattr(table, k)
                if not val:
                    continue
                if isinstance(val, bytes):
                    amplicon_data['attributes'][k] = val.decode('utf-8')
                else:
                    amplicon_data['attributes'][k] = str(val)
        elif mode.startswith('tsv'):
            observation_metadata = None
            sample_metadata = None
            try:
                logging.info('start parsing TSV file for matrix data')
                reader = pd.read_csv(tsv_file, sep=None, iterator=True)
                inferred_sep = reader._engine.data.dialect.delimiter
                df = pd.read_csv(tsv_file, sep=inferred_sep, index_col=0)
            except Exception:
                raise ValueError(
                    'Cannot parse file. Please provide valide tsv file')
            else:
                self._validate_fasta_file(df, fasta_file)
                metadata_df = None
                if metadata_keys:
                    shared_metadata_keys = list(
                        set(metadata_keys) & set(df.columns))
                    if mode == 'tsv' and 'consensus_sequence' not in shared_metadata_keys:
                        raise ValueError(
                            'TSV file does not include consensus_sequence')
                    if shared_metadata_keys:
                        metadata_df = df[shared_metadata_keys]
                        df.drop(columns=shared_metadata_keys, inplace=True)
                try:
                    df = df.astype(float)
                except ValueError:
                    err_msg = 'Found some non-float values. Matrix contains only numeric values\n'
                    err_msg += 'Please list any non-numeric column names in  Metadata Keys field'
                    raise ValueError(err_msg)
                df.fillna(0, inplace=True)
                df.index = df.index.astype('str')
                df.columns = df.columns.astype('str')
                matrix_data = {
                    'row_ids': df.index.tolist(),
                    'col_ids': df.columns.tolist(),
                    'values': df.values.tolist()
                }

            logging.info('start building attribute mapping object')
            amplicon_data.update(
                self.get_attribute_mapping("row",
                                           observation_metadata,
                                           matrix_data,
                                           matrix_name,
                                           refs,
                                           workspace_id,
                                           metadata_df=metadata_df))
            amplicon_data.update(
                self.get_attribute_mapping("col", sample_metadata, matrix_data,
                                           matrix_name, refs, workspace_id))

            amplicon_data['attributes'] = {}
        else:
            raise ValueError(
                'error parsing _file_to_amplicon_data, mode: {}'.format(mode))

        amplicon_data.update({'data': matrix_data})

        amplicon_data['search_attributes'] = [
            f'{k}|{v}' for k, v in amplicon_data['attributes'].items()
        ]

        amplicon_data['scale'] = scale
        if description:
            amplicon_data['description'] = description

        return amplicon_data

    def get_attribute_mapping(self,
                              axis,
                              metadata,
                              matrix_data,
                              matrix_name,
                              refs,
                              workspace_id,
                              metadata_df=None):
        mapping_data = {}
        axis_ids = matrix_data[f'{axis}_ids']
        if refs.get('sample_set_ref') and axis == 'col':
            name = matrix_name + "_{}_attributes".format(axis)
            mapping_data[
                f'{axis}_attributemapping_ref'] = self._sample_set_to_attribute_mapping(
                    axis_ids, refs.get('sample_set_ref'), name, workspace_id)
            mapping_data[f'{axis}_mapping'] = {x: x for x in axis_ids}
        elif refs.get(f'{axis}_attributemapping_ref'):
            am_data = self.dfu.get_objects(
                {'object_refs':
                 [refs[f'{axis}_attributemapping_ref']]})['data'][0]['data']
            unmatched_ids = set(axis_ids) - set(am_data['instances'].keys())
            if unmatched_ids:
                name = "Column" if axis == 'col' else "Row"
                raise ValueError(
                    f"The following {name} IDs from the uploaded matrix do not match "
                    f"the supplied {name} attribute mapping: {', '.join(unmatched_ids)}"
                    f"\nPlease verify the input data or upload an excel file with a"
                    f"{name} mapping tab.")
            else:
                mapping_data[f'{axis}_mapping'] = {x: x for x in axis_ids}
        elif metadata:
            name = matrix_name + "_{}_attributes".format(axis)
            mapping_data[
                f'{axis}_attributemapping_ref'] = self._metadata_to_attribute_mapping(
                    axis_ids, metadata, name, workspace_id)
            # if coming from biom file, metadata and axis IDs are guaranteed to match
            mapping_data[f'{axis}_mapping'] = {x: x for x in axis_ids}
        elif metadata_df is not None:
            name = matrix_name + "_{}_attributes".format(axis)
            mapping_data[
                f'{axis}_attributemapping_ref'] = self._meta_df_to_attribute_mapping(
                    axis_ids, metadata_df, name, workspace_id)
            mapping_data[f'{axis}_mapping'] = {x: x for x in axis_ids}

        return mapping_data

    def _meta_df_to_attribute_mapping(self, axis_ids, metadata_df, obj_name,
                                      ws_id):
        data = {'ontology_mapping_method': "TSV file", 'instances': {}}
        metadata_df = metadata_df.astype(str)
        attribute_keys = metadata_df.columns.tolist()
        data['attributes'] = [{
            'attribute': key,
            'source': 'upload'
        } for key in attribute_keys]

        if 'taxonomy' in attribute_keys:
            data['attributes'].append({
                'attribute': 'parsed_user_taxonomy',
                'source': 'upload'
            })

        for axis_id in axis_ids:
            data['instances'][axis_id] = metadata_df.loc[axis_id].tolist()
            if 'taxonomy' in attribute_keys:
                parsed_user_taxonomy = None
                taxonomy_index = attribute_keys.index('taxonomy')
                taxonomy_str = metadata_df.loc[axis_id].tolist(
                )[taxonomy_index]
                parsed_user_taxonomy = self.taxon_util.process_taxonomic_str(
                    taxonomy_str)
                data['instances'][axis_id].append(parsed_user_taxonomy)

        logging.info(
            'start saving AttributeMapping object: {}'.format(obj_name))
        info = self.dfu.save_objects({
            "id":
            ws_id,
            "objects": [{
                "type": "KBaseExperiments.AttributeMapping",
                "data": data,
                "name": obj_name
            }]
        })[0]

        return f'{info[6]}/{info[0]}/{info[4]}'

    def _sample_set_to_attribute_mapping(self, axis_ids, sample_set_ref,
                                         obj_name, ws_id):

        am_data = self.sampleservice_util.sample_set_to_attribute_mapping(
            sample_set_ref)

        unmatched_ids = set(axis_ids) - set(am_data['instances'].keys())
        if unmatched_ids:
            name = "Column"
            raise ValueError(
                f"The following {name} IDs from the uploaded matrix do not match "
                f"the supplied {name} attribute mapping: {', '.join(unmatched_ids)}"
                f"\nPlease verify the input data or upload an excel file with a"
                f"{name} mapping tab.")

        logging.info(
            'start saving AttributeMapping object: {}'.format(obj_name))
        info = self.dfu.save_objects({
            "id":
            ws_id,
            "objects": [{
                "type": "KBaseExperiments.AttributeMapping",
                "data": am_data,
                "name": obj_name
            }]
        })[0]

        return f'{info[6]}/{info[0]}/{info[4]}'

    def _metadata_to_attribute_mapping(self, instances, metadata, obj_name,
                                       ws_id):
        data = {'ontology_mapping_method': "BIOM file", 'instances': {}}
        sample_set = metadata[0:min(len(metadata), 25)]
        metadata_keys = sorted(
            set((k for m_dict in sample_set for k in m_dict)))
        data['attributes'] = [{
            'attribute': key,
            'source': 'upload'
        } for key in metadata_keys]
        for inst, meta in zip(instances, metadata):
            data['instances'][inst] = [
                str(meta[attr]) for attr in metadata_keys
            ]

        logging.info(
            'start saving AttributeMapping object: {}'.format(obj_name))
        info = self.dfu.save_objects({
            "id":
            ws_id,
            "objects": [{
                "type": "KBaseExperiments.AttributeMapping",
                "data": data,
                "name": obj_name
            }]
        })[0]
        return f'{info[6]}/{info[0]}/{info[4]}'

    def _generate_visualization_content(self, output_directory, heatmap_dir,
                                        data_df, top_heatmap_dir, top_percent,
                                        display_count):

        row_data_summary = data_df.T.describe().round(2).to_string()
        col_data_summary = data_df.describe().round(2).to_string()

        tab_def_content = ''
        tab_content = ''

        viewer_name = 'data_summary'
        tab_def_content += '''\n<div class="tab">\n'''
        tab_def_content += '''\n<button class="tablinks" '''
        tab_def_content += '''onclick="openTab(event, '{}')"'''.format(
            viewer_name)
        tab_def_content += ''' id="defaultOpen"'''
        tab_def_content += '''>Matrix Statistics</button>\n'''

        tab_content += '''\n<div id="{}" class="tabcontent" style="overflow:auto">'''.format(
            viewer_name)
        tab_content += '''\n<h5>Amplicon Matrix Size: {} x {}</h5>'''.format(
            len(data_df.index), len(data_df.columns))
        tab_content += '''\n<h5>Row Aggregating Statistics</h5>'''
        html = '''\n<pre class="tab">''' + str(row_data_summary).replace(
            "\n", "<br>") + "</pre>"
        tab_content += html
        tab_content += '''\n<br>'''
        tab_content += '''\n<hr style="height:2px;border-width:0;color:gray;background-color:gray">'''
        tab_content += '''\n<br>'''
        tab_content += '''\n<h5>Column Aggregating Statistics</h5>'''
        html = '''\n<pre class="tab">''' + str(col_data_summary).replace(
            "\n", "<br>") + "</pre>"
        tab_content += html
        tab_content += '\n</div>\n'

        if top_heatmap_dir:
            viewer_name = 'TopHeatmapViewer'
            tab_def_content += '''\n<button class="tablinks" '''
            tab_def_content += '''onclick="openTab(event, '{}')"'''.format(
                viewer_name)
            tab_def_content += '''>Top {}% ({} Rows) Heatmap</button>\n'''.format(
                round(top_percent, 2), display_count)

            heatmap_report_files = os.listdir(top_heatmap_dir)

            heatmap_index_page = None
            for heatmap_report_file in heatmap_report_files:
                if heatmap_report_file.endswith('.html'):
                    heatmap_index_page = heatmap_report_file

                shutil.copy2(
                    os.path.join(top_heatmap_dir, heatmap_report_file),
                    output_directory)

            if heatmap_index_page:
                tab_content += '''\n<div id="{}" class="tabcontent">'''.format(
                    viewer_name)
                msg = 'Top {} percent of matrix sorted by sum of abundance values.'.format(
                    round(top_percent, 2))
                tab_content += '''<p style="color:red;" >{}</p>'''.format(msg)

                tab_content += '\n<iframe height="1300px" width="100%" '
                tab_content += 'src="{}" '.format(heatmap_index_page)
                tab_content += 'style="border:none;"></iframe>'
                tab_content += '\n</div>\n'
            else:
                tab_content += '''\n<div id="{}" class="tabcontent">'''.format(
                    viewer_name)
                tab_content += '''\n<p style="color:red;" >'''
                tab_content += '''Heatmap is too large to be displayed.</p>\n'''
                tab_content += '\n</div>\n'

        viewer_name = 'MatrixHeatmapViewer'
        tab_def_content += '''\n<button class="tablinks" '''
        tab_def_content += '''onclick="openTab(event, '{}')"'''.format(
            viewer_name)
        tab_def_content += '''>Matrix Heatmap</button>\n'''

        heatmap_report_files = os.listdir(heatmap_dir)

        heatmap_index_page = None
        for heatmap_report_file in heatmap_report_files:
            if heatmap_report_file.endswith('.html'):
                heatmap_index_page = heatmap_report_file

            shutil.copy2(os.path.join(heatmap_dir, heatmap_report_file),
                         output_directory)

        if heatmap_index_page:
            tab_content += '''\n<div id="{}" class="tabcontent">'''.format(
                viewer_name)
            tab_content += '\n<iframe height="1300px" width="100%" '
            tab_content += 'src="{}" '.format(heatmap_index_page)
            tab_content += 'style="border:none;"></iframe>'
            tab_content += '\n</div>\n'
        else:
            tab_content += '''\n<div id="{}" class="tabcontent">'''.format(
                viewer_name)
            tab_content += '''\n<p style="color:red;" >'''
            tab_content += '''Heatmap is too large to be displayed.</p>\n'''
            tab_content += '\n</div>\n'

        tab_def_content += '\n</div>\n'
        return tab_def_content + tab_content

    def _generate_heatmap_html_report(self, data):

        logging.info('Start generating heatmap report page')

        data_df = pd.DataFrame(data['values'],
                               index=data['row_ids'],
                               columns=data['col_ids'])
        result_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        self._mkdir_p(result_directory)
        tsv_file_path = os.path.join(
            result_directory, 'heatmap_data_{}.tsv'.format(str(uuid.uuid4())))
        data_df.to_csv(tsv_file_path)

        if data_df.index.size < 10000:
            heatmap_dir = self.report_util.build_heatmap_html({
                'tsv_file_path':
                tsv_file_path,
                'cluster_data':
                True
            })['html_dir']
        else:
            logging.info(
                'Original matrix is too large. Skip clustering data in report.'
            )
            heatmap_dir = self.report_util.build_heatmap_html({
                'tsv_file_path':
                tsv_file_path,
                'cluster_data':
                False
            })['html_dir']
        top_heatmap_dir = None
        top_percent = 100
        display_count = 200  # roughly count for display items
        if len(data_df.index) > 1000:
            top_percent = min(display_count / data_df.index.size * 100, 100)
            top_heatmap_dir = self.report_util.build_heatmap_html({
                'tsv_file_path':
                tsv_file_path,
                'sort_by_sum':
                True,
                'top_percent':
                top_percent
            })['html_dir']

        output_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        logging.info(
            'Start generating html report in {}'.format(output_directory))

        html_report = list()

        self._mkdir_p(output_directory)
        result_file_path = os.path.join(output_directory,
                                        'matrix_viewer_report.html')

        visualization_content = self._generate_visualization_content(
            output_directory, heatmap_dir, data_df, top_heatmap_dir,
            top_percent, display_count)

        with open(result_file_path, 'w') as result_file:
            with open(
                    os.path.join(os.path.dirname(__file__), 'templates',
                                 'matrix_template.html'),
                    'r') as report_template_file:
                report_template = report_template_file.read()
                report_template = report_template.replace(
                    '<p>Visualization_Content</p>', visualization_content)
                result_file.write(report_template)

        report_shock_id = self.dfu.file_to_shock({
            'file_path': output_directory,
            'pack': 'zip'
        })['shock_id']

        html_report.append({
            'shock_id':
            report_shock_id,
            'name':
            os.path.basename(result_file_path),
            'label':
            os.path.basename(result_file_path),
            'description':
            'HTML summary report for Import Amplicon Matrix App'
        })
        return html_report

    def _generate_report(self,
                         matrix_obj_ref,
                         new_row_attr_ref,
                         new_col_attr_ref,
                         workspace_id,
                         data=None):
        """
        _generate_report: generate summary report
        """

        objects_created = [{
            'ref': matrix_obj_ref,
            'description': 'Imported Amplicon Matrix'
        }]

        if new_row_attr_ref:
            objects_created.append({
                'ref':
                new_row_attr_ref,
                'description':
                'Imported Amplicons(Row) Attribute Mapping'
            })

        if new_col_attr_ref:
            objects_created.append({
                'ref':
                new_col_attr_ref,
                'description':
                'Imported Samples(Column) Attribute Mapping'
            })

        if data:
            output_html_files = self._generate_heatmap_html_report(data)

            report_params = {
                'message':
                '',
                'objects_created':
                objects_created,
                'workspace_id':
                workspace_id,
                'html_links':
                output_html_files,
                'direct_html_link_index':
                0,
                'html_window_height':
                1400,
                'report_object_name':
                'import_matrix_from_biom_' + str(uuid.uuid4())
            }

        else:
            report_params = {
                'message':
                '',
                'objects_created':
                objects_created,
                'workspace_id':
                workspace_id,
                'report_object_name':
                'import_matrix_from_biom_' + str(uuid.uuid4())
            }

        kbase_report_client = KBaseReport(self.callback_url, token=self.token)
        output = kbase_report_client.create_extended_report(report_params)

        report_output = {
            'report_name': output['name'],
            'report_ref': output['ref']
        }

        return report_output

    def __init__(self, config):
        self.callback_url = config['SDK_CALLBACK_URL']
        self.scratch = config['scratch']
        self.token = config['KB_AUTH_TOKEN']
        self.dfu = DataFileUtil(self.callback_url)
        self.report_util = kb_GenericsReport(self.callback_url)
        self.data_util = DataUtil(config)
        self.sampleservice_util = SampleServiceUtil(config)
        self.attr_util = AttributesUtil(config)
        self.matrix_util = MatrixUtil(config)
        self.taxon_util = TaxonUtil(config)
        self.matrix_types = [
            x.split(".")[1].split('-')[0]
            for x in self.data_util.list_generic_types()
        ]
        self.taxon_wsname = config['taxon-workspace-name']
        self.kbse = KBaseSearchEngine(config['search-url'])
        self.taxon_cache = dict()

    def fetch_sequence(self, matrix_ref):
        logging.info('start to fetch consensus sequence')

        input_matrix_obj = self.dfu.get_objects({'object_refs':
                                                 [matrix_ref]})['data'][0]
        input_matrix_info = input_matrix_obj['info']
        matrix_name = input_matrix_info[1]
        matrix_type = input_matrix_info[2]
        matrix_data = input_matrix_obj['data']

        if 'KBaseMatrices.AmpliconMatrix' not in matrix_type:
            raise ValueError('Unexpected data type: {}'.format(matrix_type))

        handle = matrix_data.get('sequencing_file_handle')
        if not handle:
            raise ValueError(
                'Missing sequencing_file_handle from the matrix object')

        output_directory = os.path.join(self.scratch, str(uuid.uuid4()))
        logging.info('Start generating consensus sequence file in {}'.format(
            output_directory))
        self._mkdir_p(output_directory)

        matrix_fasta_file = self.dfu.shock_to_file({
            'handle_id': handle,
            'file_path': self.scratch
        }).get('file_path')

        try:
            logging.info('start parsing FASTA file')
            fastq_dict = SeqIO.index(matrix_fasta_file, "fasta")
        except Exception:
            raise ValueError(
                'Cannot parse file. Please provide valide FASTA file')

        row_ids = matrix_data['data']['row_ids']

        fasta_file_path = os.path.join(
            output_directory, matrix_name + 'consensus_sequence.fasta')

        with open(fasta_file_path, 'w') as f:
            for row_id in row_ids:
                consensus_sequence = str(fastq_dict.get(row_id).seq)
                f.write('>' + str(row_id) + '\n')
                f.write(consensus_sequence + '\n')

        return fasta_file_path

    def import_matrix_from_biom(self, params):
        """
        arguments:
        obj_type: one of ExpressionMatrix, FitnessMatrix, DifferentialExpressionMatrix
        matrix_name: matrix object name
        workspace_id: workspace id matrix object to be saved to
        input_shock_id: file shock id
        or
        input_file_path: absolute file path
        or
        input_staging_file_path: staging area file path

        optional arguments:
        col_attributemapping_ref: column AttributeMapping reference
        row_attributemapping_ref: row AttributeMapping reference
        genome_ref: genome reference
        matrix_obj_ref: Matrix reference
        """

        (biom_file, tsv_file, fasta_file, mode,
         metadata_keys) = self._process_params(params)

        workspace_id = params.get('workspace_id')
        matrix_name = params.get('matrix_name')
        obj_type = params.get('obj_type')
        scale = params.get('scale')
        description = params.get('description')
        refs = {k: v for k, v in params.items() if "_ref" in k}

        amplicon_data = self._file_to_amplicon_data(biom_file, tsv_file,
                                                    fasta_file, mode, refs,
                                                    matrix_name, workspace_id,
                                                    scale, description,
                                                    metadata_keys)

        for key in [
                'amplicon_type', 'amplification', 'extraction', 'target_gene',
                'target_subfragment', 'pcr_primers', 'library_kit',
                'library_layout', 'library_screening_strategy',
                'sequencing_center', 'sequencing_date',
                'sequencing_technology', 'sequencing_instrument',
                'sequencing_quality_filter_cutoff', 'read_length_cutoff',
                'read_pairing', 'barcode_error_rate',
                'chimera_detection_and_removal', 'taxon_calling_method',
                'denoise_method', 'sequence_error_cutoff', 'clustering_method',
                'clustering_cutoff', 'sample_set_ref', 'reads_set_ref'
        ]:
            if params.get(key):
                amplicon_data[key] = params[key]

        new_row_attr_ref = None
        if not params.get('row_attributemapping_ref'):
            new_row_attr_ref = amplicon_data.get('row_attributemapping_ref')

        new_col_attr_ref = None
        if not params.get('col_attributemapping_ref'):
            new_col_attr_ref = amplicon_data.get('col_attributemapping_ref')

        if fasta_file:
            logging.info(
                'start saving consensus sequence file to shock: {}'.format(
                    fasta_file))
            handle_id = self.dfu.file_to_shock({
                'file_path': fasta_file,
                'make_handle': True
            })['handle']['hid']
            amplicon_data['sequencing_file_handle'] = handle_id

        logging.info('start saving Matrix object: {}'.format(matrix_name))
        matrix_obj_ref = self.data_util.save_object({
            'obj_type':
            'KBaseMatrices.{}'.format(obj_type),
            'obj_name':
            matrix_name,
            'data':
            amplicon_data,
            'workspace_id':
            workspace_id
        })['obj_ref']

        if params.get('sample_set_ref'):
            self.matrix_util._link_matrix_to_samples(matrix_obj_ref,
                                                     amplicon_data,
                                                     params['sample_set_ref'])

        returnVal = {'matrix_obj_ref': matrix_obj_ref}

        report_output = self._generate_report(matrix_obj_ref,
                                              new_row_attr_ref,
                                              new_col_attr_ref,
                                              workspace_id,
                                              data=amplicon_data['data'])

        returnVal.update(report_output)

        return returnVal
class GenericsAPI:
    '''
    Module Name:
    GenericsAPI

    Module Description:
    
    '''

    ######## WARNING FOR GEVENT USERS ####### noqa
    # Since asynchronous IO can lead to methods - even the same method -
    # interrupting each other, you must be *very* careful when using global
    # state. A method could easily clobber the state set by another while
    # the latter method is running.
    ######################################### noqa
    VERSION = "1.0.8"
    GIT_URL = "[email protected]:Tianhao-Gu/GenericsAPI.git"
    GIT_COMMIT_HASH = "e5a7c9fc2952bf44ebf8ec76d92322f00b606b3e"

    #BEGIN_CLASS_HEADER
    #END_CLASS_HEADER

    # config contains contents of config file in a hash or None if it couldn't
    # be found
    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self.config = config
        self.config['SDK_CALLBACK_URL'] = os.environ['SDK_CALLBACK_URL']
        self.config['KB_AUTH_TOKEN'] = os.environ['KB_AUTH_TOKEN']
        self.scratch = config['scratch']
        self.attr_util = AttributesUtil(self.config)
        self.matrix_util = MatrixUtil(self.config)
        self.corr_util = CorrelationUtil(self.config)
        self.data_util = DataUtil(self.config)
        self.network_util = NetworkUtil(self.config)
        self.biom_util = BiomUtil(self.config)
        self.pca_util = PCAUtil(self.config)
        self.data_table_util = DataTableUtil(self.config)
        self.template_util = TemplateUtil(self.config)

        logging.basicConfig(format='%(created)s %(levelname)s: %(message)s',
                            level=logging.INFO)
        #END_CONSTRUCTOR
        pass

    def fetch_data(self, ctx, params):
        """
        fetch_data: fetch generics data as pandas dataframe for a generics data object
        :param params: instance of type "FetchDataParams" (Input of the
           fetch_data function obj_ref: generics object reference Optional
           arguments: generics_module: the generics data module to be
           retrieved from e.g. for an given data type like below: typedef
           structure { FloatMatrix2D data; condition_set_ref
           condition_set_ref; } SomeGenericsMatrix; generics_module should be
           {'data': 'FloatMatrix2D', 'condition_set_ref':
           'condition_set_ref'}) -> structure: parameter "obj_ref" of type
           "obj_ref" (An X/Y/Z style reference), parameter "generics_module"
           of mapping from String to String
        :returns: instance of type "FetchDataReturn" (Ouput of the fetch_data
           function data_matrix: a pandas dataframe in json format) ->
           structure: parameter "data_matrix" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN fetch_data
        returnVal = self.data_util.fetch_data(params)
        #END fetch_data

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method fetch_data return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def export_matrix(self, ctx, params):
        """
        :param params: instance of type "ExportParams" (Input of the
           export_matrix function obj_ref: generics object reference Optional
           arguments: generics_module: select the generics data to be
           retrieved from e.g. for an given data type like below: typedef
           structure { FloatMatrix2D data; condition_set_ref
           condition_set_ref; } SomeGenericsMatrix; and only 'FloatMatrix2D'
           is needed generics_module should be {'data': FloatMatrix2D'}) ->
           structure: parameter "obj_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "generics_module" of mapping from String to
           String
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN export_matrix
        returnVal = self.matrix_util.export_matrix(params)
        #END export_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method export_matrix return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def validate_data(self, ctx, params):
        """
        validate_data: validate data
        :param params: instance of type "ValidateParams" (Input of the
           validate_data function obj_type: obj type e.g.:
           'KBaseMatrices.ExpressionMatrix-1.1' data: data to be validated)
           -> structure: parameter "obj_type" of String, parameter "data" of
           mapping from String to String
        :returns: instance of type "ValidateOutput" -> structure: parameter
           "validated" of type "boolean" (A boolean - 0 for false, 1 for
           true.), parameter "failed_constraint" of mapping from String to
           String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN validate_data
        returnVal = self.data_util.validate_data(params)
        #END validate_data

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method validate_data return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def import_matrix_from_excel(self, ctx, params):
        """
        import_matrix_from_excel: import matrix object from excel
        :param params: instance of type "ImportMatrixParams" (Input of the
           import_matrix_from_excel function obj_type: a type in
           KBaseMatrices input_shock_id: file shock id input_file_path:
           absolute file path input_staging_file_path: staging area file path
           matrix_name: matrix object name description: optional, a
           description of the matrix workspace_name: workspace name matrix
           object to be saved to optional: col_attributemapping_ref: column
           AttributeMapping reference row_attributemapping_ref: row
           AttributeMapping reference genome_ref: genome reference
           diff_expr_matrix_ref: DifferentialExpressionMatrix reference
           biochemistry_ref: (for ChemicalAbundanceMatrix) reads_set_ref:
           (raw data for AmpliconMatrix) sample_set_ref: SampleSet object
           reference) -> structure: parameter "obj_type" of String, parameter
           "input_shock_id" of String, parameter "input_file_path" of String,
           parameter "input_staging_file_path" of String, parameter
           "matrix_name" of String, parameter "amplicon_set_name" of String,
           parameter "scale" of String, parameter "description" of String,
           parameter "workspace_name" of type "workspace_name" (workspace
           name of the object), parameter "genome_ref" of type "obj_ref" (An
           X/Y/Z style reference), parameter "col_attributemapping_ref" of
           type "obj_ref" (An X/Y/Z style reference), parameter
           "row_attributemapping_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "diff_expr_matrix_ref" of type "obj_ref" (An
           X/Y/Z style reference), parameter "biochemistry_ref" of type
           "obj_ref" (An X/Y/Z style reference), parameter "reads_set_ref" of
           type "obj_ref" (An X/Y/Z style reference), parameter
           "sample_set_ref" of type "obj_ref" (An X/Y/Z style reference),
           parameter "unit" of String, parameter "type" of String
        :returns: instance of type "ImportMatrixOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "matrix_obj_ref" of type "obj_ref" (An X/Y/Z
           style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN import_matrix_from_excel
        returnVal = self.matrix_util.import_matrix_from_excel(params)
        #END import_matrix_from_excel

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method import_matrix_from_excel return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def import_matrix_from_biom(self, ctx, params):
        """
        import_matrix_from_biom: import matrix object from BIOM file format
        :param params: instance of type "ImportOTUParams" -> structure:
           parameter "obj_type" of String, parameter
           "taxonomic_abundance_tsv" of String, parameter "taxonomic_fasta"
           of String, parameter "input_local_file" of String, parameter
           "matrix_name" of String, parameter "amplicon_set_name" of String,
           parameter "scale" of String, parameter "description" of String,
           parameter "workspace_name" of type "workspace_name" (workspace
           name of the object), parameter "genome_ref" of type "obj_ref" (An
           X/Y/Z style reference), parameter "col_attributemapping_ref" of
           type "obj_ref" (An X/Y/Z style reference), parameter
           "row_attributemapping_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "diff_expr_matrix_ref" of type "obj_ref" (An
           X/Y/Z style reference), parameter "biochemistry_ref" of type
           "obj_ref" (An X/Y/Z style reference), parameter "reads_set_ref" of
           type "obj_ref" (An X/Y/Z style reference), parameter
           "sample_set_ref" of type "obj_ref" (An X/Y/Z style reference),
           parameter "metadata_keys" of list of String, parameter
           "extraction_kit" of String, parameter "amplicon_type" of String,
           parameter "target_gene_region" of String, parameter
           "forward_primer_sequence" of String, parameter
           "reverse_primer_sequence" of String, parameter
           "sequencing_platform" of String, parameter "sequencing_run" of
           String, parameter "sequencing_kit" of String, parameter
           "sequencing_quality_filter_cutoff" of String, parameter
           "clustering_cutoff" of Double, parameter "clustering_method" of
           String
        :returns: instance of type "ImportMatrixOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "matrix_obj_ref" of type "obj_ref" (An X/Y/Z
           style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN import_matrix_from_biom
        returnVal = self.biom_util.import_matrix_from_biom(params)
        #END import_matrix_from_biom

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method import_matrix_from_biom return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def save_object(self, ctx, params):
        """
        save_object: validate data constraints and save matrix object
        :param params: instance of type "SaveObjectParams" (Input of the
           import_matrix_from_excel function obj_type: saving object data
           type obj_name: saving object name data: data to be saved
           workspace_name: workspace name matrix object to be saved to) ->
           structure: parameter "obj_type" of String, parameter "obj_name" of
           String, parameter "data" of mapping from String to String,
           parameter "workspace_name" of type "workspace_name" (workspace
           name of the object)
        :returns: instance of type "SaveObjectOutput" -> structure: parameter
           "obj_ref" of type "obj_ref" (An X/Y/Z style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN save_object
        returnVal = self.data_util.save_object(params)
        #END save_object

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method save_object return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def search_matrix(self, ctx, params):
        """
        search_matrix: generate a HTML report that allows users to select feature ids
        :param params: instance of type "MatrixSelectorParams" (Input of the
           search_matrix function matrix_obj_ref: object reference of a
           matrix workspace_name: workspace name objects to be saved to) ->
           structure: parameter "matrix_obj_ref" of type "obj_ref" (An X/Y/Z
           style reference), parameter "workspace_name" of type
           "workspace_name" (workspace name of the object)
        :returns: instance of type "MatrixSelectorOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN search_matrix
        returnVal = self.matrix_util.search_matrix(params)
        #END search_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method search_matrix return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def filter_matrix(self, ctx, params):
        """
        filter_matrix: create sub-matrix based on input filter_ids
        :param params: instance of type "MatrixFilterParams" (Input of the
           filter_matrix function matrix_obj_ref: object reference of a
           matrix workspace_name: workspace name objects to be saved to
           filter_ids: string of column or row ids that result matrix
           contains filtered_matrix_name: name of newly created filtered
           matrix object) -> structure: parameter "matrix_obj_ref" of type
           "obj_ref" (An X/Y/Z style reference), parameter "workspace_name"
           of type "workspace_name" (workspace name of the object), parameter
           "filtered_matrix_name" of String, parameter "remove_ids" of
           String, parameter "dimension" of String
        :returns: instance of type "MatrixFilterOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "matrix_obj_refs" of list of type "obj_ref" (An
           X/Y/Z style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN filter_matrix
        returnVal = self.matrix_util.filter_matrix(params)
        #END filter_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method filter_matrix return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def standardize_matrix(self, ctx, params):
        """
        standardize_matrix: standardize a matrix
        :param params: instance of type "StandardizeMatrixParams" (Input of
           the standardize_matrix function input_matrix_ref: object reference
           of a matrix workspace_name: workspace name objects to be saved to
           with_mean: center data before scaling with_std: scale data to unit
           variance new_matrix_name: name of newly created matrix object) ->
           structure: parameter "input_matrix_ref" of type "obj_ref" (An
           X/Y/Z style reference), parameter "workspace_name" of type
           "workspace_name" (workspace name of the object), parameter
           "with_mean" of type "boolean" (A boolean - 0 for false, 1 for
           true.), parameter "with_std" of type "boolean" (A boolean - 0 for
           false, 1 for true.), parameter "dimension" of String, parameter
           "new_matrix_name" of String
        :returns: instance of type "StandardizeMatrixOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "new_matrix_obj_ref" of type "obj_ref" (An X/Y/Z
           style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN standardize_matrix
        returnVal = self.matrix_util.standardize_matrix(params)
        #END standardize_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method standardize_matrix return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def transform_matrix(self, ctx, params):
        """
        :param params: instance of type "TransformMatrixParams" -> structure:
           parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "workspace_name" of type "workspace_name"
           (workspace name of the object), parameter "workspace_id" of Long,
           parameter "new_matrix_name" of String, parameter
           "abundance_filtering_params" of mapping from String to String,
           parameter "standardization_params" of mapping from String to
           String, parameter "ratio_transformation_params" of mapping from
           String to String, parameter "perform_relative_abundance" of type
           "boolean" (A boolean - 0 for false, 1 for true.)
        :returns: instance of type "TransformMatrixOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "new_matrix_obj_ref" of type "obj_ref" (An X/Y/Z
           style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN transform_matrix
        returnVal = self.matrix_util.transform_matrix(params)
        #END transform_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method transform_matrix return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def perform_rarefy(self, ctx, params):
        """
        :param params: instance of type "RarefyMatrixParams" -> structure:
           parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "workspace_id" of Long, parameter
           "new_matrix_name" of String, parameter "seed_number" of Long,
           parameter "dimension" of String
        :returns: instance of type "RarefyMatrixOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "new_matrix_obj_ref" of type "obj_ref" (An X/Y/Z
           style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN perform_rarefy
        returnVal = self.matrix_util.perform_rarefy(params)
        #END perform_rarefy

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method perform_rarefy return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def perform_variable_stats_matrix(self, ctx, params):
        """
        :param params: instance of type "VariableStatsParams" -> structure:
           parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "attribute_mapping_obj_ref" of type
           "obj_ref" (An X/Y/Z style reference), parameter "workspace_id" of
           Long, parameter "dist_metric" of String, parameter "dimension" of
           String, parameter "grouping" of String, parameter "permutations"
           of Long, parameter "perform_anosim" of type "boolean" (A boolean -
           0 for false, 1 for true.), parameter "perform_permanova" of type
           "boolean" (A boolean - 0 for false, 1 for true.), parameter
           "perform_permdisp" of type "boolean" (A boolean - 0 for false, 1
           for true.)
        :returns: instance of type "VariableStatsOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN perform_variable_stats_matrix
        returnVal = self.matrix_util.perform_variable_stats_matrix(params)
        #END perform_variable_stats_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError(
                'Method perform_variable_stats_matrix return value ' +
                'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def perform_mantel_test(self, ctx, params):
        """
        :param params: instance of type "MantelTestParams" -> structure:
           parameter "input_matrix_refs" of list of type "obj_ref" (An X/Y/Z
           style reference), parameter "workspace_id" of Long, parameter
           "dist_metric" of String, parameter "dimension" of String,
           parameter "correlation_method" of String, parameter "permutations"
           of Long, parameter "alternative_hypothesis" of String
        :returns: instance of type "MantelTestOutput" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN perform_mantel_test
        returnVal = self.matrix_util.perform_mantel_test(params)
        #END perform_mantel_test

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method perform_mantel_test return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def file_to_attribute_mapping(self, ctx, params):
        """
        :param params: instance of type "FileToAttributeMappingParams"
           (input_shock_id and input_file_path - alternative input params,)
           -> structure: parameter "input_shock_id" of String, parameter
           "input_file_path" of String, parameter "output_ws_id" of String,
           parameter "output_obj_name" of String
        :returns: instance of type "FileToAttributeMappingOutput" ->
           structure: parameter "attribute_mapping_ref" of type "obj_ref" (An
           X/Y/Z style reference)
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN file_to_attribute_mapping
        logging.info(
            "Starting 'file_to_attribute_mapping' with params:{}".format(
                params))
        self.attr_util.validate_params(params,
                                       ("output_ws_id", "output_obj_name"),
                                       ('input_shock_id', 'input_file_path'))
        result = self.attr_util.file_to_attribute_mapping(params)
        #END file_to_attribute_mapping

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method file_to_attribute_mapping return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]

    def file_to_fbamodel_attribute_mapping(self, ctx, params):
        """
        :param params: instance of type "FileToAttributeMappingParams"
           (input_shock_id and input_file_path - alternative input params,)
           -> structure: parameter "input_shock_id" of String, parameter
           "input_file_path" of String, parameter "output_ws_id" of String,
           parameter "output_obj_name" of String
        :returns: instance of type "FileToAttributeMappingOutput" ->
           structure: parameter "attribute_mapping_ref" of type "obj_ref" (An
           X/Y/Z style reference)
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN file_to_fbamodel_attribute_mapping
        logging.info(
            "Starting 'file_to_fbamodel_attribute_mapping' with params:{}".
            format(params))
        self.attr_util.validate_params(params,
                                       ("output_ws_id", "output_obj_name"),
                                       ('input_shock_id', 'input_file_path'))
        params['import_fbamodel_attri_mapping'] = True
        result = self.attr_util.file_to_attribute_mapping(params)
        #END file_to_fbamodel_attribute_mapping

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError(
                'Method file_to_fbamodel_attribute_mapping return value ' +
                'result is not type dict as required.')
        # return the results
        return [result]

    def update_matrix_attribute_mapping(self, ctx, params):
        """
        :param params: instance of type "UpdateMatrixAMParams" -> structure:
           parameter "staging_file_subdir_path" of String, parameter
           "dimension" of String, parameter "input_matrix_ref" of type
           "obj_ref" (An X/Y/Z style reference), parameter "workspace_name"
           of String, parameter "output_am_obj_name" of String, parameter
           "output_matrix_obj_name" of String
        :returns: instance of type "UpdateMatrixAMOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "new_matrix_obj_ref" of type "obj_ref" (An X/Y/Z
           style reference), parameter "new_attribute_mapping_ref" of type
           "obj_ref" (An X/Y/Z style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN update_matrix_attribute_mapping
        logging.info(
            "Starting 'update_matrix_attribute_mapping' with params:{}".format(
                params))
        self.attr_util.validate_params(
            params, ("staging_file_subdir_path", "dimension", "workspace_name",
                     "output_am_obj_name", "input_matrix_ref",
                     "output_matrix_obj_name"))
        returnVal = self.attr_util.update_matrix_attribute_mapping(params)
        #END update_matrix_attribute_mapping

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError(
                'Method update_matrix_attribute_mapping return value ' +
                'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def attribute_mapping_to_tsv_file(self, ctx, params):
        """
        :param params: instance of type "AttributeMappingToTsvFileParams" ->
           structure: parameter "input_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "destination_dir" of String
        :returns: instance of type "AttributeMappingToTsvFileOutput" ->
           structure: parameter "file_path" of String
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN attribute_mapping_to_tsv_file
        logging.info(
            "Starting 'attribute_mapping_to_tsv_file' with params:{}".format(
                params))
        self.attr_util.validate_params(params,
                                       ("destination_dir", "input_ref"))
        am_id, result = self.attr_util.to_tsv(params)
        #END attribute_mapping_to_tsv_file

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError(
                'Method attribute_mapping_to_tsv_file return value ' +
                'result is not type dict as required.')
        # return the results
        return [result]

    def export_attribute_mapping_tsv(self, ctx, params):
        """
        :param params: instance of type "ExportObjectParams" -> structure:
           parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference)
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN export_attribute_mapping_tsv
        logging.info(
            "Starting 'export_attribute_mapping_tsv' with params:{}".format(
                params))
        self.attr_util.validate_params(params, ("input_ref", ))
        params['destination_dir'] = self.scratch
        am_id, files = self.attr_util.to_tsv(params)
        result = self.attr_util.export(files['file_path'], am_id,
                                       params['input_ref'])
        #END export_attribute_mapping_tsv

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError(
                'Method export_attribute_mapping_tsv return value ' +
                'result is not type dict as required.')
        # return the results
        return [result]

    def export_attribute_mapping_excel(self, ctx, params):
        """
        :param params: instance of type "ExportObjectParams" -> structure:
           parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference)
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN export_attribute_mapping_excel
        logging.info(
            "Starting 'export_attribute_mapping_excel' with params:{}".format(
                params))
        self.attr_util.validate_params(params, ("input_ref", ))
        params['destination_dir'] = self.scratch
        am_id, files = self.attr_util.to_excel(params)
        result = self.attr_util.export(files['file_path'], am_id,
                                       params['input_ref'])
        #END export_attribute_mapping_excel

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError(
                'Method export_attribute_mapping_excel return value ' +
                'result is not type dict as required.')
        # return the results
        return [result]

    def export_cluster_set_excel(self, ctx, params):
        """
        :param params: instance of type "ExportObjectParams" -> structure:
           parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference)
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN export_cluster_set_excel
        logging.info(
            "Starting 'export_cluster_set_excel' with params:{}".format(
                params))
        self.attr_util.validate_params(params, ("input_ref", ))
        params['destination_dir'] = self.scratch
        cs_id, files = self.attr_util.to_excel(params)
        result = self.attr_util.export(files['file_path'], cs_id,
                                       params['input_ref'])
        #END export_cluster_set_excel

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method export_cluster_set_excel return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]

    def export_corr_matrix_excel(self, ctx, params):
        """
        :param params: instance of type "ExportObjectParams" -> structure:
           parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference)
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN export_corr_matrix_excel
        logging.info(
            "Starting 'export_corr_matrix_excel' with params:{}".format(
                params))
        result = self.corr_util.export_corr_matrix_excel(params)
        #END export_corr_matrix_excel

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method export_corr_matrix_excel return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]

    def export_pca_matrix_excel(self, ctx, params):
        """
        :param params: instance of type "ExportObjectParams" -> structure:
           parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference)
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN export_pca_matrix_excel
        result = self.pca_util.export_pca_matrix_excel(params)
        #END export_pca_matrix_excel

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method export_pca_matrix_excel return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]

    def export_amplicon_set_tsv(self, ctx, params):
        """
        :param params: instance of type "ExportObjectParams" -> structure:
           parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference)
        :returns: instance of type "ExportOutput" -> structure: parameter
           "shock_id" of String
        """
        # ctx is the context object
        # return variables are: result
        #BEGIN export_amplicon_set_tsv
        result = self.biom_util.export_amplicon_set_tsv(params)
        #END export_amplicon_set_tsv

        # At some point might do deeper type checking...
        if not isinstance(result, dict):
            raise ValueError('Method export_amplicon_set_tsv return value ' +
                             'result is not type dict as required.')
        # return the results
        return [result]

    def compute_correlation_matrix(self, ctx, params):
        """
        compute_correlation_matrix: create sub-matrix based on input filter_ids
        :param params: instance of type "CompCorrParams" (Input of the
           compute_correlation_matrix function input_obj_ref: object
           reference of a matrix workspace_name: workspace name objects to be
           saved to corr_matrix_name: correlation matrix object name
           dimension: compute correlation on column or row, one of ['col',
           'row'] method: correlation method, one of ['pearson', 'kendall',
           'spearman'] plot_corr_matrix: plot correlation matrix in report,
           default False plot_scatter_matrix: plot scatter matrix in report,
           default False compute_significance: also compute Significance in
           addition to correlation matrix) -> structure: parameter
           "input_obj_ref" of type "obj_ref" (An X/Y/Z style reference),
           parameter "workspace_name" of type "workspace_name" (workspace
           name of the object), parameter "corr_matrix_name" of String,
           parameter "dimension" of String, parameter "method" of String,
           parameter "plot_corr_matrix" of type "boolean" (A boolean - 0 for
           false, 1 for true.), parameter "plot_scatter_matrix" of type
           "boolean" (A boolean - 0 for false, 1 for true.), parameter
           "compute_significance" of type "boolean" (A boolean - 0 for false,
           1 for true.)
        :returns: instance of type "CompCorrOutput" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String,
           parameter "corr_matrix_obj_ref" of type "obj_ref" (An X/Y/Z style
           reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN compute_correlation_matrix
        returnVal = self.corr_util.compute_correlation_matrix(params)
        #END compute_correlation_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError(
                'Method compute_correlation_matrix return value ' +
                'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def compute_correlation_across_matrices(self, ctx, params):
        """
        compute_correlation_across_matrices: compute correlation matrix across matrices
        :param params: instance of type "CompCorrMetriceParams" (Input of the
           compute_correlation_across_matrices function matrix_ref_1: object
           reference of a matrix matrix_ref_2: object reference of a matrix
           workspace_name: workspace name objects to be saved to
           corr_matrix_name: correlation matrix object name dimension:
           compute correlation on column or row, one of ['col', 'row']
           method: correlation method, one of ['pearson', 'kendall',
           'spearman'] plot_corr_matrix: plot correlation matrix in report,
           default False compute_significance: also compute Significance in
           addition to correlation matrix) -> structure: parameter
           "matrix_ref_1" of type "obj_ref" (An X/Y/Z style reference),
           parameter "matrix_ref_2" of type "obj_ref" (An X/Y/Z style
           reference), parameter "workspace_name" of type "workspace_name"
           (workspace name of the object), parameter "corr_matrix_name" of
           String, parameter "dimension" of String, parameter "method" of
           String, parameter "plot_corr_matrix" of type "boolean" (A boolean
           - 0 for false, 1 for true.), parameter "compute_significance" of
           type "boolean" (A boolean - 0 for false, 1 for true.), parameter
           "corr_threshold" of Double
        :returns: instance of type "CompCorrOutput" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String,
           parameter "corr_matrix_obj_ref" of type "obj_ref" (An X/Y/Z style
           reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN compute_correlation_across_matrices
        returnVal = self.corr_util.compute_correlation_across_matrices(params)
        #END compute_correlation_across_matrices

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError(
                'Method compute_correlation_across_matrices return value ' +
                'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def build_network(self, ctx, params):
        """
        build_network: filter correlation matrix and build network
        :param params: instance of type "BuildNetworkParams" (Input of the
           build_network function corr_matrix_ref: CorrelationMatrix object
           workspace_name: workspace name objects to be saved to
           network_obj_name: Network object name filter_on_threshold: Dictory
           holder that holds filter on thredshold params params in
           filter_on_threshold: coefficient_threshold: correlation
           coefficient threshold (select pairs with greater correlation
           coefficient)) -> structure: parameter "corr_matrix_ref" of type
           "obj_ref" (An X/Y/Z style reference), parameter "workspace_name"
           of type "workspace_name" (workspace name of the object), parameter
           "network_obj_name" of String, parameter "filter_on_threshold" of
           mapping from String to String
        :returns: instance of type "BuildNetworkOutput" -> structure:
           parameter "report_name" of String, parameter "report_ref" of
           String, parameter "network_obj_ref" of type "obj_ref" (An X/Y/Z
           style reference)
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN build_network
        returnVal = self.network_util.build_network(params)
        #END build_network

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method build_network return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def run_pca(self, ctx, params):
        """
        run_pca: PCA analysis on matrix
        :param params: instance of type "PCAParams" (Input of the run_pca
           function input_obj_ref: object reference of a matrix
           workspace_name: the name of the workspace pca_matrix_name: name of
           PCA (KBaseExperiments.PCAMatrix) object dimension: compute PCA on
           column or row, one of ['col', 'row'] n_components - number of
           components (default 2) attribute_mapping_obj_ref - associated
           attribute_mapping_obj_ref scale_size_by - used for PCA plot to
           scale data size color_marker_by - used for PCA plot to group data)
           -> structure: parameter "input_obj_ref" of type "obj_ref" (An
           X/Y/Z style reference), parameter "workspace_name" of String,
           parameter "pca_matrix_name" of String, parameter "dimension" of
           String, parameter "n_components" of Long, parameter
           "attribute_mapping_obj_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "scale_size_by" of mapping from String to
           String, parameter "color_marker_by" of mapping from String to
           String
        :returns: instance of type "PCAOutput" (Ouput of the run_pca function
           pca_ref: PCA object reference (as KBaseExperiments.PCAMatrix data
           type) report_name: report name generated by KBaseReport
           report_ref: report reference generated by KBaseReport) ->
           structure: parameter "pca_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "report_name" of String, parameter
           "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN run_pca
        returnVal = self.pca_util.run_pca(params)
        #END run_pca

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method run_pca return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def view_matrix(self, ctx, params):
        """
        view_matrix: generate a report for matrix viewer
        :param params: instance of type "ViewMatrixParams" -> structure:
           parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "workspace_name" of String, parameter
           "with_attribute_info" of type "boolean" (A boolean - 0 for false,
           1 for true.)
        :returns: instance of type "ViewMatrixOutput" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN view_matrix
        returnVal = self.data_table_util.view_matrix_as_table(params)
        #END view_matrix

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError('Method view_matrix return value ' +
                             'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def build_chemical_abundance_template(self, ctx, params):
        """
        :param params: instance of type "ChemAbunTempParams" -> structure:
           parameter "workspace_name" of String, parameter "workspace_id" of
           Long, parameter "sample_set_ref" of type "obj_ref" (An X/Y/Z style
           reference), parameter "chemical_data_included" of mapping from
           String to Long, parameter "chemical_ids_included" of mapping from
           String to Long
        :returns: instance of type "ViewMatrixOutput" -> structure: parameter
           "report_name" of String, parameter "report_ref" of String
        """
        # ctx is the context object
        # return variables are: returnVal
        #BEGIN build_chemical_abundance_template
        returnVal = self.template_util.build_chemical_abundance_template(
            params)
        #END build_chemical_abundance_template

        # At some point might do deeper type checking...
        if not isinstance(returnVal, dict):
            raise ValueError(
                'Method build_chemical_abundance_template return value ' +
                'returnVal is not type dict as required.')
        # return the results
        return [returnVal]

    def status(self, ctx):
        #BEGIN_STATUS
        returnVal = {
            'state': "OK",
            'message': "",
            'version': self.VERSION,
            'git_url': self.GIT_URL,
            'git_commit_hash': self.GIT_COMMIT_HASH
        }
        #END_STATUS
        return [returnVal]