def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.report_util = kb_GenericsReport(self.callback_url) self.data_util = DataUtil(config) self.sampleservice_util = SampleServiceUtil(config) self.attr_util = AttributesUtil(config) self.matrix_util = MatrixUtil(config) self.taxon_util = TaxonUtil(config) self.matrix_types = [ x.split(".")[1].split('-')[0] for x in self.data_util.list_generic_types() ] self.taxon_wsname = config['taxon-workspace-name'] self.kbse = KBaseSearchEngine(config['search-url']) self.taxon_cache = dict()
def setUpClass(cls): cls.token = environ.get('KB_AUTH_TOKEN', None) config_file = environ.get('KB_DEPLOYMENT_CONFIG', None) cls.cfg = {} config = ConfigParser() config.read(config_file) for nameval in config.items('GenericsAPI'): cls.cfg[nameval[0]] = nameval[1] # Getting username from Auth profile for token authServiceUrl = cls.cfg['auth-service-url'] auth_client = _KBaseAuth(authServiceUrl) user_id = auth_client.get_user(cls.token) # WARNING: don't call any logging methods on the context object, # it'll result in a NoneType error cls.ctx = MethodContext(None) cls.ctx.update({ 'token': cls.token, 'user_id': user_id, 'provenance': [{ 'service': 'GenericsAPI', 'method': 'please_never_use_it_in_production', 'method_params': [] }], 'authenticated': 1 }) cls.wsURL = cls.cfg['workspace-url'] cls.shockURL = cls.cfg['shock-url'] cls.wsClient = workspaceService(cls.wsURL) cls.serviceImpl = GenericsAPI(cls.cfg) cls.scratch = cls.cfg['scratch'] cls.callback_url = os.environ['SDK_CALLBACK_URL'] cls.matrix_util = MatrixUtil(cls.cfg) cls.dfu = DataFileUtil(cls.callback_url) cls.hs = HandleService(url=cls.cfg['handle-service-url'], token=cls.token) suffix = int(time.time() * 1000) cls.wsName = "test_GenericsAPI_" + str(suffix) ret = cls.wsClient.create_workspace({'workspace': cls.wsName}) cls.wsId = ret[0] small_file = os.path.join(cls.scratch, 'test.txt') with open(small_file, "w") as f: f.write("empty content") cls.test_shock = cls.dfu.file_to_shock({ 'file_path': small_file, 'make_handle': True }) cls.handles_to_delete = [] cls.nodes_to_delete = [] cls.handles_to_delete.append(cls.test_shock['handle']['hid']) cls.nodes_to_delete.append(cls.test_shock['shock_id']) cls.prepare_data()
def __init__(self, config): #BEGIN_CONSTRUCTOR self.config = config self.config['SDK_CALLBACK_URL'] = os.environ['SDK_CALLBACK_URL'] self.config['KB_AUTH_TOKEN'] = os.environ['KB_AUTH_TOKEN'] self.scratch = config['scratch'] self.attr_util = AttributesUtil(self.config) self.matrix_util = MatrixUtil(self.config) self.corr_util = CorrelationUtil(self.config) self.data_util = DataUtil(self.config) self.network_util = NetworkUtil(self.config) self.biom_util = BiomUtil(self.config) self.pca_util = PCAUtil(self.config) self.data_table_util = DataTableUtil(self.config) logging.basicConfig(format='%(created)s %(levelname)s: %(message)s', level=logging.INFO) #END_CONSTRUCTOR pass
class BiomUtil: def _mkdir_p(self, path): """ _mkdir_p: make directory for given path """ if not path: return try: os.makedirs(path) except OSError as exc: if exc.errno == errno.EEXIST and os.path.isdir(path): pass else: raise def _process_params(self, params): logging.info('start validating import_matrix_from_biom params') # check for required parameters for p in [ 'obj_type', 'matrix_name', 'workspace_id', 'scale', 'amplicon_type', 'sequencing_technology', 'sequencing_instrument', 'target_gene', 'target_subfragment', 'taxon_calling' ]: if p not in params: raise ValueError( '"{}" parameter is required, but missing'.format(p)) # check sequencing_technology and sequencing_instrument matching sequencing_technology = params.get('sequencing_technology') sequencing_instrument = params.get('sequencing_instrument') if sequencing_technology not in SEQ_INSTRUMENTS_MAP: raise ValueError('Unexpected sequencing technology: {}'.format( sequencing_technology)) expected_instruments = SEQ_INSTRUMENTS_MAP.get(sequencing_technology) if sequencing_instrument not in expected_instruments: raise ValueError( 'Please select sequencing instrument among {} for {}'.format( expected_instruments, sequencing_technology)) # check target_gene and target_subfragment matching target_gene = params.get('target_gene') target_subfragment = list(set(params.get('target_subfragment'))) params['target_subfragment'] = target_subfragment if target_gene not in TARGET_GENE_SUBFRAGMENT_MAP: raise ValueError('Unexpected target gene: {}'.format(target_gene)) expected_subfragments = TARGET_GENE_SUBFRAGMENT_MAP.get(target_gene) if not set(target_subfragment) <= set(expected_subfragments): raise ValueError( 'Please select target subfragments among {} for {}'.format( expected_subfragments, target_gene)) # check taxon_calling taxon_calling = params.get('taxon_calling') taxon_calling_method = list( set(taxon_calling.get('taxon_calling_method'))) params['taxon_calling_method'] = taxon_calling_method if 'denoising' in taxon_calling_method: denoise_method = taxon_calling.get('denoise_method') sequence_error_cutoff = taxon_calling.get('sequence_error_cutoff') if not (denoise_method and sequence_error_cutoff): raise ValueError( 'Please provide denoise_method and sequence_error_cutoff') params['denoise_method'] = denoise_method params['sequence_error_cutoff'] = sequence_error_cutoff if 'clustering' in taxon_calling_method: clustering_method = taxon_calling.get('clustering_method') clustering_cutoff = taxon_calling.get('clustering_cutoff') if not (clustering_method and clustering_cutoff): raise ValueError( 'Please provide clustering_method and clustering_cutoff') params['clustering_method'] = clustering_method params['clustering_cutoff'] = clustering_cutoff obj_type = params.get('obj_type') if obj_type not in self.matrix_types: raise ValueError('Unknown matrix object type: {}'.format(obj_type)) scale = params.get('scale') if scale not in SCALE_TYPES: raise ValueError('Unknown scale type: {}'.format(scale)) biom_file = None tsv_file = None fasta_file = None metadata_keys = DEFAULT_META_KEYS input_local_file = params.get('input_local_file', False) if params.get('taxonomic_abundance_tsv') and params.get( 'taxonomic_fasta'): tsv_file = params.get('taxonomic_abundance_tsv') fasta_file = params.get('taxonomic_fasta') if not (tsv_file and fasta_file): raise ValueError('missing TSV or FASTA file') if not input_local_file: tsv_file = self.dfu.download_staging_file({ 'staging_file_subdir_path': tsv_file }).get('copy_file_path') fasta_file = self.dfu.download_staging_file({ 'staging_file_subdir_path': fasta_file }).get('copy_file_path') metadata_keys_str = params.get('metadata_keys') if metadata_keys_str: metadata_keys += [ x.strip() for x in metadata_keys_str.split(',') ] mode = 'tsv_fasta' elif params.get('biom_fasta'): biom_fasta = params.get('biom_fasta') biom_file = biom_fasta.get('biom_file_biom_fasta') fasta_file = biom_fasta.get('fasta_file_biom_fasta') if not (biom_file and fasta_file): raise ValueError('missing BIOM or FASTA file') if not input_local_file: biom_file = self.dfu.download_staging_file({ 'staging_file_subdir_path': biom_file }).get('copy_file_path') fasta_file = self.dfu.download_staging_file({ 'staging_file_subdir_path': fasta_file }).get('copy_file_path') mode = 'biom_fasta' elif params.get('tsv_fasta'): tsv_fasta = params.get('tsv_fasta') tsv_file = tsv_fasta.get('tsv_file_tsv_fasta') fasta_file = tsv_fasta.get('fasta_file_tsv_fasta') if not (tsv_file and fasta_file): raise ValueError('missing TSV or FASTA file') if not input_local_file: tsv_file = self.dfu.download_staging_file({ 'staging_file_subdir_path': tsv_file }).get('copy_file_path') fasta_file = self.dfu.download_staging_file({ 'staging_file_subdir_path': fasta_file }).get('copy_file_path') metadata_keys_str = tsv_fasta.get('metadata_keys_tsv_fasta') if metadata_keys_str: metadata_keys += [ x.strip() for x in metadata_keys_str.split(',') ] mode = 'tsv_fasta' else: raise ValueError('missing valide file group type in parameters') return (biom_file, tsv_file, fasta_file, mode, list(set(metadata_keys))) def _validate_fasta_file(self, df, fasta_file): logging.info('start validating FASTA file') try: fastq_dict = SeqIO.index(fasta_file, "fasta") except Exception: raise ValueError( 'Cannot parse file. Please provide valide FASTA file') matrix_ids = df.index file_ids = fastq_dict.keys() unmatched_ids = set(matrix_ids) - set(file_ids) if unmatched_ids: raise ValueError( 'FASTA file does not have [{}] OTU id'.format(unmatched_ids)) def _file_to_amplicon_data(self, biom_file, tsv_file, fasta_file, mode, refs, matrix_name, workspace_id, scale, description, metadata_keys=None): amplicon_data = refs if mode.startswith('biom'): logging.info('start parsing BIOM file for matrix data') table = biom.load_table(biom_file) observation_metadata = table._observation_metadata sample_metadata = table._sample_metadata matrix_data = { 'row_ids': table._observation_ids.tolist(), 'col_ids': table._sample_ids.tolist(), 'values': table.matrix_data.toarray().tolist() } logging.info('start building attribute mapping object') amplicon_data.update( self.get_attribute_mapping("row", observation_metadata, matrix_data, matrix_name, refs, workspace_id)) amplicon_data.update( self.get_attribute_mapping("col", sample_metadata, matrix_data, matrix_name, refs, workspace_id)) amplicon_data['attributes'] = {} for k in ('create_date', 'generated_by'): val = getattr(table, k) if not val: continue if isinstance(val, bytes): amplicon_data['attributes'][k] = val.decode('utf-8') else: amplicon_data['attributes'][k] = str(val) elif mode.startswith('tsv'): observation_metadata = None sample_metadata = None try: logging.info('start parsing TSV file for matrix data') reader = pd.read_csv(tsv_file, sep=None, iterator=True) inferred_sep = reader._engine.data.dialect.delimiter df = pd.read_csv(tsv_file, sep=inferred_sep, index_col=0) except Exception: raise ValueError( 'Cannot parse file. Please provide valide tsv file') else: self._validate_fasta_file(df, fasta_file) metadata_df = None if metadata_keys: shared_metadata_keys = list( set(metadata_keys) & set(df.columns)) if mode == 'tsv' and 'consensus_sequence' not in shared_metadata_keys: raise ValueError( 'TSV file does not include consensus_sequence') if shared_metadata_keys: metadata_df = df[shared_metadata_keys] df.drop(columns=shared_metadata_keys, inplace=True) try: df = df.astype(float) except ValueError: err_msg = 'Found some non-float values. Matrix contains only numeric values\n' err_msg += 'Please list any non-numeric column names in Metadata Keys field' raise ValueError(err_msg) df.fillna(0, inplace=True) df.index = df.index.astype('str') df.columns = df.columns.astype('str') matrix_data = { 'row_ids': df.index.tolist(), 'col_ids': df.columns.tolist(), 'values': df.values.tolist() } logging.info('start building attribute mapping object') amplicon_data.update( self.get_attribute_mapping("row", observation_metadata, matrix_data, matrix_name, refs, workspace_id, metadata_df=metadata_df)) amplicon_data.update( self.get_attribute_mapping("col", sample_metadata, matrix_data, matrix_name, refs, workspace_id)) amplicon_data['attributes'] = {} else: raise ValueError( 'error parsing _file_to_amplicon_data, mode: {}'.format(mode)) amplicon_data.update({'data': matrix_data}) amplicon_data['search_attributes'] = [ f'{k}|{v}' for k, v in amplicon_data['attributes'].items() ] amplicon_data['scale'] = scale if description: amplicon_data['description'] = description return amplicon_data def get_attribute_mapping(self, axis, metadata, matrix_data, matrix_name, refs, workspace_id, metadata_df=None): mapping_data = {} axis_ids = matrix_data[f'{axis}_ids'] if refs.get('sample_set_ref') and axis == 'col': name = matrix_name + "_{}_attributes".format(axis) mapping_data[ f'{axis}_attributemapping_ref'] = self._sample_set_to_attribute_mapping( axis_ids, refs.get('sample_set_ref'), name, workspace_id) mapping_data[f'{axis}_mapping'] = {x: x for x in axis_ids} elif refs.get(f'{axis}_attributemapping_ref'): am_data = self.dfu.get_objects( {'object_refs': [refs[f'{axis}_attributemapping_ref']]})['data'][0]['data'] unmatched_ids = set(axis_ids) - set(am_data['instances'].keys()) if unmatched_ids: name = "Column" if axis == 'col' else "Row" raise ValueError( f"The following {name} IDs from the uploaded matrix do not match " f"the supplied {name} attribute mapping: {', '.join(unmatched_ids)}" f"\nPlease verify the input data or upload an excel file with a" f"{name} mapping tab.") else: mapping_data[f'{axis}_mapping'] = {x: x for x in axis_ids} elif metadata: name = matrix_name + "_{}_attributes".format(axis) mapping_data[ f'{axis}_attributemapping_ref'] = self._metadata_to_attribute_mapping( axis_ids, metadata, name, workspace_id) # if coming from biom file, metadata and axis IDs are guaranteed to match mapping_data[f'{axis}_mapping'] = {x: x for x in axis_ids} elif metadata_df is not None: name = matrix_name + "_{}_attributes".format(axis) mapping_data[ f'{axis}_attributemapping_ref'] = self._meta_df_to_attribute_mapping( axis_ids, metadata_df, name, workspace_id) mapping_data[f'{axis}_mapping'] = {x: x for x in axis_ids} return mapping_data def _meta_df_to_attribute_mapping(self, axis_ids, metadata_df, obj_name, ws_id): data = {'ontology_mapping_method': "TSV file", 'instances': {}} metadata_df = metadata_df.astype(str) attribute_keys = metadata_df.columns.tolist() data['attributes'] = [{ 'attribute': key, 'source': 'upload' } for key in attribute_keys] if 'taxonomy' in attribute_keys: data['attributes'].append({ 'attribute': 'parsed_user_taxonomy', 'source': 'upload' }) for axis_id in axis_ids: data['instances'][axis_id] = metadata_df.loc[axis_id].tolist() if 'taxonomy' in attribute_keys: parsed_user_taxonomy = None taxonomy_index = attribute_keys.index('taxonomy') taxonomy_str = metadata_df.loc[axis_id].tolist( )[taxonomy_index] parsed_user_taxonomy = self.taxon_util.process_taxonomic_str( taxonomy_str) data['instances'][axis_id].append(parsed_user_taxonomy) logging.info( 'start saving AttributeMapping object: {}'.format(obj_name)) info = self.dfu.save_objects({ "id": ws_id, "objects": [{ "type": "KBaseExperiments.AttributeMapping", "data": data, "name": obj_name }] })[0] return f'{info[6]}/{info[0]}/{info[4]}' def _sample_set_to_attribute_mapping(self, axis_ids, sample_set_ref, obj_name, ws_id): am_data = self.sampleservice_util.sample_set_to_attribute_mapping( sample_set_ref) unmatched_ids = set(axis_ids) - set(am_data['instances'].keys()) if unmatched_ids: name = "Column" raise ValueError( f"The following {name} IDs from the uploaded matrix do not match " f"the supplied {name} attribute mapping: {', '.join(unmatched_ids)}" f"\nPlease verify the input data or upload an excel file with a" f"{name} mapping tab.") logging.info( 'start saving AttributeMapping object: {}'.format(obj_name)) info = self.dfu.save_objects({ "id": ws_id, "objects": [{ "type": "KBaseExperiments.AttributeMapping", "data": am_data, "name": obj_name }] })[0] return f'{info[6]}/{info[0]}/{info[4]}' def _metadata_to_attribute_mapping(self, instances, metadata, obj_name, ws_id): data = {'ontology_mapping_method': "BIOM file", 'instances': {}} sample_set = metadata[0:min(len(metadata), 25)] metadata_keys = sorted( set((k for m_dict in sample_set for k in m_dict))) data['attributes'] = [{ 'attribute': key, 'source': 'upload' } for key in metadata_keys] for inst, meta in zip(instances, metadata): data['instances'][inst] = [ str(meta[attr]) for attr in metadata_keys ] logging.info( 'start saving AttributeMapping object: {}'.format(obj_name)) info = self.dfu.save_objects({ "id": ws_id, "objects": [{ "type": "KBaseExperiments.AttributeMapping", "data": data, "name": obj_name }] })[0] return f'{info[6]}/{info[0]}/{info[4]}' def _generate_visualization_content(self, output_directory, heatmap_dir, data_df, top_heatmap_dir, top_percent, display_count): row_data_summary = data_df.T.describe().round(2).to_string() col_data_summary = data_df.describe().round(2).to_string() tab_def_content = '' tab_content = '' viewer_name = 'data_summary' tab_def_content += '''\n<div class="tab">\n''' tab_def_content += '''\n<button class="tablinks" ''' tab_def_content += '''onclick="openTab(event, '{}')"'''.format( viewer_name) tab_def_content += ''' id="defaultOpen"''' tab_def_content += '''>Matrix Statistics</button>\n''' tab_content += '''\n<div id="{}" class="tabcontent" style="overflow:auto">'''.format( viewer_name) tab_content += '''\n<h5>Amplicon Matrix Size: {} x {}</h5>'''.format( len(data_df.index), len(data_df.columns)) tab_content += '''\n<h5>Row Aggregating Statistics</h5>''' html = '''\n<pre class="tab">''' + str(row_data_summary).replace( "\n", "<br>") + "</pre>" tab_content += html tab_content += '''\n<br>''' tab_content += '''\n<hr style="height:2px;border-width:0;color:gray;background-color:gray">''' tab_content += '''\n<br>''' tab_content += '''\n<h5>Column Aggregating Statistics</h5>''' html = '''\n<pre class="tab">''' + str(col_data_summary).replace( "\n", "<br>") + "</pre>" tab_content += html tab_content += '\n</div>\n' if top_heatmap_dir: viewer_name = 'TopHeatmapViewer' tab_def_content += '''\n<button class="tablinks" ''' tab_def_content += '''onclick="openTab(event, '{}')"'''.format( viewer_name) tab_def_content += '''>Top {}% ({} Rows) Heatmap</button>\n'''.format( round(top_percent, 2), display_count) heatmap_report_files = os.listdir(top_heatmap_dir) heatmap_index_page = None for heatmap_report_file in heatmap_report_files: if heatmap_report_file.endswith('.html'): heatmap_index_page = heatmap_report_file shutil.copy2( os.path.join(top_heatmap_dir, heatmap_report_file), output_directory) if heatmap_index_page: tab_content += '''\n<div id="{}" class="tabcontent">'''.format( viewer_name) msg = 'Top {} percent of matrix sorted by sum of abundance values.'.format( round(top_percent, 2)) tab_content += '''<p style="color:red;" >{}</p>'''.format(msg) tab_content += '\n<iframe height="1300px" width="100%" ' tab_content += 'src="{}" '.format(heatmap_index_page) tab_content += 'style="border:none;"></iframe>' tab_content += '\n</div>\n' else: tab_content += '''\n<div id="{}" class="tabcontent">'''.format( viewer_name) tab_content += '''\n<p style="color:red;" >''' tab_content += '''Heatmap is too large to be displayed.</p>\n''' tab_content += '\n</div>\n' viewer_name = 'MatrixHeatmapViewer' tab_def_content += '''\n<button class="tablinks" ''' tab_def_content += '''onclick="openTab(event, '{}')"'''.format( viewer_name) tab_def_content += '''>Matrix Heatmap</button>\n''' heatmap_report_files = os.listdir(heatmap_dir) heatmap_index_page = None for heatmap_report_file in heatmap_report_files: if heatmap_report_file.endswith('.html'): heatmap_index_page = heatmap_report_file shutil.copy2(os.path.join(heatmap_dir, heatmap_report_file), output_directory) if heatmap_index_page: tab_content += '''\n<div id="{}" class="tabcontent">'''.format( viewer_name) tab_content += '\n<iframe height="1300px" width="100%" ' tab_content += 'src="{}" '.format(heatmap_index_page) tab_content += 'style="border:none;"></iframe>' tab_content += '\n</div>\n' else: tab_content += '''\n<div id="{}" class="tabcontent">'''.format( viewer_name) tab_content += '''\n<p style="color:red;" >''' tab_content += '''Heatmap is too large to be displayed.</p>\n''' tab_content += '\n</div>\n' tab_def_content += '\n</div>\n' return tab_def_content + tab_content def _generate_heatmap_html_report(self, data): logging.info('Start generating heatmap report page') data_df = pd.DataFrame(data['values'], index=data['row_ids'], columns=data['col_ids']) result_directory = os.path.join(self.scratch, str(uuid.uuid4())) self._mkdir_p(result_directory) tsv_file_path = os.path.join( result_directory, 'heatmap_data_{}.tsv'.format(str(uuid.uuid4()))) data_df.to_csv(tsv_file_path) if data_df.index.size < 10000: heatmap_dir = self.report_util.build_heatmap_html({ 'tsv_file_path': tsv_file_path, 'cluster_data': True })['html_dir'] else: logging.info( 'Original matrix is too large. Skip clustering data in report.' ) heatmap_dir = self.report_util.build_heatmap_html({ 'tsv_file_path': tsv_file_path, 'cluster_data': False })['html_dir'] top_heatmap_dir = None top_percent = 100 display_count = 200 # roughly count for display items if len(data_df.index) > 1000: top_percent = min(display_count / data_df.index.size * 100, 100) top_heatmap_dir = self.report_util.build_heatmap_html({ 'tsv_file_path': tsv_file_path, 'sort_by_sum': True, 'top_percent': top_percent })['html_dir'] output_directory = os.path.join(self.scratch, str(uuid.uuid4())) logging.info( 'Start generating html report in {}'.format(output_directory)) html_report = list() self._mkdir_p(output_directory) result_file_path = os.path.join(output_directory, 'matrix_viewer_report.html') visualization_content = self._generate_visualization_content( output_directory, heatmap_dir, data_df, top_heatmap_dir, top_percent, display_count) with open(result_file_path, 'w') as result_file: with open( os.path.join(os.path.dirname(__file__), 'templates', 'matrix_template.html'), 'r') as report_template_file: report_template = report_template_file.read() report_template = report_template.replace( '<p>Visualization_Content</p>', visualization_content) result_file.write(report_template) report_shock_id = self.dfu.file_to_shock({ 'file_path': output_directory, 'pack': 'zip' })['shock_id'] html_report.append({ 'shock_id': report_shock_id, 'name': os.path.basename(result_file_path), 'label': os.path.basename(result_file_path), 'description': 'HTML summary report for Import Amplicon Matrix App' }) return html_report def _generate_report(self, matrix_obj_ref, new_row_attr_ref, new_col_attr_ref, workspace_id, data=None): """ _generate_report: generate summary report """ objects_created = [{ 'ref': matrix_obj_ref, 'description': 'Imported Amplicon Matrix' }] if new_row_attr_ref: objects_created.append({ 'ref': new_row_attr_ref, 'description': 'Imported Amplicons(Row) Attribute Mapping' }) if new_col_attr_ref: objects_created.append({ 'ref': new_col_attr_ref, 'description': 'Imported Samples(Column) Attribute Mapping' }) if data: output_html_files = self._generate_heatmap_html_report(data) report_params = { 'message': '', 'objects_created': objects_created, 'workspace_id': workspace_id, 'html_links': output_html_files, 'direct_html_link_index': 0, 'html_window_height': 1400, 'report_object_name': 'import_matrix_from_biom_' + str(uuid.uuid4()) } else: report_params = { 'message': '', 'objects_created': objects_created, 'workspace_id': workspace_id, 'report_object_name': 'import_matrix_from_biom_' + str(uuid.uuid4()) } kbase_report_client = KBaseReport(self.callback_url, token=self.token) output = kbase_report_client.create_extended_report(report_params) report_output = { 'report_name': output['name'], 'report_ref': output['ref'] } return report_output def __init__(self, config): self.callback_url = config['SDK_CALLBACK_URL'] self.scratch = config['scratch'] self.token = config['KB_AUTH_TOKEN'] self.dfu = DataFileUtil(self.callback_url) self.report_util = kb_GenericsReport(self.callback_url) self.data_util = DataUtil(config) self.sampleservice_util = SampleServiceUtil(config) self.attr_util = AttributesUtil(config) self.matrix_util = MatrixUtil(config) self.taxon_util = TaxonUtil(config) self.matrix_types = [ x.split(".")[1].split('-')[0] for x in self.data_util.list_generic_types() ] self.taxon_wsname = config['taxon-workspace-name'] self.kbse = KBaseSearchEngine(config['search-url']) self.taxon_cache = dict() def fetch_sequence(self, matrix_ref): logging.info('start to fetch consensus sequence') input_matrix_obj = self.dfu.get_objects({'object_refs': [matrix_ref]})['data'][0] input_matrix_info = input_matrix_obj['info'] matrix_name = input_matrix_info[1] matrix_type = input_matrix_info[2] matrix_data = input_matrix_obj['data'] if 'KBaseMatrices.AmpliconMatrix' not in matrix_type: raise ValueError('Unexpected data type: {}'.format(matrix_type)) handle = matrix_data.get('sequencing_file_handle') if not handle: raise ValueError( 'Missing sequencing_file_handle from the matrix object') output_directory = os.path.join(self.scratch, str(uuid.uuid4())) logging.info('Start generating consensus sequence file in {}'.format( output_directory)) self._mkdir_p(output_directory) matrix_fasta_file = self.dfu.shock_to_file({ 'handle_id': handle, 'file_path': self.scratch }).get('file_path') try: logging.info('start parsing FASTA file') fastq_dict = SeqIO.index(matrix_fasta_file, "fasta") except Exception: raise ValueError( 'Cannot parse file. Please provide valide FASTA file') row_ids = matrix_data['data']['row_ids'] fasta_file_path = os.path.join( output_directory, matrix_name + 'consensus_sequence.fasta') with open(fasta_file_path, 'w') as f: for row_id in row_ids: consensus_sequence = str(fastq_dict.get(row_id).seq) f.write('>' + str(row_id) + '\n') f.write(consensus_sequence + '\n') return fasta_file_path def import_matrix_from_biom(self, params): """ arguments: obj_type: one of ExpressionMatrix, FitnessMatrix, DifferentialExpressionMatrix matrix_name: matrix object name workspace_id: workspace id matrix object to be saved to input_shock_id: file shock id or input_file_path: absolute file path or input_staging_file_path: staging area file path optional arguments: col_attributemapping_ref: column AttributeMapping reference row_attributemapping_ref: row AttributeMapping reference genome_ref: genome reference matrix_obj_ref: Matrix reference """ (biom_file, tsv_file, fasta_file, mode, metadata_keys) = self._process_params(params) workspace_id = params.get('workspace_id') matrix_name = params.get('matrix_name') obj_type = params.get('obj_type') scale = params.get('scale') description = params.get('description') refs = {k: v for k, v in params.items() if "_ref" in k} amplicon_data = self._file_to_amplicon_data(biom_file, tsv_file, fasta_file, mode, refs, matrix_name, workspace_id, scale, description, metadata_keys) for key in [ 'amplicon_type', 'amplification', 'extraction', 'target_gene', 'target_subfragment', 'pcr_primers', 'library_kit', 'library_layout', 'library_screening_strategy', 'sequencing_center', 'sequencing_date', 'sequencing_technology', 'sequencing_instrument', 'sequencing_quality_filter_cutoff', 'read_length_cutoff', 'read_pairing', 'barcode_error_rate', 'chimera_detection_and_removal', 'taxon_calling_method', 'denoise_method', 'sequence_error_cutoff', 'clustering_method', 'clustering_cutoff', 'sample_set_ref', 'reads_set_ref' ]: if params.get(key): amplicon_data[key] = params[key] new_row_attr_ref = None if not params.get('row_attributemapping_ref'): new_row_attr_ref = amplicon_data.get('row_attributemapping_ref') new_col_attr_ref = None if not params.get('col_attributemapping_ref'): new_col_attr_ref = amplicon_data.get('col_attributemapping_ref') if fasta_file: logging.info( 'start saving consensus sequence file to shock: {}'.format( fasta_file)) handle_id = self.dfu.file_to_shock({ 'file_path': fasta_file, 'make_handle': True })['handle']['hid'] amplicon_data['sequencing_file_handle'] = handle_id logging.info('start saving Matrix object: {}'.format(matrix_name)) matrix_obj_ref = self.data_util.save_object({ 'obj_type': 'KBaseMatrices.{}'.format(obj_type), 'obj_name': matrix_name, 'data': amplicon_data, 'workspace_id': workspace_id })['obj_ref'] if params.get('sample_set_ref'): self.matrix_util._link_matrix_to_samples(matrix_obj_ref, amplicon_data, params['sample_set_ref']) returnVal = {'matrix_obj_ref': matrix_obj_ref} report_output = self._generate_report(matrix_obj_ref, new_row_attr_ref, new_col_attr_ref, workspace_id, data=amplicon_data['data']) returnVal.update(report_output) return returnVal
class GenericsAPI: ''' Module Name: GenericsAPI Module Description: ''' ######## WARNING FOR GEVENT USERS ####### noqa # Since asynchronous IO can lead to methods - even the same method - # interrupting each other, you must be *very* careful when using global # state. A method could easily clobber the state set by another while # the latter method is running. ######################################### noqa VERSION = "1.0.8" GIT_URL = "[email protected]:Tianhao-Gu/GenericsAPI.git" GIT_COMMIT_HASH = "e5a7c9fc2952bf44ebf8ec76d92322f00b606b3e" #BEGIN_CLASS_HEADER #END_CLASS_HEADER # config contains contents of config file in a hash or None if it couldn't # be found def __init__(self, config): #BEGIN_CONSTRUCTOR self.config = config self.config['SDK_CALLBACK_URL'] = os.environ['SDK_CALLBACK_URL'] self.config['KB_AUTH_TOKEN'] = os.environ['KB_AUTH_TOKEN'] self.scratch = config['scratch'] self.attr_util = AttributesUtil(self.config) self.matrix_util = MatrixUtil(self.config) self.corr_util = CorrelationUtil(self.config) self.data_util = DataUtil(self.config) self.network_util = NetworkUtil(self.config) self.biom_util = BiomUtil(self.config) self.pca_util = PCAUtil(self.config) self.data_table_util = DataTableUtil(self.config) self.template_util = TemplateUtil(self.config) logging.basicConfig(format='%(created)s %(levelname)s: %(message)s', level=logging.INFO) #END_CONSTRUCTOR pass def fetch_data(self, ctx, params): """ fetch_data: fetch generics data as pandas dataframe for a generics data object :param params: instance of type "FetchDataParams" (Input of the fetch_data function obj_ref: generics object reference Optional arguments: generics_module: the generics data module to be retrieved from e.g. for an given data type like below: typedef structure { FloatMatrix2D data; condition_set_ref condition_set_ref; } SomeGenericsMatrix; generics_module should be {'data': 'FloatMatrix2D', 'condition_set_ref': 'condition_set_ref'}) -> structure: parameter "obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "generics_module" of mapping from String to String :returns: instance of type "FetchDataReturn" (Ouput of the fetch_data function data_matrix: a pandas dataframe in json format) -> structure: parameter "data_matrix" of String """ # ctx is the context object # return variables are: returnVal #BEGIN fetch_data returnVal = self.data_util.fetch_data(params) #END fetch_data # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method fetch_data return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def export_matrix(self, ctx, params): """ :param params: instance of type "ExportParams" (Input of the export_matrix function obj_ref: generics object reference Optional arguments: generics_module: select the generics data to be retrieved from e.g. for an given data type like below: typedef structure { FloatMatrix2D data; condition_set_ref condition_set_ref; } SomeGenericsMatrix; and only 'FloatMatrix2D' is needed generics_module should be {'data': FloatMatrix2D'}) -> structure: parameter "obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "generics_module" of mapping from String to String :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: returnVal #BEGIN export_matrix returnVal = self.matrix_util.export_matrix(params) #END export_matrix # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method export_matrix return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def validate_data(self, ctx, params): """ validate_data: validate data :param params: instance of type "ValidateParams" (Input of the validate_data function obj_type: obj type e.g.: 'KBaseMatrices.ExpressionMatrix-1.1' data: data to be validated) -> structure: parameter "obj_type" of String, parameter "data" of mapping from String to String :returns: instance of type "ValidateOutput" -> structure: parameter "validated" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "failed_constraint" of mapping from String to String """ # ctx is the context object # return variables are: returnVal #BEGIN validate_data returnVal = self.data_util.validate_data(params) #END validate_data # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method validate_data return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def import_matrix_from_excel(self, ctx, params): """ import_matrix_from_excel: import matrix object from excel :param params: instance of type "ImportMatrixParams" (Input of the import_matrix_from_excel function obj_type: a type in KBaseMatrices input_shock_id: file shock id input_file_path: absolute file path input_staging_file_path: staging area file path matrix_name: matrix object name description: optional, a description of the matrix workspace_name: workspace name matrix object to be saved to optional: col_attributemapping_ref: column AttributeMapping reference row_attributemapping_ref: row AttributeMapping reference genome_ref: genome reference diff_expr_matrix_ref: DifferentialExpressionMatrix reference biochemistry_ref: (for ChemicalAbundanceMatrix) reads_set_ref: (raw data for AmpliconMatrix) sample_set_ref: SampleSet object reference) -> structure: parameter "obj_type" of String, parameter "input_shock_id" of String, parameter "input_file_path" of String, parameter "input_staging_file_path" of String, parameter "matrix_name" of String, parameter "amplicon_set_name" of String, parameter "scale" of String, parameter "description" of String, parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "genome_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "col_attributemapping_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "row_attributemapping_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "diff_expr_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "biochemistry_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "reads_set_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "sample_set_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "unit" of String, parameter "type" of String :returns: instance of type "ImportMatrixOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN import_matrix_from_excel returnVal = self.matrix_util.import_matrix_from_excel(params) #END import_matrix_from_excel # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method import_matrix_from_excel return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def import_matrix_from_biom(self, ctx, params): """ import_matrix_from_biom: import matrix object from BIOM file format :param params: instance of type "ImportOTUParams" -> structure: parameter "obj_type" of String, parameter "taxonomic_abundance_tsv" of String, parameter "taxonomic_fasta" of String, parameter "input_local_file" of String, parameter "matrix_name" of String, parameter "amplicon_set_name" of String, parameter "scale" of String, parameter "description" of String, parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "genome_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "col_attributemapping_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "row_attributemapping_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "diff_expr_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "biochemistry_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "reads_set_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "sample_set_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "metadata_keys" of list of String, parameter "extraction_kit" of String, parameter "amplicon_type" of String, parameter "target_gene_region" of String, parameter "forward_primer_sequence" of String, parameter "reverse_primer_sequence" of String, parameter "sequencing_platform" of String, parameter "sequencing_run" of String, parameter "sequencing_kit" of String, parameter "sequencing_quality_filter_cutoff" of String, parameter "clustering_cutoff" of Double, parameter "clustering_method" of String :returns: instance of type "ImportMatrixOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN import_matrix_from_biom returnVal = self.biom_util.import_matrix_from_biom(params) #END import_matrix_from_biom # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method import_matrix_from_biom return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def save_object(self, ctx, params): """ save_object: validate data constraints and save matrix object :param params: instance of type "SaveObjectParams" (Input of the import_matrix_from_excel function obj_type: saving object data type obj_name: saving object name data: data to be saved workspace_name: workspace name matrix object to be saved to) -> structure: parameter "obj_type" of String, parameter "obj_name" of String, parameter "data" of mapping from String to String, parameter "workspace_name" of type "workspace_name" (workspace name of the object) :returns: instance of type "SaveObjectOutput" -> structure: parameter "obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN save_object returnVal = self.data_util.save_object(params) #END save_object # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method save_object return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def search_matrix(self, ctx, params): """ search_matrix: generate a HTML report that allows users to select feature ids :param params: instance of type "MatrixSelectorParams" (Input of the search_matrix function matrix_obj_ref: object reference of a matrix workspace_name: workspace name objects to be saved to) -> structure: parameter "matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of type "workspace_name" (workspace name of the object) :returns: instance of type "MatrixSelectorOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN search_matrix returnVal = self.matrix_util.search_matrix(params) #END search_matrix # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method search_matrix return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def filter_matrix(self, ctx, params): """ filter_matrix: create sub-matrix based on input filter_ids :param params: instance of type "MatrixFilterParams" (Input of the filter_matrix function matrix_obj_ref: object reference of a matrix workspace_name: workspace name objects to be saved to filter_ids: string of column or row ids that result matrix contains filtered_matrix_name: name of newly created filtered matrix object) -> structure: parameter "matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "filtered_matrix_name" of String, parameter "remove_ids" of String, parameter "dimension" of String :returns: instance of type "MatrixFilterOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "matrix_obj_refs" of list of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN filter_matrix returnVal = self.matrix_util.filter_matrix(params) #END filter_matrix # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method filter_matrix return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def standardize_matrix(self, ctx, params): """ standardize_matrix: standardize a matrix :param params: instance of type "StandardizeMatrixParams" (Input of the standardize_matrix function input_matrix_ref: object reference of a matrix workspace_name: workspace name objects to be saved to with_mean: center data before scaling with_std: scale data to unit variance new_matrix_name: name of newly created matrix object) -> structure: parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "with_mean" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "with_std" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "dimension" of String, parameter "new_matrix_name" of String :returns: instance of type "StandardizeMatrixOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "new_matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN standardize_matrix returnVal = self.matrix_util.standardize_matrix(params) #END standardize_matrix # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method standardize_matrix return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def transform_matrix(self, ctx, params): """ :param params: instance of type "TransformMatrixParams" -> structure: parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "workspace_id" of Long, parameter "new_matrix_name" of String, parameter "abundance_filtering_params" of mapping from String to String, parameter "standardization_params" of mapping from String to String, parameter "ratio_transformation_params" of mapping from String to String, parameter "perform_relative_abundance" of type "boolean" (A boolean - 0 for false, 1 for true.) :returns: instance of type "TransformMatrixOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "new_matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN transform_matrix returnVal = self.matrix_util.transform_matrix(params) #END transform_matrix # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method transform_matrix return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def perform_rarefy(self, ctx, params): """ :param params: instance of type "RarefyMatrixParams" -> structure: parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_id" of Long, parameter "new_matrix_name" of String, parameter "seed_number" of Long, parameter "dimension" of String :returns: instance of type "RarefyMatrixOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "new_matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN perform_rarefy returnVal = self.matrix_util.perform_rarefy(params) #END perform_rarefy # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method perform_rarefy return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def perform_variable_stats_matrix(self, ctx, params): """ :param params: instance of type "VariableStatsParams" -> structure: parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "attribute_mapping_obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_id" of Long, parameter "dist_metric" of String, parameter "dimension" of String, parameter "grouping" of String, parameter "permutations" of Long, parameter "perform_anosim" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "perform_permanova" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "perform_permdisp" of type "boolean" (A boolean - 0 for false, 1 for true.) :returns: instance of type "VariableStatsOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN perform_variable_stats_matrix returnVal = self.matrix_util.perform_variable_stats_matrix(params) #END perform_variable_stats_matrix # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError( 'Method perform_variable_stats_matrix return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def perform_mantel_test(self, ctx, params): """ :param params: instance of type "MantelTestParams" -> structure: parameter "input_matrix_refs" of list of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_id" of Long, parameter "dist_metric" of String, parameter "dimension" of String, parameter "correlation_method" of String, parameter "permutations" of Long, parameter "alternative_hypothesis" of String :returns: instance of type "MantelTestOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN perform_mantel_test returnVal = self.matrix_util.perform_mantel_test(params) #END perform_mantel_test # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method perform_mantel_test return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def file_to_attribute_mapping(self, ctx, params): """ :param params: instance of type "FileToAttributeMappingParams" (input_shock_id and input_file_path - alternative input params,) -> structure: parameter "input_shock_id" of String, parameter "input_file_path" of String, parameter "output_ws_id" of String, parameter "output_obj_name" of String :returns: instance of type "FileToAttributeMappingOutput" -> structure: parameter "attribute_mapping_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: result #BEGIN file_to_attribute_mapping logging.info( "Starting 'file_to_attribute_mapping' with params:{}".format( params)) self.attr_util.validate_params(params, ("output_ws_id", "output_obj_name"), ('input_shock_id', 'input_file_path')) result = self.attr_util.file_to_attribute_mapping(params) #END file_to_attribute_mapping # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError('Method file_to_attribute_mapping return value ' + 'result is not type dict as required.') # return the results return [result] def file_to_fbamodel_attribute_mapping(self, ctx, params): """ :param params: instance of type "FileToAttributeMappingParams" (input_shock_id and input_file_path - alternative input params,) -> structure: parameter "input_shock_id" of String, parameter "input_file_path" of String, parameter "output_ws_id" of String, parameter "output_obj_name" of String :returns: instance of type "FileToAttributeMappingOutput" -> structure: parameter "attribute_mapping_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: result #BEGIN file_to_fbamodel_attribute_mapping logging.info( "Starting 'file_to_fbamodel_attribute_mapping' with params:{}". format(params)) self.attr_util.validate_params(params, ("output_ws_id", "output_obj_name"), ('input_shock_id', 'input_file_path')) params['import_fbamodel_attri_mapping'] = True result = self.attr_util.file_to_attribute_mapping(params) #END file_to_fbamodel_attribute_mapping # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError( 'Method file_to_fbamodel_attribute_mapping return value ' + 'result is not type dict as required.') # return the results return [result] def update_matrix_attribute_mapping(self, ctx, params): """ :param params: instance of type "UpdateMatrixAMParams" -> structure: parameter "staging_file_subdir_path" of String, parameter "dimension" of String, parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of String, parameter "output_am_obj_name" of String, parameter "output_matrix_obj_name" of String :returns: instance of type "UpdateMatrixAMOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "new_matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "new_attribute_mapping_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN update_matrix_attribute_mapping logging.info( "Starting 'update_matrix_attribute_mapping' with params:{}".format( params)) self.attr_util.validate_params( params, ("staging_file_subdir_path", "dimension", "workspace_name", "output_am_obj_name", "input_matrix_ref", "output_matrix_obj_name")) returnVal = self.attr_util.update_matrix_attribute_mapping(params) #END update_matrix_attribute_mapping # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError( 'Method update_matrix_attribute_mapping return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def attribute_mapping_to_tsv_file(self, ctx, params): """ :param params: instance of type "AttributeMappingToTsvFileParams" -> structure: parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "destination_dir" of String :returns: instance of type "AttributeMappingToTsvFileOutput" -> structure: parameter "file_path" of String """ # ctx is the context object # return variables are: result #BEGIN attribute_mapping_to_tsv_file logging.info( "Starting 'attribute_mapping_to_tsv_file' with params:{}".format( params)) self.attr_util.validate_params(params, ("destination_dir", "input_ref")) am_id, result = self.attr_util.to_tsv(params) #END attribute_mapping_to_tsv_file # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError( 'Method attribute_mapping_to_tsv_file return value ' + 'result is not type dict as required.') # return the results return [result] def export_attribute_mapping_tsv(self, ctx, params): """ :param params: instance of type "ExportObjectParams" -> structure: parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference) :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: result #BEGIN export_attribute_mapping_tsv logging.info( "Starting 'export_attribute_mapping_tsv' with params:{}".format( params)) self.attr_util.validate_params(params, ("input_ref", )) params['destination_dir'] = self.scratch am_id, files = self.attr_util.to_tsv(params) result = self.attr_util.export(files['file_path'], am_id, params['input_ref']) #END export_attribute_mapping_tsv # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError( 'Method export_attribute_mapping_tsv return value ' + 'result is not type dict as required.') # return the results return [result] def export_attribute_mapping_excel(self, ctx, params): """ :param params: instance of type "ExportObjectParams" -> structure: parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference) :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: result #BEGIN export_attribute_mapping_excel logging.info( "Starting 'export_attribute_mapping_excel' with params:{}".format( params)) self.attr_util.validate_params(params, ("input_ref", )) params['destination_dir'] = self.scratch am_id, files = self.attr_util.to_excel(params) result = self.attr_util.export(files['file_path'], am_id, params['input_ref']) #END export_attribute_mapping_excel # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError( 'Method export_attribute_mapping_excel return value ' + 'result is not type dict as required.') # return the results return [result] def export_cluster_set_excel(self, ctx, params): """ :param params: instance of type "ExportObjectParams" -> structure: parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference) :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: result #BEGIN export_cluster_set_excel logging.info( "Starting 'export_cluster_set_excel' with params:{}".format( params)) self.attr_util.validate_params(params, ("input_ref", )) params['destination_dir'] = self.scratch cs_id, files = self.attr_util.to_excel(params) result = self.attr_util.export(files['file_path'], cs_id, params['input_ref']) #END export_cluster_set_excel # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError('Method export_cluster_set_excel return value ' + 'result is not type dict as required.') # return the results return [result] def export_corr_matrix_excel(self, ctx, params): """ :param params: instance of type "ExportObjectParams" -> structure: parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference) :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: result #BEGIN export_corr_matrix_excel logging.info( "Starting 'export_corr_matrix_excel' with params:{}".format( params)) result = self.corr_util.export_corr_matrix_excel(params) #END export_corr_matrix_excel # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError('Method export_corr_matrix_excel return value ' + 'result is not type dict as required.') # return the results return [result] def export_pca_matrix_excel(self, ctx, params): """ :param params: instance of type "ExportObjectParams" -> structure: parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference) :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: result #BEGIN export_pca_matrix_excel result = self.pca_util.export_pca_matrix_excel(params) #END export_pca_matrix_excel # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError('Method export_pca_matrix_excel return value ' + 'result is not type dict as required.') # return the results return [result] def export_amplicon_set_tsv(self, ctx, params): """ :param params: instance of type "ExportObjectParams" -> structure: parameter "input_ref" of type "obj_ref" (An X/Y/Z style reference) :returns: instance of type "ExportOutput" -> structure: parameter "shock_id" of String """ # ctx is the context object # return variables are: result #BEGIN export_amplicon_set_tsv result = self.biom_util.export_amplicon_set_tsv(params) #END export_amplicon_set_tsv # At some point might do deeper type checking... if not isinstance(result, dict): raise ValueError('Method export_amplicon_set_tsv return value ' + 'result is not type dict as required.') # return the results return [result] def compute_correlation_matrix(self, ctx, params): """ compute_correlation_matrix: create sub-matrix based on input filter_ids :param params: instance of type "CompCorrParams" (Input of the compute_correlation_matrix function input_obj_ref: object reference of a matrix workspace_name: workspace name objects to be saved to corr_matrix_name: correlation matrix object name dimension: compute correlation on column or row, one of ['col', 'row'] method: correlation method, one of ['pearson', 'kendall', 'spearman'] plot_corr_matrix: plot correlation matrix in report, default False plot_scatter_matrix: plot scatter matrix in report, default False compute_significance: also compute Significance in addition to correlation matrix) -> structure: parameter "input_obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "corr_matrix_name" of String, parameter "dimension" of String, parameter "method" of String, parameter "plot_corr_matrix" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "plot_scatter_matrix" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "compute_significance" of type "boolean" (A boolean - 0 for false, 1 for true.) :returns: instance of type "CompCorrOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "corr_matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN compute_correlation_matrix returnVal = self.corr_util.compute_correlation_matrix(params) #END compute_correlation_matrix # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError( 'Method compute_correlation_matrix return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def compute_correlation_across_matrices(self, ctx, params): """ compute_correlation_across_matrices: compute correlation matrix across matrices :param params: instance of type "CompCorrMetriceParams" (Input of the compute_correlation_across_matrices function matrix_ref_1: object reference of a matrix matrix_ref_2: object reference of a matrix workspace_name: workspace name objects to be saved to corr_matrix_name: correlation matrix object name dimension: compute correlation on column or row, one of ['col', 'row'] method: correlation method, one of ['pearson', 'kendall', 'spearman'] plot_corr_matrix: plot correlation matrix in report, default False compute_significance: also compute Significance in addition to correlation matrix) -> structure: parameter "matrix_ref_1" of type "obj_ref" (An X/Y/Z style reference), parameter "matrix_ref_2" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "corr_matrix_name" of String, parameter "dimension" of String, parameter "method" of String, parameter "plot_corr_matrix" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "compute_significance" of type "boolean" (A boolean - 0 for false, 1 for true.), parameter "corr_threshold" of Double :returns: instance of type "CompCorrOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "corr_matrix_obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN compute_correlation_across_matrices returnVal = self.corr_util.compute_correlation_across_matrices(params) #END compute_correlation_across_matrices # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError( 'Method compute_correlation_across_matrices return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def build_network(self, ctx, params): """ build_network: filter correlation matrix and build network :param params: instance of type "BuildNetworkParams" (Input of the build_network function corr_matrix_ref: CorrelationMatrix object workspace_name: workspace name objects to be saved to network_obj_name: Network object name filter_on_threshold: Dictory holder that holds filter on thredshold params params in filter_on_threshold: coefficient_threshold: correlation coefficient threshold (select pairs with greater correlation coefficient)) -> structure: parameter "corr_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of type "workspace_name" (workspace name of the object), parameter "network_obj_name" of String, parameter "filter_on_threshold" of mapping from String to String :returns: instance of type "BuildNetworkOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String, parameter "network_obj_ref" of type "obj_ref" (An X/Y/Z style reference) """ # ctx is the context object # return variables are: returnVal #BEGIN build_network returnVal = self.network_util.build_network(params) #END build_network # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method build_network return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def run_pca(self, ctx, params): """ run_pca: PCA analysis on matrix :param params: instance of type "PCAParams" (Input of the run_pca function input_obj_ref: object reference of a matrix workspace_name: the name of the workspace pca_matrix_name: name of PCA (KBaseExperiments.PCAMatrix) object dimension: compute PCA on column or row, one of ['col', 'row'] n_components - number of components (default 2) attribute_mapping_obj_ref - associated attribute_mapping_obj_ref scale_size_by - used for PCA plot to scale data size color_marker_by - used for PCA plot to group data) -> structure: parameter "input_obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of String, parameter "pca_matrix_name" of String, parameter "dimension" of String, parameter "n_components" of Long, parameter "attribute_mapping_obj_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "scale_size_by" of mapping from String to String, parameter "color_marker_by" of mapping from String to String :returns: instance of type "PCAOutput" (Ouput of the run_pca function pca_ref: PCA object reference (as KBaseExperiments.PCAMatrix data type) report_name: report name generated by KBaseReport report_ref: report reference generated by KBaseReport) -> structure: parameter "pca_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN run_pca returnVal = self.pca_util.run_pca(params) #END run_pca # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method run_pca return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def view_matrix(self, ctx, params): """ view_matrix: generate a report for matrix viewer :param params: instance of type "ViewMatrixParams" -> structure: parameter "input_matrix_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "workspace_name" of String, parameter "with_attribute_info" of type "boolean" (A boolean - 0 for false, 1 for true.) :returns: instance of type "ViewMatrixOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN view_matrix returnVal = self.data_table_util.view_matrix_as_table(params) #END view_matrix # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError('Method view_matrix return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def build_chemical_abundance_template(self, ctx, params): """ :param params: instance of type "ChemAbunTempParams" -> structure: parameter "workspace_name" of String, parameter "workspace_id" of Long, parameter "sample_set_ref" of type "obj_ref" (An X/Y/Z style reference), parameter "chemical_data_included" of mapping from String to Long, parameter "chemical_ids_included" of mapping from String to Long :returns: instance of type "ViewMatrixOutput" -> structure: parameter "report_name" of String, parameter "report_ref" of String """ # ctx is the context object # return variables are: returnVal #BEGIN build_chemical_abundance_template returnVal = self.template_util.build_chemical_abundance_template( params) #END build_chemical_abundance_template # At some point might do deeper type checking... if not isinstance(returnVal, dict): raise ValueError( 'Method build_chemical_abundance_template return value ' + 'returnVal is not type dict as required.') # return the results return [returnVal] def status(self, ctx): #BEGIN_STATUS returnVal = { 'state': "OK", 'message': "", 'version': self.VERSION, 'git_url': self.GIT_URL, 'git_commit_hash': self.GIT_COMMIT_HASH } #END_STATUS return [returnVal]