def _read_inputs(biom_table: biom.Table, phylogeny_fp: NewickFormat, meta_data: NumericMetadataColumn = None): if meta_data: generate_strategy = "balancing" meta, biom_table = _sort_metada(meta_data, biom_table) y = meta.iloc[:, 0] samples = meta.index else: generate_strategy = "augmentation" y = pd.Series(data=np.ones((len(biom_table.ids('sample')),)), index=biom_table.ids('sample')) samples = biom_table.ids('sample') _table_tmp = biom_table.sort_order(axis='sample', order=samples) _table = _map_observations(_table_tmp) pruned_phylogeny_fp = _prune_features_from_phylogeny(_table, phylogeny_fp) _tree = dendropy.Tree.get(path=str(pruned_phylogeny_fp), preserve_underscores=False, schema="newick", rooting='default-rooted') if sum(samples != _table.ids('sample')) > 0: raise ValueError("The samples IDs in meta data and biom table are " "not the same! The difference is:", set(samples) - set(_table.ids('sample')), "Please double check.") return _table, y, _tree, generate_strategy, pruned_phylogeny_fp
def plot(output_dir, table: biom.Table, metadata: q2.Metadata, case_where: str, control_where: str, feature_tree: skbio.TreeNode = None): with open('/tmp/tree.nwk', 'w') as fh: feature_tree.write(fh) copy_tree(os.path.join(PLOT, 'assets', 'dist'), output_dir) data_dir = os.path.join(output_dir, 'data') os.mkdir(data_dir) metadata = metadata.filter_ids(table.ids(axis='sample')) case_samples = sorted(list(metadata.get_ids(case_where))) control_samples = sorted(list(metadata.get_ids(control_where))) table.filter(case_samples + control_samples) table.remove_empty('observation') features = list(table.ids(axis='observation')) if feature_tree is not None: feature_tree = shear_no_prune(feature_tree, features) else: feature_tree = TreeNode() tree_data = tree_to_array(feature_tree) idx, = np.where(np.asarray(tree_data['children']) == 0) tree_data['lookup'] = dict(zip(map(str, idx), range(len(idx)))) tip_order = np.asarray(tree_data['names'])[idx] table = table.sort_order(tip_order, axis='observation') table = table.sort_order(case_samples + control_samples, axis='sample') with open(os.path.join(data_dir, 'packed_table.jsonp'), 'w') as fh: fh.write('LOAD_PACKED_TABLE(') fh.write(json.dumps(table_to_b64pa(table))) fh.write(');') with open(os.path.join(data_dir, 'tree.jsonp'), 'w') as fh: fh.write('LOAD_TREE(') fh.write(json.dumps(tree_data)) fh.write(');')
def _read_inputs(biom_table: biom.Table, meta_data: NumericMetadataColumn = None): if meta_data: meta, biom_table = _sort_metada(meta_data, biom_table) y = meta.iloc[:, 0] samples = meta.index else: samples = biom_table.ids('sample') y = pd.DataFrame(data=np.asarray(np.ones((len(samples), 1))).ravel(), index=samples) _table = biom_table.sort_order(axis='sample', order=samples) if np.sum(samples != _table.ids('sample')) > 0: raise ValueError( "The samples IDs in meta data and biom table are not the same! The difference is:", set(samples) - set(_table.ids('sample')), "Please double check.") return _table, y