def filter_values(table: biom.Table, th: float) -> biom.Table: """Filter out low-abundance features within each sample in a table. """ def filter_otus(data, id_, md): bound = th if th > 1 else data.sum() * th data[data < bound] = 0 return data table.transform(filter_otus, axis='sample') table.remove_empty(axis='observation') return table
def round_biom(table: biom.Table): """Round a BIOM table's data to integers and drop empty observations in place. Parameters ---------- table : biom.Table BIOM table to round. Notes ----- This function will not drop empty samples. """ f = np.vectorize(intize) table.transform(lambda data, id_, md: f(data), axis='observation') table.remove_empty(axis='observation')
def round_biom(table: biom.Table, digits=None): """Round a BIOM table's data and drop empty observations in place. Parameters ---------- table : biom.Table BIOM table to round. digits : int, optional Digits after the decimal point. Notes ----- This function will not drop empty samples. """ f = np.vectorize(partial(rounder, digits=digits)) table.transform(lambda data, id_, md: f(data), axis='observation') table.remove_empty(axis='observation')
def plot(output_dir, table: biom.Table, metadata: q2.Metadata, case_where: str, control_where: str, feature_tree: skbio.TreeNode = None): with open('/tmp/tree.nwk', 'w') as fh: feature_tree.write(fh) copy_tree(os.path.join(PLOT, 'assets', 'dist'), output_dir) data_dir = os.path.join(output_dir, 'data') os.mkdir(data_dir) metadata = metadata.filter_ids(table.ids(axis='sample')) case_samples = sorted(list(metadata.get_ids(case_where))) control_samples = sorted(list(metadata.get_ids(control_where))) table.filter(case_samples + control_samples) table.remove_empty('observation') features = list(table.ids(axis='observation')) if feature_tree is not None: feature_tree = shear_no_prune(feature_tree, features) else: feature_tree = TreeNode() tree_data = tree_to_array(feature_tree) idx, = np.where(np.asarray(tree_data['children']) == 0) tree_data['lookup'] = dict(zip(map(str, idx), range(len(idx)))) tip_order = np.asarray(tree_data['names'])[idx] table = table.sort_order(tip_order, axis='observation') table = table.sort_order(case_samples + control_samples, axis='sample') with open(os.path.join(data_dir, 'packed_table.jsonp'), 'w') as fh: fh.write('LOAD_PACKED_TABLE(') fh.write(json.dumps(table_to_b64pa(table))) fh.write(');') with open(os.path.join(data_dir, 'tree.jsonp'), 'w') as fh: fh.write('LOAD_TREE(') fh.write(json.dumps(tree_data)) fh.write(');')
def simple_plot(output_dir, table: biom.Table, feature_tree: skbio.TreeNode, metadata: q2.Metadata, case_where: str, control_where: str, n_transects: int = 10, stratify_by: str = None, mode: str = 'max'): print("Data extracted") layer_dir = os.path.join(output_dir, 'layers') rank_dir = os.path.join(output_dir, 'ranks') os.mkdir(layer_dir) os.mkdir(rank_dir) metadata = metadata.filter_ids(table.ids(axis='sample')) case_samples = sorted(list(metadata.get_ids(case_where))) control_samples = sorted(list(metadata.get_ids(control_where))) get_pairs = comparisons(metadata, control_samples, case_samples, stratify_by) table.filter(case_samples + control_samples) table.remove_empty('observation') features = list(table.ids(axis='observation')) feature_tree = shear_no_prune(feature_tree, features) print("Extraneous features removed") for n in feature_tree.traverse(): if not n.length: n.length = 0 tree = tree_to_array(feature_tree, mode) print("Tree index created") possible_transects = len(np.unique(np.asarray(tree['distances']))) tree_length = tree['distances'][0] # root of tree if n_transects > possible_transects: n_transects = possible_transects print("Only %d transects exist, using that instead" % n_transects) transects = list(np.linspace(0, tree_length, num=n_transects)) print("Will transect at: %s" % ", ".join(map(str, transects))) figure_gen = prepare_plot(tree_length) figure_gen.send(None) # initialize co-routine colors = [] points, _ = pairwise_components(table, get_pairs()) color_fig, highlight_fig, color = figure_gen.send((points, None)) color_fig.savefig(os.path.join(layer_dir, 'original.png'), transparent=True) plt.close(color_fig) highlight_fig.savefig(os.path.join(layer_dir, 'original.h.png'), transparent=True) plt.close(highlight_fig) colors.append(color) rank_files = [] collapsed_groups = pd.DataFrame() for distance in transects: collapsed_table, collapsed_counts, groups = group_by_transect( table, tree, distance) collapsed_groups[groups.name] = groups print("Table collapsed at transect %s" % distance) points, ranks = pairwise_components(collapsed_table, get_pairs()) filename = write_ranks(rank_dir, collapsed_counts, ranks, distance) rank_files.append(filename) color_fig, highlight_fig, color = figure_gen.send((points, distance)) colors.append(color) color_fig.savefig(os.path.join(layer_dir, 'T_%s.png' % distance), transparent=True) plt.close(color_fig) highlight_fig.savefig(os.path.join(layer_dir, 'T_%s.h.png' % distance), transparent=True) plt.close(highlight_fig) print("Finalizing visualization") figure = figure_gen.send((None, None)) figure.savefig(os.path.join(layer_dir, 'trajectory.png'), transparent=True) plt.close(figure) background = next(figure_gen) background.savefig(os.path.join(layer_dir, 'bg.png'), transparent=True) plt.close(background) with open(os.path.join(output_dir, 'collapsed_groups.tsv'), 'w') as fh: collapsed_groups.to_csv(fh, sep='\t') with open(os.path.join(output_dir, 'index.html'), 'w') as fh: template = Environment(loader=BaseLoader).from_string(TEMPLATE) fh.write( template.render({ 'legend': list( zip(['original'] + ['T_%s' % d for d in transects] + ['trajectory'], list(map(to_hex, colors)) + ['red'])), 'filenames': rank_files }))