def test_parse_newick_singlenode_bug(self): # https://github.com/wasade/improved-octo-waddle/issues/29 test = 'i:1;' # let's not allow this edge case with self.assertRaises(ValueError): parse_newick(test)
def test_parse_newick_no_semicolon_bug(self): # https://github.com/wasade/improved-octo-waddle/issues/26 test = "((h:1, i:1, j:1, k:1, l: 1),(e:1,f:1),(n:1,o:1,p:1))a:1" with self.assertRaises(ValueError): parse_newick(test) # make sure we work with a newline test = "((h:1, i:1, j:1, k:1, l: 1),(e:1,f:1),(n:1,o:1,p:1))a:1;\n" parse_newick(test)
def test_all_nonroot_branchlengths_zero(self): newicks = ['((b:0)a:0)root:0;', '((b:0)a:0)root:1;'] for nwk in newicks: st = parse_newick(nwk) with self.assertRaisesRegex(ValueError, "must have a positive length"): validate_tree(st)
def setUp(self): self.tree = self.mock_tree_from_nwk() self.bp_tree = from_skbio_treenode(self.tree) self.table = biom.Table( np.array([[1, 2, 0, 4], [8, 7, 0, 5], [1, 0, 0, 0], [0, 0, 0, 0]]).T, list('abed'), ['Sample1', 'Sample2', 'Sample3', 'Sample4']) self.sample_metadata = pd.DataFrame( { "Metadata1": [0, 0, 0, 1], "Metadata2": [0, 0, 0, 0], "Metadata3": [1, 2, 3, 4], "Metadata4": ["abc", "def", "ghi", "jkl"] }, index=list(self.table.ids())) # (These are some Greengenes taxonomy annotations I took from the # moving pictures taxonomy.qza file. I made up the confidences.) self.feature_metadata = pd.DataFrame( { "Taxonomy": [("k__Bacteria; p__Bacteroidetes; c__Bacteroidia; " "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; " "s__"), ("k__Bacteria; p__Proteobacteria; " "c__Gammaproteobacteria; o__Pasteurellales; " "f__Pasteurellaceae; g__; s__"), ("k__Bacteria; p__Bacteroidetes; c__Bacteroidia; " "o__Bacteroidales; f__Bacteroidaceae; g__Bacteroides; " "s__uniformis")], "Confidence": [0.95, 0.8, 0] }, index=["e", "h", "a"]) self.split_tax_fm, self.taxcols = split_taxonomy(self.feature_metadata) self.tip_md = self.split_tax_fm.loc[["a", "e"]] self.int_md = self.split_tax_fm.loc[["h"]] # This is designed to match the shearing that's done in the core test # for --p-shear-to-table self.shorn_tree = parse_newick( "(((a:1)EmpressNode0:1,b:2)g:1,(d:3)h:2)EmpressNode1:1;") self.exp_split_fm_cols = [ "Level 1", "Level 2", "Level 3", "Level 4", "Level 5", "Level 6", "Level 7", "Confidence" ] eigvals = pd.Series([0.50, 0.25, 0.25], index=['PC1', 'PC2', 'PC3']) samples = [[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]] proportion_explained = pd.Series([15.5, 12.2, 8.8], index=['PC1', 'PC2', 'PC3']) samples_df = pd.DataFrame( samples, index=['Sample1', 'Sample2', 'Sample3', 'Sample4'], columns=['PC1', 'PC2', 'PC3']) self.ordination = OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', eigvals, samples_df, proportion_explained=proportion_explained)
def test_match_inputs_feature_metadata_duplicate_name_internal_node(self): """Tests that feature metadata for internal nodes with duplicate names is preserved. In the JS interface, there are two options for coloring nodes by feature metadata: 1) just coloring tips (and propagating clades with uniform feature metadata upwards), or 2) coloring all nodes with feature metadata, which can include internal nodes. In 2), internal nodes with the same name will have the same feature metadata color. """ # Slightly modified version of self.tree with duplicate internal node # names (i and g) t = parse_newick('(((a:1,e:2)i:1,b:2)g:1,(:1,d:3)g:2)i:1;') fm = self.feature_metadata.copy() fm.index = ["a", "g", "i"] f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs( t, self.table, self.sample_metadata, fm ) assert_frame_equal(f_table, self.table) assert_frame_equal(f_sample_metadata, self.sample_metadata) split_fm = split_taxonomy(fm) # Main point of this test: all of the feature metadata should have been # kept, even though g and i were both duplicate node names. assert_frame_equal(t_fm, split_fm.loc[["a"]]) assert_frame_equal(i_fm, split_fm.loc[["g", "i"]], check_like=True)
def test_to_array(self): t = parse_newick('(((a:1,b:2,c:3)x:4,(d:5)y:6)z:7,(e:8,f:9)z:10);') exp_child_index = np.array([[4, 0, 2], [5, 3, 3], [8, 4, 5], [9, 6, 7], [10, 8, 9]], dtype=np.uint32) exp_length = np.array([1, 2, 3, 5, 4, 6, 8, 9, 7, 10, 0.0], dtype=np.double) exp_id_index = {0: True, 1: True, 2: True, 3: True, 4: False, 5: False, 6: True, 7: True, 8: False, 9: False, 10: False} exp_name = np.array(['a', 'b', 'c', 'd', 'x', 'y', 'e', 'f', 'z', 'z', None]) obs = to_skbio_treearray(t) obs_child_index = obs['child_index'] obs_length = obs['length'] obs_id_index = obs['id_index'] obs_name = obs['name'] npt.assert_equal(obs_child_index, exp_child_index) npt.assert_equal(obs_length, exp_length) self.assertEqual(obs_id_index.keys(), exp_id_index.keys()) npt.assert_equal(obs_name, exp_name) for k in obs_id_index: self.assertEqual(obs_id_index[k].is_tip(), exp_id_index[k])
def test_parse_newick_with_commas(self): # bug: comma is getting interpreted even if in quotes in_ = "(('foo,bar':1,baz:2)x:3)r;" exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) print(obs.ascii_art()) print(exp.ascii_art()) self.assertEqual(obs.compare_subsets(exp), 0.0)
def test_write_newick_edges(self): test_a = '((foo"bar":1{0},baz:2{1})x:3{2})r;' test_b = "(((a)b)c,((d)e)f)r;" buf = io.StringIO() obs = write_newick(parse_newick(test_a), buf, True) buf.seek(0) obs = to_skbio_treenode(parse_newick(buf.read())) self.assertEqual(obs.find('foo"bar"').edge_num, 0) self.assertEqual(obs.find('baz').edge_num, 1) self.assertEqual(obs.find('x').edge_num, 2) buf = io.StringIO() obs = write_newick(parse_newick(test_b), buf, True) buf.seek(0) obs = to_skbio_treenode(parse_newick(buf.read())) for o in obs.traverse(): self.assertEqual(o.edge_num, 0)
def test_nonroot_negative_branchlengths(self): newicks = [ '((b:-1)a:1)root:1;', '((b:100)a:-100)root:0;', '(b:1,c:-1)a:2;', '((b:-1)a:0)root;' ] for nwk in newicks: st = parse_newick(nwk) with self.assertRaisesRegex(ValueError, "must have nonnegative lengths"): validate_tree(st)
def test_write_newick_cases(self): tests = [ '((foo"bar":1,baz:2)x:3)r;', "(((a:1,b:2.5)c:6,d:8,(e),(f,g,(h:1,i:2)j:1)k:1.2)l,m:2)r;", "(((a)b)c,((d)e)f)r;", "((a,(b,c):5)'d','e; foo':10,((f))g)r;" ] for test in tests: buf = io.StringIO() obs = write_newick(parse_newick(test), buf, False) buf.seek(0) obs = buf.read() self._compare_newick(obs, test)
def test_parse_newick_unnamed_singledesc(self): in_ = "((a,b)c,d,(e))r;" exp_bp = [1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0] exp_n = ['r', 'c', 'a', None, 'b', None, None, 'd', None, None, 'e', None, None, None] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, e_n in enumerate(exp_n): self.assertEqual(obs_bp.name(i), e_n)
def test_parse_newick_singledesc(self): in_ = "(((a)b)c,((d)e)f)r;" # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 exp_bp = [1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0] exp_n = ['r', 'c', 'b', 'a', None, None, None, 'f', 'e', 'd', None, None, None, None] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, e_n in enumerate(exp_n): self.assertEqual(obs_bp.name(i), e_n)
def test_to_skbio_treenode_with_edge_numbers(self): # from https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009 # but without edge labels # 0 1 2 3 4 5 6 7 8 9 # 1 1 1 0 1 0 0 1 0 0 in_ = '((A:.01{0}, B:.01{1})D:.01{3}, C:.01{4}) {5};' obs = parse_newick(in_) obs_sk = to_skbio_treenode(obs) self.assertEqual(obs_sk.find('A').edge_num, 0) self.assertEqual(obs_sk.find('B').edge_num, 1) self.assertEqual(obs_sk.find('D').edge_num, 3) self.assertEqual(obs_sk.find('C').edge_num, 4) self.assertEqual(obs_sk.edge_num, 5)
def test_validate_tree_duplicate_internal_node_names(self): bad_newicks = [ # Two non-root internal nodes have same name '((a:1,b:3)c:2,(d:2,e:3)c:5)r:2;', # Two internal nodes (one of which is the root) have same name '((a:1,b:3)c:2,(d:2,e:3)f:5)c:2;' ] for nwk in bad_newicks: t = parse_newick(nwk) with self.assertWarnsRegex( TreeFormatWarning, "Internal node names in the tree are not unique"): validate_tree(t)
def test_parse_newick_name_with_semicolon(self): in_ = "((a,(b,c):5)'d','e; foo':10,((f))g)r;" exp_bp = [1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0] exp_n = ['r', 'd', 'a', None, None, 'b', None, 'c', None, None, None, 'e; foo', None, 'g', None, 'f', None, None, None, None] exp_l = [0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l)
def test_parse_newick_singledesc(self): in_ = "(((a)b)c,((d)e)f)r;" # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 exp_bp = [1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0] exp_n = [ 'r', 'c', 'b', 'a', None, None, None, 'f', 'e', 'd', None, None, None, None ] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, e_n in enumerate(exp_n): self.assertEqual(obs_bp.name(i), e_n)
def test_parse_newick_unnamed_singledesc(self): in_ = "((a,b)c,d,(e))r;" exp_bp = [1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0] exp_n = [ 'r', 'c', 'a', None, 'b', None, None, 'd', None, None, 'e', None, None, None ] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, e_n in enumerate(exp_n): self.assertEqual(obs_bp.name(i), e_n)
def test_parse_newick(self): in_ = "((a:2,b):1,(c:4,d)y:20,e)r;" exp_bp = [1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0] exp_n = ['r', None, 'a', None, 'b', None, None, 'y', 'c', None, 'd', None, None, 'e', None, None] exp_l = [0, 1, 2, 0, 0, 0, 0, 20, 4, 0, 0, 0, 0, 0, 0, 0] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l)
def test_validate_tree_overlapping_tip_and_internal_node_names(self): bad_newicks = [ # Tip overlaps with non-root internal node '((a:1,b:3)a:2,d:5)e:2;', # Tip overlaps with root node '((a:1,b:3)c:2,d:5)a:2;', # Tip overlaps with both non-root and root internal nodes '((a:1,b:3)a:2,d:5)a:2;' ] for nwk in bad_newicks: t = parse_newick(nwk) with self.assertRaisesRegex( ValueError, "Tip names in the tree cannot overlap with internal node names" ): validate_tree(t)
def test_validate_tree(self): validate_tree(self.tree) # check the tree is still equivalent obs = self.tree exp = parse_newick(self.nwk) self.assertEqual(len(obs), len(exp)) for i in range(1, len(exp) + 1): node_o = obs.postorderselect(i) node_e = exp.postorderselect(i) self.assertEqual(node_o, node_e) self.assertEqual(obs.length(node_o), exp.length(node_e)) self.assertEqual(obs.name(node_o), exp.name(node_e))
def test_parse_newick_name_with_semicolon(self): in_ = "((a,(b,c):5)'d','e; foo':10,((f))g)r;" exp_bp = [1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0] exp_n = [ 'r', 'd', 'a', None, None, 'b', None, 'c', None, None, None, 'e; foo', None, 'g', None, 'f', None, None, None, None ] exp_l = [0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l)
def test_parse_newick(self): in_ = "((a:2,b):1,(c:4,d)y:20,e)r;" exp_bp = [1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0] exp_n = [ 'r', None, 'a', None, 'b', None, None, 'y', 'c', None, 'd', None, None, 'e', None, None ] exp_l = [0, 1, 2, 0, 0, 0, 0, 20, 4, 0, 0, 0, 0, 0, 0, 0] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l)
def plot(output_dir: str, tree: NewickFormat, feature_table: pd.DataFrame, sample_metadata: qiime2.Metadata, pcoa: OrdinationResults = None, feature_metadata: qiime2.Metadata = None, ignore_missing_samples: bool = False, filter_missing_features: bool = False, number_of_features: int = 5, filter_unobserved_features_from_phylogeny: bool = True) -> None: if pcoa is not None and pcoa.features is not None: # select the top N most important features based on the vector's # magnitude (coped from q2-emperor) feats = pcoa.features.copy() origin = np.zeros_like(feats.columns) feats['importance'] = feats.apply(euclidean, axis=1, args=(origin, )) feats.sort_values('importance', inplace=True, ascending=False) feats.drop(['importance'], inplace=True, axis=1) pcoa.features = feats[:number_of_features].copy() sample_metadata = sample_metadata.to_dataframe() if feature_metadata is not None: feature_metadata = feature_metadata.to_dataframe() # path to the actual newick file with open(str(tree)) as file: t = parse_newick(file.readline()) trim_tree = filter_unobserved_features_from_phylogeny viz = Empress(tree=t, table=feature_table, sample_metadata=sample_metadata, feature_metadata=feature_metadata, ordination=pcoa, ignore_missing_samples=ignore_missing_samples, filter_missing_features=filter_missing_features, filter_unobserved_features_from_phylogeny=trim_tree) with open(os.path.join(output_dir, 'empress.html'), 'w') as file: file.write(str(viz)) viz.copy_support_files(output_dir) index = os.path.join(TEMPLATES, 'index.html') q2templates.render(index, output_dir)
def get_bp(newickfmt): """Loads a bp.BP tree from a QIIME 2 NewickFormat object. This function, along with save_viz(), was moved here from _plot.py so it could be reused between different Empress commands. Parameters ---------- newickfmt : q2_types.tree.NewickFormat Returns ------- bp.BP """ with open(str(newickfmt)) as treefile: # The file will still be closed even though we return from within the # with block: see https://stackoverflow.com/a/9885287/10730311. return parse_newick(treefile.readline())
def test_parse_newick_complex(self): in_ = "(((a:1,b:2.5)c:6,d:8,(e),(f,g,(h:1,i:2)j:1)k:1.2)l,m:2)r;" # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 exp_bp = [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0] exp_n = ['r', 'l', 'c', 'a', None, 'b', None, None, 'd', None, None, 'e', None, None, 'k', 'f', None, 'g', None, 'j', 'h', None, 'i', None, None, None, None, 'm', None, None] exp_l = [0, 0, 6, 1, 0, 2.5, 0, 0, 8, 0, 0, 0, 0, 0, 1.2, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 2, 0, 0] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l)
def test_match_inputs_feature_metadata_root_metadata_allowed(self): """Tests that feature metadata for the root node is preserved.""" # Slightly modified version of self.tree where root has a name (i) t = parse_newick('(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2)i:1;') fm = self.feature_metadata.copy() fm.index = ["a", "g", "i"] f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs( t, self.table, self.sample_metadata, fm) # (check that we didn't mess up the table / sample metadata matching by # accident) self.assertEqual(f_table, self.table) assert_frame_equal(f_sample_metadata, self.sample_metadata) split_fm = split_taxonomy(fm) # Main point of this test: all of the feature metadata should have been # kept, since a, g, and i are all included in the tree (i in particular # is important to verify, since it's the root) assert_frame_equal(t_fm, split_fm.loc[["a"]]) assert_frame_equal(i_fm, split_fm.loc[["g", "i"]], check_like=True)
def test_match_inputs_feature_metadata_only_internal_node_metadata(self): """Tests that feature metadata only for internal nodes is allowed.""" # Slightly modified version of self.tree where root has a name (i) t = parse_newick('(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2)i:1;') fm = self.feature_metadata.copy() fm.index = ["h", "g", "i"] f_table, f_sample_metadata, t_fm, i_fm = tools.match_inputs( t, self.table, self.sample_metadata, fm) self.assertEqual(f_table, self.table) assert_frame_equal(f_sample_metadata, self.sample_metadata) split_fm = split_taxonomy(fm) # 1) Check that tip metadata is empty self.assertEqual(len(t_fm.index), 0) # 2) Check that internal node metadata was preserved assert_frame_equal(i_fm, split_fm.loc[fm.index], check_like=True) # 3) Check that columns on both DFs are identical self.assertListEqual(list(t_fm.columns), self.exp_split_fm_cols) self.assertListEqual(list(i_fm.columns), self.exp_split_fm_cols)
def check_and_process_files(output_dir, tree_file, feature_metadata): """Initial checks and processing of files for standalone CLI plotting. Parameters ---------- output_dir : str tree_file : str fm_file : str Returns ------- bp.Tree pd.DataFrame """ if os.path.isdir(output_dir): raise OSError("Output directory already exists!") with open(str(tree_file), "r") as f: tree_newick = parse_newick(f.readline()) if feature_metadata is not None: feature_metadata = pd.read_csv(feature_metadata, sep="\t", index_col=0) return tree_newick, feature_metadata
def test_parse_newick_simple_edge_numbers(self): # from https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009 # but without edge labels # 0 1 2 3 4 5 6 7 8 9 # 1 1 1 0 1 0 0 1 0 0 in_ = '((A:.01{0}, B:.01{1})D:.01{3}, C:.01{4}) {5};' exp_sk = '((A:.01, B:.01)D:.01, C:.01);' # skbio doesn't know about edge numbers obs = parse_newick(in_) obs_sk = to_skbio_treenode(obs) exp_sk = skbio.TreeNode.read([exp_sk]) self.assertEqual(obs_sk.compare_rfd(exp_sk), 0) self.assertEqual(obs.edge(2), 0) self.assertEqual(obs.edge(4), 1) self.assertEqual(obs.edge(1), 3) self.assertEqual(obs.edge(7), 4) self.assertEqual(obs.edge(0), 5) self.assertEqual(obs.edge_from_number(0), 2) self.assertEqual(obs.edge_from_number(1), 4) self.assertEqual(obs.edge_from_number(3), 1) self.assertEqual(obs.edge_from_number(4), 7) self.assertEqual(obs.edge_from_number(5), 0)
def test_parse_newick_complex(self): in_ = "(((a:1,b:2.5)c:6,d:8,(e),(f,g,(h:1,i:2)j:1)k:1.2)l,m:2)r;" # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 exp_bp = [ 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0 ] exp_n = [ 'r', 'l', 'c', 'a', None, 'b', None, None, 'd', None, None, 'e', None, None, 'k', 'f', None, 'g', None, 'j', 'h', None, 'i', None, None, None, None, 'm', None, None ] exp_l = [ 0, 0, 6, 1, 0, 2.5, 0, 0, 8, 0, 0, 0, 0, 0, 1.2, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 2, 0, 0 ] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l)
def setUp(self): self.tstr = "(((a:1,b:2.5)c:6,d:8,(e),(f,g,(h:1,i:2)j:1)k:1.2)l,m:2)r;" self.bp = parse_newick(self.tstr) self.sktn = skbio.TreeNode.read(StringIO(self.tstr))
def setUp(self): self.tree = parse_newick('(((a:1,e:2):1,b:2)g:1,(:1,d:3)h:2):1;') self.pruned_tree = TreeNode.read( StringIO('(((a:1)EmpressNode0:1,b:2)g:1,(d:3)h:2)EmpressNode1:1;')) # Test table/metadata (mostly) adapted from Qurro: self.table = biom.Table( np.array([[1, 2, 0, 4], [8, 7, 0, 5], [1, 0, 0, 0], [0, 0, 1, 0]]).T, list('abed'), ['Sample1', 'Sample2', 'Sample3', 'Sample4']) self.unrelated_table = biom.Table( np.array([[5, 2, 0, 2], [2, 3, 0, 1], [5, 2, 0, 0], [4, 5, 0, 4]]).T, list("hijk"), ['Sample1', 'Sample2', 'Sample3', 'Sample4']) self.sample_metadata = pd.DataFrame( { "Metadata1": [0, 0, 0, 1], "Metadata2": [0, 0, 0, 0], "Metadata3": [1, 2, 3, 4], "Metadata4": ["abc", "def", "ghi", "jkl"] }, index=list(self.table.ids())) self.feature_metadata = pd.DataFrame( { "fmdcol1": ["asdf", "ghjk"], "fmdcol2": ["qwer", "tyui"] }, index=["a", "h"]) self.filtered_table = biom.Table( np.array([[1, 2, 4], [8, 7, 5], [1, 0, 0]]).T, ['a', 'b', 'd'], ['Sample1', 'Sample2', 'Sample3']) self.filtered_sample_metadata = pd.DataFrame( { "Metadata1": [0, 0, 0], "Metadata2": [0, 0, 0], "Metadata3": [1, 2, 3], "Metadata4": ["abc", "def", "ghi"] }, index=["Sample1", "Sample2", "Sample3"]) eigvals = pd.Series(np.array([0.50, 0.25, 0.25]), index=['PC1', 'PC2', 'PC3']) samples = np.array([[0.1, 0.2, 0.3], [0.2, 0.3, 0.4], [0.3, 0.4, 0.5], [0.4, 0.5, 0.6]]) proportion_explained = pd.Series([15.5, 12.2, 8.8], index=['PC1', 'PC2', 'PC3']) samples_df = pd.DataFrame( samples, index=['Sample1', 'Sample2', 'Sample3', 'Sample4'], columns=['PC1', 'PC2', 'PC3']) self.pcoa = skbio.OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', eigvals, samples_df, proportion_explained=proportion_explained) features = np.abs(samples_df.copy() / 2.0).iloc[:2, :] features.index = 'f.' + features.index self.biplot_no_matches = skbio.OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', eigvals, samples_df, features=features, proportion_explained=proportion_explained) features = np.abs(samples_df / 2.0).iloc[:2, :] features.index = pd.Index(['a', 'h']) self.biplot = skbio.OrdinationResults( 'PCoA', 'Principal Coordinate Analysis', eigvals, samples_df, features=features, proportion_explained=proportion_explained) self.biplot_tree = parse_newick( '(((y:1,z:2):1,b:2)g:1,(:1,d:3)h:2):1;') self.biplot_table = biom.Table( np.array([[1, 2], [8, 7], [1, 0], [0, 3]]).T, ['y', 'z'], ['Sample1', 'Sample2', 'Sample3', 'Sample4']) self.files_to_remove = [] self.maxDiff = None
def main(tree, table, sample_metadata, feature_metadata, ordination, ignore_missing_samples, filter_extra_samples, filter_missing_features, shear_to_table, number_of_features): """Generate a development plot If no arguments are provided the moving pictures dataset will be loaded, and a tandem plot will be generated. Alternatively, the user can input a new dataset. """ # by default load the moving pictures data (tandem plot) if tree is None or table is None or sample_metadata is None: tree, table, sample_metadata, feature_metadata, ordination = \ load_mp_data() filter_extra_samples = True # otherwise require a tree, table and sample meadata elif (tree is not None and table is not None and sample_metadata is not None): tree = q2.Artifact.load(tree) table = q2.Artifact.load(table) sample_metadata = q2.Metadata.load(sample_metadata) if feature_metadata is not None: feature_metadata = q2.Artifact.load(feature_metadata).view( q2.Metadata) if ordination is not None: ordination = q2.Artifact.load(ordination) else: raise ValueError('Tree, table and sample metadata are required!') with open(str(tree.view(NewickFormat))) as f: tree = parse_newick(f.readline()) table = table.view(biom.Table) sample_metadata = sample_metadata.to_dataframe() feature_metadata = feature_metadata.to_dataframe() if ordination is not None: ordination = ordination.view(OrdinationResults) if ordination.features is not None: # select the top N most important features based on the vector's # magnitude (coped from q2-emperor) feats = ordination.features.copy() # in cases where the the axes are all zero there might be all-NA # columns feats.fillna(0, inplace=True) origin = np.zeros_like(feats.columns) feats['importance'] = feats.apply(euclidean, axis=1, args=(origin, )) feats.sort_values('importance', inplace=True, ascending=False) feats.drop(['importance'], inplace=True, axis=1) ordination.features = feats[:number_of_features].copy() # These two lines fetch the JS files for both apps directly from the # installation directory - this makes testing/developing easier empress_resources = pkg_resources.resource_filename( 'empress', 'support_files') emperor_resources = get_emperor_support_files_dir() viz = Empress(table=table, tree=tree, ordination=ordination, feature_metadata=feature_metadata, sample_metadata=sample_metadata, resource_path=empress_resources, ignore_missing_samples=ignore_missing_samples, filter_extra_samples=filter_extra_samples, filter_missing_features=filter_missing_features, shear_to_table=shear_to_table) if ordination is not None: viz._emperor.base_url = emperor_resources with open('development-page.html', 'w') as f: f.write(viz.make_empress())
def test_parse_newick_nested_quotes(self): # bug: quotes are removed in_ = '((foo"bar":1,baz:2)x:3)r;' exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) self.assertEqual(obs.compare_subsets(exp), 0.0)
def test_parse_newick_with_parens(self): # bug: parens are getting interpreted even if in quotes in_ = "(('foo(b)ar':1,baz:2)x:3)r;" exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) self.assertEqual(obs.compare_subsets(exp), 0.0)