def test_parse_newick_with_commas(self): # bug: comma is getting interpreted even if in quotes in_ = "(('foo,bar':1,baz:2)x:3)r;" exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) print(obs.ascii_art()) print(exp.ascii_art()) self.assertEqual(obs.compare_subsets(exp), 0.0)
def test_write_newick_edges(self): test_a = '((foo"bar":1{0},baz:2{1})x:3{2})r;' test_b = "(((a)b)c,((d)e)f)r;" buf = io.StringIO() obs = write_newick(parse_newick(test_a), buf, True) buf.seek(0) obs = to_skbio_treenode(parse_newick(buf.read())) self.assertEqual(obs.find('foo"bar"').edge_num, 0) self.assertEqual(obs.find('baz').edge_num, 1) self.assertEqual(obs.find('x').edge_num, 2) buf = io.StringIO() obs = write_newick(parse_newick(test_b), buf, True) buf.seek(0) obs = to_skbio_treenode(parse_newick(buf.read())) for o in obs.traverse(): self.assertEqual(o.edge_num, 0)
def test_parse_jplace_multiple_per_fragment(self): columns = [ 'fragment', 'edge_num', 'likelihood', 'like_weight_ratio', 'distal_length', 'pendant_length' ] exp_df = [ [ "82", 361, 0.01013206496780672, 1, 0.02652932626620403, 0.039354548684623215 ], [ "99", 308, 0.04520741687623886, 1, 0.11020044356641526, 0.06550337922097477 ], # tied on like_weight_ratio but lower pendant [ "99", 309, 0.04520741687623886, 1, 0.11020044356641526, 0.00550337922097477 ], [ "55", 139, 0.09563944060686769, 1, 0.014593217782258146, 0.04537214236560885 ], # tied higher like_weight_ratio [ "55", 138, 0.09563944060686769, 10, 0.014593217782258146, 0.04537214236560885 ] ] exp_df = pd.DataFrame(exp_df, columns=columns) # ...adjust jplace data data = json.loads(self.jplacedata) keep = [] for placement in data['placements']: if placement['n'][0] == '82': keep.append(placement) elif placement['n'][0] == '99': placement['p'].append([ 309, 0.04520741687623886, 1, 0.11020044356641526, 0.00550337922097477 ]) keep.append(placement) elif placement['n'][0] == '55': placement['p'].append([ 138, 0.09563944060686769, 10, 0.014593217782258146, 0.04537214236560885 ]) keep.append(placement) data['placements'] = keep data = json.dumps(data) exp_tree = self.tree obs_df, obs_tree = parse_jplace(data) obs_tree = to_skbio_treenode(obs_tree) pdt.assert_frame_equal(obs_df, exp_df) self.assertEqual(obs_tree.compare_rfd(exp_tree), 0)
def test_to_skbio_treenode(self): obs = to_skbio_treenode(self.bp) for o, e in zip(obs.traverse(), self.sktn.traverse()): if e.length is None: self.assertEqual(o.length, None if e.is_root() else 0.0) else: self.assertEqual(o.length, e.length) self.assertEqual(o.name, e.name) self.assertEqual(obs.ascii_art(), self.sktn.ascii_art())
def test_to_skbio_treenode_with_edge_numbers(self): # from https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009 # but without edge labels # 0 1 2 3 4 5 6 7 8 9 # 1 1 1 0 1 0 0 1 0 0 in_ = '((A:.01{0}, B:.01{1})D:.01{3}, C:.01{4}) {5};' obs = parse_newick(in_) obs_sk = to_skbio_treenode(obs) self.assertEqual(obs_sk.find('A').edge_num, 0) self.assertEqual(obs_sk.find('B').edge_num, 1) self.assertEqual(obs_sk.find('D').edge_num, 3) self.assertEqual(obs_sk.find('C').edge_num, 4) self.assertEqual(obs_sk.edge_num, 5)
def test_place_jplace_square_braces(self): self.jplacedata = json.loads(self.jplacedata) treestr = self.jplacedata['tree'] treestr = re.sub(r"{(\d+)}", r"[\1]", treestr) self.jplacedata['tree'] = treestr self.jplacedata = json.dumps(self.jplacedata) exp_tree = self.tree obs_df, obs_tree = parse_jplace(self.jplacedata) obs_tree = to_skbio_treenode(obs_tree) self.assertEqual(obs_tree.compare_rfd(exp_tree), 0) for n in obs_tree.traverse(include_self=False): self.assertTrue(n.edge_num >= 0)
def test_parse_jplace_simple(self): columns = [ 'fragment', 'edge_num', 'likelihood', 'like_weight_ratio', 'distal_length', 'pendant_length' ] exp_df = [[ "82", 361, 0.01013206496780672, 1, 0.02652932626620403, 0.039354548684623215 ], [ "99", 308, 0.04520741687623886, 1, 0.11020044356641526, 0.06550337922097477 ], [ "43", 309, 0.04054866161921744, 1, 0.010712923050783987, 0.020946988900520196 ], [ "195", 277, 0.01918907908397749, 1, 0.03065741838803451, 0.04513513498399864 ], [ "162", 55, 0.01758935282545493, 1, 0.0033199487685078776, 0.05388735804976052 ], [ "56", 81, 0.2366882303770561, 1, 0.04172580852519453, 0.0007060238727097983 ], [ "91", 105, 0.0001863393767883581, 1, 0.04578898721138839, 0.08655004339151215 ], [ "174", 89, 0.01216463967379211, 1, 0.04707020642820376, 0.045206727542450205 ], [ "5", 143, 0.012162345471765756, 1, 0.023797389484252734, 0.10447375403452556 ], [ "55", 139, 0.09563944060686769, 1, 0.014593217782258146, 0.04537214236560885 ]] exp_df = pd.DataFrame(exp_df, columns=columns) exp_tree = self.tree obs_df, obs_tree = parse_jplace(self.jplacedata) obs_tree = to_skbio_treenode(obs_tree) pdt.assert_frame_equal(obs_df, exp_df) self.assertEqual(obs_tree.compare_rfd(exp_tree), 0)
def subset_and_write_table_tree( otu_size: int, sample_size: int, density: float, rep: int, seed: int, table: biom.Table, tree: bp.BP, output_dir: str, ) -> None: """Given parameters for a single subset, filter the table and tree and write to file """ # prepare output info file_start = 'otu_size-{}--sample_size-{}--rep-{}--seed-{}--density-{}' \ .format(otu_size, sample_size, rep, seed, density) full_file_start = os.path.join(output_dir, file_start) # get subset of table table_subset = get_random_subtable(table, otu_size=otu_size, sample_size=sample_size, seed=seed, density=density) table_subset.table_id = file_start # write out biom table with biom_open(full_file_start + '.biom', 'w') as fp: table_subset.to_hdf5(fp, "subset: " + file_start) # create a sheared tree based off the table otu_ids = table_subset.ids('observation') # TODO: change bp_tree to tree bp_tree = tree # bp.from_skbio_treenode(tree) sheared_bp = bp_tree.shear(set(otu_ids)) tree_subset = bp.to_skbio_treenode(sheared_bp) # tree_subset = tree.shear(otu_ids) for node in tree_subset.traverse(): if node.length is None: node.length = 0 tree_subset.write(full_file_start + '.newick')
def _validate_and_match_data(self, ignore_missing_samples, filter_missing_features, filter_unobserved_features_from_phylogeny): # remove unobserved features from the phylogeny if filter_unobserved_features_from_phylogeny: self.tree = self.tree.shear(set(self.table.columns)) # extract balance parenthesis self._bp_tree = list(self.tree.B) self.tree = Tree.from_tree(to_skbio_treenode(self.tree)) fill_missing_node_names(self.tree) # Note that the feature_table we get from QIIME 2 (as an argument to # this function) is set up such that the index describes sample IDs and # the columns describe feature IDs. We transpose this table before # sending it to tools.match_inputs() and keep using the transposed # table for the rest of this visualizer. self.table, self.samples, self.tip_md, self.int_md = match_inputs( self.tree, self.table.T, self.samples, self.features, ignore_missing_samples, filter_missing_features)
def test_parse_newick_simple_edge_numbers(self): # from https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009 # but without edge labels # 0 1 2 3 4 5 6 7 8 9 # 1 1 1 0 1 0 0 1 0 0 in_ = '((A:.01{0}, B:.01{1})D:.01{3}, C:.01{4}) {5};' exp_sk = '((A:.01, B:.01)D:.01, C:.01);' # skbio doesn't know about edge numbers obs = parse_newick(in_) obs_sk = to_skbio_treenode(obs) exp_sk = skbio.TreeNode.read([exp_sk]) self.assertEqual(obs_sk.compare_rfd(exp_sk), 0) self.assertEqual(obs.edge(2), 0) self.assertEqual(obs.edge(4), 1) self.assertEqual(obs.edge(1), 3) self.assertEqual(obs.edge(7), 4) self.assertEqual(obs.edge(0), 5) self.assertEqual(obs.edge_from_number(0), 2) self.assertEqual(obs.edge_from_number(1), 4) self.assertEqual(obs.edge_from_number(3), 1) self.assertEqual(obs.edge_from_number(4), 7) self.assertEqual(obs.edge_from_number(5), 0)
def test_parse_newick_nested_quotes(self): # bug: quotes are removed in_ = '((foo"bar":1,baz:2)x:3)r;' exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) self.assertEqual(obs.compare_subsets(exp), 0.0)
def test_parse_newick_with_parens(self): # bug: parens are getting interpreted even if in quotes in_ = "(('foo(b)ar':1,baz:2)x:3)r;" exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) self.assertEqual(obs.compare_subsets(exp), 0.0)