def test_parse_newick_with_commas(self):
     # bug: comma is getting interpreted even if in quotes
     in_ = "(('foo,bar':1,baz:2)x:3)r;"
     exp = skbio.TreeNode.read([in_])
     obs = to_skbio_treenode(parse_newick(in_))
     print(obs.ascii_art())
     print(exp.ascii_art())
     self.assertEqual(obs.compare_subsets(exp), 0.0)
Exemple #2
0
 def test_parse_newick_with_commas(self):
     # bug: comma is getting interpreted even if in quotes
     in_ = "(('foo,bar':1,baz:2)x:3)r;"
     exp = skbio.TreeNode.read([in_])
     obs = to_skbio_treenode(parse_newick(in_))
     print(obs.ascii_art())
     print(exp.ascii_art())
     self.assertEqual(obs.compare_subsets(exp), 0.0)
    def test_write_newick_edges(self):
        test_a = '((foo"bar":1{0},baz:2{1})x:3{2})r;'
        test_b = "(((a)b)c,((d)e)f)r;"

        buf = io.StringIO()
        obs = write_newick(parse_newick(test_a), buf, True)
        buf.seek(0)
        obs = to_skbio_treenode(parse_newick(buf.read()))
        self.assertEqual(obs.find('foo"bar"').edge_num, 0)
        self.assertEqual(obs.find('baz').edge_num, 1)
        self.assertEqual(obs.find('x').edge_num, 2)

        buf = io.StringIO()
        obs = write_newick(parse_newick(test_b), buf, True)
        buf.seek(0)
        obs = to_skbio_treenode(parse_newick(buf.read()))
        for o in obs.traverse():
            self.assertEqual(o.edge_num, 0)
    def test_parse_jplace_multiple_per_fragment(self):
        columns = [
            'fragment', 'edge_num', 'likelihood', 'like_weight_ratio',
            'distal_length', 'pendant_length'
        ]
        exp_df = [
            [
                "82", 361, 0.01013206496780672, 1, 0.02652932626620403,
                0.039354548684623215
            ],
            [
                "99", 308, 0.04520741687623886, 1, 0.11020044356641526,
                0.06550337922097477
            ],
            # tied on like_weight_ratio but lower pendant
            [
                "99", 309, 0.04520741687623886, 1, 0.11020044356641526,
                0.00550337922097477
            ],
            [
                "55", 139, 0.09563944060686769, 1, 0.014593217782258146,
                0.04537214236560885
            ],
            # tied higher like_weight_ratio
            [
                "55", 138, 0.09563944060686769, 10, 0.014593217782258146,
                0.04537214236560885
            ]
        ]
        exp_df = pd.DataFrame(exp_df, columns=columns)

        # ...adjust jplace data
        data = json.loads(self.jplacedata)
        keep = []
        for placement in data['placements']:
            if placement['n'][0] == '82':
                keep.append(placement)
            elif placement['n'][0] == '99':
                placement['p'].append([
                    309, 0.04520741687623886, 1, 0.11020044356641526,
                    0.00550337922097477
                ])
                keep.append(placement)
            elif placement['n'][0] == '55':
                placement['p'].append([
                    138, 0.09563944060686769, 10, 0.014593217782258146,
                    0.04537214236560885
                ])
                keep.append(placement)
        data['placements'] = keep
        data = json.dumps(data)
        exp_tree = self.tree
        obs_df, obs_tree = parse_jplace(data)
        obs_tree = to_skbio_treenode(obs_tree)
        pdt.assert_frame_equal(obs_df, exp_df)
        self.assertEqual(obs_tree.compare_rfd(exp_tree), 0)
Exemple #5
0
    def test_to_skbio_treenode(self):
        obs = to_skbio_treenode(self.bp)
        for o, e in zip(obs.traverse(), self.sktn.traverse()):
            if e.length is None:
                self.assertEqual(o.length, None if e.is_root() else 0.0)
            else:
                self.assertEqual(o.length, e.length)
            self.assertEqual(o.name, e.name)

        self.assertEqual(obs.ascii_art(), self.sktn.ascii_art())
    def test_to_skbio_treenode(self):
        obs = to_skbio_treenode(self.bp)
        for o, e in zip(obs.traverse(), self.sktn.traverse()):
            if e.length is None:
                self.assertEqual(o.length, None if e.is_root() else 0.0)
            else:
                self.assertEqual(o.length, e.length)
            self.assertEqual(o.name, e.name)

        self.assertEqual(obs.ascii_art(), self.sktn.ascii_art())
Exemple #7
0
 def test_to_skbio_treenode_with_edge_numbers(self):
     # from https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009
     # but without edge labels
     # 0 1 2 3 4 5 6 7 8 9
     # 1 1 1 0 1 0 0 1 0 0
     in_ = '((A:.01{0}, B:.01{1})D:.01{3}, C:.01{4}) {5};'
     obs = parse_newick(in_)
     obs_sk = to_skbio_treenode(obs)
     self.assertEqual(obs_sk.find('A').edge_num, 0)
     self.assertEqual(obs_sk.find('B').edge_num, 1)
     self.assertEqual(obs_sk.find('D').edge_num, 3)
     self.assertEqual(obs_sk.find('C').edge_num, 4)
     self.assertEqual(obs_sk.edge_num, 5)
    def test_place_jplace_square_braces(self):
        self.jplacedata = json.loads(self.jplacedata)
        treestr = self.jplacedata['tree']
        treestr = re.sub(r"{(\d+)}", r"[\1]", treestr)
        self.jplacedata['tree'] = treestr
        self.jplacedata = json.dumps(self.jplacedata)

        exp_tree = self.tree
        obs_df, obs_tree = parse_jplace(self.jplacedata)
        obs_tree = to_skbio_treenode(obs_tree)
        self.assertEqual(obs_tree.compare_rfd(exp_tree), 0)
        for n in obs_tree.traverse(include_self=False):
            self.assertTrue(n.edge_num >= 0)
 def test_parse_jplace_simple(self):
     columns = [
         'fragment', 'edge_num', 'likelihood', 'like_weight_ratio',
         'distal_length', 'pendant_length'
     ]
     exp_df = [[
         "82", 361, 0.01013206496780672, 1, 0.02652932626620403,
         0.039354548684623215
     ],
               [
                   "99", 308, 0.04520741687623886, 1, 0.11020044356641526,
                   0.06550337922097477
               ],
               [
                   "43", 309, 0.04054866161921744, 1, 0.010712923050783987,
                   0.020946988900520196
               ],
               [
                   "195", 277, 0.01918907908397749, 1, 0.03065741838803451,
                   0.04513513498399864
               ],
               [
                   "162", 55, 0.01758935282545493, 1, 0.0033199487685078776,
                   0.05388735804976052
               ],
               [
                   "56", 81, 0.2366882303770561, 1, 0.04172580852519453,
                   0.0007060238727097983
               ],
               [
                   "91", 105, 0.0001863393767883581, 1, 0.04578898721138839,
                   0.08655004339151215
               ],
               [
                   "174", 89, 0.01216463967379211, 1, 0.04707020642820376,
                   0.045206727542450205
               ],
               [
                   "5", 143, 0.012162345471765756, 1, 0.023797389484252734,
                   0.10447375403452556
               ],
               [
                   "55", 139, 0.09563944060686769, 1, 0.014593217782258146,
                   0.04537214236560885
               ]]
     exp_df = pd.DataFrame(exp_df, columns=columns)
     exp_tree = self.tree
     obs_df, obs_tree = parse_jplace(self.jplacedata)
     obs_tree = to_skbio_treenode(obs_tree)
     pdt.assert_frame_equal(obs_df, exp_df)
     self.assertEqual(obs_tree.compare_rfd(exp_tree), 0)
def subset_and_write_table_tree(
    otu_size: int,
    sample_size: int,
    density: float,
    rep: int,
    seed: int,
    table: biom.Table,
    tree: bp.BP,
    output_dir: str,
) -> None:
    """Given parameters for a single subset, filter the table and tree and
    write to file

    """
    # prepare output info
    file_start = 'otu_size-{}--sample_size-{}--rep-{}--seed-{}--density-{}' \
        .format(otu_size, sample_size, rep, seed, density)
    full_file_start = os.path.join(output_dir, file_start)

    # get subset of table
    table_subset = get_random_subtable(table,
                                       otu_size=otu_size,
                                       sample_size=sample_size,
                                       seed=seed,
                                       density=density)

    table_subset.table_id = file_start

    # write out biom table
    with biom_open(full_file_start + '.biom', 'w') as fp:
        table_subset.to_hdf5(fp, "subset: " + file_start)

    # create a sheared tree based off the table
    otu_ids = table_subset.ids('observation')

    # TODO: change bp_tree to tree
    bp_tree = tree  # bp.from_skbio_treenode(tree)

    sheared_bp = bp_tree.shear(set(otu_ids))

    tree_subset = bp.to_skbio_treenode(sheared_bp)

    # tree_subset = tree.shear(otu_ids)

    for node in tree_subset.traverse():
        if node.length is None:
            node.length = 0
    tree_subset.write(full_file_start + '.newick')
Exemple #11
0
    def _validate_and_match_data(self, ignore_missing_samples,
                                 filter_missing_features,
                                 filter_unobserved_features_from_phylogeny):
        # remove unobserved features from the phylogeny
        if filter_unobserved_features_from_phylogeny:
            self.tree = self.tree.shear(set(self.table.columns))

        # extract balance parenthesis
        self._bp_tree = list(self.tree.B)

        self.tree = Tree.from_tree(to_skbio_treenode(self.tree))
        fill_missing_node_names(self.tree)

        # Note that the feature_table we get from QIIME 2 (as an argument to
        # this function) is set up such that the index describes sample IDs and
        # the columns describe feature IDs. We transpose this table before
        # sending it to tools.match_inputs() and keep using the transposed
        # table for the rest of this visualizer.
        self.table, self.samples, self.tip_md, self.int_md = match_inputs(
            self.tree, self.table.T, self.samples, self.features,
            ignore_missing_samples, filter_missing_features)
    def test_parse_newick_simple_edge_numbers(self):
        # from https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009
        # but without edge labels
        # 0 1 2 3 4 5 6 7 8 9
        # 1 1 1 0 1 0 0 1 0 0
        in_ = '((A:.01{0}, B:.01{1})D:.01{3}, C:.01{4}) {5};'
        exp_sk = '((A:.01, B:.01)D:.01, C:.01);'  # skbio doesn't know about edge numbers
        obs = parse_newick(in_)
        obs_sk = to_skbio_treenode(obs)
        exp_sk = skbio.TreeNode.read([exp_sk])
        self.assertEqual(obs_sk.compare_rfd(exp_sk), 0)

        self.assertEqual(obs.edge(2), 0)
        self.assertEqual(obs.edge(4), 1)
        self.assertEqual(obs.edge(1), 3)
        self.assertEqual(obs.edge(7), 4)
        self.assertEqual(obs.edge(0), 5)
        self.assertEqual(obs.edge_from_number(0), 2)
        self.assertEqual(obs.edge_from_number(1), 4)
        self.assertEqual(obs.edge_from_number(3), 1)
        self.assertEqual(obs.edge_from_number(4), 7)
        self.assertEqual(obs.edge_from_number(5), 0)
Exemple #13
0
 def test_parse_newick_nested_quotes(self):
     # bug: quotes are removed
     in_ = '((foo"bar":1,baz:2)x:3)r;'
     exp = skbio.TreeNode.read([in_])
     obs = to_skbio_treenode(parse_newick(in_))
     self.assertEqual(obs.compare_subsets(exp), 0.0)
Exemple #14
0
 def test_parse_newick_with_parens(self):
     # bug: parens are getting interpreted even if in quotes
     in_ = "(('foo(b)ar':1,baz:2)x:3)r;"
     exp = skbio.TreeNode.read([in_])
     obs = to_skbio_treenode(parse_newick(in_))
     self.assertEqual(obs.compare_subsets(exp), 0.0)
 def test_parse_newick_nested_quotes(self):
     # bug: quotes are removed
     in_ = '((foo"bar":1,baz:2)x:3)r;'
     exp = skbio.TreeNode.read([in_])
     obs = to_skbio_treenode(parse_newick(in_))
     self.assertEqual(obs.compare_subsets(exp), 0.0)
 def test_parse_newick_with_parens(self):
     # bug: parens are getting interpreted even if in quotes
     in_ = "(('foo(b)ar':1,baz:2)x:3)r;"
     exp = skbio.TreeNode.read([in_])
     obs = to_skbio_treenode(parse_newick(in_))
     self.assertEqual(obs.compare_subsets(exp), 0.0)