예제 #1
0
 def testMeanNodeAgeSummarizationOnMCCT(self):
     tree_array = dendropy.TreeArray(ignore_node_ages=False)
     tree_array.read_from_path(
         self.support_trees_path,
         "nexus",
         # colleciton_offset=0,
         tree_offset=self.burnin,
     )
     target_tree = dendropy.Tree.get_from_path(
         self.target_tree_path,
         schema="nexus",
         taxon_namespace=tree_array.taxon_namespace,
     )
     tree_array.summarize_splits_on_tree(
         tree=target_tree,
         is_bipartitions_updated=False,
         set_edge_lengths="median-age",
     )
     expected_tree = dendropy.Tree.get_from_path(
         self.expected_tree_path,
         "nexus",
         taxon_namespace=tree_array.taxon_namespace)
     expected_tree.encode_bipartitions()
     expected_tree.calc_node_ages()
     self.assertEqual(expected_tree.bipartition_encoding,
                      target_tree.bipartition_encoding)
     for exp_bipartition in expected_tree.bipartition_encoding:
         exp_edge = expected_tree.bipartition_edge_map[exp_bipartition]
         obs_edge = target_tree.bipartition_edge_map[exp_bipartition]
         self.assertAlmostEqual(obs_edge.head_node.age,
                                exp_edge.head_node.age)
예제 #2
0
def bootstrap_support(input_tree, replicate_trees, output_tree):
    """ Calculate support for tree with replicates covering the same taxon set.

    Parameters
    ----------
    input_tree : str
      Tree inferred from complete data.
    replicate_trees : iterable
      Files containing replicate trees.
    output_tree: str
      Name of output tree with support values.
    """

    # read tree as rooted and get descendant taxa
    # rooted_tree = dendropy.Tree.get_from_path(input_tree, schema='newick', rooting="force-rooted", preserve_underscores=True)
    # root_node = rooted_tree.seed_node
    # arbitrary_child = root_node.child_nodes()[0]
    # taxa_for_rooting = [leaf.taxon.label for leaf in arbitrary_child.leaf_iter()]
    # print taxa_for_rooting

    # read tree and bootstrap replicates as unrooted, and
    # calculate bootstrap support
    orig_tree = dendropy.Tree.get_from_path(input_tree, schema='newick', rooting="force-unrooted", preserve_underscores=True)
    orig_tree.bipartitions = True
    orig_tree.encode_bipartitions()

    rep_trees = dendropy.TreeArray(taxon_namespace=orig_tree.taxon_namespace,
                                    is_rooted_trees=False,
                                    ignore_edge_lengths=True,
                                    ignore_node_ages=True,
                                    use_tree_weights=False)

    rep_trees.read_from_files(files=replicate_trees,
                                schema='newick',
                                rooting="force-unrooted",
                                preserve_underscores=True,
                                taxon_namespace=orig_tree.taxon_namespace)

    rep_trees.summarize_splits_on_tree(orig_tree,
                                       is_bipartitions_updated=True,
                                       add_support_as_node_attribute=True,
                                       support_as_percentages=True)

    for node in orig_tree.internal_nodes():
        if node.label:
            node.label = str(int(node.support)) + ':' + node.label
        else:
            node.label = str(int(node.support))

    # now root the tree again
    # mrca = orig_tree.mrca(taxon_labels=taxa_for_rooting)
    # orig_tree.reroot_at_edge(mrca.edge,
    #                         length1=0.5 * mrca.edge_length,
    #                         length2=0.5 * mrca.edge_length,
    #                         update_bipartitions=True)
    # assert orig_tree.is_rooted
    orig_tree.write_to_path(output_tree, schema='newick', suppress_rooting=True, unquoted_underscores=True)
예제 #3
0
def bootstrap_support(input_tree, replicate_trees, output_tree):
    """ Calculate support for tree with replicates covering the same taxon set.

    Parameters
    ----------
    input_tree : str
      Tree inferred from complete data.
    replicate_trees : iterable
      Files containing replicate trees.
    output_tree: str
      Name of output tree with support values.
    """

    import dendropy

    # read tree and bootstrap replicates as unrooted, and
    # calculate bootstrap support
    orig_tree = dendropy.Tree.get_from_path(input_tree,
                                            schema='newick',
                                            rooting="force-unrooted",
                                            preserve_underscores=True)
    orig_tree.bipartitions = True
    orig_tree.encode_bipartitions()

    rep_trees = dendropy.TreeArray(taxon_namespace=orig_tree.taxon_namespace,
                                   is_rooted_trees=False,
                                   ignore_edge_lengths=True,
                                   ignore_node_ages=True,
                                   use_tree_weights=False)

    rep_trees.read_from_files(files=replicate_trees,
                              schema='newick',
                              rooting="force-unrooted",
                              preserve_underscores=True,
                              taxon_namespace=orig_tree.taxon_namespace)

    rep_trees.summarize_splits_on_tree(orig_tree,
                                       is_bipartitions_updated=True,
                                       add_support_as_node_attribute=True,
                                       support_as_percentages=True)

    for node in orig_tree.internal_nodes():
        if node.label:
            support, taxon, aux_info = parse_label(node.label)
            node.label = create_label(node.support, taxon, aux_info)
        else:
            node.label = str(int(node.support))

    orig_tree.write_to_path(output_tree,
                            schema='newick',
                            suppress_rooting=True,
                            unquoted_underscores=True)
예제 #4
0
 def test_add_tree(self):
     trees = self.get_trees()
     tree_array = dendropy.TreeArray(taxon_namespace=trees.taxon_namespace)
     for tree in trees:
         tree_array.add_tree(tree)
     self.verify_tree_array(tree_array, trees)
예제 #5
0
    def testVariants(self):
        for tree_offset, is_weighted, is_multifurcating, is_rooted in itertools.product(
            (100, ),
            (
                False,
                True,
            ),
            (
                False,
                True,
            ),
            (
                False,
                True,
            ),
        ):
            # for tree_offset, is_weighted, is_multifurcating, is_rooted in itertools.product( (0, 100), (True,), (False,), (False,),  ):
            # print("is_rooted: {is_rooted}, is_multifurcating: {is_multifurcating}, is_weighted: {is_weighted}, tree_offset: {tree_offset}".format(
            #     is_rooted=is_rooted,
            #     is_multifurcating=is_multifurcating,
            #     is_weighted=is_weighted,
            #     tree_offset=tree_offset))
            source_trees, bipartition_encoding_freqs, test_trees_string = self.get_regime(
                is_rooted=is_rooted,
                is_multifurcating=is_multifurcating,
                is_weighted=is_weighted,
                tree_offset=tree_offset)
            ta = dendropy.TreeArray(
                is_rooted_trees=is_rooted,
                use_tree_weights=is_weighted,
                taxon_namespace=source_trees.taxon_namespace,
            )
            ta.read_from_string(test_trees_string,
                                "newick",
                                tree_offset=tree_offset,
                                store_tree_weights=is_weighted)
            be_to_tree = {}
            for tree in source_trees:
                be_to_tree[tree.key] = tree
            topologies = ta.topologies()
            for tree in topologies:
                b = frozenset(tree.encode_bipartitions())
                # stree = be_to_tree[b]
                # print("{} ({}): {}".format(
                #     calculated_topology_freqs[tree],
                #     ta._split_distribution.calc_normalization_weight(),
                #     (   bipartition_encoding_freqs[b],
                #         stree.actual_count,
                #         stree.total_weighted_count,
                #         source_trees.total_weight,
                #         stree.frequency,
                #         stree.total_weighted_count / source_trees.total_weight,
                #     )))
                self.assertAlmostEqual(tree.frequency,
                                       bipartition_encoding_freqs[b])

            calculated_bipartition_encoding_freqs = ta.bipartition_encoding_frequencies(
            )
            for tree in source_trees:
                # if tree.key not in calculated_bipartition_encoding_freqs:
                #     print(tree.actual_count)
                #     print(tree.total_weighted_count)
                #     print(tree.frequency)
                # f1 = bipartition_encoding_freqs[tree.key]
                # f2 = calculated_bipartition_encoding_freqs[tree.key]
                # self.assertAlmostEqual(f1,f2)
                if tree.actual_count == 0:
                    if tree.key in calculated_bipartition_encoding_freqs:
                        self.assertAlmostEqual(
                            calculated_bipartition_encoding_freqs[tree.key], 0)
                else:
                    # self.assertIn(tree.key, calculated_bipartition_encoding_freqs)
                    f1 = bipartition_encoding_freqs[tree.key]
                    f2 = calculated_bipartition_encoding_freqs[tree.key]
                    self.assertAlmostEqual(f1, f2)