Ejemplo n.º 1
0
    def tree_from_splits(self,
            split_distribution,
            min_freq=0.5,
            include_edge_lengths=True):
        """Returns a consensus tree from splits in `split_distribution`.

        If include_edge_length_var is True, then the sample variance of the
            edge length will also be calculated and will be stored as
            a length_var attribute.
        """
        taxon_set = split_distribution.taxon_set
        taxa_mask = taxon_set.all_taxa_bitmask()
        if self.weighted_splits:
            split_freqs = split_distribution.weighted_split_frequencies
        else:
            split_freqs = split_distribution.split_frequencies
        is_rooted = split_distribution.is_rooted
        #include_edge_lengths = self.support_as_labels and include_edge_lengths
        if self.support_as_edge_lengths and include_edge_lengths:
            raise Exception("Cannot map support as edge lengths if edge lengths are to be set on consensus tree")

        to_try_to_add = []
        _almost_one = lambda x: abs(x - 1.0) <= 0.0000001
        for s, freq in split_freqs.iteritems():
            if (min_freq is None) or (freq > min_freq) or (_almost_one(min_freq) and _almost_one(freq)):
                to_try_to_add.append((freq, s))
        to_try_to_add.sort(reverse=True)
        splits_for_tree = [i[1] for i in to_try_to_add]

        con_tree = treesplit.tree_from_splits(splits=splits_for_tree,
                taxon_set=taxon_set,
                is_rooted=is_rooted)
        treesplit.encode_splits(con_tree)

        if include_edge_lengths:
            split_edge_lengths = {}
            for split, edges in split_distribution.split_edge_lengths.items():
                if len(edges) > 0:
                    mean, var = mean_and_sample_variance(edges)
                    elen = mean
                else:
                    elen = None
                split_edge_lengths[split] = elen
        else:
            split_edge_lengths = None

        for node in con_tree.postorder_node_iter():
            split = node.edge.split_bitmask
            if split in split_freqs:
                self.map_split_support_to_node(node=node, split_support=split_freqs[split])
            if include_edge_lengths and split in split_distribution.split_edge_lengths:
                edges = split_distribution.split_edge_lengths[split]
                if len(edges) > 0:
                    mean, var = mean_and_sample_variance(edges)
                    elen = mean
                else:
                    elen = None
                node.edge.length = elen

        return con_tree
Ejemplo n.º 2
0
 def testReferenceTree(self):
     ref_tree_list = datagen.reference_tree_list()
     t_tree_list = dendropy.TreeList()
     for ref_tree in ref_tree_list:
         treesplit.encode_splits(ref_tree)
         splits = ref_tree.split_edges.keys()
         t_tree = treesplit.tree_from_splits(splits=splits,
                 taxon_set=ref_tree_list.taxon_set,
                 is_rooted=ref_tree.is_rooted)
         self.assertEqual(ref_tree.symmetric_difference(t_tree), 0)
Ejemplo n.º 3
0
 def testReferenceTree(self):
     ref_tree_list = datagen.reference_tree_list()
     t_tree_list = dendropy.TreeList()
     for ref_tree in ref_tree_list:
         treesplit.encode_splits(ref_tree)
         splits = ref_tree.split_edges.keys()
         t_tree = treesplit.tree_from_splits(splits=splits,
                 taxon_set=ref_tree_list.taxon_set,
                 is_rooted=ref_tree.is_rooted)
         self.assertEqual(ref_tree.symmetric_difference(t_tree), 0)
Ejemplo n.º 4
0
 def calc_tree_freqs(self, taxon_set, is_rooted=False):
     """
     Returns an ordered dictionary (OrderedDict) of DendroPy trees mapped
     to a tuple, (raw numbers of occurrences, proportion of total trees
     counted) in (descending) order of the proportion of occurrence.
     """
     hash_freqs = self.calc_hash_freqs()
     tree_freqs = OrderedDict()
     for topology_hash, (count, freq) in hash_freqs.items():
         tree = treesplit.tree_from_splits(splits=topology_hash,
             taxon_set=taxon_set,
             is_rooted=is_rooted)
         tree_freqs[tree] = (count, freq)
     return tree_freqs
Ejemplo n.º 5
0
    def testUltrametricTrees(self):
        tree_files = [
            "pythonidae.beast.summary.tre", "primates.beast.mcct.medianh.tre"
        ]

        for tree_file in tree_files:
            ref_tree = dendropy.Tree.get_from_path(
                pathmap.tree_source_path(tree_file), "nexus", as_rooted=True)
            treesplit.encode_splits(ref_tree)
            splits = ref_tree.split_edges.keys()
            t_tree = treesplit.tree_from_splits(splits=splits,
                                                taxon_set=ref_tree.taxon_set,
                                                is_rooted=ref_tree.is_rooted)
            treesplit.encode_splits(t_tree)
            self.assertEqual(ref_tree.symmetric_difference(t_tree), 0)
Ejemplo n.º 6
0
    def testUltrametricTrees(self):
        tree_files = [
                "pythonidae.beast.summary.tre",
                "primates.beast.mcct.medianh.tre"
                ]

        for tree_file in tree_files:
            ref_tree = dendropy.Tree.get_from_path(pathmap.tree_source_path(tree_file),
                    "nexus",
                    as_rooted=True)
            treesplit.encode_splits(ref_tree)
            splits = ref_tree.split_edges.keys()
            t_tree = treesplit.tree_from_splits(splits=splits,
                    taxon_set=ref_tree.taxon_set,
                    is_rooted=ref_tree.is_rooted)
            treesplit.encode_splits(t_tree)
            self.assertEqual(ref_tree.symmetric_difference(t_tree), 0)