Exemple #1
0
 def test_scored_trees_collection_write(self):
     """writes a tree collection"""
     sct = ScoredTreeCollection(self.rooted_trees_lengths)
     with TemporaryDirectory(".") as dirname:
         dirname = pathlib.Path(dirname)
         out = dirname / "collection.trees"
         sct.write(out)
Exemple #2
0
    def test_consensus_from_scored_trees_collection_ii(self):
        """strict consensus should handle conflicting trees"""
        sct = ScoredTreeCollection(list(zip([1] * 3, self.unrooted_conflicting_trees)))
        ct = sct.get_consensus_trees()[0]
        self.assertTrue(ct.same_topology(Tree("(a,b,c,d);")))

        sct = ScoredTreeCollection(list(zip([1] * 3, self.rooted_conflicting_trees)))
        # cts = sct.get_consensus_trees(method='rooted')
        ct = sct.get_consensus_trees(method="rooted")[0]
        self.assertTrue(ct.same_topology(Tree("(a,b,c,d);")))
Exemple #3
0
    def test_consensus_tree_branch_lengths(self):
        """consensus trees should average branch lengths properly"""

        def get_ac(tree):
            for edge in tree.get_edge_vector(include_root=False):
                if set("ac") == set([c.name for c in edge.children]):
                    return edge

        sct = ScoredTreeCollection(self.unrooted_trees_lengths)
        ct = sct.get_consensus_tree()
        maj_tree = self.unrooted_trees_lengths[0][1]
        # to ensure consistent comparison with majority, we root the ct same way
        # as maj
        tip_names = maj_tree.get_tip_names()
        ct = ct.rooted_with_tip("d")
        ct = ct.sorted(tip_names)

        self.assertTrue(abs(get_ac(ct).length - get_ac(maj_tree).length) < 1e-9)

        sct = ScoredTreeCollection(self.rooted_trees_lengths)
        ct = sct.get_consensus_tree(method="rooted")
        maj_tree = self.rooted_trees_lengths[0][1]
        self.assertTrue(abs(get_ac(ct).length - get_ac(maj_tree).length) < 1e-9)
Exemple #4
0
 def results2output(self, results):
     return ScoredTreeCollection(results)
Exemple #5
0
def gnj(dists, keep=None, dkeep=0, ui=None):
    """Arguments:
        - dists: dict of (name1, name2): distance
        - keep: number of best partial trees to keep at each iteration,
          and therefore to return.  Same as Q parameter in original GNJ paper.
        - dkeep: number of diverse partial trees to keep at each iteration,
          and therefore to return.  Same as D parameter in original GNJ paper.
    Result:
        - a sorted list of (tree length, tree) tuples
    """
    try:
        dists = dists.to_dict()
    except AttributeError:
        pass

    (names, d) = distance_dict_to_2D(dists)

    if keep is None:
        keep = len(names) * 5
    all_keep = keep + dkeep

    # For recognising duplicate topologies, encode partitions (ie: edges) as
    # frozensets of tip names, which should be quickly comparable.
    arbitrary_anchor = names[0]
    all_tips = frozenset(names)

    def encode_partition(tips):
        included = frozenset(tips)
        if arbitrary_anchor not in included:
            included = all_tips - included
        return included
        # could also convert to long int, or cache, would be faster?

    tips = [frozenset([n]) for n in names]
    nodes = [LightweightTreeTip(name) for name in names]
    star_tree = PartialTree(d, nodes, tips, 0.0)
    star_tree.topology = frozenset([])
    trees = [star_tree]

    # Progress display auxiliary code
    template = " size %%s/%s  trees %%%si" % (len(names), len(str(all_keep)))
    total_work = 0
    max_candidates = 1
    total_work_before = {}
    for L in range(len(names), 3, -1):
        total_work_before[L] = total_work
        max_candidates = min(all_keep, max_candidates * L * (L - 1) // 2)
        total_work += max_candidates

    def _show_progress():
        t = len(next_trees)
        work_done = total_work_before[L] + t
        ui.display(msg=template % (L, t), progress=work_done / total_work)

    for L in range(len(names), 3, -1):
        # Generator of candidate joins, best first.
        # Note that with dkeep>0 this generator is used up a bit at a time
        # by 2 different interupted 'for' loops below.
        candidates = uniq_neighbour_joins(trees, encode_partition)

        # First take up to 'keep' best ones
        next_trees = []
        _show_progress()
        for pair in candidates:
            next_trees.append(pair)
            if len(next_trees) == keep:
                break
        _show_progress()

        # The very best one is used as an anchor for measuring the
        # topological distance to others
        best_topology = next_trees[0].topology
        prior_td = [len(best_topology ^ tree.topology) for tree in trees]

        # Maintain a separate queue of joins for each possible
        # topological distance
        max_td = (max(prior_td) + 1) // 2
        queue = [deque() for g in range(max_td + 1)]
        queued = 0

        # Now take up to dkeep joins, an equal number of the best at each
        # topological distance, while not calculating any more TDs than
        # necessary.
        prior_td = dict(list(zip(list(map(id, trees)), prior_td)))
        target_td = 1
        while (candidates or queued) and len(next_trees) < all_keep:
            if candidates and not queue[target_td]:
                for pair in candidates:
                    diff = pair.new_partition not in best_topology
                    td = (prior_td[id(pair.tree)] + [-1, +1][diff]) // 2
                    # equiv, slower: td = len(best_topology ^ topology) // 2
                    queue[td].append(pair)
                    queued += 1
                    if td == target_td:
                        break
                else:
                    candidates = None
            if queue[target_td]:
                next_trees.append(queue[target_td].popleft())
                queued -= 1
                _show_progress()

            target_td = target_td % max_td + 1

        trees = [pair.joined() for pair in next_trees]

    result = [tree.asScoreTreeTuple() for tree in trees]
    result.sort()
    return ScoredTreeCollection(result)