Exemple #1
0
    def test_consensus_from_scored_trees_collection_ii(self):
        """strict consensus should handle conflicting trees"""
        sct = ScoredTreeCollection(zip([1]*3, self.unrooted_conflicting_trees))
        ct = sct.getConsensusTrees()[0]
        self.assertTrue(ct.sameTopology(Tree("(a,b,c,d);")))

        sct = ScoredTreeCollection(zip([1]*3, self.rooted_conflicting_trees))
        #cts = sct.getConsensusTrees(method='rooted')
        ct = sct.getConsensusTrees(method='rooted')[0]
        self.assertTrue(ct.sameTopology(Tree("(a,b,c,d);")))
Exemple #2
0
    def test_consensus_tree_branch_lengths(self):
        """consensus trees should average branch lengths properly"""
        def get_ac(tree):
            for edge in tree.getEdgeVector(include_root=False):
                if set('ac') == set([c.Name for c in edge.Children]):
                    return edge

        sct = ScoredTreeCollection(self.unrooted_trees_lengths)
        ct = sct.getConsensusTree()
        maj_tree = self.unrooted_trees_lengths[0][1]
        self.assertTrue(abs(get_ac(ct).Length-get_ac(maj_tree).Length) < 1e-9)

        sct = ScoredTreeCollection(self.rooted_trees_lengths)
        ct = sct.getConsensusTree(method='rooted')
        maj_tree = self.rooted_trees_lengths[0][1]
        self.assertTrue(abs(get_ac(ct).Length-get_ac(maj_tree).Length) < 1e-9)
Exemple #3
0
    def test_consensus_from_scored_trees_collection_ii(self):
        """strict consensus should handle conflicting trees"""
        sct = ScoredTreeCollection(
            zip([1] * 3, self.unrooted_conflicting_trees))
        ct = sct.getConsensusTrees()[0]
        self.assertTrue(ct.sameTopology(Tree("(a,b,c,d);")))

        sct = ScoredTreeCollection(zip([1] * 3, self.rooted_conflicting_trees))
        #cts = sct.getConsensusTrees(method='rooted')
        ct = sct.getConsensusTrees(method='rooted')[0]
        self.assertTrue(ct.sameTopology(Tree("(a,b,c,d);")))
Exemple #4
0
    def test_consensus_tree_branch_lengths(self):
        """consensus trees should average branch lengths properly"""
        def get_ac(tree):
            for edge in tree.getEdgeVector(include_root=False):
                if set('ac') == set([c.Name for c in edge.Children]):
                    return edge

        sct = ScoredTreeCollection(self.unrooted_trees_lengths)
        ct = sct.getConsensusTree()
        maj_tree = self.unrooted_trees_lengths[0][1]
        self.assertTrue(
            abs(get_ac(ct).Length - get_ac(maj_tree).Length) < 1e-9)

        sct = ScoredTreeCollection(self.rooted_trees_lengths)
        ct = sct.getConsensusTree(method='rooted')
        maj_tree = self.rooted_trees_lengths[0][1]
        self.assertTrue(
            abs(get_ac(ct).Length - get_ac(maj_tree).Length) < 1e-9)
Exemple #5
0
def gnj(dists, keep=None, dkeep=0, ui=None):
    """Arguments:
        - dists: dict of (name1, name2): distance
        - keep: number of best partial trees to keep at each iteration,  
          and therefore to return.  Same as Q parameter in original GNJ paper.
        - dkeep: number of diverse partial trees to keep at each iteration, 
          and therefore to return.  Same as D parameter in original GNJ paper.
    Result:
        - a sorted list of (tree length, tree) tuples
    """
     
    (names, d) = distanceDictTo2D(dists)

    if keep is None:
        keep = len(names) * 5
    all_keep = keep + dkeep
        
    # For recognising duplicate topologies, encode partitions (ie: edges) as 
    # frozensets of tip names, which should be quickly comparable.
    arbitrary_anchor = names[0]
    all_tips = frozenset(names)
    def encode_partition(tips):
        included = frozenset(tips)
        if arbitrary_anchor not in included:
            included = all_tips - included
        return included
        # could also convert to long int, or cache, would be faster?    
    
    tips = [frozenset([n]) for n in names]
    nodes = [LightweightTreeTip(name) for name in names]
    star_tree = PartialTree(d, nodes, tips, 0.0)
    star_tree.topology = frozenset([])
    trees = [star_tree]
    
    # Progress display auxiliary code
    template = ' size %%s/%s  trees %%%si' % (len(names), len(str(all_keep)))
    total_work = 0
    max_candidates = 1
    total_work_before = {}
    for L in range(len(names), 3, -1):
        total_work_before[L] = total_work
        max_candidates = min(all_keep, max_candidates*L*(L-1)//2)
        total_work += max_candidates
        
    def _show_progress():
        t = len(next_trees)
        work_done = total_work_before[L] + t
        ui.display(msg=template % (L, t), progress=work_done/total_work)
    
    for L in range(len(names), 3, -1):
        # Generator of candidate joins, best first.
        # Note that with dkeep>0 this generator is used up a bit at a time
        # by 2 different interupted 'for' loops below.
        candidates = uniq_neighbour_joins(trees, encode_partition)
        
        # First take up to 'keep' best ones
        next_trees = []
        _show_progress()
        for pair in candidates:
            next_trees.append(pair)
            if len(next_trees) == keep:
                break 
        _show_progress()

        # The very best one is used as an anchor for measuring the 
        # topological distance to others
        best_topology = next_trees[0].topology
        prior_td = [len(best_topology ^ tree.topology) for tree in trees]
        
        # Maintain a separate queue of joins for each possible 
        # topological distance 
        max_td = (max(prior_td) + 1) // 2
        queue = [deque() for g in range(max_td+1)]
        queued = 0
        
        # Now take up to dkeep joins, an equal number of the best at each 
        # topological distance, while not calculating any more TDs than 
        # necessary.
        prior_td = dict(zip(map(id, trees), prior_td))
        target_td = 1
        while (candidates or queued) and len(next_trees) < all_keep:
            if candidates and not queue[target_td]:
                for pair in candidates:
                    diff = pair.new_partition not in best_topology
                    td = (prior_td[id(pair.tree)] + [-1,+1][diff]) // 2
                    # equiv, slower: td = len(best_topology ^ topology) // 2
                    queue[td].append(pair)
                    queued += 1
                    if td == target_td:
                        break
                else:
                    candidates = None
            if queue[target_td]:
                next_trees.append(queue[target_td].popleft())
                queued -= 1
                _show_progress()

            target_td = target_td % max_td + 1
        
        trees = [pair.joined() for pair in next_trees]
                
    result = [tree.asScoreTreeTuple() for tree in trees]
    result.sort()
    return ScoredTreeCollection(result)
 def results2output(self, results):
     return ScoredTreeCollection(results)