def test_consensus_from_scored_trees_collection_ii(self): """strict consensus should handle conflicting trees""" sct = ScoredTreeCollection(zip([1]*3, self.unrooted_conflicting_trees)) ct = sct.getConsensusTrees()[0] self.assertTrue(ct.sameTopology(Tree("(a,b,c,d);"))) sct = ScoredTreeCollection(zip([1]*3, self.rooted_conflicting_trees)) #cts = sct.getConsensusTrees(method='rooted') ct = sct.getConsensusTrees(method='rooted')[0] self.assertTrue(ct.sameTopology(Tree("(a,b,c,d);")))
def test_consensus_tree_branch_lengths(self): """consensus trees should average branch lengths properly""" def get_ac(tree): for edge in tree.getEdgeVector(include_root=False): if set('ac') == set([c.Name for c in edge.Children]): return edge sct = ScoredTreeCollection(self.unrooted_trees_lengths) ct = sct.getConsensusTree() maj_tree = self.unrooted_trees_lengths[0][1] self.assertTrue(abs(get_ac(ct).Length-get_ac(maj_tree).Length) < 1e-9) sct = ScoredTreeCollection(self.rooted_trees_lengths) ct = sct.getConsensusTree(method='rooted') maj_tree = self.rooted_trees_lengths[0][1] self.assertTrue(abs(get_ac(ct).Length-get_ac(maj_tree).Length) < 1e-9)
def test_consensus_from_scored_trees_collection_ii(self): """strict consensus should handle conflicting trees""" sct = ScoredTreeCollection( zip([1] * 3, self.unrooted_conflicting_trees)) ct = sct.getConsensusTrees()[0] self.assertTrue(ct.sameTopology(Tree("(a,b,c,d);"))) sct = ScoredTreeCollection(zip([1] * 3, self.rooted_conflicting_trees)) #cts = sct.getConsensusTrees(method='rooted') ct = sct.getConsensusTrees(method='rooted')[0] self.assertTrue(ct.sameTopology(Tree("(a,b,c,d);")))
def test_consensus_tree_branch_lengths(self): """consensus trees should average branch lengths properly""" def get_ac(tree): for edge in tree.getEdgeVector(include_root=False): if set('ac') == set([c.Name for c in edge.Children]): return edge sct = ScoredTreeCollection(self.unrooted_trees_lengths) ct = sct.getConsensusTree() maj_tree = self.unrooted_trees_lengths[0][1] self.assertTrue( abs(get_ac(ct).Length - get_ac(maj_tree).Length) < 1e-9) sct = ScoredTreeCollection(self.rooted_trees_lengths) ct = sct.getConsensusTree(method='rooted') maj_tree = self.rooted_trees_lengths[0][1] self.assertTrue( abs(get_ac(ct).Length - get_ac(maj_tree).Length) < 1e-9)
def gnj(dists, keep=None, dkeep=0, ui=None): """Arguments: - dists: dict of (name1, name2): distance - keep: number of best partial trees to keep at each iteration, and therefore to return. Same as Q parameter in original GNJ paper. - dkeep: number of diverse partial trees to keep at each iteration, and therefore to return. Same as D parameter in original GNJ paper. Result: - a sorted list of (tree length, tree) tuples """ (names, d) = distanceDictTo2D(dists) if keep is None: keep = len(names) * 5 all_keep = keep + dkeep # For recognising duplicate topologies, encode partitions (ie: edges) as # frozensets of tip names, which should be quickly comparable. arbitrary_anchor = names[0] all_tips = frozenset(names) def encode_partition(tips): included = frozenset(tips) if arbitrary_anchor not in included: included = all_tips - included return included # could also convert to long int, or cache, would be faster? tips = [frozenset([n]) for n in names] nodes = [LightweightTreeTip(name) for name in names] star_tree = PartialTree(d, nodes, tips, 0.0) star_tree.topology = frozenset([]) trees = [star_tree] # Progress display auxiliary code template = ' size %%s/%s trees %%%si' % (len(names), len(str(all_keep))) total_work = 0 max_candidates = 1 total_work_before = {} for L in range(len(names), 3, -1): total_work_before[L] = total_work max_candidates = min(all_keep, max_candidates*L*(L-1)//2) total_work += max_candidates def _show_progress(): t = len(next_trees) work_done = total_work_before[L] + t ui.display(msg=template % (L, t), progress=work_done/total_work) for L in range(len(names), 3, -1): # Generator of candidate joins, best first. # Note that with dkeep>0 this generator is used up a bit at a time # by 2 different interupted 'for' loops below. candidates = uniq_neighbour_joins(trees, encode_partition) # First take up to 'keep' best ones next_trees = [] _show_progress() for pair in candidates: next_trees.append(pair) if len(next_trees) == keep: break _show_progress() # The very best one is used as an anchor for measuring the # topological distance to others best_topology = next_trees[0].topology prior_td = [len(best_topology ^ tree.topology) for tree in trees] # Maintain a separate queue of joins for each possible # topological distance max_td = (max(prior_td) + 1) // 2 queue = [deque() for g in range(max_td+1)] queued = 0 # Now take up to dkeep joins, an equal number of the best at each # topological distance, while not calculating any more TDs than # necessary. prior_td = dict(zip(map(id, trees), prior_td)) target_td = 1 while (candidates or queued) and len(next_trees) < all_keep: if candidates and not queue[target_td]: for pair in candidates: diff = pair.new_partition not in best_topology td = (prior_td[id(pair.tree)] + [-1,+1][diff]) // 2 # equiv, slower: td = len(best_topology ^ topology) // 2 queue[td].append(pair) queued += 1 if td == target_td: break else: candidates = None if queue[target_td]: next_trees.append(queue[target_td].popleft()) queued -= 1 _show_progress() target_td = target_td % max_td + 1 trees = [pair.joined() for pair in next_trees] result = [tree.asScoreTreeTuple() for tree in trees] result.sort() return ScoredTreeCollection(result)
def results2output(self, results): return ScoredTreeCollection(results)