def decompose_trees(tree, max_subset_size): """ """ next_trees = [tree] done_trees = [] while len(next_trees) > 0: trees = next_trees next_trees = [] for tree in trees: t1, t2 = bisect_tree(tree, breaking_edge_style="centroid") n1 = t1.n_leaves n2 = t2.n_leaves if n1 > max_subset_size: next_trees.append(t1) else: if n1 >= 5: done_trees.append(t1) else: sys.exit("T1 has fewer than 5 leaves!") if n2 > max_subset_size: next_trees.append(t2) else: if n2 >= 5: done_trees.append(t2) else: sys.exit("T2 has fewer than 5 leaves!") return done_trees
def _do_test_longest(self, t, level="1"): if t.n_leaves < 3: return before_br_len = [ e.length for e in t._tree.preorder_edge_iter() if e.length ] _LOG.debug("code=%s\n before = %s" % (level, t.compose_newick())) _LOG.debug(" after len(before_br_len) = %d" % (len(before_br_len))) num_real_edges_before = len(before_br_len) if len(t._tree.seed_node.child_nodes()) < 3: num_real_edges_before -= 1 t1, t2 = bisect_tree(t, 'longest') after_1_br_len = [ e.length for e in t1._tree.preorder_edge_iter() if e.length ] after_2_br_len = [ e.length for e in t2._tree.preorder_edge_iter() if e.length ] num_branches_1 = len(after_1_br_len) num_branches_2 = len(after_2_br_len) if num_branches_2 == 0: num_branches_2 = 1 if num_branches_1 == 0: num_branches_1 = 1 expected_diff = 3 if num_branches_2 == 1: expected_diff -= 2 if num_branches_1 == 1: expected_diff -= 2 # cherries are rooted, so they make 1 edge look like 2 if len(t1._tree.seed_node.child_nodes()) == 2: num_branches_1 -= 1 if len(t2._tree.seed_node.child_nodes()) == 2: num_branches_2 -= 1 _LOG.debug(" after 1 = %s" % (t1.compose_newick())) _LOG.debug(" after num_branches_1 = %d" % (num_branches_1)) _LOG.debug(" after 2 = %s" % (t2.compose_newick())) _LOG.debug(" after num_branches_2 = %d" % (num_branches_2)) #self.assertEqual(len(before_br_len), expected_diff + num_branches_1 + num_branches_2) before_br_len.sort(reverse=True) before_br_len.pop(0) before_sum = sum(before_br_len) after_sum = sum(after_1_br_len) + sum(after_2_br_len) diff = before_sum - after_sum self.assertTrue(abs(diff) < TOL) if t1.n_leaves > 2: nl = level + ".1" self._do_test_longest(t1, level=nl) if t2.n_leaves > 2: nl = level + ".2" self._do_test_longest(t2, level=nl)
def _do_test_longest(self, t, level="1"): if t.n_leaves < 3: return before_br_len = [e.length for e in t._tree.preorder_edge_iter() if e.length] _LOG.debug("code=%s\n before = %s" % (level, t.compose_newick())) _LOG.debug(" after len(before_br_len) = %d" % (len(before_br_len))) num_real_edges_before = len(before_br_len) if len(t._tree.seed_node.child_nodes()) < 3: num_real_edges_before -= 1 t1, t2 = bisect_tree(t, 'longest') after_1_br_len = [e.length for e in t1._tree.preorder_edge_iter() if e.length] after_2_br_len = [e.length for e in t2._tree.preorder_edge_iter() if e.length] num_branches_1 = len(after_1_br_len) num_branches_2 = len(after_2_br_len) if num_branches_2 == 0: num_branches_2 = 1 if num_branches_1 == 0: num_branches_1 = 1 expected_diff = 3 if num_branches_2 == 1: expected_diff -= 2 if num_branches_1 == 1: expected_diff -= 2 # cherries are rooted, so they make 1 edge look like 2 if len(t1._tree.seed_node.child_nodes()) == 2: num_branches_1 -= 1 if len(t2._tree.seed_node.child_nodes()) == 2: num_branches_2 -= 1 _LOG.debug(" after 1 = %s" % (t1.compose_newick())) _LOG.debug(" after num_branches_1 = %d" % (num_branches_1)) _LOG.debug(" after 2 = %s" % (t2.compose_newick())) _LOG.debug(" after num_branches_2 = %d" % (num_branches_2)) #self.assertEqual(len(before_br_len), expected_diff + num_branches_1 + num_branches_2) before_br_len.sort(reverse=True) before_br_len.pop(0) before_sum = sum(before_br_len) after_sum = sum(after_1_br_len) + sum(after_2_br_len) diff = before_sum - after_sum self.assertTrue(abs(diff) < TOL) if t1.n_leaves > 2: nl = level+ ".1" self._do_test_longest(t1, level=nl) if t2.n_leaves > 2: nl = level+ ".2" self._do_test_longest(t2, level=nl)
def decompose_trees(tree, max_subset_size): """ """ next_trees = [tree] done_trees = [] while len(next_trees) > 0: trees = next_trees next_trees = [] for tree in trees: t1, t2 = bisect_tree(tree, breaking_edge_style="centroid") no1 = t1.n_leaves no2 = t2.n_leaves d_t1 = t1._tree d_t2 = t2._tree d_t1_mat = d_t1.phylogenetic_distance_matrix() d_t2_mat = d_t2.phylogenetic_distance_matrix() diam_t1 = -1 diam_t2 = -1 for i, n1 in enumerate(d_t1.taxon_namespace[:-1]): for n2 in d_t1.taxon_namespace[i + 1:]: diam_t1 = max(diam_t1, d_t1_mat(n1, n2)) for i, n1 in enumerate(d_t2.taxon_namespace[:-1]): for n2 in d_t2.taxon_namespace[i + 1:]: diam_t2 = max(diam_t2, d_t2_mat(n1, n2)) if no1 > max_subset_size: next_trees.append(t1) else: if no1 >= 5: done_trees.append(t1) # print(diam_t1) else: sys.exit("T1 has fewer than 5 leaves!") if no2 > max_subset_size: next_trees.append(t2) else: if no2 >= 5: done_trees.append(t2) # print(diam_t2) else: sys.exit("T2 has fewer than 5 leaves!") return done_trees
def decompose_phylogeny(phy, max_size, min_size): trees_map = [] tree_list = [phy] while len(tree_list) > 0: tmp_phy = tree_list.pop() t1, t2 = bisect_tree(tree=tmp_phy, breaking_edge_style='midpoint', max_size=max_size) if t1.count_leaves() > min_size: tree_list.append(deepcopy(t1)) else: trees_map.append(deepcopy(t1)) if t2.count_leaves() > min_size: tree_list.append(deepcopy(t2)) else: trees_map.append(deepcopy(t2)) return trees_map
def main(args): # Step 1: Decompose tree tree = dendropy.Tree.get(path=args.input_tree_file, schema="newick") tree.resolve_polytomies(limit=2, update_bipartitions=True) tree = PhylogeneticTree(tree) t1, t2 = bisect_tree(tree) trees = [t1, t2] # Step 2: Write out leaf subsets # i = 1 i = 0 keep1 = t1.leaf_node_names() with open(args.output + "/A.lab", "w") as f: f.write("\n".join(keep1)) keep2 = t2.leaf_node_names() with open(args.output + "/B.lab", "w") as f: f.write("\n".join(keep2))
def _do_test_centroid(self, t, level="1"): if t.n_leaves < 5: return t.calc_splits() t1, t2 = bisect_tree(t, 'centroid') assert t1.n_leaves + t2.n_leaves == t.n_leaves # indent = level.count(".") # print("==============\nInput tree has %s leaf nodes." % t.n_leaves) # print("Subtree 1 tree has %s leaf nodes." % t1.n_leaves) # print("Subtree 2 tree has %s leaf nodes." % t2.n_leaves) # print("==============\n") if t1.n_leaves > 2: nl = level + ".1" self._do_test_centroid(t1, level=nl) if t2.n_leaves > 2: nl = level + ".2" self._do_test_centroid(t2, level=nl)
def _do_test_centroid(self, t, level="1"): if t.n_leaves < 5: return t.calc_splits() t1, t2 = bisect_tree(t, 'centroid') assert t1.n_leaves + t2.n_leaves == t.n_leaves # indent = level.count(".") # print("==============\nInput tree has %s leaf nodes." % t.n_leaves) # print("Subtree 1 tree has %s leaf nodes." % t1.n_leaves) # print("Subtree 2 tree has %s leaf nodes." % t2.n_leaves) # print("==============\n") if t1.n_leaves > 2: nl = level+ ".1" self._do_test_centroid(t1, level=nl) if t2.n_leaves > 2: nl = level+ ".2" self._do_test_centroid(t2, level=nl)