def testReroot(self): newick = "((t5,t6),((t4,(t2,t1)),t3));" d = dataio.trees_from_newick([newick]) tree = d.trees_blocks[0][0] taxa_block = d.taxa_blocks[0] ref = dataio.trees_from_newick( [newick], taxa_block=taxa_block).trees_blocks[0][0] encode_splits(ref) o_newick = "((t2, t1),((t4,(t5,t6)),t3));" o_tree = dataio.trees_from_newick( [o_newick], taxa_block=taxa_block).trees_blocks[0][0] encode_splits(o_tree) self.assertEqual(symmetric_difference(o_tree, ref), 2) taxa_labels = ["t%d" % i for i in xrange(1, 7)] for leaf_name in taxa_labels: f = lambda x: x.label == leaf_name nd = tree.find_taxon_node(f) tree.to_outgroup_position(nd) r_newick = str(tree) r_tree = dataio.trees_from_newick( [r_newick], taxa_block=taxa_block).trees_blocks[0][0] encode_splits(r_tree) self.assertEqual(symmetric_difference(r_tree, ref), 0)
def testCollapseClade(self): tree = dataio.trees_from_newick(["(t5,t6,((t4,(t2,t1)),t3));" ]).trees_blocks[0][0] encode_splits(tree) root = tree.seed_node root_children = root.child_nodes() fc = root_children[0] collapse_clade(fc) tree.debug_check_tree(splits=True) self.assertEqual(str(tree), "(t5,t6,((t4,(t2,t1)),t3))") fc2 = root_children[2] fc2children = fc2.child_nodes() t124child = fc2children[0] collapse_clade(t124child) tree.debug_check_tree(logger_obj=_LOG) self.assertEqual(str(tree), "(t5,t6,((t4,t2,t1),t3))") collapse_clade(fc2) tree.debug_check_tree(logger_obj=_LOG) self.assertEqual(str(tree), "(t5,t6,(t4,t2,t1,t3))") collapse_clade(root) tree.debug_check_tree(logger_obj=_LOG) tree.debug_check_tree(logger_obj=_LOG) self.assertEqual(str(tree), "(t5,t6,t4,t2,t1,t3)") tree = dataio.trees_from_newick(["((t5,t6),((t4,(t2,t1)),t3));" ]).trees_blocks[0][0] root = tree.seed_node collapse_clade(root) tree.debug_check_tree(logger_obj=_LOG) self.assertEqual(str(tree), "(t5,t6,t4,t2,t1,t3)")
def testRerootSplits(self): newick = "((Athrotaxi,(Callitris,(Juniperusc,Libocedrus))),(((((((Basichlsac,(Mougeotisp,Lamprothma)),Thuidium),(Petalaphy,Haplomitr2)),((Botrychbit,(Vittarifle,((Dicksonant,((Polypodapp,Oleandrapi),Dennstasam)),Azollacaro))),Angiopteri)),Isoetesmel),((Sagittari,(Calochort,(Tacca,(Calathea,Ravenala)))),((Nelumbo,((((((Verbena,((Thunbergi,Acanthus),(Proboscid,Harpogoph))),Asclepias),Menyanthe),(Phyllonom,(Chamaedap,Pyrola))),((((Mirabilus,Pisum),Circaea),((Rheinward,Octomeles),Greyia)),Dudleya)),Phoradend)),(((Liriodchi,Annona),Gyrocarpu),Illicium)))),(Pseudotsu,(Agathisova,Agathismac))));" d = dataio.trees_from_newick([newick]) tree = d.trees_blocks[0][0] taxa_block = d.taxa_blocks[0] ref = dataio.trees_from_newick( [newick], taxa_block=taxa_block).trees_blocks[0][0] encode_splits(tree) encode_splits(ref) r = tree.seed_node curr_n = r.child_nodes()[1] former_mask = curr_n.edge.clade_mask tm = r.edge.clade_mask nbits = count_bits(tm) from dendropy.splits import split_as_string tree.reroot_at(curr_n, splits=True, delete_deg_two=False) new_root = tree.seed_node self.assertEqual(tm, new_root.edge.clade_mask) self.assertEqual(True, new_root is curr_n) self.assertEqual(True, r.parent_node is curr_n) flipped = (~(r.edge.clade_mask)) & tm self.assertEqual(True, (former_mask == r.edge.clade_mask) or (flipped == former_mask))
def testConflict(self): o = [ '(1,5,(2,((3,6),4)))', '(2,1,(3,(6,4)))', ] m = [o[0], o[1], '(1,5,(2,(3,6,4)))'] n = list(m) dataset = trees_from_newick(n, taxa_block=TaxaBlock( [str(i) for i in xrange(1, 7)])) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees) rng = DebuggingRandom() for i in xrange(50): n = list(m) dataset = trees_from_newick(n, taxa_block=TaxaBlock( [str(i) for i in xrange(1, 7)])) trees = [i[0] for i in dataset.trees_blocks] for t in trees: randomly_reorient_tree(t, rng=rng) self.kernelOfTest(trees) o = [ '(1,5,(3,((2,6),4)))', '(2,1,(3,(6,4)))', ] n = [o[0], o[1], '((1,5),2,3,6,4)'] dataset = trees_from_newick(n, taxa_block=TaxaBlock( [str(i) for i in xrange(1, 7)])) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees)
def dofour_five_compat(self, four_taxon_newick, five_taxon_newick): #sys.stdout.write("\n4 taxon:%s\n" % four_taxon_newick) #sys.stdout.write("5 taxon:%s\n" % five_taxon_newick) dataset = trees_from_newick( [five_taxon_newick, four_taxon_newick, five_taxon_newick]) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees) # make sure that the behavior is not order dependent dataset = trees_from_newick( [four_taxon_newick, five_taxon_newick, five_taxon_newick]) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees)
def testSymmDiff(self): newick = "((t5,t6),((t4,(t2,t1)),t3));" d = dataio.trees_from_newick([newick]) ref = d.trees_blocks[0][0] taxa_block = d.taxa_blocks[0] encode_splits(ref) o_newick = "((t1,t2),((t4,(t5,t6)),t3));" o_tree = dataio.trees_from_newick( [o_newick], taxa_block=taxa_block).trees_blocks[0][0] encode_splits(o_tree) self.assertEqual(treedists.symmetric_difference(o_tree, ref), 2)
def testEuclideanDist(self): d = dataio.trees_from_newick([ "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);", "((t5:2.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);", "((t5:0.161175,t6:0.161175):0.392293,((t2:0.075411,(t4:0.104381,t1:0.075411):1):0.065840,t3:0.170221):0.383247);", "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):0.028969):0.065840,t3:0.170221):0.383247);", ]) tree_list = [i[0] for i in d.trees_blocks] #print "\n".join([str(i) for i in tree_list]) for i in tree_list: encode_splits(i) assert_approx_equal( treedists.euclidean_distance(tree_list[0], tree_list[1]), 2.0) assert_approx_equal( treedists.euclidean_distance(tree_list[0], tree_list[2]), math.sqrt(2.0)) assert_approx_equal( treedists.euclidean_distance(tree_list[0], tree_list[3]), 0.97103099999999998) assert_approx_equal( treedists.euclidean_distance(tree_list[1], tree_list[2]), math.sqrt(6.0)) assert_approx_equal( treedists.euclidean_distance(tree_list[1], tree_list[3]), 2.2232636377544162) assert_approx_equal( treedists.euclidean_distance(tree_list[2], tree_list[3]), 1.000419513484718)
def testCladeMasks(self): dataset = dataio.trees_from_newick([ "((t5:0.161175,t6:0.161175):0.392293,((t4:0.104381,(t2:0.075411,t1:0.075411):1):0.065840,t3:0.170221):0.383247);", ]) tree_list = [i[0] for i in dataset.trees_blocks] for i in tree_list: _LOG.debug(i.get_indented_form()) encode_splits(i) _LOG.debug(i.get_indented_form(splits=True)) i.debug_check_tree(splits=True, logger_obj=_LOG) root1 = tree_list[0].seed_node root1e = root1.edge self.assertEqual(split_to_list(root1e.clade_mask), range(6)) self.assertEqual(split_to_list(root1e.clade_mask, one_based=True), range(1,7)) self.assertEqual(split_to_list(root1e.clade_mask, mask=21, one_based=True), [1, 3, 5]) self.assertEqual(split_to_list(root1e.clade_mask, mask=21), [0, 2, 4]) self.assertEqual(count_bits(root1e.clade_mask), 6) fc1 = root1.child_nodes()[0] fc1e = fc1.edge self.assertEqual(split_to_list(fc1e.clade_mask), [0, 1]) self.assertEqual(split_to_list(fc1e.clade_mask, one_based=True), [1, 2]) self.assertEqual(split_to_list(fc1e.clade_mask, mask=0x15, one_based=True), [1]) self.assertEqual(split_to_list(fc1e.clade_mask, mask=0x15), [0]) self.assertEqual(count_bits(fc1e.clade_mask), 2)
def testOrderDependent(self): o = ['(1,5,(2,(3,4)))', '(2,4,(3,(6,7)))', '(3,4,(6,(7,8)))'] n = [o[0], o[2], o[1], '(1,2,3,4,5,6,7,8)'] dataset = trees_from_newick(n) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees) expected = '(1,5,(2,((3,(6,(7,8))),4)))' dataset = trees_from_newick(o + [expected]) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees) o.reverse() dataset = trees_from_newick(o + [expected]) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees)
def testThree(self): o = [ '(Athrotaxi,(Liriodchi,Nelumbo),Sagittari);', '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));', '(Athrotaxi,((((((((Verbena,((Thunbergi,Acanthus),(Proboscid,Harpogoph))),Asclepias),Menyanthe),(Phyllonom,(Chamaedap,Pyrola))),((((Mirabilus,Pisum),Circaea),((Rheinward,Octomeles),Greyia)),Dudleya)),Phoradend),Nelumbo),Liriodchi),Sagittari);', '(Athrotaxi,((((Liriodchi,Annona),Gyrocarpu),Illicium),Nelumbo),((((Ravenala,Calathea),Tacca),Calochort),Sagittari));', ] expected = '((Athrotaxi,(Callitris,(Juniperusc,Libocedrus))),(((((((Basichlsac,(Mougeotisp,Lamprothma)),Thuidium),(Petalaphy,Haplomitr2)),((Botrychbit,(Vittarifle,((Dicksonant,((Polypodapp,Oleandrapi),Dennstasam)),Azollacaro))),Angiopteri)),Isoetesmel),((Sagittari,(Calochort,(Tacca,(Calathea,Ravenala)))),((Nelumbo,((((((Verbena,((Thunbergi,Acanthus),(Proboscid,Harpogoph))),Asclepias),Menyanthe),(Phyllonom,(Chamaedap,Pyrola))),((((Mirabilus,Pisum),Circaea),((Rheinward,Octomeles),Greyia)),Dudleya)),Phoradend)),(((Liriodchi,Annona),Gyrocarpu),Illicium)))),(Pseudotsu,(Agathisova,Agathismac))));' n = o + [expected] dataset = trees_from_newick(n) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees) o.reverse() n = o + [expected] dataset = trees_from_newick(n) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees)
def testPolytomy(self): dataset = trees_from_newick([ '(Athrotaxi,(Liriodchi,Nelumbo2),Sagittari2);', '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));', '(Athrotaxi,Liriodchi,Nelumbo2,Sagittari2,Basichlsac,Lamprothma,Mougeotisp,Haplomitr2,Petalaphy,Angiopteri,Azollacaro,Dennstasam,Oleandrapi,Polypodapp,Dicksonant,Vittarifle,Botrychbit,Isoetesmel,Agathismac,Agathisova,Pseudotsu,Libocedrus,Juniperusc,Callitris,Nelumbo,Sagittari,Thuidium);', ]) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees)
def testInsertPath(self): dataset = trees_from_newick([ '(((1,2),3),4,5);', '(1,2,(3,(7,(8,(9,(4,5))))));', '(1,2,(3,(7,(8,(9,(4,5))))));', ]) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees)
def testMultiEdgeCollision(self): dataset = trees_from_newick([ '(1,2,(3,(4,(5,6))));', '(1,2,(3,(7,(8,6))));', '(1,2,(3,(4,5,6,7,8)));', ]) trees = [i[0] for i in dataset.trees_blocks] self.kernelOfTest(trees)
def testCollapseEdge(self): tree = dataio.trees_from_newick(["((t5,t6),((t4,(t2,t1)),t3));" ]).trees_blocks[0][0] root = tree.seed_node self.assertEqual(str(tree), "((t5,t6),((t4,(t2,t1)),t3))") fc = root.child_nodes()[0] collapse_edge(fc.edge) tree.debug_check_tree(logger_obj=_LOG) self.assertEqual(str(tree), "(t5,t6,((t4,(t2,t1)),t3))")
def testRandomlyReorient(self): n = '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));' m = [n, n] dataset = dataio.trees_from_newick(m) trees = [i[0] for i in dataset.trees_blocks] ref = trees[0] changing = trees[1] rng = DebuggingRandom() encode_splits(ref) encode_splits(changing) for i in xrange(50): randomly_reorient_tree(changing, rng=rng, splits=True) self.assertNotEqual(str(changing), n) changing.debug_check_tree(logger_obj=_LOG, splits=True) if symmetric_difference(ref, changing) != 0: self.fail("\n%s\n!=\n%s" % (str(ref), str(changing)))
def pairwiseMerger(self, tree1, tree2): data = trees_from_newick((tree1, tree2)) trees = [i[0] for i in data.trees_blocks] output = strict_consensus_merge(trees, gordons_supertree=self.useGordons) return (str(output))
help="Specify to use the Gordon's strict consensus") (options, args) = parser.parse_args() if len(args) == 0: sys.exit("Expecting a filename as an argument") format = options.format.upper() trees = [] if format == "NEXUS" or format == "NEXML": for fn in args: fo = open(fn, "rU") d = dataset_from_file(fo, format=format) t = [] for tb in d.trees_blocks: t.extend(tb) trees.extend(t) elif format == "PHYLIP" or format == "NEWICK": newicks = [] for f in args: fo = open(f, "rU") for line in fo: l = line.strip() if l: newicks.append(l) dataset = trees_from_newick(newicks) trees = [i[0] for i in dataset.trees_blocks] else: sys.exit("Unknown format %s" % format) o = strict_consensus_merge(trees, gordons_supertree=options.gordons) sys.stdout.write("%s;\n" % str(o))