def runTest(self): ref = dendropy.Tree(stream=StringIO("((t5,t6),((t4,(t2,t1)),t3));"), schema="newick") taxon_set = ref.taxon_set encode_splits(ref) o_tree = dendropy.Tree(stream=StringIO("((t1,t2),((t4,(t5,t6)),t3));"), schema="newick", taxon_set=taxon_set) encode_splits(o_tree) self.assertEqual(treecalc.symmetric_difference(o_tree, ref), 2)
def kernelOfTest(self, trees): expected = trees[-1] input = trees[:-1] _LOG.debug('input = %s' % str(input)) output = inplace_strict_consensus_merge(input) encode_splits(output) encode_splits(expected) if symmetric_difference(expected, output) != 0: self.fail("\n%s\n!=\n%s" % (str(output), str(expected)))
def runTest(self): taxon_set = dendropy.TaxonSet([str(i+1) for i in range(5)]) tree_list = dendropy.TreeList( stream=StringIO(""" (5,((4,3),2),1); (5,(4,3,2),1); (5,((4,3),2),1); (5,(4,3),2,1); (5,((4,3),2),1); (5,4,3,2,1); """), schema="newick", taxon_set=taxon_set) tree = tree_list[0] expected_tree = tree_list[1] treesplit.encode_splits(tree) all_cm = tree.seed_node.edge.split_bitmask split_to_target = 0xA treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm) treesplit.encode_splits(tree) treesplit.encode_splits(expected_tree) self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0) tree = tree_list[2] expected_tree = tree_list[3] treesplit.encode_splits(tree) all_cm = tree.seed_node.edge.split_bitmask split_to_target = 0x3 treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm) treesplit.encode_splits(tree) treesplit.encode_splits(expected_tree) self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0) tree = tree_list[4] expected_tree = tree_list[5] treesplit.encode_splits(tree) all_cm = tree.seed_node.edge.split_bitmask split_to_target = 0x5 treemanip.collapse_conflicting(tree.seed_node, split_to_target, all_cm) treesplit.encode_splits(tree) treesplit.encode_splits(expected_tree) self.assertEqual(treecalc.symmetric_difference(tree, expected_tree), 0)
def compare_trees(tree_filename1, tree_filename2): from dendropy import Tree, TreeList from dendropy.treecalc import symmetric_difference, euclidean_distance, robinson_foulds_distance as rbd, PatristicDistanceMatrix as pdm c = TreeList([g(tree_filename1), g(tree_filename2)]) pp1 = pdm(c[0]).distances() pp2 = pdm(c[1]).distances() sumbl1 = sum(n.edge_length for n in c[0].nodes() if n.edge_length is not None) sumbl2 = sum(n.edge_length for n in c[1].nodes() if n.edge_length is not None) e = [n.edge_length for n in c[0].nodes() if n.edge_length is not None] return {'nBSD':euclidean_distance(c[0], c[1]), 'SDD':symmetric_difference(c[0], c[1]), 'RBD':rbd(c[0], c[1]), 'edgeDelta1': max(pp1)-min(pp1), 'edgeStd1': np.std(pp1),\ 'edgeDelta2': max(pp2)-min(pp2), 'edgeStd2': np.std(pp2), 'SumBranchLen1': sumbl1, 'SumBranchLen2': sumbl2}
def testConsensus(self): con_tree = self.tree_list.consensus(min_freq=0.50, trees_splits_encoded=False, support_label_decimals=2) con_tree.update_splits() self.assertEqual(treecalc.symmetric_difference(self.mb_con_tree, con_tree), 0) self.assertEqual(len(con_tree.split_edges), len(self.mb_con_tree.split_edges)) sd = self.tree_list.split_distribution for split in self.mb_con_tree.split_edges: edge1 = self.mb_con_tree.split_edges[split] edge2 = con_tree.split_edges[split] if edge1.head_node.label and edge2.head_node.label: s1 = float(edge1.head_node.label) s2 = round(float(edge2.head_node.label), 2) self.assertAlmostEqual(s1, s2, 2)
def runTest(self): n = '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));' k = dendropy.TreeList(stream=StringIO(n), schema="newick")[0] trees = dendropy.TreeList(stream=StringIO(n+n), schema="newick", encode_splits=True, taxon_set=k.taxon_set) ref = trees[0] changing = trees[1] rng = RepeatedRandom() for i in xrange(50): treemanip.randomly_reorient_tree(changing, rng=rng, splits=True) self.assertNotEqual(str(changing), n) changing.debug_check_tree(logger_obj=_LOG, splits=True) if treecalc.symmetric_difference(ref, changing) != 0: self.fail("\n%s\n!=\n%s" % (str(ref), str(changing)))
def unique_trees(tree_list, mcmc_trees, format, burnin=0, taxonset=None): '''Takes a list and a Mr. Bayes mcmc sample as input. Returns a list of non-redundant tree topologies using symmetric difference, and the number of redundant topologies in the sample.''' redundant_count = 0 for tree in tree_iter(mcmc_trees, format, burnin, taxonset): for ut in tree_list: sd = treecalc.symmetric_difference(tree, ut) #print sd ## error check if sd == 0: redundant_count += 1 break else: tree_list.append(tree) return tree_list, redundant_count
def unique_trees(tree_list,mcmc_trees,format,burnin=0,taxonset=None): '''Takes a list and a Mr. Bayes mcmc sample as input. Returns a list of non-redundant tree topologies using symmetric difference, and the number of redundant topologies in the sample.''' redundant_count = 0 for tree in tree_iter(mcmc_trees,format,burnin,taxonset): for ut in tree_list: sd = treecalc.symmetric_difference(tree,ut) #print sd ## error check if sd == 0: redundant_count +=1 break else: tree_list.append(tree) return tree_list, redundant_count
def print_distances(tree_list,mle,uniq_flag=False): mle_tree, taxa = get_mle_tree(mle) distances = [] uniq_trees = dendropy.TreeList() count = 1 for t in tree_source_iter(stream=open(tree_list, 'rU'),schema='nexus',taxon_set=taxa): dist = treecalc.symmetric_difference(mle_tree, t) print "Distance between MLE tree and tree %i: %i" % (count,dist) distances.append(dist) count +=1 if uniq_flag and dist > 0: uniq_trees.append(t) print("Mean symmetric distance between MLE and tree list: %d" \ % float(sum(distances)/len(distances))) return uniq_trees, len(uniq_trees)
def long_branch_symmdiff(trees_to_compare, edge_len_threshold, copy_trees=False, rooted=False): """Returns matrix of the symmetric_differences between trees after all internal edges with lengths < `edge_len_threshold` have been collapsed. If `copy_trees` is True then the trees will be copied first (if False, then the trees may will have their short edges collapsed on exit). """ if copy_trees: tree_list = [copy.copy(i) for i in trees_to_compare] else: tree_list = list(trees_to_compare) n_trees = len(tree_list) _LOG.debug('%d Trees to compare:\n%s\n' % (n_trees, '\n'.join([str(i) for i in tree_list]))) if n_trees < 2: return [0 for t in tree_list] f_r = [] for tree in tree_list: to_collapse = [] encode_splits(tree) for edge in tree.preorder_edge_iter(filter_fn=Edge.is_internal): elen = edge.length if elen is not None and elen < edge_len_threshold: to_collapse.append(edge) for edge in to_collapse: collapse_edge(edge) f_r.append(tree.is_rooted) tree.is_rooted = bool(rooted) encode_splits(tree) sd_row = [0] * n_trees sd_mat = [list(sd_row) for i in xrange(n_trees)] for i, tree_one in enumerate(tree_list[:-1]): for col_count, tree_two in enumerate(tree_list[1 + i:]): j = i + 1 + col_count sd = symmetric_difference(tree_one, tree_two) sd_mat[i][j] = sd sd_mat[j][i] = sd if not copy_trees: for r, tree in itertools.izip(f_r, tree_list): tree.is_rooted = r return sd_mat
def print_distances(tree_list, mle, uniq_flag=False): mle_tree, taxa = get_mle_tree(mle) distances = [] uniq_trees = dendropy.TreeList() count = 1 for t in tree_source_iter(stream=open(tree_list, 'rU'), schema='nexus', taxon_set=taxa): dist = treecalc.symmetric_difference(mle_tree, t) print "Distance between MLE tree and tree %i: %i" % (count, dist) distances.append(dist) count += 1 if uniq_flag and dist > 0: uniq_trees.append(t) print("Mean symmetric distance between MLE and tree list: %d" \ % float(sum(distances)/len(distances))) return uniq_trees, len(uniq_trees)
def testConsensus(self): con_tree = self.tree_list.consensus(min_freq=0.50, trees_splits_encoded=False, support_label_decimals=2) con_tree.update_splits() self.assertEqual( treecalc.symmetric_difference(self.mb_con_tree, con_tree), 0) self.assertEqual(len(con_tree.split_edges), len(self.mb_con_tree.split_edges)) sd = self.tree_list.split_distribution for split in self.mb_con_tree.split_edges: edge1 = self.mb_con_tree.split_edges[split] edge2 = con_tree.split_edges[split] if edge1.head_node.label and edge2.head_node.label: s1 = float(edge1.head_node.label) s2 = round(float(edge2.head_node.label), 2) self.assertAlmostEqual(s1, s2, 2)
def long_branch_symmdiff(trees_to_compare, edge_len_threshold, copy_trees=False, rooted=False): """Returns matrix of the symmetric_differences between trees after all internal edges with lengths < `edge_len_threshold` have been collapsed. If `copy_trees` is True then the trees will be copied first (if False, then the trees may will have their short edges collapsed on exit). """ if copy_trees: tree_list = [copy.copy(i) for i in trees_to_compare] else: tree_list = list(trees_to_compare) n_trees = len(tree_list) _LOG.debug('%d Trees to compare:\n%s\n' % (n_trees, '\n'.join([str(i) for i in tree_list]))) if n_trees < 2: return [0 for t in tree_list] f_r = [] for tree in tree_list: to_collapse = [] encode_splits(tree) for edge in tree.preorder_edge_iter(filter_fn=Edge.is_internal): elen = edge.length if elen is not None and elen < edge_len_threshold: to_collapse.append(edge) for edge in to_collapse: collapse_edge(edge) f_r.append(tree.is_rooted) tree.is_rooted = bool(rooted) encode_splits(tree) sd_row = [0]*n_trees sd_mat = [list(sd_row) for i in xrange(n_trees)] for i, tree_one in enumerate(tree_list[:-1]): for col_count, tree_two in enumerate(tree_list[1+i:]): j = i + 1 + col_count sd = symmetric_difference(tree_one, tree_two) sd_mat[i][j] = sd sd_mat[j][i] = sd if not copy_trees: for r, tree in itertools.izip(f_r, tree_list): tree.is_rooted = r return sd_mat
def compare_trees(tree_filename1, tree_filename2): from dendropy import Tree, TreeList from dendropy.treecalc import symmetric_difference g = lambda x: Tree.get_from_path(x, 'newick') c = TreeList([g(tree_filename1), g(tree_filename2)]) return symmetric_difference(c[0], c[1])
#! /usr/bin/env python import dendropy from dendropy import multi_tree_source_iter from dendropy import treecalc distances = [] taxa = dendropy.TaxonSet() mle_tree = dendropy.Tree.get_from_path('pythonidae.mle.nex', 'nexus', taxon_set=taxa) mcmc_tree_file_paths = [ 'pythonidae.mb.run1.t', 'pythonidae.mb.run2.t', 'pythonidae.mb.run3.t', 'pythonidae.mb.run4.t' ] for mcmc_tree in multi_tree_source_iter(mcmc_tree_file_paths, schema='nexus', taxon_set=taxa): distances.append(treecalc.symmetric_difference(mle_tree, mcmc_tree)) print("Mean symmetric distance between MLE and MCMC trees: %d" % float(sum(distances) / len(distances)))
#! /usr/bin/env python import dendropy from dendropy import tree_source_iter from dendropy import treecalc distances = [] taxa = dendropy.TaxonSet() mle_tree = dendropy.Tree.get_from_path('pythonidae.mle.nex', 'nexus', taxon_set=taxa) for mcmc_tree in tree_source_iter( stream=open('pythonidae.mcmc.nex', 'rU'), schema='nexus', taxon_set=taxa, tree_offset=200): distances.append(treecalc.symmetric_difference(mle_tree, mcmc_tree)) print("Mean symmetric distance between MLE and MCMC trees: %d" % float(sum(distances)/len(distances)))
def symmetric_difference(tree_str1, tree_str2): if tree_str1 is None or tree_str2 is None or "" == tree_str1 or "" == tree_str2: return -1 taxon = TaxonSet() return treecalc.symmetric_difference(Tree(stream=StringIO(tree_str1), schema='newick', taxon_set=taxon), Tree(stream=StringIO(tree_str2), schema='newick', taxon_set=taxon))
def compare_trees(tree_filename1, tree_filename2): from dendropy import Tree, TreeList from dendropy.treecalc import symmetric_difference, euclidean_distance, robinson_foulds_distance as rbd c = TreeList([g(tree_filename1), g(tree_filename2)]) return {'nBSD':euclidean_distance(c[0],c[1]), 'SDD':symmetric_difference(c[0], c[1]), \ 'RBD':rbd(c[0],c[1])}