def tred(counter, Namelist, finlist, Non_CleanList_Done): original = [] noise = [] '''Returns the symmetric difference between each noise and original alignment and the original tree.''' with open( 'fasta_orig', "w") as fo: #Creates a file with each alignment as a Fasta-format for n in range(len(Namelist)): fo.write('\n' + '>' + Namelist[n] + '\n' + Non_CleanList_Done[n]) with open('fasta_noise', 'w') as fn: for n in range(len(Namelist)): fn.write('\n' + '>' + Namelist[n] + '\n' + finlist[n]) with open('fastprot_orig', 'w') as fpo: # Runs the command "fastprot" on file "fa_sekvens". fpo.write(check_output(["fastprot", 'fasta_orig'])) with open('fastprot_noise', 'w') as fpn: fpn.write(check_output(["fastprot", 'fasta_noise'])) tempo = tempfile.TemporaryFile( mode='w+t' ) # Creates a temporary file "temp" and makes it readable as text. Writes output of fnj to it. tempo.write(check_output(["fnj", "-O", "newick", "fastprot_orig"])) tempo.seek(0) tempn = tempfile.TemporaryFile( mode='w+t' ) # Creates a temporary file "temp" and makes it readable as text. Writes output of fnj to it. tempn.write(check_output(["fnj", "-O", "newick", "fastprot_noise"])) tempn.seek(0) tns = dendropy.TaxonNamespace() t1 = dendropy.Tree.get(file=open(sys.argv[2], 'r'), schema="newick", tree_offset=0, taxon_namespace=tns) t2 = dendropy.Tree.get(file=tempo, schema="newick", tree_offset=0, taxon_namespace=tns) t3 = dendropy.Tree.get(file=tempn, schema="newick", tree_offset=0, taxon_namespace=tns) t1.encode_bipartitions() t2.encode_bipartitions() t3.encode_bipartitions() original = treecompare.symmetric_difference( t1, t2) #Compares the symmetric difference between the two trees t1 and t2 noise = treecompare.symmetric_difference(t1, t3) tempo.close() tempn.close() return original, noise
def perform_comparsions(treelist, comp_tree): df = pd.DataFrame(columns=['RF']) total_diffs = 2*len(comp_tree.nodes()) for et in treelist: et.migrate_taxon_namespace(comp_tree.taxon_namespace) df.loc[len(df)] = treecompare.symmetric_difference(et,comp_tree)/total_diffs print(treecompare.symmetric_difference(et,comp_tree)/total_diffs) return(df)
def runTest(self): taxon_namespace = dendropy.TaxonNamespace([str(i+1) for i in range(5)]) tree_list = dendropy.TreeList.get_from_stream( StringIO(""" (5,((4,3),2),1); (5,(4,3,2),1); (5,((4,3),2),1); (5,(4,3),2,1); (5,((4,3),2),1); (5,4,3,2,1); """), schema="newick", taxon_namespace=taxon_namespace) tree = tree_list[0] expected_tree = tree_list[1] tree.encode_bipartitions() tree_leafset_bitmask = tree.seed_node.edge.bipartition._leafset_bitmask bipartition_to_target = dendropy.Bipartition( bitmask=0xA, tree_leafset_bitmask=tree_leafset_bitmask, compule_bitmasks=True) assert bipartition_to_target._lowest_relevant_bit is not None tree.seed_node.collapse_conflicting(bipartition_to_target) tree.encode_bipartitions() expected_tree.encode_bipartitions() self.assertEqual(treecompare.symmetric_difference(tree, expected_tree), 0) tree = tree_list[2] expected_tree = tree_list[3] tree.encode_bipartitions() tree_leafset_bitmask = tree.seed_node.edge.bipartition._leafset_bitmask bipartition_to_target = dendropy.Bipartition(bitmask=0x3, tree_leafset_bitmask=tree_leafset_bitmask, compile_bipartition=True) tree.seed_node.collapse_conflicting(bipartition_to_target) tree.encode_bipartitions() expected_tree.encode_bipartitions() self.assertEqual(treecompare.symmetric_difference(tree, expected_tree), 0) tree = tree_list[4] expected_tree = tree_list[5] tree.encode_bipartitions() tree_leafset_bitmask = tree.seed_node.edge.bipartition._leafset_bitmask bipartition_to_target = dendropy.Bipartition(bitmask=0x5, tree_leafset_bitmask=tree_leafset_bitmask, compile_bipartition=True) tree.seed_node.collapse_conflicting(bipartition_to_target) tree.encode_bipartitions() expected_tree.encode_bipartitions() self.assertEqual(treecompare.symmetric_difference(tree, expected_tree), 0)
def tree_comparison(collapsed_tree, hogtree, phyml_tree, gene_tree_congruence, report): print("Calculating gene tree congruence metric...") tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get(file=open(collapsed_tree, 'r'), schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get(file=open(hogtree, 'r'), schema='newick', taxon_namespace=tns) tree3 = dendropy.Tree.get(file=open(phyml_tree, 'r'), schema='newick', taxon_namespace=tns) diff = treecompare.symmetric_difference(tree1,tree2, is_bipartitions_updated=False) #same as unweighted RF distance tree1_node_num = (len(tree1.internal_nodes())) #gets number of splits (nodes) in tree tree2_node_num = (len(tree2.internal_nodes())) tree3_node_num = (len(tree3.internal_nodes())) hog_metric = 1 - diff/(tree1_node_num + tree2_node_num) num_nodes_collapsed = tree3_node_num - tree1_node_num with open(gene_tree_congruence, 'w') as outfile1: outfile1.write(str(hog_metric) + "\n") with open(report, 'w') as outfile2: with open(collapsed_tree, 'r') as collapsedtreefile: with open(hogtree, 'r') as hogtreefile: with open(phyml_tree, 'r') as phymltreefile: phyml_genetree = phymltreefile.read() hogtree = hogtreefile.read() collapsed_genetree = collapsedtreefile.read() collapsedplot = tree1.as_ascii_plot() hogplot = tree2.as_ascii_plot() outfile2.write("HOG tree:\n" + str(hogtree) + "\n"+ str(hogplot)+ "\n\nUncollapsed Gene tree:\n" + str(phyml_genetree) + "\n\nCollapsed Gene tree:\n" + str(collapsed_genetree) + "\n"+ str(collapsedplot)+ "\n\n# nodes collapsed: "+str(num_nodes_collapsed)+ "\n\nSymmetric distance: "+str(diff)+ "\n\n# Nodes (Hogtree): "+str(tree2_node_num) + "\n\n# Nodes (Uncollaped Gene tree): "+ str(tree3_node_num)+ "\n\n# Nodes (Collapsed Gene tree): " + str(tree1_node_num)+"\n\nHOG Tree Congruence:" + str(hog_metric)+ "\n\n")
def check(self, title, src_prefix): tns = dendropy.TaxonNamespace() input_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"), schema='nexus', attached_taxon_namespace=tns) input_taxa = input_ds.taxon_namespaces[0] output_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"), schema='nexus', taxon_namespace=input_taxa) for set_idx, src_trees in enumerate(input_ds.tree_lists): src_trees = input_ds.tree_lists[set_idx] ref_trees = output_ds.tree_lists[set_idx] for tree_idx, src_tree in enumerate(src_trees): _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx + 1, len(input_ds.tree_lists), tree_idx + 1, len(src_trees))) ref_tree = ref_trees[tree_idx] # tree_dist = paup.symmetric_difference(src_tree, ref_tree) # d = src_tree.symmetric_difference(ref_tree) # if d > 0: # print d self.assertEqual( treecompare.symmetric_difference(src_tree, ref_tree), 0)
def compare_trees(tree1_str, tree2_str): try: tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get(data=tree1_str, schema="newick",taxon_namespace=tns) tree2 = dendropy.Tree.get(data=tree2_str, schema="newick",taxon_namespace=tns) tree1.encode_bipartitions() tree2.encode_bipartitions() #----------------------------------------------------------- #This method returns the symmetric distance between two trees. #The symmetric distance between two trees is the sum of the number of splits found in one of the trees but not the other. #It is common to see this statistic called the Robinson-Foulds distance areSame = True if treecompare.symmetric_difference(tree1, tree2) == 0 else False status = 200 message = "Success" except Error as e: message = str(e) status = 500 response = {'status': status, 'message': message, 'are_same_tree': areSame} return response
def calcDistance(self): if self.path1 != '' and self.path2 != '': self.fileEx1 = (os.path.splitext(self.path1)[1])[1:] self.fileEx2 = (os.path.splitext(self.path2)[1])[1:] tns = dendropy.TaxonNamespace() self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileEx1, taxon_namespace=tns) self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileEx2, taxon_namespace=tns) self.tree1.encode_bipartitions() self.tree2.encode_bipartitions() print(treecompare.false_positives_and_negatives(self.tree1, self.tree2)) # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree1.encode_bipartitions() # self.tree2.encode_bipartitions() # oblicz dystans # self.symDist = self.tree1.symmetric_difference(self.tree2) self.symDist = treecompare.symmetric_difference(self.tree1, self.tree2) self.fpnDist = treecompare.false_positives_and_negatives(self.tree1, self.tree2) self.eucDist = treecompare.euclidean_distance(self.tree1, self.tree2) self.rfDist = treecompare.robinson_foulds_distance(self.tree1, self.tree2) # pokaz wyniki self.res1.setText(str(self.eucDist)) #eucDist self.res2.setText(str(self.rfDist)) #rfDist
def compare_trees(expected, estimated): # assert(estimated.euclidean_distance(expected)<= branch lengths are hard to test. TODO figure out how. #taxon_namespace = dendropy.TaxonSet() exp_tree = dendropy.Tree.get_from_path(expected, "newick") est_tree = dendropy.Tree.get_from_path( estimated, "nexus", taxon_namespace=exp_tree.taxon_namespace) return (treecompare.symmetric_difference(est_tree, exp_tree))
def symmetric_difference(tree1, tree2): deprecate.dendropy_deprecation_warning( preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.symmetric_difference()' function has moved to 'dendropy.calculate.treecompare.symmetric_difference()'.", old_construct="from dendropy import treecalc\nd = treecalc.symmetric_difference(...)", new_construct="from dendropy.calculate import treecompare\nd = treecompare.symmetric_difference(...)", ) return treecompare.symmetric_difference(tree1=tree1, tree2=tree2)
def testTrees(self): tree_files = [ ("dendropy-test-trees-n33-unrooted-x100a.nexus", "force-unrooted", False), ("dendropy-test-trees-multifurcating-unrooted.nexus", "force-unrooted", False), ("pythonidae.beast.summary.tre", "force-rooted", True), ("primates.beast.mcct.medianh.tre", "force-rooted", True), ] for tree_file, rooting, is_rooted in tree_files: ref_tree = dendropy.Tree.get_from_path( pathmap.tree_source_path(tree_file), "nexus", rooting=rooting) bipartition_encoding = ref_tree.encode_bipartitions() t_tree = dendropy.Tree.from_bipartition_encoding( bipartition_encoding, taxon_namespace=ref_tree.taxon_namespace, is_rooted=ref_tree.is_rooted) # t_tree.encode_bipartitions() _LOG.debug("--\n File: {} ({})".format( tree_file, ref_tree.is_rooted)) _LOG.debug(" Original: {}".format( ref_tree.as_string("newick"))) _LOG.debug("Reconstructed: {}".format(t_tree.as_string("newick"))) self.assertEqual( treecompare.symmetric_difference(ref_tree, t_tree), 0)
def rf_distance_dualbros_orig(dualbros_file, original_file): """ :param dualbros_file: output file from dualbros rooting, contains output newick string rooted :param original_file: original file with correct rooting, must have same taxon names as dualbros output. newick. :return: rf distance between the two trees. 0 signifies correct root, >1 means incorrect root. >2 means something went pretty wrong. usually is 1 or 2, as i've seen. """ with open(dualbros_file, "r") as fp: file_contents = fp.read() fp.close() info, data = file_contents.split("------------------------") # read in the original gene tree. o_tree = dp.Tree.get_from_path(original_file, schema="newick") o_tree.is_rooted = True # make tree from output c_tree = dp.Tree.get_from_string(data, schema="newick") c_tree.is_rooted = True # calculate rf distance. taxon_namespaces have to be the same between the two trees. c_tree.migrate_taxon_namespace( o_tree.taxon_namespace) # hopefully will not throw error return tc.symmetric_difference(o_tree, c_tree) # testing out the function. #print(rf_distance_dualbros_orig("sample_output/tree4.txt", "../final-project-src/AllSimulatedDatasets/R-025-HI-NR/formatted_for_optroot/tree4"))
def compare_rf_distance(ref, method_types, testset_dir, num_testset, size_testset, size_phy, out_filename=None, silent=False): if out_filename is None: out_filename = 'RF_dist.csv' # establish common taxon namespace tns = dendropy.TaxonNamespace() total_result_file = os.path.join(testset_dir, out_filename) total_contents = [] for i in range(num_testset): print('testset[{}] Calculating RF distance ...'.format(i + 1)) total_content = [] ref_trees = [] for j in range(size_testset): ref_tree_file = os.path.join(testset_dir, str(i + 1), '%d_%s.nwk' % (j + 1, ref)) ref_trees.append( dendropy.Tree.get(path=ref_tree_file, schema='newick', taxon_namespace=tns)) result_file = os.path.join(testset_dir, str(i + 1), out_filename) with open(result_file, 'wt') as f_write: f_write.write('reference: {}\n'.format(ref)) for method_type in method_types: s = 0 contents = [method_type] for j in range(size_testset): tree_file = os.path.join( testset_dir, str(i + 1), '%d_%s.nwk' % (j + 1, method_type)) tree = dendropy.Tree.get(path=tree_file, schema='newick', taxon_namespace=tns) rf_dist = treecompare.symmetric_difference( ref_trees[j], tree) / (2 * size_phy - 6) s += rf_dist contents.append(str(rf_dist)) f_write.write('{}\n'.format(','.join(contents))) if not silent: # print(', '.join(contents), 'avg: ', s / size_testset) print('{}: {}'.format(method_type, s / size_testset)) total_content.append('{:.3f}'.format(s / size_testset)) total_contents.append(total_content) total_contents = [list(t) for t in zip(*total_contents)] with open(total_result_file, 'wt') as f_write: for idx, method_type in enumerate(method_types): f_write.write('{},{}\n'.format(method_type, ','.join(total_contents[idx])))
def symmetric_difference(tree1, tree2): deprecate.dendropy_deprecation_warning( preamble="Deprecated since DendroPy 4: The 'dendropy.treecalc.symmetric_difference()' function has moved to 'dendropy.calculate.treecompare.symmetric_difference()'.", old_construct="from dendropy import treecalc\nd = treecalc.symmetric_difference(...)", new_construct="from dendropy.calculate import treecompare\nd = treecompare.symmetric_difference(...)") return treecompare.symmetric_difference( tree1=tree1, tree2=tree2)
def tree_compare(tempdir): # CHANGE to tempdir tns = dendropy.TaxonNamespace() tree1 = Tree.get_from_path(tempdir + "/ref.tree", "newick", taxon_namespace=tns) tree2 = Tree.get_from_path(tempdir + "/normal_tree", "newick", taxon_namespace=tns) tree3 = Tree.get_from_path(tempdir + "/red_tree", "newick", taxon_namespace=tns) tree1.encode_bipartitions() tree2.encode_bipartitions() tree3.encode_bipartitions() distance_normal = treecompare.symmetric_difference(tree1, tree2) distance_reduced = treecompare.symmetric_difference(tree1, tree3) return distance_normal, distance_reduced
def main(): #Files are all stored in a files files = glob.glob('./*.txt') tree_combinations = list(combinations([i for i in range(len(files))], 2)) labels = [] scores = [] for tup in tree_combinations: #Name of gene 1 x = files[tup[0]] x = x.replace(".", "") x = x.replace("\\", "") x = x.replace(".txt", "") x = x.replace("txt", "") x = x.replace("_", " ") x = x.replace("tree", "") #Name of gene 2 y = files[tup[1]] y = y.replace(".", "") y = y.replace("\\", "") y = y.replace(".txt", "") y = y.replace("txt", "") y = y.replace("_", " ") y = y.replace("tree", "") label = x + "vs " + y print(label) infile_1 = open(files[tup[0]]) infile_2 = open(files[tup[1]]) lines1 = str(infile_1.readline()) lines2 = str(infile_2.readline()) s1 = lines1 s2 = lines2 # establish common taxon namespace tns = dendropy.TaxonNamespace() # ensure all trees loaded use common namespace tree1 = dendropy.Tree.get(data=s1, schema='newick', preserve_underscores=True, suppress_internal_node_taxa=False, taxon_namespace=tns) tree2 = dendropy.Tree.get(data=s2, schema='newick', preserve_underscores=True, suppress_internal_node_taxa=False, taxon_namespace=tns) ## Unweighted Robinson-Foulds distance score = treecompare.symmetric_difference(tree1, tree2) print("Comparing tree ", files[tup[0]], " and ", files[tup[1]], ": ", score) labels.append(label) scores.append(score) print(scores) print(labels)
def compare_trees(expected,estimated): # assert(estimated.euclidean_distance(expected)<= branch lengths are hard to test. TODO figure out how. #taxon_namespace = dendropy.TaxonSet() exp_tree = dendropy.Tree.get_from_path( expected, "newick") est_tree = dendropy.Tree.get_from_path( estimated, "nexus", taxon_namespace=exp_tree.taxon_namespace) return(treecompare.symmetric_difference(est_tree, exp_tree))
def robinson_foulds(self, tree1, tree2, taxa_list): """Calculate Robinson-Foulds (i.e., symmetric_difference) distance between two trees.""" tree1, tree2 = self._read_trees(tree1, tree2, taxa_list) rf = treecompare.symmetric_difference(tree1, tree2) num_taxa = len([t for t in tree1.leaf_node_iter()]) normalized_rf = float(rf) / (2 * (num_taxa - 3)) return rf, normalized_rf
def test_sum_of_credibilities(self): ta = self.trees.as_tree_array(is_rooted_trees=True) sd = self.get_trees().split_distribution(is_bipartitions_updated=False) # for independent verification scores, max_idx = ta.calculate_sum_of_split_supports() self.assertEqual(len(scores), len(self.trees)) for score, tree in zip(scores, self.trees): self.assertAlmostEqual(score, sd.sum_of_split_support_on_tree(tree)) self.assertEqual(max_idx, 73) self.assertAlmostEqual(scores[max_idx], 30.89) t0 = self.trees[73] t1 = ta.maximum_sum_of_split_support_tree() self.assertEqual(treecompare.symmetric_difference(t0, t1), 0)
def all_dist_among_trees_sym(treeDict): """ distance matrix of Robinson Foulds difference between every pair of trees """ res = [] keys = treeDict.keys() comb = combinations(keys, 2) for treeName1, treeName2 in comb: tree1 = deepcopy(treeDict[treeName1]) tree2 = deepcopy(treeDict[treeName2]) res.append(treecompare.symmetric_difference(tree1, tree2)) return res
def testPrunedThenEncoding(self): inp = StringIO('''(a,b,c,(d,e)); (b,d,(c,e));''') first, second = dendropy.TreeList.get_from_stream(inp, schema='newick') # prune tree 1 to have the same leaf set as tree 2. # this removes the first taxon in the taxon list "A" retain_list = set([node.taxon for node in second.leaf_nodes()]) exclude_list = [node for node in first.leaf_nodes() if node.taxon not in retain_list] for nd in exclude_list: first.prune_subtree(nd) # the trees are now (b,c,(d,e)) and (b,d,(c,e)) so the symmetric diff is 2 self.assertEqual(2, treecompare.symmetric_difference(first, second))
def main(): treefile1 = sys.argv[1] treefile2 = sys.argv[2] treelist = TreeList() treelist.read(file=open(treefile1, 'rU'), schema="nexus") treelist.read(file=open(treefile2, 'rU'), schema="nexus") if treecompare.symmetric_difference(treelist.__getitem__(0), treelist.__getitem__(1)) == 0: print "trees are identical" else: print "trees are NOT identical"
def verify_resolve_polytomies(self, tree_string, rng): tree = dendropy.Tree.get_from_string(tree_string, "newick") if "&U" in tree_string: assert not tree.is_rooted else: assert tree.is_rooted for nd in tree: nd.edge.length = 100 tree.resolve_polytomies(rng=rng) tree.encode_bipartitions() tree._debug_check_tree( check_bipartitions=True, unique_bipartition_edge_mapping=True) for nd in tree: if nd is tree.seed_node and not tree.is_rooted: self.assertEqual(len(nd._child_nodes), 3) elif len(nd._child_nodes) > 0: self.assertEqual(len(nd._child_nodes), 2) tree2 = dendropy.Tree.get_from_string(tree_string, "newick", taxon_namespace=tree.taxon_namespace) self.assertNotEqual(treecompare.symmetric_difference(tree, tree2), 0) tree.collapse_unweighted_edges() self.assertEqual(treecompare.symmetric_difference(tree, tree2), 0)
def dist_among_trees_sym(treeDict): """ distance matrix of symmetric difference between every pair of trees """ res = {} for treeName1 in treeDict.keys(): tree1 = treeDict[treeName1] res[treeName1] = {} for treeName2 in treeDict.keys(): tree2 = treeDict[treeName2] res[treeName1][treeName2] = treecompare.symmetric_difference( tree1, tree2) return res
def runTest(self): n = '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));' k = dendropy.TreeList.get_from_stream(StringIO(n), schema="newick")[0] trees = dendropy.TreeList.get_from_stream(StringIO(n+n), schema="newick", taxon_namespace=k.taxon_namespace) ref = trees[0] changing = trees[1] rng = MockRandom() for i in range(50): changing.randomly_reorient(rng=rng, update_bipartitions=True) self.assertNotEqual(str(changing), n) changing._debug_check_tree(logger_obj=_LOG, check_bipartitions=True) d = treecompare.symmetric_difference(ref, changing, is_bipartitions_updated=False) if d != 0: self.fail("\n{}\n!=\n{}\nRF={}".format(str(ref), str(changing), d))
def test_sum_of_credibilities(self): ta = self.trees.as_tree_array(is_rooted_trees=True) sd = self.get_trees().split_distribution( is_bipartitions_updated=False) # for independent verification scores, max_idx = ta.calculate_sum_of_split_supports() self.assertEqual(len(scores), len(self.trees)) for score, tree in zip(scores, self.trees): self.assertAlmostEqual(score, sd.sum_of_split_support_on_tree(tree)) self.assertEqual(max_idx, 73) self.assertAlmostEqual(scores[max_idx], 30.89) t0 = self.trees[73] t1 = ta.maximum_sum_of_split_support_tree() self.assertEqual(treecompare.symmetric_difference(t0, t1), 0)
def testConsensus(self): con_tree = self.tree_list.consensus( min_freq=0.50, is_bipartitions_updated=False, support_label_decimals=2) con_tree.encode_bipartitions() self.assertEqual(treecompare.symmetric_difference(self.mb_con_tree, con_tree), 0) self.assertEqual(len(con_tree.bipartition_encoding), len(self.mb_con_tree.bipartition_encoding)) for bipartition in self.mb_con_tree.bipartition_encoding: edge1 = self.mb_con_tree.bipartition_edge_map[bipartition] edge2 = con_tree.bipartition_edge_map[bipartition] if edge1.head_node.label and edge2.head_node.label: s1 = float(edge1.head_node.label) s2 = round(float(edge2.head_node.label), 2) self.assertAlmostEqual(s1, s2, 2)
def compute_dist_matrix(self, dendropy=False, weighted=False, resolve=True, overwrite=False): import dendropy from dendropy.calculate import treecompare db = tables.open_file(self.h5name, mode="a") trees, intvals = self.grab_trees(db) if (not db.__contains__("/" + "dist_matrix") or overwrite): D = np.zeros((len(trees), len(trees))) if (not dendropy): for n in range(len(trees) - 1): for nn in range(n + 1, len(trees)): D[n, nn] = self.compare_trees(trees[n], trees[nn]) D[nn, n] = D[n, nn] else: T = dendropy.TreeList([ dendropy.Tree.get(data=t.write(), schema='newick') for t in trees ]) for n in range(len(trees) - 1): for nn in range(n + 1, len(trees)): if (weighted): w_rf = treecompare.euclidean_distance( T[n], T[nn] ) #weighted_robinson_foulds_distance(T[n],T[nn]) else: w_rf = treecompare.symmetric_difference( T[n], T[nn] ) #weighted_robinson_foulds_distance(T[n],T[nn]) D[n, nn] = w_rf D[nn, n] = w_rf if (overwrite): del db["/dist_matrix"] db.create_array("/", "dist_matrix", D) else: D = np.array([ np.array(row) for row in db.get_node("/dist_matrix", classname="Array") ]) db.flush() db.close() return D
def distance(file_path, file_format, file_path2): taxon_namespace = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get_from_path(file_path, file_format, taxon_namespace=taxon_namespace) tree2 = dendropy.Tree.get_from_path(file_path2, file_format, taxon_namespace=taxon_namespace) sym_diff = treecompare.symmetric_difference(tree1, tree2) euc_dis = treecompare.euclidean_distance(tree1, tree2) false_pos = treecompare.false_positives_and_negatives(tree1, tree2) robinson_dis = treecompare.robinson_foulds_distance(tree1, tree2) print("Symetric difference: ", sym_diff) print("Robinson Foulds distance: ", robinson_dis) print("False positives and negatives: ", false_pos) print("Euclidean distance: ", euc_dis)
def runTest(self): n = '(Basichlsac,(Lamprothma,Mougeotisp),(((Haplomitr2,Petalaphy),((Angiopteri,(((Azollacaro,((Dennstasam,(Oleandrapi,Polypodapp)),Dicksonant)),Vittarifle),Botrychbit)),(Isoetesmel,((((Agathismac,Agathisova),Pseudotsu),(((Libocedrus,Juniperusc),Callitris),Athrotaxi)),((Liriodchi,Nelumbo),Sagittari))))),Thuidium));' trees = dendropy.TreeList.get_from_stream(StringIO(n+n), schema="newick") ref = trees[0] changing = trees[1] rng = MockRandom() ref.encode_bipartitions() changing.encode_bipartitions() orig_root = changing.seed_node for i in range(50): changing.randomly_rotate(rng=rng) self.assertNotEqual(str(changing), n) self.assertEqual(orig_root, changing.seed_node) changing._debug_check_tree(logger_obj=_LOG, check_bipartitions=True) if treecompare.symmetric_difference(ref, changing) != 0: self.fail("\n%s\n!=\n%s" % (str(ref), str(changing)))
def testConsensus(self): con_tree = self.tree_list.consensus(min_freq=0.50, is_bipartitions_updated=False, support_label_decimals=2) con_tree.encode_bipartitions() self.assertEqual( treecompare.symmetric_difference(self.mb_con_tree, con_tree), 0) self.assertEqual(len(con_tree.bipartition_encoding), len(self.mb_con_tree.bipartition_encoding)) for bipartition in self.mb_con_tree.bipartition_encoding: edge1 = self.mb_con_tree.bipartition_edge_map[bipartition] edge2 = con_tree.bipartition_edge_map[bipartition] if edge1.head_node.label and edge2.head_node.label: s1 = float(edge1.head_node.label) s2 = round(float(edge2.head_node.label), 2) self.assertAlmostEqual(s1, s2, 2)
def tree_comparison(collapsed_tree, hogtree, phyml_tree, gene_tree_congruence, report): print("Calculating gene tree congruence metric...") tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get(file=open(collapsed_tree, 'r'), schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get(file=open(hogtree, 'r'), schema='newick', taxon_namespace=tns) tree3 = dendropy.Tree.get(file=open(phyml_tree, 'r'), schema='newick', taxon_namespace=tns) diff = treecompare.symmetric_difference( tree1, tree2, is_bipartitions_updated=False) #same as unweighted RF distance tree1_node_num = (len(tree1.internal_nodes()) ) #gets number of splits (nodes) in tree tree2_node_num = (len(tree2.internal_nodes())) tree3_node_num = (len(tree3.internal_nodes())) hog_metric = 1 - diff / (tree1_node_num + tree2_node_num) num_nodes_collapsed = tree3_node_num - tree1_node_num with open(gene_tree_congruence, 'w') as outfile1: outfile1.write(str(hog_metric) + "\n") with open(report, 'w') as outfile2: with open(collapsed_tree, 'r') as collapsedtreefile: with open(hogtree, 'r') as hogtreefile: with open(phyml_tree, 'r') as phymltreefile: phyml_genetree = phymltreefile.read() hogtree = hogtreefile.read() collapsed_genetree = collapsedtreefile.read() collapsedplot = tree1.as_ascii_plot() hogplot = tree2.as_ascii_plot() outfile2.write( "HOG tree:\n" + str(hogtree) + "\n" + str(hogplot) + "\n\nUncollapsed Gene tree:\n" + str(phyml_genetree) + "\n\nCollapsed Gene tree:\n" + str(collapsed_genetree) + "\n" + str(collapsedplot) + "\n\n# nodes collapsed: " + str(num_nodes_collapsed) + "\n\nSymmetric distance: " + str(diff) + "\n\n# Nodes (Hogtree): " + str(tree2_node_num) + "\n\n# Nodes (Uncollaped Gene tree): " + str(tree3_node_num) + "\n\n# Nodes (Collapsed Gene tree): " + str(tree1_node_num) + "\n\nHOG Tree Congruence:" + str(hog_metric) + "\n\n")
def main(tree_path_1, tree_path_2): tns = dendropy.TaxonNamespace() tree1 = read_tree(tree_path_1, tns) tree2 = read_tree(tree_path_2, tns) tree1.encode_bipartitions() tree2.encode_bipartitions() print("Number of leaves in tree 1: ", len(tree1.leaf_nodes())) print("Number of leaves in tree 2: ", len(tree2.leaf_nodes())) print("Unweighted Robinson-Fould distance: ", treecompare.symmetric_difference(tree1, tree2)) print("Weighted Robinson-Fould distance: ", treecompare.weighted_robinson_foulds_distance(tree1, tree2)) print("Euclidean distance: ", treecompare.euclidean_distance(tree1, tree2))
def dist_tree_all(treeFiles, treeTrueFile): treeTrue = dendropy.Tree.get_from_path(treeTrueFile, schema='newick') treeTreeTotalLength = treeTrue.length() treeTrueScaled = deepcopy(treeTrue) treeTrueScaled.scale_edges(1. / treeTreeTotalLength) distRf = [] distRfScaled = [] distSym = [] for treeFile in treeFiles: tree = dendropy.Tree.get_from_path(treeFile, schema='newick') distSym.append(treecompare.symmetric_difference(treeTrue, tree)) distRf.append( treecompare.weighted_robinson_foulds_distance(treeTrue, tree)) tree.scale_edges(1. / tree.length()) distRfScaled.append( treecompare.weighted_robinson_foulds_distance( treeTrueScaled, tree)) return distRf, distRfScaled, distSym
def evaluate(ref, file_name): # To store the data during the process, we create two temporary files. tmp1 = tempfile.mkstemp() tmp2 = tempfile.mkstemp() # Use the commands of fastprot and fnj. # The output of the FastPhylo programs is in file 'tmp2'. os.system("fastprot -m -o " + tmp1[1] + " " + file_name) os.system("fnj -O newick -m FNJ -o " + tmp2[1] + " " + tmp1[1]) #Use Dendropy to compare the trees. in_tree = Tree.get_from_stream(os.fdopen(tmp2[0]), schema='newick', taxon_namespace=tns) ref_tree = Tree.get_from_path(ref, schema='newick', taxon_namespace=tns) sym_diff = treecompare.symmetric_difference(ref_tree, in_tree) return sym_diff
def testTrees(self): tree_files = [ ("dendropy-test-trees-n33-unrooted-x100a.nexus", "force-unrooted", False), ("dendropy-test-trees-multifurcating-unrooted.nexus", "force-unrooted", False), ("pythonidae.beast.summary.tre", "force-rooted", True), ("primates.beast.mcct.medianh.tre", "force-rooted", True), ] for tree_file, rooting, is_rooted in tree_files: ref_tree = dendropy.Tree.get_from_path(pathmap.tree_source_path(tree_file), "nexus", rooting=rooting) bipartition_encoding = ref_tree.encode_bipartitions() t_tree = dendropy.Tree.from_bipartition_encoding( bipartition_encoding, taxon_namespace=ref_tree.taxon_namespace, is_rooted=ref_tree.is_rooted) # t_tree.encode_bipartitions() _LOG.debug("--\n File: {} ({})".format(tree_file, ref_tree.is_rooted)) _LOG.debug(" Original: {}".format(ref_tree.as_string("newick"))) _LOG.debug("Reconstructed: {}".format(t_tree.as_string("newick"))) self.assertEqual(treecompare.symmetric_difference(ref_tree, t_tree), 0)
def testMidpointRooting(self): taxa = dendropy.TaxonNamespace() test_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.randomly-rooted.tre'), "nexus", taxon_namespace=taxa, rooting="force-rooted") expected_trees = dendropy.TreeList.get_from_path(pathmap.tree_source_path('pythonidae.random.bd0301.midpoint-rooted.tre'), "nexus", taxon_namespace=taxa, rooting="force-rooted") for idx, test_tree in enumerate(test_trees): expected_tree = expected_trees[idx] test_tree.reroot_at_midpoint(update_bipartitions=True) self.assertEqual(treecompare.symmetric_difference(test_tree, expected_tree), 0) for bipartition in test_tree.bipartition_encoding: if test_tree.bipartition_edge_map[bipartition].head_node is test_tree.seed_node: continue # self.assertAlmostEqual(bipartition.edge.length, expected_tree.split_bitmask_edge_map[bipartition.split_bitmask].length, 3) self.assertAlmostEqual(test_tree.bipartition_edge_map[bipartition].length, expected_tree.bipartition_edge_map[bipartition].length, 3)
def calcDistance(self): if self.path1 != '' and self.path2 != '': self.fileEx1 = (os.path.splitext(self.path1)[1])[1:] self.fileEx2 = (os.path.splitext(self.path2)[1])[1:] tns = dendropy.TaxonNamespace() self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileEx1, taxon_namespace=tns) self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileEx2, taxon_namespace=tns) self.tree1.encode_bipartitions() self.tree2.encode_bipartitions() print( treecompare.false_positives_and_negatives( self.tree1, self.tree2)) # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree1.encode_bipartitions() # self.tree2.encode_bipartitions() # oblicz dystans # self.symDist = self.tree1.symmetric_difference(self.tree2) self.symDist = treecompare.symmetric_difference( self.tree1, self.tree2) self.fpnDist = treecompare.false_positives_and_negatives( self.tree1, self.tree2) self.eucDist = treecompare.euclidean_distance( self.tree1, self.tree2) self.rfDist = treecompare.robinson_foulds_distance( self.tree1, self.tree2) # pokaz wyniki self.res1.setText(str(self.eucDist)) #eucDist self.res2.setText(str(self.rfDist)) #rfDist
def check(self, title, src_prefix): tns = dendropy.TaxonNamespace() input_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".dendropy-pruned.nex"), schema='nexus', attached_taxon_namespace=tns) input_taxa = input_ds.taxon_namespaces[0] output_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"), schema='nexus', taxon_namespace=input_taxa) for set_idx, src_trees in enumerate(input_ds.tree_lists): src_trees = input_ds.tree_lists[set_idx] ref_trees = output_ds.tree_lists[set_idx] for tree_idx, src_tree in enumerate(src_trees): _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees))) ref_tree = ref_trees[tree_idx] # tree_dist = paup.symmetric_difference(src_tree, ref_tree) # d = src_tree.symmetric_difference(ref_tree) # if d > 0: # print d self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0)
def is_concordant(G, S, sp_to_genes): G2 = dendropy.Tree(G) for s in S.leaf_nodes(): genes = sp_to_genes[s.taxon.label] mrca = G2.mrca(taxon_labels=genes) leaf_taxa = map(lambda x: x.taxon.label, mrca.leaf_nodes()) if set(leaf_taxa) != set(genes): return False children = get_children(mrca) for child in children: mrca.remove_child(child) mrca.taxon = G.taxon_namespace.get_taxon(label=s.taxon.label) # maybe update bipartitions before? S.encode_bipartitions() G2.encode_bipartitions() diff = treecompare.symmetric_difference(G2, S) if diff == 0: return True else: return False
def check(self, title, src_prefix, to_retain=False): input_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".pre-pruned.nex"), schema='nexus') tns1 = dendropy.TaxonNamespace() input_ds.attach_taxon_namespace(tns1) input_taxa = input_ds.taxon_namespaces[0] output_ds = dendropy.DataSet.get_from_path( src=pathmap.tree_source_path(src_prefix + ".paup-pruned.nex"), schema='nexus', taxon_namespace=input_taxa) tns2 = dendropy.TaxonNamespace() output_ds.attach_taxon_namespace(tns2) if to_retain: taxf = open(pathmap.tree_source_path(src_prefix + ".retained_taxa.txt"), "r") else: taxf = open(pathmap.tree_source_path(src_prefix + ".pruned_taxa.txt"), "r") rows = taxf.readlines() taxon_idxs_list = [ [int(i) for i in row.split()] for row in rows ] for set_idx, src_trees in enumerate(input_ds.tree_lists): src_trees = input_ds.tree_lists[set_idx] ref_trees = output_ds.tree_lists[set_idx] taxon_idxs = taxon_idxs_list[set_idx] sub_taxa = [src_trees.taxon_namespace[i] for i in taxon_idxs] for tree_idx, src_tree in enumerate(src_trees): _LOG.debug("%s Set %d/%d, Tree %d/%d" % (title, set_idx+1, len(input_ds.tree_lists), tree_idx+1, len(src_trees))) ref_tree = ref_trees[tree_idx] if to_retain: src_tree.retain_taxa(sub_taxa) else: src_tree.prune_taxa(sub_taxa) # tree_dist = paup.symmetric_difference(src_tree, ref_tree) self.assertEqual(treecompare.symmetric_difference(src_tree, ref_tree), 0) taxf.close()
def compare_to_strain_tree(self, rf=True): import dendropy from dendropy.calculate import treecompare from ete3 import Tree if (rf): with open(self.strainTree, 'r') as ref: Ref = dendropy.Tree.get(file=ref, schema="newick") T = dendropy.TreeList([Ref] + [ dendropy.Tree.get(data=t.write(), schema='newick') for t in self.grab_trees()[0] ]) d = np.zeros(len(T) - 1) for n in range(1, len(T)): d[n - 1] = treecompare.symmetric_difference(T[0], T[n]) else: Ref = Tree(self.strainTree, format=1) trees = self.grab_trees()[0] d = np.zeros(len(trees)) for n, tree in enumerate(trees): d[n] = self.compare_trees(Ref, tree) return d
def tred(counter, Namelist, finlist, Non_CleanList_Done): '''Returns the symmetric difference between each noise and original alignment and the original tree.''' with open( 'fa_sekvens', "w" ) as sh: #Creates a file with each alignment as a Fasta-format, B.A.J.S= Beskrivning Av Justerad Sekvens for n in range(len(Namelist)): sh.write('\n' + '>' + Namelist[n] + '\n' + Non_CleanList_Done[n]) with open( 'fastprot_text', 'w' ) as ph: # Runs the command "fastprot" on file "fa_sekvens". K.I.S.S = Kalkylerar Individuell Sekvensiell Strcka av BAJS out = check_output(["fastprot", 'fa_sekvens']) ph.write(out) with open( str(counter), 'w' ) as kd: #Creates a treefile using command "fnj" for each alignment and outputs it as a file with name as a number. tree = check_output(["fnj", "-O", "newick", "fastprot_text"]) kd.write(tree) tns = dendropy.TaxonNamespace() t1 = dendropy.Tree.get( file=open('asymmetric_0.5.tree', 'r'), schema="newick", tree_offset=0, taxon_namespace=tns) #Opens original tree and defines it as t1 t2 = dendropy.Tree.get( file=open(str(counter), 'r'), schema="newick", tree_offset=0, taxon_namespace=tns) #Opens each tree made from aligments as t2 t1.encode_bipartitions() # Makes sure t1 and t2 have the same bipartitions t2.encode_bipartitions() print(treecompare.symmetric_difference(t1, t2) ) #Compares the symmetric difference between the two trees t1 and t2 return
def calculateDistance(self): if self.path1 != '' and self.path2 != '': #get files extensions self.fileExtension1 = (os.path.splitext(self.path1)[1])[1:] self.fileExtension2 = (os.path.splitext(self.path2)[1])[1:] #open tree files tns = dendropy.TaxonNamespace() self.tree1 = dendropy.Tree.get_from_path(self.path1, self.fileExtension1, taxon_namespace=tns) self.tree2 = dendropy.Tree.get_from_path(self.path2, self.fileExtension2, taxon_namespace=tns) self.tree1.encode_bipartitions() self.tree2.encode_bipartitions() print(treecompare.false_positives_and_negatives(self.tree1, self.tree2)) # self.tree1 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree2 = dendropy.Tree.get_from_string('((A, B), (C, D))', 'newick') # self.tree1.encode_bipartitions() #self.tree2.encode_bipartitions() #calculate distances #self.symDist = self.tree1.symmetric_difference(self.tree2) self.symDist = treecompare.symmetric_difference(self.tree1, self.tree2) self.fpnDist = treecompare.false_positives_and_negatives(self.tree1, self.tree2) self.eucDist = treecompare.euclidean_distance(self.tree1, self.tree2) self.rfDist = treecompare.robinson_foulds_distance(self.tree1, self.tree2) #show distances self.dist1Value.setText(str(self.eucDist)) self.dist2Value.setText(str(self.rfDist)) self.dist3Value.setText(str(self.symDist)) self.dist4Value.setText(str(self.fpnDist))
ilist = find_files(top=o_file, filename_filter=ext) olist = find_files(top=i_file, filename_filter=ext) split1 = [os.path.split(file)[1] for file in ilist] split2 = [os.path.split(file)[1] for file in olist] RF = [] TLdiff = [] T1L = [] T2L = [] shared_files = [] for file in ilist: tree1 = dendropy.Tree.get_from_path(file, 'nexus') TL1 = tree1.length() T1L.append(TL1) if os.path.split(file)[1] in split2: shared_files.append(file) tree2 = dendropy.Tree.get_from_path(file, 'nexus', taxon_namespace=tree1.taxon_namespace) TL2= tree2.length() T2L.append(TL2) TLdiff.append(TL1-TL2) RF.append(treecompare.symmetric_difference(tree1,tree2)) df = pd.DataFrame(shared_files) df['RF'] = RF df['TLDiff'] = TLdiff df['TL1'] = TL1 df['TL2'] = TL2 print(df) df.to_csv('c.csv')
path = file_path+'/'+onlyfiles[j] fil = open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+path, 'r') lines_list=fil.readlines() fil.close() test = SeqDic(lines_list) # If this dose not worke we do not have a FASTA file ## # Makes a newick tree and checks if the referense tree is recovered. The none reducing file. ## line = 'cat /home/4/u1we1f44/Documents/appbio15/projekt/data/'+path+' | fastprot -I fasta -O phylip | fnj -I phylip -O "newick" -o "Treeout.txt"' os.system(line) TreePath=file_path+'/'+RefTree t1=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/data/'+TreePath,'r'),schema="newick",tree_offset=0) t2=Tree.get(file=open('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt','r'),schema="newick",tree_offset=0,taxon_namespace=t1.taxon_namespace) t1.encode_bipartitions() t2.encode_bipartitions() if treecompare.symmetric_difference(t1, t2)==0: NotFixedCount += 1 os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt') Total += 1 else: Total += 1 os.remove('/home/4/u1we1f44/Documents/appbio15/projekt/src/Treeout.txt') ## # Makes a temporary file. In the temporary file with data with the nosie columns remoeved. MAkes a newick tree and checks if the refernse tree is recovered. # The nosie columns removed. ## os.system("touch temp.fa") tempf = open('temp.fa','w') seq_dic = MoreThan2(lines_list) for key in seq_dic: tempf.write('>'+key+'\n'+seq_dic[key]+'\n')
#f = open("results.txt", "a") #file1=open("/home/ubuntu/PhD_Study/Research/Simulated_Datasets/Taxa/t200","r") file1 = open( "/mnt/e/PhD_Study/FromLinux/PhD_Study/Research/Simulated_Datasets/Indelible/indelible_tests/height/h2.0/t10.mt", "r") s1 = file1.readline() #name="RAxML_bestTree.t200BS" #file2name="/home/ubuntu/PhD_Study/Research/Simulated_Datasets/Taxa/outputs/" + name #file2 = open(file2name, "r") file2 = open( "/mnt/e/PhD_Study/FromLinux/PhD_Study/Research/Simulated_Datasets/Indelible/indelible_tests/height/h2.0/t10.fast", "r") s2 = file2.readline() # establish common taxon namespace tns = dendropy.TaxonNamespace() # ensure all trees loaded use common namespace try: tree1 = dendropy.Tree.get(data=s1, schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get(data=s2, schema='newick', taxon_namespace=tns) ## Unweighted Robinson-Foulds distance rf_distance = treecompare.symmetric_difference(tree1, tree2) print(rf_distance) #f.write(name) #f.write("\n") #f.write(rf_distance.__str__()) #f.write("\n") except: print("Input model tree problem")
import dendropy from dendropy.calculate import treecompare tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get( path= "/home/mys/Documents/git/AiBToS-Project1/input/clustalo-quicktree.newick", schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get( path= "/home/mys/Documents/git/AiBToS-Project1/input/clustalo-rapidnj.newick", schema='newick', taxon_namespace=tns) tree1.encode_bipartitions() tree2.encode_bipartitions() print(treecompare.symmetric_difference(tree1, tree2))
#! /usr/bin/env python import dendropy from dendropy.calculate import treecompare distances = [] taxa = dendropy.TaxonNamespace() mle_tree = dendropy.Tree.get(path='pythonidae.mle.nex', schema='nexus', taxon_namespace=taxa) mcmc_tree_file_paths = [ 'pythonidae.mb.run1.t', 'pythonidae.mb.run2.t', 'pythonidae.mb.run3.t', 'pythonidae.mb.run4.t' ] for mcmc_tree in dendropy.Tree.yield_from_files(files=mcmc_tree_file_paths, schema='nexus', taxon_namespace=taxa): distances.append(treecompare.symmetric_difference(mle_tree, mcmc_tree)) print("Mean symmetric distance between MLE and MCMC trees: %d" % float(sum(distances) / len(distances)))
tns = dendropy.TaxonNamespace() try: # ensure all trees loaded use common namespace tree1 = dendropy.Tree.get(data=s1, schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get(data=s2, schema='newick', taxon_namespace=tns) tree3 = dendropy.Tree.get(data=s3, schema='newick', taxon_namespace=tns) ## Unweighted Robinson-Foulds distance rf_fast = treecompare.symmetric_difference(tree1, tree2) rf_raxml = treecompare.symmetric_difference(tree1, tree3) #print(rf_distance) result_fast.write(str(rf_fast)) result_fast.write("\n") result_raxml.write(str(rf_raxml)) result_raxml.write("\n") except: result_fast.write("tree error") result_fast.write("\n") result_raxml.write("tree error") result_raxml.write("\n") result_fast.write("\n\n") result_raxml.write("\n\n")
import dendropy from dendropy.calculate import treecompare tree_str1 = "((A,B),C);" tree_list1 = dendropy.TreeList() tree_list1.read(data=tree_str1, schema="newick") tree_list2 = dendropy.TreeList(taxon_namespace=tree_list1.taxon_namespace) tree_list2.read(data=tree_str1, schema="newick") # Results in: 0 print(treecompare.symmetric_difference(tree_list1[0], tree_list2[0]))
import dendropy from dendropy.calculate import treecompare import argparse parser = argparse.ArgumentParser() parser.add_argument('reftree') parser.add_argument('tree') args = parser.parse_args() InFile = open(args.reftree) tree = '' for line in InFile: tree = line tns = dendropy.TaxonNamespace() tree1 = dendropy.Tree.get(data=tree,schema='newick',taxon_namespace=tns) #reference tree InFile =open(args.tree) tree = '' for line in InFile: tree = line tree2 = dendropy.Tree.get(data=tree,schema='newick',taxon_namespace=tns) #original or noise reduced tree n = treecompare.symmetric_difference(tree1,tree2) #get a numerical value for symmetric difference.. sys.stdout.write(str(n)+'\n') #and write it to standard output
T_seq_list = dendropy.TreeList(taxon_namespace=T_H_list.taxon_namespace) T_seq_list.read(data=T_seq_string, schema="newick") T_F_list = dendropy.TreeList(taxon_namespace=T_H_list.taxon_namespace) T_F_list.read(data=T_F_string, schema="newick") # so this creation of the namespaces works, as these two lists below will print out the contents of # the original tree files, AND the statistical functions below also successfully run. #print T_H_list[0] #print T_DMC_list[0] # Calculating symmetric differences (unweighted robinson foulds). # symmetric difference is the number of splits found in one of the trees but not the other. # it is defined as the number of transformations needed to turn one tree into the other. print "Symmetric difference between T_H and T_DMC: " + str(treecompare.symmetric_difference(T_H_list[0], T_DMC_list[0])) print "Symmetric difference between T_H and T_seq: " + str(treecompare.symmetric_difference(T_H_list[0], T_seq_list[0])) print "Symmetric difference between T_H with T_F: " + str(treecompare.symmetric_difference(T_H_list[0], T_F_list[0])) print "Symmetric difference between T_DMC with T_seq: " + str(treecompare.symmetric_difference(T_DMC_list[0], T_seq_list[0])) print "Symmetric difference between T_DMC with T_F: " + str(treecompare.symmetric_difference(T_DMC_list[0], T_F_list[0])) print "Symmetric difference between T_seq with T_F: " + str(treecompare.symmetric_difference(T_seq_list[0], T_F_list[0])) # Calculating the robinson foulds distances # This is the weighted symmetric difference, which is the sum of the square of differences in branch lengths for equivalent splits between two trees. # It takes edge lengths into account, and therefore will yield a non-zero answer for trees with identical relationships, but have different branch lengths. # This explains why the unweighted distance between T_H and T_seq is 0, but is >0 for the weighted distance. print "Robinson-Foulds distance between T_H and T_DMC: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_DMC_list[0])) print "Robinson-Foulds distance between T_H and T_seq: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_seq_list[0])) print "Robinson-Foulds distance between T_H and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_H_list[0], T_F_list[0])) print "Robinson-Foulds distance between T_DMC and T_seq: " + str(treecompare.weighted_robinson_foulds_distance(T_DMC_list[0], T_seq_list[0])) print "Robinson-Foulds distance between T_DMC and T_F: " + str(treecompare.weighted_robinson_foulds_distance(T_DMC_list[0], T_F_list[0]))
import dendropy from dendropy.calculate import treecompare s1 = "(a,(b,(c,d)));" s2 = "(a,(d,(b,c)));" # establish common taxon namespace tns = dendropy.TaxonNamespace() # ensure all trees loaded use common namespace tree1 = dendropy.Tree.get( data=s1, schema='newick', taxon_namespace=tns) tree2 = dendropy.Tree.get( data=s2, schema='newick', taxon_namespace=tns) ## Unweighted Robinson-Foulds distance print(treecompare.symmetric_difference(tree1, tree2))
#! /usr/bin/env python # -*- coding: utf-8 -*- import dendropy from dendropy.calculate import treecompare distances = [] taxa = dendropy.TaxonNamespace() mle_tree = dendropy.Tree.get( path='pythonidae.mle.nex', schema='nexus', taxon_namespace=taxa) mcmc_tree_file_paths = ['pythonidae.mb.run1.t', 'pythonidae.mb.run2.t', 'pythonidae.mb.run3.t', 'pythonidae.mb.run4.t'] for mcmc_tree in dendropy.Tree.yield_from_files( files=mcmc_tree_file_paths, schema='nexus', taxon_namespace=taxa): distances.append(treecompare.symmetric_difference(mle_tree, mcmc_tree)) print("Mean symmetric distance between MLE and MCMC trees: %d" % float(sum(distances)/len(distances)))