def main(): print("Hello World!") print( "This file demonstrates the usage of the DendroPy module and its functions." ) print("\n1. Tree Generations\n") print("\n1.1. Create two random birth-death trees and print them:\n") # If you want to compare two trees, make sure you specify the same Taxon Namespace! tns = dendropy.TaxonNamespace() num_leaves = 5 t1 = dendropy.simulate.treesim.birth_death_tree(birth_rate=1.0, death_rate=0.5, num_extant_tips=num_leaves, taxon_namespace=tns) t2 = dendropy.simulate.treesim.birth_death_tree(birth_rate=1.0, death_rate=0.2, num_extant_tips=num_leaves, taxon_namespace=tns) print("\tTree 1: ", t1.as_string("newick")) t1.print_plot() print("\tTree 2: ", t2.as_string("newick")) t2.print_plot() print("\n2. Compare Trees\n") print( "\n2.1. Compare tree with itself and print Robinson-Foulds (RF) distance:\n" ) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, t1)) print( "\n2.2. Compare different trees and print Robinson-Foulds (RF) distance:\n" ) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, t2)) print("\n3. Load Trees from Newick Files and Compare:\n") print("\n3.1 Load trees from Newick files:\n") # If you want to compare two trees, make sure you specify the same Taxon Namespace! tns = dendropy.TaxonNamespace() directory = '/Users/filipbergentoft/Desktop/Github/DD2434/Assignment 2/2_4/' filename = directory + "data/q2_4/q2_4_tree_mixture.pkl_tree_0_newick.txt" with open(filename, 'r') as input_file: newick_str = input_file.read() t0 = dendropy.Tree.get(data=newick_str, schema="newick", taxon_namespace=tns) print("\tTree 0: ", t0.as_string("newick")) t0.print_plot() filename = directory + "data/q2_4/q2_4_tree_mixture.pkl_tree_1_newick.txt" with open(filename, 'r') as input_file: newick_str = input_file.read() t1 = dendropy.Tree.get(data=newick_str, schema="newick", taxon_namespace=tns) print("\tTree 1: ", t1.as_string("newick")) t1.print_plot() filename = directory + "data/q2_4/q2_4_tree_mixture.pkl_tree_2_newick.txt" with open(filename, 'r') as input_file: newick_str = input_file.read() t2 = dendropy.Tree.get(data=newick_str, schema="newick", taxon_namespace=tns) print("\tTree 2: ", t2.as_string("newick")) t2.print_plot() print("\n3.2 Compare trees and print Robinson-Foulds (RF) distance:\n") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, t1)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, t2)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, t2)) print("\n4. Load Inferred Trees") filename = directory + "data/q2_4/q2_4_own_results_em_topology.npy" # This is the result you have. topology_list = np.load(filename) print(topology_list.shape) print(topology_list) rt0 = Tree() rt0.load_tree_from_direct_arrays(topology_list[0]) rt0 = dendropy.Tree.get(data=rt0.newick, schema="newick", taxon_namespace=tns) print("\tInferred Tree 0: ", rt0.as_string("newick")) rt0.print_plot() rt1 = Tree() rt1.load_tree_from_direct_arrays(topology_list[1]) rt1 = dendropy.Tree.get(data=rt1.newick, schema="newick", taxon_namespace=tns) print("\tInferred Tree 1: ", rt1.as_string("newick")) rt1.print_plot() rt2 = Tree() rt2.load_tree_from_direct_arrays(topology_list[2]) rt2 = dendropy.Tree.get(data=rt2.newick, schema="newick", taxon_namespace=tns) print("\tInferred Tree 2: ", rt2.as_string("newick")) rt2.print_plot() print("\n4.2 Compare trees and print Robinson-Foulds (RF) distance:\n") print("\tt0 vs inferred trees") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, rt0)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, rt1)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, rt2)) print("\tt1 vs inferred trees") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, rt0)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, rt1)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, rt2)) print("\tt2 vs inferred trees") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t2, rt0)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t2, rt1)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t2, rt2)) print("\nInvestigate") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, rt0)) print("\tRF distance: \t", dendropy.calculate.treecompare.find_missing_bipartitions(t0, rt0)) print( "\tRF distance: \t", dendropy.calculate.treecompare.false_positives_and_negatives(t0, rt0)) print("\tRF distance: \t", dendropy.calculate.treecompare.find_missing_bipartitions(t0, rt1)) print( "\tRF distance: \t", dendropy.calculate.treecompare.false_positives_and_negatives(t0, rt1))
def main(): print("\n1. Load samples from txt file.\n") default_sample_filename = 'q_2_5_tm_10node_20sample_4clusters.pkl_samples.txt' default_output_filename = 'q_2_5_tm_10node_20sample_4clusters_results' default_num_clusters = 4 default_seed_val = 42 default_real_values_filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_0_topology.npy" samples = np.loadtxt(default_sample_filename, delimiter="\t", dtype=np.int32) num_samples, num_nodes = samples.shape print("\tnum_samples: ", num_samples, "\tnum_nodes: ", num_nodes) print("\tSamples: \n", samples) print("\n2. Run EM Algorithm.\n") loglikelihood, topology_array, theta_array = em_algorithm( default_seed_val, samples, num_clusters=default_num_clusters) print("\n3. Save, print and plot the results.\n") print("top array 0 = ", topology_array[0, :]) save_results(loglikelihood, topology_array, theta_array, default_output_filename) save_results(loglikelihood, topology_array[0, :], theta_array[0, :], "q_2_5_tm_10node_20sample_4clusters0_results") save_results(loglikelihood, topology_array[1, :], theta_array[1, :], "q_2_5_tm_10node_20sample_4clusters1_results") save_results(loglikelihood, topology_array[2, :], theta_array[2, :], "q_2_5_tm_10node_20sample_4clusters2_results") save_results(loglikelihood, topology_array[3, :], theta_array[3, :], "q_2_5_tm_10node_20sample_4clusters3_results") for i in range(default_num_clusters): print("\n\tCluster: ", i) print("\tTopology: \t", topology_array[i]) print("\tTheta: \t", theta_array[i]) Theta_Expected = np.array(theta_array.shape) Topology_Expected = np.zeros((default_num_clusters, samples.shape[1])) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_0_theta.npy" theta_expected0 = np.load(filename) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_0_topology.npy" topology_expected0 = np.load(filename) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_pi.npy" pi_expected = np.load(filename) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_1_theta.npy" theta_expected1 = np.load(filename) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_1_topology.npy" topology_expected1 = np.load(filename) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_2_theta.npy" theta_expected2 = np.load(filename) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_2_topology.npy" topology_expected2 = np.load(filename) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_3_theta.npy" theta_expected3 = np.load(filename) filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_3_topology.npy" topology_expected3 = np.load(filename) Theta_Expected = theta_array thetafiles = [ theta_expected0, theta_expected1, theta_expected2, theta_expected3 ] topologyfiles = [ topology_expected0, topology_expected1, topology_expected2, topology_expected3 ] for k in range(default_num_clusters): Theta_Expected[k][:][:][:] = thetafiles[k] Topology_Expected[k, :] = topologyfiles[k] loglikelihood_expected = np.zeros(len(loglikelihood)) for iterations in range(len(loglikelihood)): loglikelihood_expected[iterations] = log_likelihood( default_num_clusters, samples, Theta_Expected, Topology_Expected, pi_expected) print("loglikelihood_expected = ", loglikelihood_expected[0]) plt.figure(figsize=(8, 3)) plt.subplot(121) plt.plot(np.exp(loglikelihood_expected), label='Expected') plt.plot(np.exp(loglikelihood), label='Estimated') plt.ylabel("Likelihood of Mixture") plt.xlabel("Iterations") plt.subplot(122) plt.plot(loglikelihood_expected, label='Expected') plt.plot(loglikelihood, label='Estimated') plt.ylabel("Log-Likelihood of Mixture") plt.xlabel("Iterations") plt.legend(loc=(1.04, 0)) plt.show() print("\n4. Retrieve real results and compare.\n") if default_real_values_filename != "": print("\tComparing the results with real values...") print("\t4.1. Make the Robinson-Foulds distance analysis.\n") print("Hello World!") print( "This file demonstrates the usage of the DendroPy module and its functions." ) print("\n1. Tree Generations\n") print("\n1.1. Create two random birth-death trees and print them:\n") tns = dendropy.TaxonNamespace() num_leaves = 5 t1 = dendropy.simulate.treesim.birth_death_tree( birth_rate=1.0, death_rate=0.5, num_extant_tips=num_leaves, taxon_namespace=tns) t2 = dendropy.simulate.treesim.birth_death_tree( birth_rate=1.0, death_rate=0.2, num_extant_tips=num_leaves, taxon_namespace=tns) print("\tTree 1: ", t1.as_string("newick")) t1.print_plot() print("\tTree 2: ", t2.as_string("newick")) t2.print_plot() print("\n2. Compare Trees\n") print( "\n2.1. Compare tree with itself and print Robinson-Foulds (RF) distance:\n" ) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, t1)) print( "\n2.2. Compare different trees and print Robinson-Foulds (RF) distance:\n" ) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, t2)) print("\n3. Load Trees from Newick Files and Compare:\n") print("\n3.1 Load trees from Newick files:\n") # If you want to compare two trees, make sure you specify the same Taxon Namespace! tns = dendropy.TaxonNamespace() filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_0_newick.txt" with open(filename, 'r') as input_file: newick_str = input_file.read() t0 = dendropy.Tree.get(data=newick_str, schema="newick", taxon_namespace=tns) print("\tTree 0: ", t0.as_string("newick")) t0.print_plot() filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_1_newick.txt" with open(filename, 'r') as input_file: newick_str = input_file.read() t1 = dendropy.Tree.get(data=newick_str, schema="newick", taxon_namespace=tns) print("\tTree 1: ", t1.as_string("newick")) t1.print_plot() filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_2_newick.txt" with open(filename, 'r') as input_file: newick_str = input_file.read() t2 = dendropy.Tree.get(data=newick_str, schema="newick", taxon_namespace=tns) print("\tTree 2: ", t2.as_string("newick")) t2.print_plot() filename = "q_2_5_tm_10node_20sample_4clusters.pkl_tree_3_newick.txt" with open(filename, 'r') as input_file: newick_str = input_file.read() t3 = dendropy.Tree.get(data=newick_str, schema="newick", taxon_namespace=tns) print("\tTree 3: ", t3.as_string("newick")) t3.print_plot() print("\n3.2 Compare trees and print Robinson-Foulds (RF) distance:\n") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, t1)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, t2)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, t2)) filename = "q_2_5_tm_10node_20sample_4clusters0_results_em_topology.npy" topology_list = np.load(filename) rt0 = Tree() rt0.load_tree_from_direct_arrays(np.array(topology_list)) rt0 = dendropy.Tree.get(data=rt0.newick, schema="newick", taxon_namespace=tns) print("\tInferred Tree 0: ", rt0.as_string("newick")) rt0.print_plot() filename = "q_2_5_tm_10node_20sample_4clusters1_results_em_topology.npy" topology_list = np.load(filename) rt1 = Tree() rt1.load_tree_from_direct_arrays(np.array(topology_list)) rt1 = dendropy.Tree.get(data=rt1.newick, schema="newick", taxon_namespace=tns) print("\tInferred Tree 1: ", rt1.as_string("newick")) rt1.print_plot() filename = "q_2_5_tm_10node_20sample_4clusters2_results_em_topology.npy" topology_list = np.load(filename) rt2 = Tree() rt2.load_tree_from_direct_arrays(np.array(topology_list)) rt2 = dendropy.Tree.get(data=rt2.newick, schema="newick", taxon_namespace=tns) print("\tInferred Tree 2: ", rt2.as_string("newick")) rt2.print_plot() filename = "q_2_5_tm_10node_20sample_4clusters3_results_em_topology.npy" topology_list = np.load(filename) rt3 = Tree() rt3.load_tree_from_direct_arrays(np.array(topology_list)) rt3 = dendropy.Tree.get(data=rt3.newick, schema="newick", taxon_namespace=tns) print("\tInferred Tree 3: ", rt3.as_string("newick")) rt3.print_plot() print("\n4.2 Compare trees and print Robinson-Foulds (RF) distance:\n") print("\tt0 vs inferred trees") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, rt0)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, rt1)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t0, rt2)) print("\tt1 vs inferred trees") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, rt0)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, rt1)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t1, rt2)) print("\tt2 vs inferred trees") print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t2, rt0)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t2, rt1)) print("\tRF distance: \t", dendropy.calculate.treecompare.symmetric_difference(t2, rt2)) print("\t4.2. Make the likelihood comparison.\n")