def run_method(method, tree, m = 300, kappa = 2, mutation_rate=0.05, threshold = None, verbose = False): start_time = time.time() observations, taxa_meta = generation.simulate_sequences(m, tree_model=tree, seq_model=generation.HKY(kappa = kappa), mutation_rate=mutation_rate, alphabet="DNA") runtime = time.time() - start_time print("Simulation took %s seconds" % runtime) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(observations, taxa_meta, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining(reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = snj(observations, taxa_meta) if method == "NJ": nj = reconstruct_tree.NeighborJoining(reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = nj(observations, taxa_meta) if method == "STR+NJ": spectral_method = reconstruct_tree.STDR(reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, min_split = 5, verbose = verbose) if method == "STR+SNJ": spectral_method = reconstruct_tree.STDR(reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, min_split = 5, verbose = verbose) if method == "STR+RaXML": spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, raxml_args = "-T 2 --HKY85 -c 1", min_split = 5, verbose = verbose) runtime = time.time() - start_time RF,F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ",RF) print("F1% = ",F1) return([method, str(threshold), runtime, RF, F1])
reconstruct_tree.JC_similarity_matrix) nj = reconstruct_tree.NeighborJoining(reconstruct_tree.JC_similarity_matrix) treesvd = reconstruct_tree.TreeSVD() methods = [snj, treesvd] #results = compare_methods.experiment([reference_tree], jc, N_vec, methods=methods,\ # mutation_rates = [mutation_rate], reps_per_tree=num_reps) df = pd.DataFrame( columns=['method', 'runtime', 'RF', 'n', 'cherries_ref', 'cherries_res']) for i in np.arange(num_reps): print(i) reference_tree = utils.unrooted_pure_kingman_tree(num_taxa) ch_ref = cherry_count_for_tree(reference_tree) for n in N_vec: observations, taxa_meta = generation.simulate_sequences( n, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, alphabet="DNA") # Tree svd t_s = time.time() tree_svd_rec = treesvd(observations, taxa_meta) runtime_treesvd = time.time() - t_s RF_svd, F1 = reconstruct_tree.compare_trees(tree_svd_rec, reference_tree) ch_svd = cherry_count_for_tree(tree_svd_rec) df = df.append( { 'method': 'treesvd', 'runtime': runtime_treesvd, 'RF': RF_svd,
threshold = 32 # jc = generation.Jukes_Cantor() hky = generation.HKY(kappa=2) mutation_rate = 0.05 # mutation_rate = [jc.p2t(0.95)] np.random.seed(0) # reference_tree = utils.unrooted_birth_death_tree(num_taxa, birth_rate=1) # reference_tree = utils.lopsided_tree(num_taxa) reference_tree = utils.balanced_binary(num_taxa) # for x in reference_tree.preorder_edge_iter(): # x.length = 1 np.random.seed(0) t0 = time.time() observations, meta = generation.simulate_sequences(N, tree_model=reference_tree, seq_model=hky, mutation_rate=mutation_rate, rng=np.random, alphabet='DNA') print("gen time: ", time.time() - t0) spectral_method = str.STR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix, threshold=threshold, merge_method="least_square", num_gaps=1, min_split=5, verbose=False) t0 = time.time() #cProfile.run("""tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.JC_similarity_matrix, # taxa_metadata= meta, # threshhold = 8, min_split = 3 ,verbose=False)""", filename="temp1.prof")
def run_method(method, size, run_num, tree, m=300, kappa=2, mutation_rate=0.05, threshold=None, verbose=False): subtree_folder = "/gpfs/ysm/scratch60/morgan_levine/mw957/tree_merge_test/seqlen_" + str( m) + "_" + method + "_" + str(threshold) + "_" + str(run_num) + "/" if os.path.exists(subtree_folder): shutil.rmtree(subtree_folder) os.mkdir(subtree_folder) tree.write(path=subtree_folder + "true_tree.txt", schema="newick") subtree_filename = subtree_folder + "subtree_%s.txt" start_time = time.time() observations, taxa_meta = generation.simulate_sequences( m, tree_model=tree, seq_model=generation.Jukes_Cantor(), mutation_rate=mutation_rate, alphabet="DNA") runtime = time.time() - start_time print("Simulation took %s seconds" % runtime) spectral_method = reconstruct_tree.STDR( reconstruct_tree.RAxML, reconstruct_tree.JC_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, reconstruct_tree.JC_similarity_matrix, taxa_metadata=taxa_meta, threshhold=threshold, raxml_args="-T 2 --HKY85 -c 1", min_split=5, verbose=verbose, subtree_filename=subtree_filename) runtime = time.time() - start_time tree_rec.write(path=subtree_folder + "STDR_tree.txt", schema="newick") distance = reconstruct_tree.JC_distance_matrix(observations, taxa_meta) distance_pd = pd.DataFrame(distance) taxa_list = [x.label for x in taxa_meta] with open(subtree_folder + 'taxa.txt', 'w') as f: for item in taxa_list: f.write("%s\n" % item) distance_pd.index = taxa_list distance_path = subtree_folder + "HKY_distance.txt" distance_pd.to_csv(distance_path, sep="\t", header=False) with open(distance_path, 'r') as original: data = original.read() with open(distance_path, 'w') as modified: modified.write(str(size) + "\n" + data) # accuracy of the STDR method RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ", RF) print("F1% = ", F1) return ([method, str(threshold), runtime, RF, F1])
jc = generation.Jukes_Cantor() mutation_rate = [jc.p2t(0.95)] num_itr = 2 #0 # reference_tree = utils.unrooted_birth_death_tree(num_taxa, birth_rate=1) # for x in reference_tree.preorder_edge_iter(): # x.length = 1 merging_method_list = ['least_square', 'angle'] RF = {'least_square': [], 'angle': []} F1 = {'least_square': [], 'angle': []} for merge_method in merging_method_list: for i in range(num_itr): #reference_tree = utils.balanced_binary(num_taxa) reference_tree = utils.lopsided_tree(num_taxa) observations, taxa_meta = generation.simulate_sequences( N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate) spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.NeighborJoining, reconstruct_tree.JC_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, reconstruct_tree.JC_similarity_matrix, taxa_metadata=taxa_meta, threshhold=16, merge_method=merge_method) RF_i, F1_i = reconstruct_tree.compare_trees(tree_rec, reference_tree) RF[merge_method].append(RF_i) F1[merge_method].append(F1_i)