def run_method(method, tree, m = 300, kappa = 2, mutation_rate=0.05, threshold = None, verbose = False): start_time = time.time() observations, taxa_meta = generation.simulate_sequences(m, tree_model=tree, seq_model=generation.HKY(kappa = kappa), mutation_rate=mutation_rate, alphabet="DNA") runtime = time.time() - start_time print("Simulation took %s seconds" % runtime) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(observations, taxa_meta, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining(reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = snj(observations, taxa_meta) if method == "NJ": nj = reconstruct_tree.NeighborJoining(reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = nj(observations, taxa_meta) if method == "STR+NJ": spectral_method = reconstruct_tree.STDR(reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, min_split = 5, verbose = verbose) if method == "STR+SNJ": spectral_method = reconstruct_tree.STDR(reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, min_split = 5, verbose = verbose) if method == "STR+RaXML": spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, raxml_args = "-T 2 --HKY85 -c 1", min_split = 5, verbose = verbose) runtime = time.time() - start_time RF,F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ",RF) print("F1% = ",F1) return([method, str(threshold), runtime, RF, F1])
N = 1000 data_HKY = simulate_discrete_chars(N, H3N2_tree, Hky85(kappa = 2), mutation_rate=0.1) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) identical = np.array([np.mean(a == b) for a, b in product(ch_arr, repeat = 2)]) #start_time = time.time() #cProfile.run('S = HKY_similarity_matrix(ch_arr)') #compute_s_time = time.time() - start_time #print("--- %s seconds ---" % compute_s_time) threshold = 128 t1 = time.time() spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction(ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace = H3N2_tree.taxon_namespace, threshhold = threshold,min_split = 30) runtime = time.time()-t1 Deep_nj_RF, Deep_nj_RF_F1 = reconstruct_tree.compare_trees(tree_rec, H3N2_tree) print("SNJ: ") print("RF = ", Deep_nj_RF) print("F1% = ", Deep_nj_RF_F1) print("runtime = ", runtime) print("")
def run_method(method, size, run_num, tree, m=300, kappa=2, mutation_rate=0.05, threshold=None, verbose=False): subtree_folder = "/gpfs/ysm/scratch60/morgan_levine/mw957/tree_merge_test/seqlen_" + str( m) + "_" + method + "_" + str(threshold) + "_" + str(run_num) + "/" if os.path.exists(subtree_folder): shutil.rmtree(subtree_folder) os.mkdir(subtree_folder) tree.write(path=subtree_folder + "true_tree.txt", schema="newick") subtree_filename = subtree_folder + "subtree_%s.txt" start_time = time.time() observations, taxa_meta = generation.simulate_sequences( m, tree_model=tree, seq_model=generation.Jukes_Cantor(), mutation_rate=mutation_rate, alphabet="DNA") runtime = time.time() - start_time print("Simulation took %s seconds" % runtime) spectral_method = reconstruct_tree.STDR( reconstruct_tree.RAxML, reconstruct_tree.JC_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, reconstruct_tree.JC_similarity_matrix, taxa_metadata=taxa_meta, threshhold=threshold, raxml_args="-T 2 --HKY85 -c 1", min_split=5, verbose=verbose, subtree_filename=subtree_filename) runtime = time.time() - start_time tree_rec.write(path=subtree_folder + "STDR_tree.txt", schema="newick") distance = reconstruct_tree.JC_distance_matrix(observations, taxa_meta) distance_pd = pd.DataFrame(distance) taxa_list = [x.label for x in taxa_meta] with open(subtree_folder + 'taxa.txt', 'w') as f: for item in taxa_list: f.write("%s\n" % item) distance_pd.index = taxa_list distance_path = subtree_folder + "HKY_distance.txt" distance_pd.to_csv(distance_path, sep="\t", header=False) with open(distance_path, 'r') as original: data = original.read() with open(distance_path, 'w') as modified: modified.write(str(size) + "\n" + data) # accuracy of the STDR method RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ", RF) print("F1% = ", F1) return ([method, str(threshold), runtime, RF, F1])