def run_method(method, tree, m = 300, kappa = 2, mutation_rate=0.05, threshold = None, verbose = False): start_time = time.time() observations, taxa_meta = generation.simulate_sequences(m, tree_model=tree, seq_model=generation.HKY(kappa = kappa), mutation_rate=mutation_rate, alphabet="DNA") runtime = time.time() - start_time print("Simulation took %s seconds" % runtime) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(observations, taxa_meta, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining(reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = snj(observations, taxa_meta) if method == "NJ": nj = reconstruct_tree.NeighborJoining(reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = nj(observations, taxa_meta) if method == "STR+NJ": spectral_method = reconstruct_tree.STDR(reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, min_split = 5, verbose = verbose) if method == "STR+SNJ": spectral_method = reconstruct_tree.STDR(reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, min_split = 5, verbose = verbose) if method == "STR+RaXML": spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, raxml_args = "-T 2 --HKY85 -c 1", min_split = 5, verbose = verbose) runtime = time.time() - start_time RF,F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ",RF) print("F1% = ",F1) return([method, str(threshold), runtime, RF, F1])
def run_method(method, tree, seqs, threshold=None): ch_list = list() taxons = [x.label for x in tree.taxon_namespace] for t in taxons: ch_list.append([x.symbol for x in seqs[t]]) ch_arr = np.array(ch_list) ch_arr[ch_arr == "U"] = "T" ch_dendro = dendropy.DnaCharacterMatrix() ch_dendro.taxon_namespace = tree.taxon_namespace for t, taxon in enumerate(taxons): ch_dendro.new_sequence(tree.taxon_namespace[t], ch_arr[t, :].tolist()) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(ch_dendro, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining( reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = snj(ch_arr, tree.taxon_namespace) if method == "NJ": nj = reconstruct_tree.NeighborJoining( reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = nj(ch_arr, tree.taxon_namespace) if method == "STR + NJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix_missing_data, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + SNJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix_missing_data, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + RaXML": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix_missing_data, taxon_namespace=tree.taxon_namespace, threshhold=threshold, raxml_args="-T 2 --HKY85 -c 1", min_split=5) runtime = time.time() - start_time RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ", RF) print("F1% = ", F1) return ([method, str(threshold), runtime, RF, F1])
def run_method(method, tree, threshold=None): data_HKY = simulate_discrete_chars(1000, tree, Hky85(kappa=2), mutation_rate=0.1) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(data_HKY, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining( reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = snj(ch_arr, tree.taxon_namespace) if method == "NJ": nj = reconstruct_tree.NeighborJoining( reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = nj(ch_arr, tree.taxon_namespace) if method == "STR + NJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + SNJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + RaXML": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, raxml_args="-T 2 --HKY85 -c 1", min_split=5) runtime = time.time() - start_time RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ", RF) print("F1% = ", F1) return ([method, str(threshold), runtime, RF, F1])
import generation from dendropy.interop import raxml from dendropy.model.discrete import simulate_discrete_chars, Jc69 from dendropy.calculate.treecompare import symmetric_difference import pandas as pd import numpy as np #num_taxa = [128,256,512,1024,2048,4096] num_taxa = np.arange(200, 2000, 200) n_itr = 10 #num_taxa = [8,16,32] N = 1000 jc = generation.Jukes_Cantor() mutation_rate = jc.p2t(0.95) nj = reconstruct_tree.NeighborJoining(reconstruct_tree.JC_similarity_matrix) raxml = reconstruct_tree.RAxML() #tree_list = [utils.balanced_binary(m) for m in num_taxa] df = pd.DataFrame(columns=['method', 'runtime', 'RF', 'm']) for m in num_taxa: for n_itr in range(n_itr): reference_tree = utils.unrooted_pure_kingman_tree(m) observations, taxa_meta = generation.simulate_sequences( N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, alphabet="DNA") # NJ time_s = time.time()