def run_method(method, tree, m = 300, kappa = 2, mutation_rate=0.05, threshold = None, verbose = False): start_time = time.time() observations, taxa_meta = generation.simulate_sequences(m, tree_model=tree, seq_model=generation.HKY(kappa = kappa), mutation_rate=mutation_rate, alphabet="DNA") runtime = time.time() - start_time print("Simulation took %s seconds" % runtime) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(observations, taxa_meta, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining(reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = snj(observations, taxa_meta) if method == "NJ": nj = reconstruct_tree.NeighborJoining(reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = nj(observations, taxa_meta) if method == "STR+NJ": spectral_method = reconstruct_tree.STDR(reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, min_split = 5, verbose = verbose) if method == "STR+SNJ": spectral_method = reconstruct_tree.STDR(reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, min_split = 5, verbose = verbose) if method == "STR+RaXML": spectral_method = reconstruct_tree.STDR(reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reconstruction(observations, reconstruct_tree.HKY_similarity_matrix, taxa_metadata = taxa_meta, threshhold = threshold, raxml_args = "-T 2 --HKY85 -c 1", min_split = 5, verbose = verbose) runtime = time.time() - start_time RF,F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ",RF) print("F1% = ",F1) return([method, str(threshold), runtime, RF, F1])
) and node.parent_node.child_nodes()[1].is_leaf(): cherry_count += 1 cherry_count = cherry_count / 2 return cherry_count num_taxa = 20 num_reps = 10 N_vec = np.arange(100, 400, 50) #reference_tree = utils.balanced_binary(num_taxa) jc = generation.Jukes_Cantor(num_classes=2) mutation_rate = jc.p2t(0.9) snj = reconstruct_tree.SpectralNeighborJoining( reconstruct_tree.JC_similarity_matrix) nj = reconstruct_tree.NeighborJoining(reconstruct_tree.JC_similarity_matrix) treesvd = reconstruct_tree.TreeSVD() methods = [snj, treesvd] #results = compare_methods.experiment([reference_tree], jc, N_vec, methods=methods,\ # mutation_rates = [mutation_rate], reps_per_tree=num_reps) df = pd.DataFrame( columns=['method', 'runtime', 'RF', 'n', 'cherries_ref', 'cherries_res']) for i in np.arange(num_reps): print(i) reference_tree = utils.unrooted_pure_kingman_tree(num_taxa) ch_ref = cherry_count_for_tree(reference_tree) for n in N_vec: observations, taxa_meta = generation.simulate_sequences( n, tree_model=reference_tree,
def run_method(method, tree, threshold=None): data_HKY = simulate_discrete_chars(1000, tree, Hky85(kappa=2), mutation_rate=0.1) ch_list = list() for t in data_HKY.taxon_namespace: ch_list.append([x.symbol for x in data_HKY[t]]) ch_arr = np.array(ch_list) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(data_HKY, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining( reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = snj(ch_arr, tree.taxon_namespace) if method == "NJ": nj = reconstruct_tree.NeighborJoining( reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = nj(ch_arr, tree.taxon_namespace) if method == "STR + NJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + SNJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + RaXML": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix, taxon_namespace=tree.taxon_namespace, threshhold=threshold, raxml_args="-T 2 --HKY85 -c 1", min_split=5) runtime = time.time() - start_time RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ", RF) print("F1% = ", F1) return ([method, str(threshold), runtime, RF, F1])
def run_method(method, tree, seqs, threshold=None): ch_list = list() taxons = [x.label for x in tree.taxon_namespace] for t in taxons: ch_list.append([x.symbol for x in seqs[t]]) ch_arr = np.array(ch_list) ch_arr[ch_arr == "U"] = "T" ch_dendro = dendropy.DnaCharacterMatrix() ch_dendro.taxon_namespace = tree.taxon_namespace for t, taxon in enumerate(taxons): ch_dendro.new_sequence(tree.taxon_namespace[t], ch_arr[t, :].tolist()) if method == "RaXML": raxml_HKY = reconstruct_tree.RAxML() start_time = time.time() tree_rec = raxml_HKY(ch_dendro, raxml_args="-T 2 --HKY85 -c 1") if method == "SNJ": snj = reconstruct_tree.SpectralNeighborJoining( reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = snj(ch_arr, tree.taxon_namespace) if method == "NJ": nj = reconstruct_tree.NeighborJoining( reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = nj(ch_arr, tree.taxon_namespace) if method == "STR + NJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.NeighborJoining, reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix_missing_data, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + SNJ": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.SpectralNeighborJoining, reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix_missing_data, taxon_namespace=tree.taxon_namespace, threshhold=threshold, min_split=5) if method == "STR + RaXML": spectral_method = reconstruct_tree.SpectralTreeReconstruction( reconstruct_tree.RAxML, reconstruct_tree.HKY_similarity_matrix_missing_data) start_time = time.time() tree_rec = spectral_method.deep_spectral_tree_reonstruction( ch_arr, reconstruct_tree.HKY_similarity_matrix_missing_data, taxon_namespace=tree.taxon_namespace, threshhold=threshold, raxml_args="-T 2 --HKY85 -c 1", min_split=5) runtime = time.time() - start_time RF, F1 = reconstruct_tree.compare_trees(tree_rec, tree) print(method) if threshold is not None: print(threshold) print("--- %s seconds ---" % runtime) print("RF = ", RF) print("F1% = ", F1) return ([method, str(threshold), runtime, RF, F1])