def part12_cat(num_taxa_s_cat, Ns_cat): ##### Part 1: NJ vs SNJ print("Starting Part 12") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) methods = [snj, nj] ## Part 1.2: different Taxa cat_trees = [spectraltree.lopsided_tree(num_taxa) for num_taxa in num_taxa_s_cat] res_cat12 = compare_methods.experiment(tree_list = cat_trees, sequence_model = sequence_model, Ns = Ns_cat, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, verbose=True) return res_cat12
def part11(num_taxa, Ns_bin): ##### Part 1: NJ vs SNJ print("Starting Part 11") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) methods = [snj, nj] ## Part 1.1: different N bin_tree = spectraltree.balanced_binary(num_taxa) res_bin11 = compare_methods.experiment(tree_list = [bin_tree], sequence_model = sequence_model, Ns = Ns_bin, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, verbose=True) return res_bin11
def part21_king(num_taxa_king, Ns_king): ##### Part 2: NJ vs SNJ vs RG vs CLRG print("Starting Part 21 for kingman tree") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) rg = spectraltree.RG(spectraltree.JC_distance_matrix) #clrg = spectraltree.CLRG() methods = [snj, nj,rg] alphabet = "DNA" ## Part 2.1: different N king_tree = spectraltree.unrooted_pure_kingman_tree(num_taxa_king) res_king21 = compare_methods.experiment(tree_list = [king_tree], sequence_model = sequence_model, Ns = Ns_king, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet, verbose=True) return res_king21
def part21_bin(num_taxa_bin, Ns_bin): ##### Part 2: NJ vs SNJ vs RG vs CLRG print("Starting Part 21 for binary tree") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) rg = spectraltree.RG(spectraltree.JC_distance_matrix) #clrg = spectraltree.CLRG() methods = [snj, nj,rg] alphabet = "DNA" ## Part 2.1: different N bin_tree = spectraltree.balanced_binary(num_taxa_bin) res_bin21 = compare_methods.experiment(tree_list = [bin_tree], sequence_model = sequence_model, Ns = Ns_bin, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet, verbose=True) return res_bin21
def part11_cat(num_taxa, Ns_cat): ##### Part 1: NJ vs SNJ print("Starting Part 11") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) methods = [snj, nj] ## Part 1.1: different N #alphabet = "Binary" cat_tree = spectraltree.lopsided_tree(num_taxa) res_cat11 = compare_methods.experiment(tree_list = [cat_tree], sequence_model =sequence_model, Ns = Ns_cat, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, verbose=True, savepath = '20200821_res12_cat_SNJPAPER',folder = './experiments/snj_paper_experiments/results/') return res_cat11
def part22_cat(num_taxa_s_cat, Ns_cat): ## Part 2.2: different Taxa print("Starting Part 22 for caterpillar trees") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) rg = spectraltree.RG(spectraltree.JC_distance_matrix) clrg = spectraltree.CLRG() methods = [snj, nj,rg] alphabet = "Binary" cat_trees = [spectraltree.lopsided_tree(num_taxa) for num_taxa in num_taxa_s_cat] res_cat22 = compare_methods.experiment(tree_list = cat_trees, sequence_model = sequence_model, Ns = Ns_cat, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet , verbose=True) return res_cat22
def part21_cat(num_taxa_cat, Ns_cat): ##### Part 2: NJ vs SNJ vs RG vs CLRG print("Starting Part 21 for caterpillar tree") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) rg = spectraltree.RG(spectraltree.JC_distance_matrix) #clrg = spectraltree.CLRG() methods = [snj, nj,rg] alphabet = "DNA" ## Part 2.1: different N cat_tree = spectraltree.lopsided_tree(num_taxa_cat) res_cat21 = compare_methods.experiment(tree_list = [cat_tree], sequence_model = sequence_model, Ns = Ns_cat, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet, verbose=True) return res_cat21
def test_hky(self): hky = spectraltree.HKY(kappa=2) observationsHKY, metaHKY = spectraltree.simulate_sequences( seq_len=5000, tree_model=self.reference_tree, seq_model=hky, mutation_rate=0.1, rng=default_rng(345), alphabet="DNA") snj_hky = spectraltree.SpectralNeighborJoining( spectraltree.HKY_similarity_matrix(metaHKY)) self.assertTrue( spectraltree.topos_equal(self.reference_tree, snj_hky(observationsHKY, metaHKY)))
def test_jukes_cantor(self): jc = spectraltree.Jukes_Cantor() observationsJC, metaJC = spectraltree.simulate_sequences( seq_len=5000, tree_model=self.reference_tree, seq_model=jc, mutation_rate=0.1, rng=default_rng(345), alphabet="DNA") snj = spectraltree.SpectralNeighborJoining( spectraltree.JC_similarity_matrix) self.assertTrue( spectraltree.topos_equal(self.reference_tree, snj(observationsJC, metaJC)))
def part22_king(num_taxa_s_king, Ns_king): ## Part 2.2: different Taxa print("Starting Part 22 for kingman trees") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) rg = spectraltree.RG(spectraltree.JC_distance_matrix) clrg = spectraltree.CLRG() methods = [snj, nj,rg] alphabet = "DNA" king_trees = [spectraltree.unrooted_pure_kingman_tree(num_taxa) for num_taxa in num_taxa_s_king] res_king22 = compare_methods.experiment(tree_list = king_trees, sequence_model = sequence_model, Ns = Ns_king, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet , verbose=True) return res_king22
def part22_bin(num_taxa_s_bin, Ns_bin): ## Part 2.2: different Taxa print("Starting Part 22 for binary trees") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) rg = spectraltree.RG(spectraltree.JC_distance_matrix) clrg = spectraltree.CLRG() methods = [snj, nj,rg] alphabet = "DNA" bin_trees = [spectraltree.balanced_binary(num_taxa) for num_taxa in num_taxa_s_bin] res_bin22 = compare_methods.experiment(tree_list = bin_trees, sequence_model = sequence_model, Ns = Ns_bin, methods = methods, mutation_rates=mutation_rates, reps_per_tree=5, alphabet=alphabet , verbose=True) return res_bin22
def part31_king(num_taxa_king, Ns_king): ##### Part 3: NJ vs SNJ vs RG vs CLRG vs Forrest vs Tree_SVD print("Starting Part 31") snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) #rg = spectraltree.RG(spectraltree.JC_distance_matrix) #clrg = spectraltree.CLRG() forrest = spectraltree.Forrest() tree_svd = spectraltree.TreeSVD() #methods = [snj, nj,rg,clrg, forrest,tree_svd] methods = [snj, nj, forrest,tree_svd] alphabet = "Binary" ## Part 3.1: different N kingman_tree = spectraltree.unrooted_birth_death_tree(num_taxa_king, birth_rate=1) res_king31 = compare_methods.experiment(tree_list = [kingman_tree], sequence_model = sequence_model2, Ns = Ns_king, methods = methods, mutation_rates=mutation_rates2, reps_per_tree=5, alphabet=alphabet, verbose=True) return res_king31
seq_model=jc, mutation_rate=mutation_rate, rng=np.random, alphabet='DNA') spectral_method = spectraltree.SpectralTreeReconstruction( spectraltree.NeighborJoining, spectraltree.JC_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=meta, threshhold=8, min_split=3, merge_method="least_square", verbose=False) str_cherry_count = cherry_count_for_tree(tree_rec) print("STR Cherry count:", str_cherry_count) ########################################## snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) tree_rec = snj(observations, meta) snj_cherry_count = cherry_count_for_tree(tree_rec) print("SNJ Cherry count:", snj_cherry_count) ###############################################3 nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) tree_rec = nj(observations, meta) nj_cherry_count = cherry_count_for_tree(tree_rec) print("NJ Cherry count:", nj_cherry_count)
#alpha_vec = [5,10] #for alpha in alpha_vec: # gamma_vec = np.random.gamma(alpha,1/alpha,(1,N)) # hist, bin_edges = np.histogram(gamma_vec,20) # bin_centers = 0.5*(bin_edges[0:-1]+bin_edges[1:]) # plt.plot(bin_centers,hist,'s') #plt.show() #a=1 #plt.hist(gamma_vec, bins='auto') num_itr = 5 num_taxa = 256 jc = spectraltree.Jukes_Cantor() nj = spectraltree.NeighborJoining(spectraltree.JC_similarity_matrix) snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) reference_tree = spectraltree.balanced_binary(num_taxa) N_vec = np.arange(100, 1000, 100) gamma_shape_vec = [5, 10, 15, 20] df = pd.DataFrame(columns=['method', 'RF', 'n', 'gamma_shape']) base_rate = jc.p2t(0.9) for i in range(num_itr): for gamma_shape in gamma_shape_vec: #gamma_vec = np.random.gamma(alpha,1/alpha,max(N_vec)) observations, taxa_meta = spectraltree.simulate_sequences_gamma( max(N_vec), reference_tree, jc, base_rate,