def test_threshold_partition(self): jc = spectraltree.Jukes_Cantor() observations, taxa_meta = spectraltree.simulate_sequences( seq_len=1000, tree_model=self.reference_tree, seq_model=jc, mutation_rate=jc.p2t(0.95), rng=np.random.default_rng(321)) spectral_method = spectraltree.STDR(spectraltree.NeighborJoining, spectraltree.JC_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=taxa_meta, num_gaps=4, threshhold=35) RF, _ = spectraltree.compare_trees(tree_rec, self.reference_tree) self.assertEqual(RF, 0) tree_rec_b = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=taxa_meta, num_gaps=1, threshhold=35) RF_b, _ = spectraltree.compare_trees(tree_rec_b, self.reference_tree) self.assertEqual(RF_b, 0)
def test_adjacency_matrix_to_tree(self): adj_mat = np.array([ [0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 1, 0, 1, 0], [0, 1, 0, 0, 1, 0, 1], [1, 0, 0, 0, 0, 1, 0], ]) t = spectraltree.adjacency_matrix_to_tree(adj_mat, len(self.taxa1), self.taxa1) RF, _ = spectraltree.compare_trees(self.tree, t) self.assertEqual(RF, 0)
def test_jukes_cantor(self): # reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=1, rng=) # reference_tree = spectraltree.lopsided_tree(num_taxa) reference_tree = spectraltree.balanced_binary(8) jc = spectraltree.Jukes_Cantor(num_classes=2) observations, meta = spectraltree.simulate_sequences( seq_len=10_000, tree_model=reference_tree, seq_model=jc, mutation_rate=jc.p2t(0.98), rng=default_rng(678), alphabet="Binary") rg = spectraltree.RG(spectraltree.JC_distance_matrix) recoverd_tree = rg(observations, meta) print("(RF distance,F1 score):", spectraltree.compare_trees(reference_tree, recoverd_tree)) self.assertTrue(spectraltree.topos_equal( reference_tree, recoverd_tree)) # this doesn't work very well
def test_angle_least_square(self): # copied from test_deep_spectral_tree_reonstruction N = 1000 jc = spectraltree.Jukes_Cantor() mutation_rate = jc.p2t(0.95) num_itr = 2 #0 #reference_tree = spectraltree.balanced_binary(num_taxa) lopsided_reference_tree = spectraltree.lopsided_tree(128) # reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=1) # for x in reference_tree.preorder_edge_iter(): # x.length = 1 merging_method_list = ['least_square', 'angle'] RF = {'least_square': [], 'angle': []} F1 = {'least_square': [], 'angle': []} for merge_method in merging_method_list: for i in range(num_itr): observations, taxa_meta = spectraltree.simulate_sequences( seq_len=N, tree_model=lopsided_reference_tree, seq_model=jc, mutation_rate=mutation_rate, rng=np.random.default_rng(789)) spectral_method = spectraltree.STDR( spectraltree.NeighborJoining, spectraltree.JC_similarity_matrix) tree_rec = spectral_method.deep_spectral_tree_reconstruction( observations, spectraltree.JC_similarity_matrix, taxa_metadata=taxa_meta, threshhold=16, merge_method=merge_method) RF_i, F1_i = spectraltree.compare_trees( tree_rec, lopsided_reference_tree) RF[merge_method].append(RF_i) F1[merge_method].append(F1_i) self.assertEqual(np.mean(RF['angle']), 0) self.assertEqual(np.mean(RF['least_square']), 0)
d = G.diameter(directed=False) for delta in delta_vec: mutation_rate = jc.p2t(delta) observations, taxa_meta = spectraltree.simulate_sequences( max(N_vec), tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, alphabet="DNA") for N in N_vec: # SNJ t_s = time.time() tree_snj = snj(observations[:, :N], taxa_meta) runtime_snj = time.time() - t_s RF_snj, F1 = spectraltree.compare_trees(tree_snj, reference_tree) df = df.append( { 'method': 'SNJ', 'runtime': runtime_snj, 'RF': RF_snj, 'm': m, 'diameter': d, 'N': N, 'delta': delta }, ignore_index=True) #NJ t_s = time.time() tree_nj = nj(observations[:, :N], taxa_meta)
print("Genration observations by JC and HKY") observationsJC, metaJC = spectraltree.simulate_sequences( N, tree_model=reference_tree, seq_model=jc, mutation_rate=mutation_rate, alphabet="DNA") ################################# ## SNJ - Jukes_Cantor ################################# t0 = _t() snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix) sim = spectraltree.JC_similarity_matrix(observationsJC, metaJC) inside_log = np.clip(sim, a_min=1e-16, a_max=None) dis = -np.log(inside_log) disC = np.array(metricNearness.metricNearness(dis)) simC = np.exp(-disC) tree_rec = snj.reconstruct_from_similarity(sim, taxa_metadata=metaJC) tree_recC = snj.reconstruct_from_similarity(simC, taxa_metadata=metaJC) RFC, F1C = spectraltree.compare_trees(tree_recC, reference_tree) RF, F1 = spectraltree.compare_trees(tree_rec, reference_tree) print("###################") print("SNJ - Jukes_Cantor:") print("time:", _t() - t0) print("RF = ", RF, " F1% = ", F1) print("RFC = ", RFC, " F1C% = ", F1C) print("")
alphabet="DNA") # observations, taxa_meta = spectraltree.simulate_sequences(max(N_vec), tree_model=reference_tree, seq_model=jc, mutation_rate=base_rate, alphabet="DNA") #observations,taxa_meta = generate_sequences_gamma(reference_tree,num_taxa,jc, # gamma_vec,basic_rate) for n in N_vec: print('iteration: ', i, 'gamma_shape: ', gamma_shape, ' n: ', n) # estimate distance with hetero via raxml dist_raxml = raxml_gamma_corrected_distance_matrix( observations[:, :n], taxa_meta) # NJ - estimate tree with heterogeneity tree_nj_het = spectraltree.NeighborJoining( lambda x: x).reconstruct_from_similarity( np.exp(-dist_raxml), taxa_meta) RF, F1 = spectraltree.compare_trees(tree_nj_het, reference_tree) df = df.append( { 'method': 'NJ-het', 'RF': RF, 'n': n, 'gamma_shape': gamma_shape }, ignore_index=True) # NJ - estimate tree via standard distance #tree_nj_standard = nj(observations[:,:n],taxa_meta) #RF,F1 = spectraltree.compare_trees(tree_nj_standard, reference_tree) #df = df.append({'method': 'NJ', 'RF': RF,'n': n,'gamma_shape':gamma_shape}, ignore_index=True) #SNJ - estimate tree with heterogeneity