Ejemplo n.º 1
0
    def test_threshold_partition(self):
        jc = spectraltree.Jukes_Cantor()
        observations, taxa_meta = spectraltree.simulate_sequences(
            seq_len=1000,
            tree_model=self.reference_tree,
            seq_model=jc,
            mutation_rate=jc.p2t(0.95),
            rng=np.random.default_rng(321))

        spectral_method = spectraltree.STDR(spectraltree.NeighborJoining,
                                            spectraltree.JC_similarity_matrix)

        tree_rec = spectral_method.deep_spectral_tree_reconstruction(
            observations,
            spectraltree.JC_similarity_matrix,
            taxa_metadata=taxa_meta,
            num_gaps=4,
            threshhold=35)
        RF, _ = spectraltree.compare_trees(tree_rec, self.reference_tree)
        self.assertEqual(RF, 0)

        tree_rec_b = spectral_method.deep_spectral_tree_reconstruction(
            observations,
            spectraltree.JC_similarity_matrix,
            taxa_metadata=taxa_meta,
            num_gaps=1,
            threshhold=35)
        RF_b, _ = spectraltree.compare_trees(tree_rec_b, self.reference_tree)
        self.assertEqual(RF_b, 0)
Ejemplo n.º 2
0
    def test_adjacency_matrix_to_tree(self):
        adj_mat = np.array([
            [0, 0, 0, 0, 0, 0, 1],
            [0, 0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 1, 0, 0],
            [0, 0, 1, 1, 0, 1, 0],
            [0, 1, 0, 0, 1, 0, 1],
            [1, 0, 0, 0, 0, 1, 0],
        ])

        t = spectraltree.adjacency_matrix_to_tree(adj_mat, len(self.taxa1),
                                                  self.taxa1)

        RF, _ = spectraltree.compare_trees(self.tree, t)
        self.assertEqual(RF, 0)
Ejemplo n.º 3
0
    def test_jukes_cantor(self):
        # reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=1, rng=)
        # reference_tree = spectraltree.lopsided_tree(num_taxa)
        reference_tree = spectraltree.balanced_binary(8)

        jc = spectraltree.Jukes_Cantor(num_classes=2)
        observations, meta = spectraltree.simulate_sequences(
            seq_len=10_000,
            tree_model=reference_tree,
            seq_model=jc,
            mutation_rate=jc.p2t(0.98),
            rng=default_rng(678),
            alphabet="Binary")

        rg = spectraltree.RG(spectraltree.JC_distance_matrix)
        recoverd_tree = rg(observations, meta)
        print("(RF distance,F1 score):",
              spectraltree.compare_trees(reference_tree, recoverd_tree))
        self.assertTrue(spectraltree.topos_equal(
            reference_tree, recoverd_tree))  # this doesn't work very well
Ejemplo n.º 4
0
    def test_angle_least_square(self):
        # copied from test_deep_spectral_tree_reonstruction
        N = 1000
        jc = spectraltree.Jukes_Cantor()
        mutation_rate = jc.p2t(0.95)
        num_itr = 2  #0
        #reference_tree = spectraltree.balanced_binary(num_taxa)
        lopsided_reference_tree = spectraltree.lopsided_tree(128)
        # reference_tree = spectraltree.unrooted_birth_death_tree(num_taxa, birth_rate=1)
        # for x in reference_tree.preorder_edge_iter():
        #     x.length = 1
        merging_method_list = ['least_square', 'angle']
        RF = {'least_square': [], 'angle': []}
        F1 = {'least_square': [], 'angle': []}
        for merge_method in merging_method_list:
            for i in range(num_itr):

                observations, taxa_meta = spectraltree.simulate_sequences(
                    seq_len=N,
                    tree_model=lopsided_reference_tree,
                    seq_model=jc,
                    mutation_rate=mutation_rate,
                    rng=np.random.default_rng(789))
                spectral_method = spectraltree.STDR(
                    spectraltree.NeighborJoining,
                    spectraltree.JC_similarity_matrix)
                tree_rec = spectral_method.deep_spectral_tree_reconstruction(
                    observations,
                    spectraltree.JC_similarity_matrix,
                    taxa_metadata=taxa_meta,
                    threshhold=16,
                    merge_method=merge_method)
                RF_i, F1_i = spectraltree.compare_trees(
                    tree_rec, lopsided_reference_tree)
                RF[merge_method].append(RF_i)
                F1[merge_method].append(F1_i)

        self.assertEqual(np.mean(RF['angle']), 0)
        self.assertEqual(np.mean(RF['least_square']), 0)
Ejemplo n.º 5
0
    d = G.diameter(directed=False)
    for delta in delta_vec:
        mutation_rate = jc.p2t(delta)
        observations, taxa_meta = spectraltree.simulate_sequences(
            max(N_vec),
            tree_model=reference_tree,
            seq_model=jc,
            mutation_rate=mutation_rate,
            alphabet="DNA")
        for N in N_vec:

            # SNJ
            t_s = time.time()
            tree_snj = snj(observations[:, :N], taxa_meta)
            runtime_snj = time.time() - t_s
            RF_snj, F1 = spectraltree.compare_trees(tree_snj, reference_tree)
            df = df.append(
                {
                    'method': 'SNJ',
                    'runtime': runtime_snj,
                    'RF': RF_snj,
                    'm': m,
                    'diameter': d,
                    'N': N,
                    'delta': delta
                },
                ignore_index=True)

            #NJ
            t_s = time.time()
            tree_nj = nj(observations[:, :N], taxa_meta)
print("Genration observations by JC and HKY")

observationsJC, metaJC = spectraltree.simulate_sequences(
    N,
    tree_model=reference_tree,
    seq_model=jc,
    mutation_rate=mutation_rate,
    alphabet="DNA")

#################################
## SNJ - Jukes_Cantor
#################################
t0 = _t()
snj = spectraltree.SpectralNeighborJoining(spectraltree.JC_similarity_matrix)
sim = spectraltree.JC_similarity_matrix(observationsJC, metaJC)
inside_log = np.clip(sim, a_min=1e-16, a_max=None)
dis = -np.log(inside_log)
disC = np.array(metricNearness.metricNearness(dis))
simC = np.exp(-disC)
tree_rec = snj.reconstruct_from_similarity(sim, taxa_metadata=metaJC)
tree_recC = snj.reconstruct_from_similarity(simC, taxa_metadata=metaJC)
RFC, F1C = spectraltree.compare_trees(tree_recC, reference_tree)
RF, F1 = spectraltree.compare_trees(tree_rec, reference_tree)

print("###################")
print("SNJ - Jukes_Cantor:")
print("time:", _t() - t0)
print("RF = ", RF, "    F1% = ", F1)
print("RFC = ", RFC, "    F1C% = ", F1C)
print("")
            alphabet="DNA")
        # observations, taxa_meta = spectraltree.simulate_sequences(max(N_vec), tree_model=reference_tree, seq_model=jc, mutation_rate=base_rate, alphabet="DNA")
        #observations,taxa_meta = generate_sequences_gamma(reference_tree,num_taxa,jc,
        #    gamma_vec,basic_rate)
        for n in N_vec:
            print('iteration: ', i, 'gamma_shape: ', gamma_shape, ' n: ', n)

            # estimate distance with hetero via raxml
            dist_raxml = raxml_gamma_corrected_distance_matrix(
                observations[:, :n], taxa_meta)

            # NJ - estimate tree with heterogeneity
            tree_nj_het = spectraltree.NeighborJoining(
                lambda x: x).reconstruct_from_similarity(
                    np.exp(-dist_raxml), taxa_meta)
            RF, F1 = spectraltree.compare_trees(tree_nj_het, reference_tree)
            df = df.append(
                {
                    'method': 'NJ-het',
                    'RF': RF,
                    'n': n,
                    'gamma_shape': gamma_shape
                },
                ignore_index=True)

            # NJ - estimate tree via standard distance
            #tree_nj_standard = nj(observations[:,:n],taxa_meta)
            #RF,F1 = spectraltree.compare_trees(tree_nj_standard, reference_tree)
            #df = df.append({'method': 'NJ', 'RF': RF,'n': n,'gamma_shape':gamma_shape}, ignore_index=True)

            #SNJ - estimate tree with heterogeneity