def test_with_recurrent_mutations(self): # actually with only ONE site, at 0.0 N = 10 ngens = 100 tables = wf_sim(N=N, ngens=ngens, deep_history=False, seed=self.random_seed) tables.sort() ts = tables.tree_sequence() ts = tsutil.jukes_cantor(ts, 1, 10, seed=self.random_seed) tables = ts.tables self.assertEqual(tables.sites.num_rows, 1) self.assertGreater(tables.mutations.num_rows, 0) # before simplify for h in ts.haplotypes(): self.assertEqual(len(h), 1) # after simplify tables.sort() tables.simplify() self.assertGreater(tables.nodes.num_rows, 0) self.assertGreater(tables.edges.num_rows, 0) self.assertEqual(tables.sites.num_rows, 1) self.assertGreater(tables.mutations.num_rows, 0) ts = tables.tree_sequence() self.assertEqual(ts.sample_size, N) for hap in ts.haplotypes(): self.assertEqual(len(hap), ts.num_sites)
def test_jukes_cantor_balanced_ternary_multiroot(self): ts = tskit.Tree.generate_balanced(50, arity=3).tree_sequence ts = tsutil.decapitate(ts, ts.num_edges // 3) ts = tsutil.jukes_cantor(ts, 15, 2, seed=3) self.verify(ts) assert ts.num_sites > 1 self.verify(tsutil.jiggle_samples(ts))
def test_jukes_cantor_n20_felsenstein_matrix(self): cost_matrix = np.array( [[0, 2.5, 1, 2.5], [2.5, 0, 2.5, 1], [1, 2.5, 0, 2.5], [2.5, 1, 2.5, 0]] ) ts = msprime.simulate(20, random_seed=1) ts = tsutil.jukes_cantor(ts, 5, 2, seed=1) self.verify_jukes_cantor(ts, cost_matrix)
def test_with_recurrent_mutations(self): # actually with only ONE site, at 0.0 N = 10 ngens = 100 tables = wf_sim(N=N, ngens=ngens, deep_history=False, seed=self.random_seed) msprime.sort_tables(**tables.asdict()) ts = msprime.load_tables(**tables.asdict()) ts = tsutil.jukes_cantor(ts, 1, 10, seed=self.random_seed) tables = ts.tables self.assertEqual(tables.sites.num_rows, 1) self.assertGreater(tables.mutations.num_rows, 0) nodes = tables.nodes samples = np.where(nodes.flags == msprime.NODE_IS_SAMPLE)[0].astype( np.int32) # before simplify for h in ts.haplotypes(): self.assertEqual(len(h), 1) # after simplify msprime.simplify_tables(samples=samples, nodes=tables.nodes, edges=tables.edges, sites=tables.sites, mutations=tables.mutations) self.assertGreater(tables.nodes.num_rows, 0) self.assertGreater(tables.edges.num_rows, 0) self.assertEqual(tables.sites.num_rows, 1) self.assertGreater(tables.mutations.num_rows, 0) ts = msprime.load_tables(**tables.asdict()) self.assertEqual(ts.sample_size, N) for hap in ts.haplotypes(): self.assertEqual(len(hap), ts.num_sites)
def test_with_mutations(self): N = 10 ngens = 100 tables = wf_sim(N=N, ngens=ngens, deep_history=False, seed=self.random_seed) tables.sort() ts = tables.tree_sequence() ts = tsutil.jukes_cantor(ts, 10, 0.1, seed=self.random_seed) tables = ts.tables assert tables.sites.num_rows > 0 assert tables.mutations.num_rows > 0 samples = np.where( tables.nodes.flags == tskit.NODE_IS_SAMPLE)[0].astype(np.int32) tables.sort() tables.simplify(samples) assert tables.nodes.num_rows > 0 assert tables.edges.num_rows > 0 assert tables.nodes.num_rows > 0 assert tables.edges.num_rows > 0 assert tables.sites.num_rows > 0 assert tables.mutations.num_rows > 0 ts = tables.tree_sequence() assert ts.sample_size == N for hap in ts.haplotypes(): assert len(hap) == ts.num_sites
def test_with_recurrent_mutations(self): # actually with only ONE site, at 0.0 N = 10 ngens = 100 tables = wf_sim(N=N, ngens=ngens, deep_history=False, seed=self.random_seed) tables.sort() ts = tables.tree_sequence() ts = tsutil.jukes_cantor(ts, 1, 10, seed=self.random_seed) tables = ts.tables assert tables.sites.num_rows == 1 assert tables.mutations.num_rows > 0 # before simplify for h in ts.haplotypes(): assert len(h) == 1 # after simplify tables.sort() tables.simplify() assert tables.nodes.num_rows > 0 assert tables.edges.num_rows > 0 assert tables.sites.num_rows == 1 assert tables.mutations.num_rows > 0 ts = tables.tree_sequence() assert ts.sample_size == N for hap in ts.haplotypes(): assert len(hap) == ts.num_sites
def test_with_mutations(self): N = 10 ngens = 100 tables = wf_sim(N=N, ngens=ngens, deep_history=False, seed=self.random_seed) tables.sort() ts = msprime.load_tables(**tables.asdict()) ts = tsutil.jukes_cantor(ts, 10, 0.1, seed=self.random_seed) tables = ts.tables self.assertGreater(tables.sites.num_rows, 0) self.assertGreater(tables.mutations.num_rows, 0) samples = np.where( tables.nodes.flags == msprime.NODE_IS_SAMPLE)[0].astype(np.int32) tables.sort() tables.simplify(samples) self.assertGreater(tables.nodes.num_rows, 0) self.assertGreater(tables.edges.num_rows, 0) self.assertGreater(tables.nodes.num_rows, 0) self.assertGreater(tables.edges.num_rows, 0) self.assertGreater(tables.sites.num_rows, 0) self.assertGreater(tables.mutations.num_rows, 0) ts = msprime.load_tables(**tables.asdict()) self.assertEqual(ts.sample_size, N) for hap in ts.haplotypes(): self.assertEqual(len(hap), ts.num_sites)
def create_data(length): ts = msprime.simulate(sample_size=10, length=length, mutation_rate=1e-2, random_seed=123) ts = tsutil.jukes_cantor(ts, length, 1.0, seed=123) assert ts.num_sites == length return ts
def test_silent_mutations(self): ts = msprime.simulate(50, random_seed=1) ts = tsutil.jukes_cantor(ts, 5, 2, seed=2) num_silent = 0 for m in ts.mutations(): if (m.parent != -1 and ts.mutation(m.parent).derived_state == m.derived_state): num_silent += 1 assert num_silent > 20
def get_wf_sims(seed): wf_sims = [] for N in [5, 10, 20]: for surv in [0.0, 0.5, 0.9]: for mut in [0.01, 1.0]: for nloci in [1, 2, 3]: tables = wf_sim(N=N, ngens=N, survival=surv, seed=seed) tables.sort() ts = tables.tree_sequence() ts = tsutil.jukes_cantor(ts, num_sites=nloci, mu=mut, seed=seed) wf_sims.append(ts) return wf_sims
def get_wf_sims(self, seed): """ Returns an iterator of example tree sequences produced by the WF simulator. """ for N in [5, 10, 20]: for surv in [0.0, 0.5, 0.9]: for mut in [0.01, 1.0]: for nloci in [1, 2, 3]: tables = wf_sim(N=N, ngens=N, survival=surv, seed=seed) msprime.sort_tables(**tables.asdict()) ts = msprime.load_tables(**tables.asdict()) ts = tsutil.jukes_cantor(ts, num_sites=nloci, mu=mut, seed=seed) self.verify_simulation(ts, ngens=N) yield ts
def test_jukes_cantor_n50_internal_samples(self): ts = msprime.simulate(50, random_seed=1) ts = tsutil.jukes_cantor(ts, 5, 2, seed=2) self.verify(tsutil.jiggle_samples(ts))
def test_jukes_cantor_n50(self): ts = msprime.simulate(50, random_seed=1) ts = tsutil.jukes_cantor(ts, 5, 2, seed=2) self.verify(ts)
def test_jukes_cantor_n20_simple_matrix(self): cost_matrix = np.ones((4, 4)) np.fill_diagonal(cost_matrix, 0) ts = msprime.simulate(20, random_seed=1) ts = tsutil.jukes_cantor(ts, 5, 2, seed=1) self.verify_jukes_cantor(ts, cost_matrix)
def test_jukes_cantor_n15_multiroot(self): ts = msprime.simulate(15, random_seed=1) ts = tsutil.decapitate(ts, ts.num_edges // 3) ts = tsutil.jukes_cantor(ts, 15, 2, seed=3) self.verify(ts)
def test_jukes_cantor_balanced_ternary_internal_samples(self): tree = tskit.Tree.generate_balanced(27, arity=3) ts = tsutil.jukes_cantor(tree.tree_sequence, 5, 2, seed=1) assert ts.num_sites > 1 self.verify(tsutil.jiggle_samples(ts))
def test_jukes_cantor_n_15(self): ts = msprime.simulate(15, mutation_rate=2, random_seed=2) ts = tsutil.jukes_cantor(ts, num_sites=10, mu=0.1, seed=10) self.verify(ts, tskit.ALLELES_ACGT)
def test_jukes_cantor_n_8_high_recombination(self): ts = msprime.simulate(8, recombination_rate=20, random_seed=2) ts = tsutil.jukes_cantor(ts, num_sites=20, mu=5, seed=4) self.verify(ts, tskit.ALLELES_ACGT)
def test_jukes_cantor_balanced_ternary(self): ts = tskit.Tree.generate_balanced(27, arity=3).tree_sequence ts = tsutil.jukes_cantor(ts, num_sites=10, mu=0.1, seed=10) self.verify(ts, tskit.ALLELES_ACGT)
def test_jukes_cantor_n50_multiroot(self): ts = msprime.simulate(50, random_seed=1) ts = tsutil.decapitate(ts, ts.num_edges // 2) ts = tsutil.jukes_cantor(ts, 5, 2, seed=2) self.verify(ts)
def test_jukes_cantor_leaf_polytomy_n5(self): tree = tskit.Tree.unrank(5, (7, 0)) ts = tsutil.jukes_cantor(tree.tree_sequence, 5, 2, seed=1) assert ts.num_sites > 2 self.verify(ts)