Beispiel #1
0
 def _run_msprime_coalescent_stats(self, args):
     print("\t msprime:", args)
     runner = cli.get_mspms_runner(args.split())
     sim = runner.get_simulator()
     rng = msprime.RandomGenerator(random.randint(1, 2**32 - 1))
     sim.set_random_generator(rng)
     num_populations = sim.get_num_populations()
     replicates = runner.get_num_replicates()
     num_trees = [0 for j in range(replicates)]
     time = [0 for j in range(replicates)]
     ca_events = [0 for j in range(replicates)]
     re_events = [0 for j in range(replicates)]
     mig_events = [None for j in range(replicates)]
     for j in range(replicates):
         sim.reset()
         sim.run()
         num_trees[j] = sim.get_num_breakpoints() + 1
         time[j] = sim.get_time() / 4  # Convert to coalescent units
         ca_events[j] = sim.get_num_common_ancestor_events()
         re_events[j] = sim.get_num_recombination_events()
         mig_events[j] = [
             r for row in sim.get_num_migration_events() for r in row
         ]
     d = {
         "t": time,
         "num_trees": num_trees,
         "ca_events": ca_events,
         "re_events": re_events
     }
     for j in range(num_populations**2):
         events = [mig_events[k][j] for k in range(replicates)]
         d["mig_events_{}".format(j)] = events
     df = pd.DataFrame(d)
     return df
Beispiel #2
0
 def test_recombination_n100(self):
     rng = msprime.RandomGenerator(100)
     sim = msprime.simulator_factory(100,
                                     recombination_rate=0.2,
                                     record_full_arg=True,
                                     random_generator=rng)
     self.verify(sim)
Beispiel #3
0
def test():
    S = []
    np.random.seed(42)
    msp_rng = msprime.RandomGenerator(84)
    for i in range(1000):
        print(i)
        ts = wfrec(100, 100, 1000, 100)
        sites = msprime.SiteTable()

        mutations = msprime.MutationTable()
        mutgen = msprime.MutationGenerator(msp_rng, 100. / 4000.)
        mutgen.generate(ts.tables.nodes, ts.tables.edges, sites, mutations)
        ts = ts.load_tables(nodes=ts.tables.nodes,
                            edges=ts.tables.edges,
                            sites=sites,
                            mutations=mutations)
        S.append(ts.num_mutations)
    S2 = []
    for i in msprime.simulate(100,
                              recombination_rate=0,
                              mutation_rate=25,
                              num_replicates=1000):
        # S2.append(i.tables.nodes[next(i.trees()).root].time)
        S2.append(i.num_mutations)
    return S, S2
Beispiel #4
0
 def test_multimerger(self):
     rng = msprime.RandomGenerator(1234)
     sim = msprime.simulator_factory(
         100, recombination_rate=0.1, record_full_arg=True,
         random_generator=rng, demographic_events=[
             msprime.InstantaneousBottleneck(time=0.1, population=0, strength=5)])
     self.verify(sim, multiple_mergers=True)
Beispiel #5
0
 def run_replicate(self, j, treefile):
     recomb_map = msprime.RecombinationMap.uniform_map(
         self.num_loci, self.recombination_rate, self.num_loci)
     sim = msprime.simulator_factory(self.sample_size,
                                     recombination_map=recomb_map,
                                     Ne=self.effective_population_size)
     sim.run()
     mutation_rate = 0
     if self.generate_haplotypes:
         mutation_rate = self.mutation_rate
     mutation_generator = msprime.MutationGenerator(
         msprime.RandomGenerator(random.randint(1, 2**31)), mutation_rate)
     tree_sequence = sim.get_tree_sequence(mutation_generator)
     if self.generate_trees:
         tree_sequence.dump(treefile)
     if self.generate_haplotypes:
         with open(treefile, "w") as f:
             for h in tree_sequence.haplotypes():
                 print(h, file=f)
     self.tree_file_size[j] = os.path.getsize(treefile)
     self.used_memory[j] = sim.used_memory
     self.num_trees[j] = sim.num_breakpoints
     self.num_multiple_re_events = sim.num_multiple_recombination_events
     self.num_re_events[j] = sim.num_recombination_events
     self.num_ca_events[j] = sim.num_common_ancestor_events
     self.num_records[j] = tree_sequence.num_edges
     self.num_nodes[j] = tree_sequence.num_nodes
     self.num_records_per_tree = get_mean_records_per_tree(tree_sequence)
Beispiel #6
0
 def verify_simulation(self, n, m, r):
     """
     Verifies a simulation for the specified parameters.
     """
     recomb_map = msprime.RecombinationMap.uniform_map(m, r, num_loci=m)
     rng = msprime.RandomGenerator(1)
     sim = msprime.simulator_factory(n,
                                     recombination_map=recomb_map,
                                     random_generator=rng)
     self.assertEqual(sim.random_generator, rng)
     sim.run()
     self.assertEqual(sim.num_breakpoints, len(sim.breakpoints))
     self.assertGreater(sim.time, 0)
     self.assertGreater(sim.num_avl_node_blocks, 0)
     self.assertGreater(sim.num_segment_blocks, 0)
     self.assertGreater(sim.num_node_mapping_blocks, 0)
     tree_sequence = sim.get_tree_sequence()
     t = 0.0
     for record in tree_sequence.nodes():
         if record.time > t:
             t = record.time
     self.assertEqual(sim.time, t)
     self.assertGreater(sim.num_common_ancestor_events, 0)
     self.assertGreaterEqual(sim.num_recombination_events, 0)
     self.assertGreaterEqual(sim.total_num_migration_events, 0)
     self.assertGreaterEqual(sim.num_multiple_recombination_events, 0)
     self.verify_sparse_trees(tree_sequence)
     self.verify_dump_load(tree_sequence)
Beispiel #7
0
    def __init__(self,
                 sample_size=1,
                 num_loci=1,
                 scaled_recombination_rate=0,
                 num_replicates=1,
                 migration_matrix=None,
                 population_configurations=None,
                 demographic_events=None,
                 scaled_mutation_rate=0,
                 print_trees=False,
                 precision=3,
                 random_seeds=None,
                 scaled_gene_conversion_rate=0,
                 gene_conversion_track_length=1,
                 hotspots=None):
        self._sample_size = sample_size
        self._num_loci = num_loci
        self._num_replicates = num_replicates
        self._recombination_rate = scaled_recombination_rate
        self._mutation_rate = scaled_mutation_rate
        # For strict ms-compability we want to have m non-recombining loci
        if hotspots is None:
            self._recomb_map = msprime.RecombinationMap.uniform_map(
                num_loci, self._recombination_rate, discrete=True)
        else:
            self._recomb_map = hotspots_to_recomb_map(hotspots,
                                                      self._recombination_rate,
                                                      num_loci)

        # If we have specified any population_configurations we don't want
        # to give the overall sample size.
        sample_size = self._sample_size
        if population_configurations is not None:
            sample_size = None
        # msprime measure's time in units of generations, given a specific
        # Ne value whereas ms uses coalescent time. To be compatible with ms,
        # we therefore need to use an Ne value of 1/4.
        self._simulator = msprime.simulator_factory(
            Ne=0.25,
            sample_size=sample_size,
            recombination_map=self._recomb_map,
            population_configurations=population_configurations,
            migration_matrix=migration_matrix,
            demographic_events=demographic_events,
            gene_conversion_rate=scaled_gene_conversion_rate,
            gene_conversion_track_length=gene_conversion_track_length)

        self._precision = precision
        self._print_trees = print_trees
        # sort out the random seeds
        ms_seeds = random_seeds
        if random_seeds is None:
            ms_seeds = generate_seeds()
        seed = get_single_seed(ms_seeds)
        self._random_generator = msprime.RandomGenerator(seed)
        self._ms_random_seeds = ms_seeds
        self._simulator.random_generator = self._random_generator
        self._mutation_generator = msprime.MutationGenerator(
            self._random_generator, self._mutation_rate)
Beispiel #8
0
def make_tree_add_mutations(nodes, edges, mutrate):
    rng = msprime.RandomGenerator(42)
    m = msprime.MutationTable()
    s = msprime.SiteTable()
    mg = msprime.MutationGenerator(rng, mutrate)
    mg.generate(nodes, edges, s, m)
    rv = msprime.load_tables(nodes=nodes, edgesets=edges, sites=s, mutations=m)
    return (rv, s)
Beispiel #9
0
    def test_hudson(self):
        threshold = 20
        sim = msprime.simulator_factory(sample_size=10, recombination_rate=10)
        sim.random_generator = msprime.RandomGenerator(2)
        sim.run()
        self.assertGreater(sim.num_common_ancestor_events, threshold)
        self.assertGreater(sim.num_recombination_events, threshold)
        self.assertEqual(sim.num_rejected_common_ancestor_events, 0)

        sim2 = msprime.simulator_factory(
            sample_size=10, recombination_rate=10, model="hudson")
        sim2.random_generator = msprime.RandomGenerator(2)
        sim2.run()
        self.assertEqual(
            sim2.num_common_ancestor_events, sim.num_common_ancestor_events)
        self.assertEqual(
            sim2.num_recombination_events, sim.num_recombination_events)
        self.assertEqual(sim2.num_rejected_common_ancestor_events, 0)
Beispiel #10
0
 def test_no_recombination(self):
     rng = msprime.RandomGenerator(1)
     sim = msprime.simulator_factory(10, random_generator=rng, record_full_arg=True)
     ts = self.verify(sim)
     ts_simplified = ts.simplify()
     t1 = ts.tables
     t2 = ts_simplified.tables
     self.assertEqual(t1.nodes, t2.nodes)
     self.assertEqual(t1.edges, t2.edges)
Beispiel #11
0
 def test_smc_variants(self):
     for model in ["smc", "smc_prime"]:
         threshold = 20
         sim = msprime.simulator_factory(
             sample_size=10, recombination_rate=5, model=model)
         sim.random_generator = msprime.RandomGenerator(3)
         sim.run()
         self.assertGreater(sim.num_common_ancestor_events, threshold)
         self.assertGreater(sim.num_recombination_events, threshold)
         self.assertGreater(sim.num_rejected_common_ancestor_events, 0)
Beispiel #12
0
def write_vcf(chrom):
    treefile = args.tree_file[chrom]
    vcf = open(args.vcffile[chrom], "w")
    mut_rate = args.mut_rate[chrom]
    seed = seeds[chrom]
    logfile.write("Simulating mutations on" + treefile + "\n")
    ts = msprime.load(treefile)
    tables = ts.dump_tables()
    rng = msprime.RandomGenerator(seed)
    mutgen = msprime.MutationGenerator(rng, mut_rate)
    mutgen.generate(tables.nodes, tables.edges, tables.sites, tables.mutations)
    logfile.write("Saving to" + args.vcffile[chrom] + "\n")
    mutated_ts = msprime.load_tables(**tables.asdict())
    mutated_ts.write_vcf(vcf, ploidy=1)
    return True
Beispiel #13
0
 def __init__(self,
              sample_size=1,
              num_loci=1,
              scaled_recombination_rate=0,
              num_replicates=1,
              migration_matrix=None,
              population_configurations=None,
              demographic_events=None,
              scaled_mutation_rate=0,
              print_trees=False,
              precision=3,
              random_seeds=None):
     self._sample_size = sample_size
     self._num_loci = num_loci
     self._num_replicates = num_replicates
     # We use unscaled per-generation rates. By setting Ne = 1 we
     # don't need to rescale, but we still need to divide by 4 to
     # cancel the factor introduced when calculated the scaled rates.
     self._recombination_rate = scaled_recombination_rate / 4
     self._mutation_rate = scaled_mutation_rate / 4
     # Confusingly, we need the scaled mutation rate for generating
     # mutations because we can't used msprime's high-level API
     # directly.
     # For strict ms-compability we want to have m non-recombining loci
     recomb_map = msprime.RecombinationMap.uniform_map(
         num_loci, self._recombination_rate, num_loci)
     # If we have specified any population_configurations we don't want
     # to give the overall sample size.
     sample_size = self._sample_size
     if population_configurations is not None:
         sample_size = None
     self._simulator = msprime.simulator_factory(
         sample_size=sample_size,
         recombination_map=recomb_map,
         population_configurations=population_configurations,
         migration_matrix=migration_matrix,
         demographic_events=demographic_events)
     self._precision = precision
     self._print_trees = print_trees
     # sort out the random seeds
     ms_seeds = random_seeds
     if random_seeds is None:
         ms_seeds = generate_seeds()
     seed = get_single_seed(ms_seeds)
     self._random_generator = msprime.RandomGenerator(seed)
     self._ms_random_seeds = ms_seeds
     self._simulator.set_random_generator(self._random_generator)
def write_vcf(chrom):
    treefile = args.tree_file[chrom]
    vcf = open(args.vcffile[chrom], "w")
    mut_rate = args.mut_rate[chrom]
    seed = seeds[chrom]
    logfile.write("Simulating mutations on" + treefile + "\n")
    ts = msprime.load(treefile)
    rng = msprime.RandomGenerator(seed)
    nodes = msprime.NodeTable()
    edgesets = msprime.EdgesetTable()
    sites = msprime.SiteTable()
    mutations = msprime.MutationTable()
    migrations = msprime.MigrationTable()
    ts.dump_tables(nodes=nodes, edgesets=edgesets, migrations=migrations)
    mutgen = msprime.MutationGenerator(rng, mut_rate)
    mutgen.generate(nodes, edgesets, sites, mutations)
    logfile.write("Saving to" + args.vcffile[chrom] + "\n")
    mutated_ts = msprime.load_tables(nodes=nodes,
                                     edgesets=edgesets,
                                     sites=sites,
                                     mutations=mutations)
    mutated_ts.write_vcf(vcf, ploidy=1)

    return True
minimal_ts.dump_samples_text(samples_file)

logfile.write("Simplified; now writing to treefile (if specified).\n")
logfile.write(time.strftime('%X %x %Z') + "\n")
logfile.write("----------\n")
logfile.flush()

if args.treefile is not None:
    minimal_ts.dump(args.treefile)

mut_seed = args.seed
logfile.write("Generating mutations with seed " + str(mut_seed) + "\n")
logfile.flush()

rng = msprime.RandomGenerator(mut_seed)
nodes = msprime.NodeTable()
edgesets = msprime.EdgesetTable()
sites = msprime.SiteTable()
mutations = msprime.MutationTable()
minimal_ts.dump_tables(nodes=nodes, edgesets=edgesets)
mutgen = msprime.MutationGenerator(rng, args.mut_rate)
mutgen.generate(nodes, edgesets, sites, mutations)

# print(nodes, file=logfile)
# print(edgesets, file=logfile)
# print(sites, file=logfile)
# print(mutations, file=logfile)

mutated_ts = msprime.load_tables(nodes=nodes,
                                 edgesets=edgesets,
Beispiel #16
0
import fwdpy11_arg_example.evolve_arg as ea
import msprime
import numpy as np
import sys

N = int(sys.argv[1])
rho = float(sys.argv[2])
theta = float(sys.argv[3])
gc_interval = int(sys.argv[4])
seed = int(sys.argv[5])
simplifier, atracker, tsim = ea.evolve_track_wrapper(popsize=N,
                                                     rho=rho,
                                                     seed=seed,
                                                     gc_interval=gc_interval,
                                                     mu=0.0)

print(tsim, simplifier.times)
np.random.seed(seed)

# Get a sample of size n = 10
msprime.simplify_tables(np.random.choice(2 * N, 10, replace=False).tolist(),
                        nodes=simplifier.nodes,
                        edgesets=simplifier.edgesets)
msp_rng = msprime.RandomGenerator(seed)
sites = msprime.SiteTable()
mutations = msprime.MutationTable()
mutgen = msprime.MutationGenerator(msp_rng,
                                   theta / float(4 * N))  # rho = theta
mutgen.generate(simplifier.nodes, simplifier.edgesets, sites, mutations)
print(sites.num_rows)
Beispiel #17
0
        # Use fwdpy11
        wf.evolve(rng, pop, params)
        # Get a sample
        s = fwdpy11.sampling.sample_separate(rng, pop, args.nsam)
    else:
        # Use this module
        simplifier, atracker, tsim = evolve_track(
            rng, pop, params, args.gc, True, args.seed, args.async, args.queue, args.qsize, args.wthreads)
        # Take times from simplifier before they change.
        times = simplifier.times
        times['fwd_sim_runtime'] = [tsim]
        times['N'] = [args.popsize]
        times['theta'] = [args.theta]
        times['rho'] = [args.rho]
        times['simplify_interval'] = [args.gc]
        d = pd.DataFrame(times)
        d.to_csv(args.outfile1, sep='\t', index=False, compression='gzip')
        # Simplify the genealogy down to a sample,
        # And throw mutations onto that sample
        msprime.simplify_tables(np.random.choice(2 * args.popsize, args.nsam,
                                                 replace=False).tolist(),
                                nodes=simplifier.nodes,
                                edges=simplifier.edges)
        msp_rng = msprime.RandomGenerator(args.seed)
        sites = msprime.SiteTable()
        mutations = msprime.MutationTable()
        mutgen = msprime.MutationGenerator(
            msp_rng, args.theta / float(4 * args.popsize))
        mutgen.generate(simplifier.nodes,
                        simplifier.edges, sites, mutations)
Beispiel #18
0
 def test_mutation_generator_unsupported(self):
     n = 10
     mutgen = msprime.MutationGenerator(msprime.RandomGenerator(1), 1)
     with self.assertRaises(ValueError):
         msprime.simulate(n, mutation_generator=mutgen)
Beispiel #19
0
 def test_random_seed(self):
     seed = 12345
     rng = msprime.RandomGenerator(seed)
     sim = msprime.simulator_factory(10, random_generator=rng)
     self.assertEqual(rng, sim.random_generator)
     self.assertEqual(rng.get_seed(), seed)