def test(): S = [] np.random.seed(42) msp_rng = msprime.RandomGenerator(84) for i in range(1000): print(i) ts = wfrec(100, 100, 1000, 100) sites = msprime.SiteTable() mutations = msprime.MutationTable() mutgen = msprime.MutationGenerator(msp_rng, 100. / 4000.) mutgen.generate(ts.tables.nodes, ts.tables.edges, sites, mutations) ts = ts.load_tables(nodes=ts.tables.nodes, edges=ts.tables.edges, sites=sites, mutations=mutations) S.append(ts.num_mutations) S2 = [] for i in msprime.simulate(100, recombination_rate=0, mutation_rate=25, num_replicates=1000): # S2.append(i.tables.nodes[next(i.trees()).root].time) S2.append(i.num_mutations) return S, S2
def run_replicate(self, j, treefile): recomb_map = msprime.RecombinationMap.uniform_map( self.num_loci, self.recombination_rate, self.num_loci) sim = msprime.simulator_factory(self.sample_size, recombination_map=recomb_map, Ne=self.effective_population_size) sim.run() mutation_rate = 0 if self.generate_haplotypes: mutation_rate = self.mutation_rate mutation_generator = msprime.MutationGenerator( msprime.RandomGenerator(random.randint(1, 2**31)), mutation_rate) tree_sequence = sim.get_tree_sequence(mutation_generator) if self.generate_trees: tree_sequence.dump(treefile) if self.generate_haplotypes: with open(treefile, "w") as f: for h in tree_sequence.haplotypes(): print(h, file=f) self.tree_file_size[j] = os.path.getsize(treefile) self.used_memory[j] = sim.used_memory self.num_trees[j] = sim.num_breakpoints self.num_multiple_re_events = sim.num_multiple_recombination_events self.num_re_events[j] = sim.num_recombination_events self.num_ca_events[j] = sim.num_common_ancestor_events self.num_records[j] = tree_sequence.num_edges self.num_nodes[j] = tree_sequence.num_nodes self.num_records_per_tree = get_mean_records_per_tree(tree_sequence)
def __init__(self, sample_size=1, num_loci=1, scaled_recombination_rate=0, num_replicates=1, migration_matrix=None, population_configurations=None, demographic_events=None, scaled_mutation_rate=0, print_trees=False, precision=3, random_seeds=None, scaled_gene_conversion_rate=0, gene_conversion_track_length=1, hotspots=None): self._sample_size = sample_size self._num_loci = num_loci self._num_replicates = num_replicates self._recombination_rate = scaled_recombination_rate self._mutation_rate = scaled_mutation_rate # For strict ms-compability we want to have m non-recombining loci if hotspots is None: self._recomb_map = msprime.RecombinationMap.uniform_map( num_loci, self._recombination_rate, discrete=True) else: self._recomb_map = hotspots_to_recomb_map(hotspots, self._recombination_rate, num_loci) # If we have specified any population_configurations we don't want # to give the overall sample size. sample_size = self._sample_size if population_configurations is not None: sample_size = None # msprime measure's time in units of generations, given a specific # Ne value whereas ms uses coalescent time. To be compatible with ms, # we therefore need to use an Ne value of 1/4. self._simulator = msprime.simulator_factory( Ne=0.25, sample_size=sample_size, recombination_map=self._recomb_map, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, gene_conversion_rate=scaled_gene_conversion_rate, gene_conversion_track_length=gene_conversion_track_length) self._precision = precision self._print_trees = print_trees # sort out the random seeds ms_seeds = random_seeds if random_seeds is None: ms_seeds = generate_seeds() seed = get_single_seed(ms_seeds) self._random_generator = msprime.RandomGenerator(seed) self._ms_random_seeds = ms_seeds self._simulator.random_generator = self._random_generator self._mutation_generator = msprime.MutationGenerator( self._random_generator, self._mutation_rate)
def make_tree_add_mutations(nodes, edges, mutrate): rng = msprime.RandomGenerator(42) m = msprime.MutationTable() s = msprime.SiteTable() mg = msprime.MutationGenerator(rng, mutrate) mg.generate(nodes, edges, s, m) rv = msprime.load_tables(nodes=nodes, edgesets=edges, sites=s, mutations=m) return (rv, s)
def write_vcf(chrom): treefile = args.tree_file[chrom] vcf = open(args.vcffile[chrom], "w") mut_rate = args.mut_rate[chrom] seed = seeds[chrom] logfile.write("Simulating mutations on" + treefile + "\n") ts = msprime.load(treefile) tables = ts.dump_tables() rng = msprime.RandomGenerator(seed) mutgen = msprime.MutationGenerator(rng, mut_rate) mutgen.generate(tables.nodes, tables.edges, tables.sites, tables.mutations) logfile.write("Saving to" + args.vcffile[chrom] + "\n") mutated_ts = msprime.load_tables(**tables.asdict()) mutated_ts.write_vcf(vcf, ploidy=1) return True
def __init__(self, sample_size=1, num_loci=1, scaled_recombination_rate=0, num_replicates=1, migration_matrix=None, population_configurations=None, demographic_events=None, scaled_mutation_rate=0, print_trees=False, precision=3, random_seeds=None): self._sample_size = sample_size self._num_loci = num_loci self._num_replicates = num_replicates # We use unscaled per-generation rates. By setting Ne = 1 we # don't need to rescale, but we still need to divide by 4 to # cancel the factor introduced when calculated the scaled rates. self._recombination_rate = scaled_recombination_rate / 4 self._mutation_rate = scaled_mutation_rate / 4 # For strict ms-compability we want to have m non-recombining loci recomb_map = msprime.RecombinationMap.uniform_map( num_loci, self._recombination_rate, num_loci) # If we have specified any population_configurations we don't want # to give the overall sample size. sample_size = self._sample_size if population_configurations is not None: sample_size = None self._simulator = msprime.simulator_factory( sample_size=sample_size, recombination_map=recomb_map, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) self._precision = precision self._print_trees = print_trees # sort out the random seeds ms_seeds = random_seeds if random_seeds is None: ms_seeds = generate_seeds() seed = get_single_seed(ms_seeds) self._random_generator = msprime.RandomGenerator(seed) self._ms_random_seeds = ms_seeds self._simulator.random_generator = self._random_generator self._mutation_generator = msprime.MutationGenerator( self._random_generator, self._mutation_rate)
def write_vcf(chrom): treefile = args.tree_file[chrom] vcf = open(args.vcffile[chrom], "w") mut_rate = args.mut_rate[chrom] seed = seeds[chrom] logfile.write("Simulating mutations on" + treefile + "\n") ts = msprime.load(treefile) rng = msprime.RandomGenerator(seed) nodes = msprime.NodeTable() edgesets = msprime.EdgesetTable() sites = msprime.SiteTable() mutations = msprime.MutationTable() migrations = msprime.MigrationTable() ts.dump_tables(nodes=nodes, edgesets=edgesets, migrations=migrations) mutgen = msprime.MutationGenerator(rng, mut_rate) mutgen.generate(nodes, edgesets, sites, mutations) logfile.write("Saving to" + args.vcffile[chrom] + "\n") mutated_ts = msprime.load_tables(nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations) mutated_ts.write_vcf(vcf, ploidy=1) return True
logfile.flush() if args.treefile is not None: minimal_ts.dump(args.treefile) mut_seed = args.seed logfile.write("Generating mutations with seed " + str(mut_seed) + "\n") logfile.flush() rng = msprime.RandomGenerator(mut_seed) nodes = msprime.NodeTable() edgesets = msprime.EdgesetTable() sites = msprime.SiteTable() mutations = msprime.MutationTable() minimal_ts.dump_tables(nodes=nodes, edgesets=edgesets) mutgen = msprime.MutationGenerator(rng, args.mut_rate) mutgen.generate(nodes, edgesets, sites, mutations) # print(nodes, file=logfile) # print(edgesets, file=logfile) # print(sites, file=logfile) # print(mutations, file=logfile) mutated_ts = msprime.load_tables(nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations) del minimal_ts logfile.write("Generated mutations!\n")
recombination_rate=opts['recomb_rate'], population_configurations=pops, migration_matrix=migr_init, demographic_events=migr_change) logfile.write(" done simulating! Generating mutations.\n") logfile.write(time.strftime(' %X %x %Z\n')) logfile.flush() rng = msprime.RandomGenerator(mut_seed) nodes = msprime.NodeTable() edgesets = msprime.EdgesetTable() sites = msprime.SiteTable() mutations = msprime.MutationTable() minimal_ts.dump_tables(nodes=nodes, edgesets=edgesets) mutgen = msprime.MutationGenerator(rng, opts['mut_rate']) mutgen.generate(nodes, edgesets, sites, mutations) mutated_ts = msprime.load_tables(nodes=nodes, edgesets=edgesets, sites=sites, mutations=mutations) del ts logfile.write(" done generating mutations! Writing out data.\n") logfile.write(time.strftime(' %X %x %Z\n')) logfile.flush() mutated_ts.dump(opts['treefile']) mutated_ts.write_vcf(open(opts['vcffile'], 'w'), ploidy=1)
import fwdpy11_arg_example.evolve_arg as ea import msprime import numpy as np import sys N = int(sys.argv[1]) rho = float(sys.argv[2]) theta = float(sys.argv[3]) gc_interval = int(sys.argv[4]) seed = int(sys.argv[5]) simplifier, atracker, tsim = ea.evolve_track_wrapper(popsize=N, rho=rho, seed=seed, gc_interval=gc_interval, mu=0.0) print(tsim, simplifier.times) np.random.seed(seed) # Get a sample of size n = 10 msprime.simplify_tables(np.random.choice(2 * N, 10, replace=False).tolist(), nodes=simplifier.nodes, edgesets=simplifier.edgesets) msp_rng = msprime.RandomGenerator(seed) sites = msprime.SiteTable() mutations = msprime.MutationTable() mutgen = msprime.MutationGenerator(msp_rng, theta / float(4 * N)) # rho = theta mutgen.generate(simplifier.nodes, simplifier.edgesets, sites, mutations) print(sites.num_rows)
# Use fwdpy11 wf.evolve(rng, pop, params) # Get a sample s = fwdpy11.sampling.sample_separate(rng, pop, args.nsam) else: # Use this module simplifier, atracker, tsim = evolve_track( rng, pop, params, args.gc, True, args.seed, args.async, args.queue, args.qsize, args.wthreads) # Take times from simplifier before they change. times = simplifier.times times['fwd_sim_runtime'] = [tsim] times['N'] = [args.popsize] times['theta'] = [args.theta] times['rho'] = [args.rho] times['simplify_interval'] = [args.gc] d = pd.DataFrame(times) d.to_csv(args.outfile1, sep='\t', index=False, compression='gzip') # Simplify the genealogy down to a sample, # And throw mutations onto that sample msprime.simplify_tables(np.random.choice(2 * args.popsize, args.nsam, replace=False).tolist(), nodes=simplifier.nodes, edges=simplifier.edges) msp_rng = msprime.RandomGenerator(args.seed) sites = msprime.SiteTable() mutations = msprime.MutationTable() mutgen = msprime.MutationGenerator( msp_rng, args.theta / float(4 * args.popsize)) mutgen.generate(simplifier.nodes, simplifier.edges, sites, mutations)
x = msprime.load_tables(nodes=nt, edges=es) # Lets look at the MRCAS. # This is where things go badly: MRCAS = [t.get_time(t.get_root()) for t in x.trees()] print(MRCAS) # Throw down some mutations # onto a sample of size nsam # We'll copy tables here, # just to see what happens. # PLR: these .copy()s aren't doing anything: just overwritten before nt_s = nt.copy() es_s = es.copy() nsam_samples = np.random.choice(2 * popsize, nsam, replace=False) # PLR: TreeSequence.simplify() *returns* the modified tree sequence, leaving x unmodified # you could alternatively do everything here with tables xs = x.simplify(nsam_samples.tolist()) xs.dump_tables(nodes=nt_s, edges=es_s) msp_rng = msprime.RandomGenerator(seed) mutations = msprime.MutationTable() sites = msprime.SiteTable() mutgen = msprime.MutationGenerator(msp_rng, theta / float(4 * popsize)) mutgen.generate(nt_s, es_s, sites, mutations) x = msprime.load_tables(nodes=nt_s, edges=es_s, sites=sites, mutations=mutations) print(sites.num_rows)
def test_mutation_generator_unsupported(self): n = 10 mutgen = msprime.MutationGenerator(msprime.RandomGenerator(1), 1) with self.assertRaises(ValueError): msprime.simulate(n, mutation_generator=mutgen)