def test_simulate_with_mask(self): engines = ["msprime", "slim"] species = stdpopsim.get_species("HomSap") L = 1000 contig = species.get_contig(length=L) contig.mutation_rate = 1e-3 contig.recombination_map = msprime.RateMap.uniform(L, 0) samples = [msprime.SampleSet(2, population=0, ploidy=1)] model = stdpopsim.PiecewiseConstantSize(100) for engine_name in engines: engine = stdpopsim.get_engine(engine_name) # test engine with exclusion mask contig.inclusion_mask = None contig.exclusion_mask = np.array([[0, L // 2]]) ts = engine.simulate(demographic_model=model, contig=contig, samples=samples) # check that positions of mutations are within mask positions = np.array( [ts.site(m.site).position for m in ts.mutations()]) assert np.all(positions >= L // 2) # test engine with inclusion mask contig.exclusion_mask = None contig.inclusion_mask = np.array([[0, L // 2]]) ts = engine.simulate(demographic_model=model, contig=contig, samples=samples) # check that positions of mutations are within mask positions = np.array( [ts.site(m.site).position for m in ts.mutations()]) assert np.all(positions < L // 2)
def get_samples(self, *args): """ Returns a list of msprime.Sample objects, with the number of samples from each population determined by the positional arguments. For instance, ``model.get_samples(2, 5, 7)`` would return a list of 14 samples, two of which are from the model's first population (i.e., with population ID ``model.populations[0].id``), five are from the model's second population, and seven are from the model's third population. The number of of arguments must be less than or equal to the number of "sampling" populations, ``model.num_sampling_populations``; if the number of arguments is less than the number of sampling populations, then remaining numbers are treated as zero. .. todo:: This documentation is broken. We're now returning msprime SampleSet objects. """ samples = [] for pop_index, n in enumerate(args): if self.populations[pop_index].allow_samples: samples.append( msprime.SampleSet( num_samples=n, population=pop_index, time=self.populations[pop_index].sampling_time, ploidy=1, # Avoid breaking too much at once. )) elif n > 0: raise ValueError( "Samples requested from non-sampling population {pop_index}" ) return samples
def test_wf_hudson_ancient_samples(self): t = 10 n = 20 ts = msprime.sim_ancestry( [msprime.SampleSet(1, time=j, population=0) for j in range(n)], population_size=10, model=[msprime.DiscreteTimeWrightFisher(duration=t), "hudson"], random_seed=2, ) tree = ts.first() assert tree.num_roots == 1 times = ts.tables.nodes.time[ts.tables.nodes.flags == 0] dtwf_times = times[np.logical_and(times > 0, times < t)] assert dtwf_times.shape[0] > 0 assert np.all(dtwf_times == np.floor(dtwf_times)) coalescent_times = times[times > t] assert coalescent_times.shape[0] > 0 assert np.all(coalescent_times != np.floor(coalescent_times))
def _discoal_str_to_msprime(args): # takes discoal command line as input and returns an iterator over the # msprime tree sequences. tokens = args.split(" ") # positional args sample_size = int(tokens[0]) nreps = int(tokens[1]) seq_length = int(tokens[2]) # parse discoal command line for params # init ones we definitely need for comparison theta = rho = alpha = sweep_site = sweep_mod_time = None refsize = 1e6 for i in range(3, len(tokens)): # pop size change case if tokens[i] == "-en": raise ValueError( "sweeps with population size changes remain unimplemented") # migration rate case if (tokens[i] == "-m") or (tokens[i] == "-p"): raise ValueError( "sweeps with multiple populations remain unimplemented") # split or admixture case if (tokens[i] == "-ea") or (tokens[i] == "-ed"): raise ValueError("sweeps with splits or admixture not supported") # sweep params if tokens[i] == "-x": sweep_site = float(tokens[i + 1]) if (tokens[i] == "-ws") or (tokens[i] == "-wd") or (tokens[i] == "-wn"): sweep_mod_time = float(tokens[i + 1]) if tokens[i] == "-a": alpha = float(tokens[i + 1]) if tokens[i] == "-N": refsize = float(tokens[i + 1]) # coalescent params if tokens[i] == "-t": theta = float(tokens[i + 1]) if tokens[i] == "-r": rho = float(tokens[i + 1]) mod_list = [] if alpha is not None: # sweep model s = alpha / (2 * refsize) mod = msprime.SweepGenicSelection( position=np.floor(sweep_site * seq_length), start_frequency=1.0 / (2 * refsize), end_frequency=1.0 - (1.0 / (2 * refsize)), s=s * 2, # discoal fitness model is 1, 1+s, 1+2s dt=1e-6, ) mod_list.append(msprime.StandardCoalescent(duration=sweep_mod_time)) mod_list.append(mod) # if an event is defined from discoal line # best thing to do is rescale to Ne=0.25 # so that time scale are consistent # see note at msprime/cli.py line 626 # and following for alternate solution if sweep_mod_time > 0: refsize = 0.25 mod.s = alpha / refsize # append final model mod_list.append("hudson") # scale theta and rho recomb_rate = rho / (4 * refsize * (seq_length - 1)) mu = theta / (4 * refsize * seq_length) # We're only interested in ancestry sim here. assert mu == 0 replicates = msprime.sim_ancestry( [msprime.SampleSet(sample_size, ploidy=1)], population_size=refsize, model=mod_list, recombination_rate=recomb_rate, sequence_length=seq_length, discrete_genome=False, num_replicates=nreps, ) return replicates
#################### # Run # Simulate an ancestral history for 3 diploid samples under the coalescent # with recombination on a seqlength region with parameters defined above.. newick = "((true:" + str(Tqt) + ",query:" + str(Tqt) + "):" + str( Ttf - Tqt) + ",false:" + str(Ttf) + ")" demography = msprime.Demography.from_species_tree( newick, time_units="myr", initial_size=pop_size, generation_time=generation_length) ts = msprime.sim_ancestry(samples=[ msprime.SampleSet(1, population="true", time=true_age), msprime.SampleSet(1, population="query", time=query_age), msprime.SampleSet(1, population="false", time=false_age) ], demography=demography, recombination_rate=recomb_rate, ploidy=2, sequence_length=seqlength, random_seed=123456) mts = msprime.sim_mutations(ts, rate=mutation_rate, random_seed=5678) # Default mutation model is msprime.JC69. # Create arbitrary sequence of same length, because msprime doesn't bother to simulate non-variable sites. bases = ["A", "C", "T", "G"] bgseq = random.choices(bases, k=seqlength)