def __init__(self): # Parameter values from "Simulating Data" section # these are assumptions, not estimates N_ref = 100000 t_1_coal = 0.5 t_2_coal = 5.0 # estimates from the ANN N_R = 544200 N_B = 145300 N_A = 652700 # Times are provided in 4N_ref generations, so we convert into generations. # generation_time = 10 / year t_1 = t_1_coal * 4 * N_ref t_2 = t_2_coal * 4 * N_ref # Single population in this model self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_R), ] self.demographic_events = [ # Size change at bottleneck (back in time; BIT) msprime.PopulationParametersChange( time=t_1, initial_size=N_B, population_id=0), # Size change at recovery (BIT) msprime.PopulationParametersChange( time=t_2, initial_size=N_A, population_id=0) ] self.migration_matrix = [[0]]
def HuberTwoEpoch(): id = "QC-African2Epoch_1H18" populations = [ stdpopsim.Population(id="SouthMiddleAtlas", description="A. thalina"), ] # Time of second epoch T_2 = 568344 # population sizes N_ANC = 746148 N_2 = 100218 return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=_species.generation_time, populations=populations, population_configurations=[ msprime.PopulationConfiguration(initial_size=N_2, metadata=populations[0].asdict()), ], demographic_events=[ msprime.PopulationParametersChange(time=T_2, initial_size=N_ANC, population_id=0), ], population_id_map=[{ "SouthMiddleAtlas": 0 }] * 2, )
def test_single_growth_rate(self): # Set out our values in units of generations and absolute sizes. Ne = 1000 growth_rate = -0.01 end_time = 20 end_size = Ne * math.exp(-growth_rate * end_time) population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=Ne, growth_rate=growth_rate)] demographic_events = [ msprime.PopulationParametersChange(time=end_time, growth_rate=0)] simulator = msprime.simulator_factory( Ne=Ne, population_configurations=population_configurations, demographic_events=demographic_events) ll_sim = simulator.create_ll_instance() ll_end_time = ll_sim.debug_demography() self.assertEqual(end_time, ll_end_time) populations = [ msprime.Population(**d) for d in ll_sim.get_population_configuration()] self.assertEqual(len(populations), 1) pop = populations[0] self.assertEqual(pop.growth_rate, growth_rate) self.assertEqual(pop.initial_size, Ne) self.assertEqual(pop.get_size(end_time), end_size) # Now fast forward to the next time slice. ll_end_time = ll_sim.debug_demography() self.assertTrue(math.isinf(ll_end_time)) populations = [ msprime.Population(**d) for d in ll_sim.get_population_configuration()] pop = populations[0] self.assertEqual(pop.growth_rate, 0) self.assertEqual(pop.initial_size, end_size) self.assertEqual(pop.get_size(10), end_size)
def step_geno(ss_each=ss * 2, tmove=100): #N is the population size for each deme #ss_each is the haploid sample size for each deme #l is the length of the chromosome sample_sizes = [ss_each] * d population_configurations = [ msprime.PopulationConfiguration(sample_size=k) for k in sample_sizes ] #specify demographic event - move all lineages to one population after tmove generations demog = [ msprime.MassMigration(time=tmove, source=i, destination=d - 1, proportion=1.0) for i in range(d - 1) ] demog.append( #change migration rate among demes to be 0 msprime.MigrationRateChange(time=tmove, rate=0)) ts = msprime.simulate(Ne=args.npop, population_configurations=population_configurations, migration_matrix=mig_mat, mutation_rate=args.mu, recombination_rate=args.rho, length=args.length, demographic_events=demog, num_replicates=100, random_seed=args.seed) return (ts)
def _5pop_test_demog(N=1000): populations = [stdpopsim.Population(f"pop{i}", f"Population {i}") for i in range(5)] pop_config = [ msprime.PopulationConfiguration( initial_size=N, metadata=populations[i].asdict() ) for i in range(len(populations)) ] mig_mat = [ [0, 0, 0, 0, 0], [0, 0, 1e-5, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], ] dem_events = [ msprime.MassMigration(time=100, source=0, destination=1, proportion=0.1), msprime.MassMigration(time=200, source=3, destination=2), msprime.MigrationRateChange(time=200, rate=0), msprime.MassMigration(time=300, source=1, destination=0), msprime.MassMigration(time=400, source=2, destination=4, proportion=0.1), msprime.MassMigration(time=600, source=2, destination=0), msprime.MassMigration(time=700, source=4, destination=0), ] return stdpopsim.DemographicModel( id="5pop_test", description="5pop_test", long_description="5pop_test", populations=populations, generation_time=1, population_configurations=pop_config, demographic_events=dem_events, migration_matrix=mig_mat, )
def _afr_2epoch(): N_A = 746148 N_0 = 100218 t_1 = 568344 populations = [ stdpopsim.Population( id="Africa", description="Arabidopsis thaliana African population") ] return stdpopsim.DemographicModel( id="African2Epoch_1H18", description="African two epoch model", long_description=""" Model estimated from site frequency spectrum of synonymous SNPs from African samples using Williamson et al. (2005) methodology. """, populations=populations, citations=[stdpopsim.Citation( author="Huber et al.", year=2018, doi="https://doi.org/10.1038/s41467-018-05281-7", reasons={stdpopsim.CiteReason.DEM_MODEL}) ], generation_time=1, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_0, metadata=populations[0].asdict()) ], demographic_events=[ msprime.PopulationParametersChange( time=t_1, initial_size=N_A, population_id=0) ] )
def __init__(self): # Time of second epoch T_2 = 7420 T_3 = 14534 # population sizes N_ANC = 161744 N_2 = 24076 N_3 = 203077 self.population_configurations = [ msprime.PopulationConfiguration( initial_size=N_3, metadata=self.populations[0].asdict()), ] self.migration_matrix = [[0]] self.demographic_events = [ msprime.PopulationParametersChange(time=T_3, initial_size=N_2, population_id=0), msprime.PopulationParametersChange(time=T_2 + T_3, initial_size=N_ANC, population_id=0), ]
def __init__(self, NA, N1, N2, T, M12, M21): self.population_configurations = [ msprime.PopulationConfiguration( initial_size=N1, metadata=self.populations[0].asdict()), msprime.PopulationConfiguration( initial_size=N2, metadata=self.populations[1].asdict()), msprime.PopulationConfiguration( initial_size=NA, metadata=self.populations[2].asdict()) ] self.migration_matrix = [[0, M12, 0], [M21, 0, 0], [0, 0, 0]] self.demographic_events = [ msprime.MassMigration( time=T, source=0, destination=2, proportion=1), msprime.MassMigration( time=T, source=1, destination=2, proportion=1) ]
def _sma_1pop(): # the size during the interval times[k] to times[k+1] = sizes[k] times = np.array([ 699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324, 260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610, 954517, 1119341, 1312147, 1537686, 1801500, 2110100]) sizes = np.array([ 42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989]) # MSMC is accurate from 40Kya-1.6Mya for A.thaliana (Durvasula et al 2017) # set the first 7 sizes # equal to the size at 8 (~40Kya) sizes[:8] = sizes[8] # set the last 2 entries equal # to the size at 30 (~1.6Mya) sizes[30:32] = sizes[30] demographic_events = [] for sz, t in zip(sizes, times): demographic_events.append( msprime.PopulationParametersChange( time=t, initial_size=sz, population_id=0)) populations = [ stdpopsim.Population( id="SouthMiddleAtlas", description="Arabidopsis Thaliana South Middle Atlas population") ] return stdpopsim.DemographicModel( id="SouthMiddleAtlas_1D17", description="South Middle Atlas piecewise constant size", long_description=""" This model comes from MSMC using two randomly sampled homozygous individuals (Khe32 and Ifr4) from the South Middle Atlas region from the Middle Atlas Mountains in Morocco. The model is estimated with 32 time periods. Because estimates from the recent and ancient past are less accurate, we set the population size in the first 7 time periods equal to the size at the 8th time period and the size during last 2 time periods equal to the size in the 30th time period. """, populations=populations, citations=[stdpopsim.Citation( author="Durvasula et al.", year=2017, doi="https://doi.org/10.1073/pnas.1616736114", reasons={stdpopsim.CiteReason.DEM_MODEL}) ], generation_time=1, demographic_events=demographic_events, population_configurations=[ msprime.PopulationConfiguration( initial_size=sizes[0], metadata=populations[0].asdict()) ] )
def test_population_configurations(self): def f(configs): return msprime.simulator_factory(population_configurations=configs) for bad_type in [10, ["sdf"], "sdfsd"]: self.assertRaises(TypeError, f, bad_type) # Just test the basic equalities here. The actual # configuration options are tested elewhere. for N in range(1, 10): pop_configs = [ msprime.PopulationConfiguration(5, initial_size=5) for _ in range(N) ] sample_size = 5 * N sim = msprime.simulator_factory( population_configurations=pop_configs) self.assertEqual(len(sim.demography.populations), len(pop_configs)) for pop, pop_config in zip(sim.demography.populations, pop_configs): self.assertEqual(pop.initial_size, pop_config.initial_size) self.assertEqual(pop.growth_rate, pop_config.growth_rate) self.assertEqual(len(sim.samples), sample_size) self.assertEqual(len(sim.population_configuration), N) # The default is a single population sim = msprime.simulator_factory(10) self.assertEqual(len(sim.population_configuration), 1)
def test_sample_size_population_configuration(self): for d in range(1, 5): # Zero sample size is always an error configs = [msprime.PopulationConfiguration(0) for _ in range(d)] self.assertRaises(ValueError, msprime.simulator_factory, population_configurations=configs) configs = [msprime.PopulationConfiguration(2) for _ in range(d)] sim = msprime.simulator_factory(population_configurations=configs) self.assertEqual(len(sim.samples), 2 * d) samples = [] for j in range(d): samples += [ msprime.Sample(population=j, time=0) for _ in range(2) ] self.assertEqual(sim.samples, samples)
def simulate_trees(self, **kwargs): sampled_t = self.sampled_t if sampled_t is None: sampled_t = 0.0 sampled_t = np.array(sampled_t) * np.ones(len(self.sampled_pops)) pops = {p: i for i, p in enumerate(self.sampled_pops)} demographic_events = [] for e in self._G.graph["events"]: e = e.get_msprime_event(self._G.graph["params"], pops) if e is not None: demographic_events.append(e) return msprime.simulate( population_configurations=[ msprime.PopulationConfiguration() for _ in range(len(pops)) ], Ne=self.default_N / 4, demographic_events=demographic_events, samples=[ msprime.Sample(population=pops[p], time=t) for p, t, n in zip( self.sampled_pops, self.sampled_t, self.sampled_n) for _ in range(n) ], **kwargs)
def migration_example(): n = 10 t = 1 population_configurations = [ msprime.PopulationConfiguration(n // 2), msprime.PopulationConfiguration(n // 2), msprime.PopulationConfiguration(0), ] demographic_events = [ msprime.MassMigration(time=t, source=0, destination=2), msprime.MassMigration(time=t, source=1, destination=2), ] ts = msprime.simulate(population_configurations=population_configurations, demographic_events=demographic_events, random_seed=1) return ts
def exp_decline(self, N0=100, N1=1000, T=1000): """ One population model with exponential decline in population size. Used for testing that growth rates are handled appropriately. """ r = math.log(N0 / N1) / T pop0 = stdpopsim.models.Population(id="pop0", description="") return stdpopsim.DemographicModel( id="exp_decline", description="exp_decline", long_description="exp_decline", populations=[pop0], generation_time=1, population_configurations=[ msprime.PopulationConfiguration( initial_size=N0, growth_rate=r, metadata=pop0.asdict(), ) ], demographic_events=[ msprime.PopulationParametersChange(time=T, initial_size=N1, growth_rate=0, population_id=0), ], )
def recapitate(self, recombination_rate, keep_first_generation=False, population_configurations=None, **kwargs): ''' Returns a "recapitated" tree sequence, by using msprime to run a coalescent simulation from the "top" of this tree sequence, i.e., allowing any uncoalesced lineages to coalesce. To allow this process, the first generation of the SLiM simulation has been recorded in the tree sequence, but are not currently marked as samples, so this process (or, simplify()) will remove any of these that are not needed. If you want to keep them, then set ``keep_first_generation`` to True; although this will make more work here. Note that ``Ne`` is not set automatically, so defaults to ``1.0``; you probably want to set it explicitly. Similarly, migration is not set up automatically, so that if there are uncoalesced lineages in more than one population, you will need to pass in a migration matrix to allow coalescence. In both cases, remember that population IDs in ``tskit`` begin with 0, so that if your SLiM simulation has populations ``p1`` and ``p2``, then the tree sequence will have three populations (but with no nodes assigned to population 0), so that migration rate of 1.0 between ``p1`` and ``p2`` needs a migration matrix of [[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0]] :param float recombination_rate: The recombination rate - only a constant recombination rate is allowed. :param bool keep_first_generation: Whether to keep the individuals (and genomes) corresponding to the first SLiM generation in the resulting tree sequence :param list population_configurations: See :meth:`msprime.simulate()` for this argument; if not provided, each population will have zero growth rate and the same effective population size. :param dict kwargs: Any other arguments to :meth:`msprime.simulate()`. ''' recomb = msprime.RecombinationMap( positions=[0.0, self.sequence_length], rates=[recombination_rate, 0.0], num_loci=int(self.sequence_length)) if population_configurations is None: population_configurations = [ msprime.PopulationConfiguration() for _ in range(self.num_populations) ] if keep_first_generation: ts = self._mark_first_generation() else: ts = self recap = msprime.simulate( from_ts=ts, population_configurations=population_configurations, recombination_map=recomb, start_time=self.slim_generation, **kwargs) ts = SlimTreeSequence.load_tables(recap.tables) return ts
def _recap_and_rescale(self, ts, seed, recap_epoch, contig, mutation_rate, slim_scaling_factor): """ Apply post-SLiM transformations to ``ts``. This rescales node times, does recapitation, simplification, and adds neutral mutations. """ # Node times come from SLiM generation numbers, which may have been # divided by a scaling factor for computational tractability. tables = ts.dump_tables() for table in (tables.nodes, tables.migrations): table.time *= slim_scaling_factor ts = pyslim.SlimTreeSequence.load_tables(tables) ts.slim_generation *= slim_scaling_factor rng = random.Random(seed) s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32) population_configurations = [ msprime.PopulationConfiguration(initial_size=pop.start_size, growth_rate=pop.growth_rate) for pop in recap_epoch.populations ] ts = ts.recapitate(recombination_rate=contig.recombination_map. mean_recombination_rate, population_configurations=population_configurations, migration_matrix=recap_epoch.migration_matrix, random_seed=s1) ts = self._simplify_remembered(ts) # Add neutral mutations. ts = pyslim.SlimTreeSequence( msprime.mutate(ts, rate=mutation_rate, keep=True, random_seed=s2)) return ts
def __init__(self): super().__init__() # the size during the interval times[k] to times[k+1] = sizes[k] self.times = np.array([ 699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324, 260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610, 954517, 1119341, 1312147, 1537686, 1801500, 2110100]) self.sizes = np.array([ 42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989]) # MSMC is accurate from 40Kya-1.6Mya for A.thaliana(Durvasula et al 2017) # set the first 7 sizes # equal to the size at 8 (~40Kya) self.sizes[:8] = self.sizes[8] # set the last 2 entries equal # to the size at 30 (~1.6Mya) self.sizes[30:32] = self.sizes[30] # generation time is 1 year self.generation_time = 1 self.demographic_events = [] for idx, t in enumerate(self.times): self.demographic_events.append( msprime.PopulationParametersChange( time=t, initial_size=self.sizes[idx], population_id=0)) self.migration_matrix = [[0]] self.population_configurations = [ msprime.PopulationConfiguration(initial_size=self.sizes[0]) ]
def two_bin(NA, N1, N2, Ts, M1, M2): NA = NA N1 = N1 N2 = N2 Ts = Ts M1 = M1 M2 = M2 population_configurations = [ msprime.PopulationConfiguration(sample_size=50, initial_size=N1), msprime.PopulationConfiguration(sample_size=50, initial_size=N2) ] migration_matrix = [[0, M1], [M1, 0]] demographic_events = [ msprime.MigrationRateChange(time=Ts / 2, rate=M2, matrix_index=(0, 1)), msprime.MigrationRateChange(time=Ts / 2, rate=M2, matrix_index=(1, 0)), msprime.MassMigration(time=Ts, source=1, destination=0, proportion=1.0) ] dp = msprime.DemographyDebugger( Ne=NA, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) #dp.print_history() replicates = 1 length = 100000 sim = msprime.simulate(Ne=NA, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, mutation_rate=1e-7, recombination_rate=1e-8, length=length, num_replicates=replicates) pairwise_diff = [] for j, s in enumerate(sim): s0 = len(s.get_samples(0)) s1 = len(s.get_samples(1)) haps = [h for h in s.haplotypes()] h0 = haps[0:s0] h1 = haps[s0:s0 + s1 - 1] for hap0 in h0: for hap1 in h1: pairwise_diff.append( sum(1 for a, b in zip(hap0, hap1) if a != b)) return (np.var(np.array(pairwise_diff)))
def _afr_3epoch(): id = "African3Epoch_1S16" description = "Three epoch African population" long_description = """ The three epoch (modern, bottleneck, ancestral) model estimated for a single African Drosophila Melanogaster population from Sheehan and Song (2016). Population sizes are estimated by a deep learning model trained on simulation data. NOTE: Due to differences in coalescence units between PSMC (2N) and msms (4N) the number of generations were doubled from PSMC estimates when simulating data from msms in the original publication. We have faithfully represented the published model here. """ populations = [_afr_population] citations = [ stdpopsim.Citation( author="Sheehan and Song", year=2016, doi="https://doi.org/10.1371/journal.pcbi.1004845", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ] generation_time = _species.generation_time # Parameter values from "Simulating Data" section # these are assumptions, not estimates N_ref = 100000 t_1_coal = 0.5 t_2_coal = 5.0 # estimates from the ANN N_R = 544200 N_B = 145300 N_A = 652700 # Times are provided in 4N_ref generations, so we convert into generations. # generation_time = 10 / year t_1 = t_1_coal * 4 * N_ref t_2 = (t_1_coal + t_2_coal) * 4 * N_ref return stdpopsim.DemographicModel( id=id, description=description, long_description=long_description, populations=populations, citations=citations, generation_time=generation_time, population_configurations=[ msprime.PopulationConfiguration(initial_size=N_R, metadata=populations[0].asdict()) ], demographic_events=[ # Size change at bottleneck (back in time; BIT) msprime.PopulationParametersChange(time=t_1, initial_size=N_B, population_id=0), # Size change at recovery (BIT) msprime.PopulationParametersChange(time=t_2, initial_size=N_A, population_id=0), ], )
class TestNoQCWarning(unittest.TestCase): species = stdpopsim.get_species("EscCol") model = stdpopsim.DemographicModel( id="FakeModel", description="FakeModel", long_description="FakeModel", citations=[ stdpopsim.Citation( author="Farnsworth", year=3000, doi="https://doi.org/10.1000/xyz123", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=10, populations=[stdpopsim.Population("Omicronians", "Popplers, etc.")], population_configurations=[ msprime.PopulationConfiguration(initial_size=1000) ], ) def setUp(self): self.species.add_demographic_model(self.model) def tearDown(self): self.species.demographic_models.remove(self.model) def verify_noQC_warning(self, cmd): with self.assertWarns(stdpopsim.QCMissingWarning): capture_output(stdpopsim.cli.stdpopsim_main, cmd.split()) def test_noQC_warning(self): self.verify_noQC_warning("EscCol -d FakeModel -D 10 -L 10") def test_noQC_warning_quiet(self): self.verify_noQC_warning("-q EscCol -d FakeModel -D 10 -L 10") def verify_noQC_citations_not_written(self, cmd): # Non-QCed models shouldn't be used in publications, so citations # shouldn't be offered to the user. with self.assertLogs() as logs: out, err = capture_output(stdpopsim.cli.stdpopsim_main, cmd.split()) log_output = "\n".join(logs.output) for citation in self.model.citations: self.assertFalse(citation.author in out) self.assertFalse(citation.doi in out) self.assertFalse(citation.author in err) self.assertFalse(citation.doi in err) self.assertFalse(citation.author in log_output) self.assertFalse(citation.doi in log_output) def test_noQC_citations_not_written(self): self.verify_noQC_citations_not_written( "EscCol -d FakeModel -D 10 -L 10") def test_noQC_citations_not_written_verbose(self): self.verify_noQC_citations_not_written( "-vv EscCol -d FakeModel -D 10 -L 10")
def test_population_configuration_initial_size(self): for initial_size in [1, 10, 1000]: conf = msprime.PopulationConfiguration(initial_size=initial_size) self.assertIsNone(conf.sample_size) for Ne in [1, 10, 1e6]: d = conf.get_ll_representation(Ne) dp = {"initial_size": initial_size / Ne, "growth_rate": 0} self.assertEqual(d, dp)
def test_populations(self): ts = msprime.simulate(population_configurations=[ msprime.PopulationConfiguration(10), msprime.PopulationConfiguration(10) ], migration_matrix=[[0, 1], [1, 0]], record_migrations=True, random_seed=1) mutated = msprime.mutate(ts, 0) t1 = ts.dump_tables() self.assertEqual(len(t1.populations), 2) self.assertGreater(len(t1.migrations), 0) t2 = mutated.dump_tables() self.verify_topology(t1, t2) self.verify_provenance(t1, t2) self.assertEqual(t1.sites, t2.sites) self.assertEqual(t1.mutations, t2.mutations)
def migration_simulation_cline(r_rate): n_demes = 20 m = 0.1 / 3. ## Allocate the initial sample ## Sample 20 individuals from two demes population_configurations_sample_1 = [ msprime.PopulationConfiguration(sample_size=0), msprime.PopulationConfiguration(sample_size=20) ] population_configurations_sample_2 = [ msprime.PopulationConfiguration(sample_size=0), msprime.PopulationConfiguration(sample_size=20) ] population_configurations_empty = [ msprime.PopulationConfiguration(sample_size=0) for i in range(n_demes - len(population_configurations_sample_1) * 2) ] population_configurations = population_configurations_sample_1 + population_configurations_empty + population_configurations_sample_2 # Now we set up the migration matrix. Since this is a symmetric # island model, we have the same rate of migration between all # pairs of subpopulations. Diagonal elements must be zero. migration_matrix = [] for i in range(n_demes): temp = [0] * n_demes if i == 0: temp[i + 1] = m elif i == n_demes - 1: temp[i - 1] = m else: temp[i - 1] = m temp[i + 1] = m migration_matrix.append(temp) # We pass these values to the simulate function, and ask it # to run the required number of replicates. forest = msprime.simulate( Ne=500, population_configurations=population_configurations, length=10000, migration_matrix=migration_matrix, recombination_rate=r_rate) return (forest)
def test_population_configuration(self): pop_configs = [msprime.PopulationConfiguration(5) for _ in range(2)] ts = msprime.simulate( population_configurations=pop_configs, migration_matrix=[[0, 1], [1, 0]], demographic_events=[msprime.SimulationModelChange(time=10)], ) self.verify(ts)
def test_constant_single_pop(self): pop_config = [msprime.PopulationConfiguration(initial_size=10000)] mig_mat = [[0]] demo_events = [] gens = 100 rate = demography.coalescence_rates.get_rates(0, 0, pop_config, mig_mat, demo_events, gens) self.assertTrue(np.all(rate == 1./2/10000))
def test_population_construction_popconfig_metadata(self): pop0 = stdpopsim.Population(id="A", description="Pop A") pc_meta = [msprime.PopulationConfiguration( initial_size=1, growth_rate=0.03, metadata=pop0.asdict())] dm = stdpopsim.DemographicModel( id="", description="", long_description="", generation_time=1, population_configurations=pc_meta) self.assertEqual(dm.populations[0].asdict(), pop0.asdict())
def migration(N_haps): # M is the overall symmetric migration rate, d is the number of demes. M = 0.2 d = 2 # We rescale m into per-generation values for msprime. m = M / (4 * (d - 1)) # Allocate the initial sample. population_configurations = [ msprime.PopulationConfiguration(sample_size=2 * N_haps[0]), msprime.PopulationConfiguration(sample_size=2 * N_haps[1]) ] # Now we set up the migration matrix. # This is a symmetric island model, so we have the same rate of migration # between all pairs of demes. Diagonal elements must be zero. migration_matrix = [[0, m], [m, 0]] # We pass these values to the simulate function. return (population_configurations, migration_matrix)
def two_bins(NA, N1, N2, Ts, M1, M2): NA = NA N1 = N1 N2 = N2 Ts = Ts M1 = M1 M2 = M2 population_configurations = [ msprime.PopulationConfiguration(sample_size=0, initial_size=N1), msprime.PopulationConfiguration(sample_size=50, initial_size=N2) ] migration_matrix = [[0, M2], [0, 0]] demographic_events = [ msprime.MigrationRateChange(time=Ts / 2, rate=M1, matrix_index=(0, 1)), #msprime.MigrationRateChange(time=Ts/2, rate=M1, matrix_index=(1, 0)), msprime.MassMigration(time=Ts, source=1, destination=0, proportion=1.0) ] #dp = msprime.DemographyDebugger( # Ne=NA, # population_configurations=population_configurations, # migration_matrix=migration_matrix, # demographic_events=demographic_events) #dp.print_history() replicates = 500000 sim = msprime.simulate(Ne=NA, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, mutation_rate=1e-7, recombination_rate=1e-8, length=100000, num_replicates=replicates) pi = np.zeros(replicates) seg = np.zeros(replicates) ld = np.zeros(replicates) for j, s in enumerate(sim): pi[j] = s.get_pairwise_diversity() seg[j] = s.get_num_mutations() ld[j] = np.var(msprime.LdCalculator(s).get_r2_matrix()) #return(np.array([np.mean(pi),np.var(pi),np.mean(seg),np.var(seg)])) #return(np.array([np.var(pi),np.var(seg), np.var(ld)])) return (np.array([np.var(seg)]))
def test_annotate_errors(self): for ts in self.get_msprime_examples(): with self.assertRaises(ValueError): _ = pyslim.annotate_defaults(ts, model_type="WF", slim_generation=0) with self.assertRaises(ValueError): _ = pyslim.annotate_defaults(ts, model_type="WF", slim_generation=4.4) with self.assertRaises(ValueError): _ = pyslim.annotate_defaults(ts, model_type="foo", slim_generation=4) with self.assertRaises(ValueError): _ = pyslim.annotate_defaults(ts, model_type=[], slim_generation=4) # odd number of samples ts = msprime.simulate(3) with self.assertRaisesRegex(ValueError, "diploid"): _ = pyslim.annotate_defaults(ts, model_type="WF", slim_generation=1) # inconsistent populations for diploids ts = msprime.simulate(population_configurations=[ msprime.PopulationConfiguration(sample_size=3), msprime.PopulationConfiguration(sample_size=1) ], migration_matrix=[[0.0, 1.0], [1.0, 0.0]]) with self.assertRaisesRegex(ValueError, "more than one population"): _ = pyslim.annotate_defaults(ts, model_type="WF", slim_generation=1) # inconsistent times for diploids samples = [ msprime.Sample(population=0, time=0), msprime.Sample(population=0, time=0), msprime.Sample(population=0, time=0), msprime.Sample(population=0, time=1), ] ts = msprime.simulate(samples=samples) with self.assertRaisesRegex(ValueError, "more than one time"): _ = pyslim.annotate_defaults(ts, model_type="WF", slim_generation=1)
def __init__(self): # Both of the following are directly # converted from MSMC output scaled by A.Thaliana # mutation rate 7e-9 and 1 generation # per year. self.times = np.array([ 6.990000e+02, 2.796000e+03, 6.068000e+03, 9.894000e+03, 1.437000e+04, 1.960600e+04, 2.573000e+04, 3.289400e+04, 4.127500e+04, 5.107700e+04, 6.254400e+04, 7.595800e+04, 9.164800e+04, 1.100010e+05, 1.314710e+05, 1.565840e+05, 1.859600e+05, 2.203240e+05, 2.605200e+05, 3.075400e+05, 3.625410e+05, 4.268790e+05, 5.021390e+05, 5.901730e+05, 6.931510e+05, 8.136100e+05, 9.545170e+05, 1.119341e+06, 1.312147e+06, 1.537686e+06, 1.801500e+06, 2.110100e+06 ]) self.sizes = np.array([ 4.2252426e+07, 4.2252426e+07, 6.0323000e+04, 7.2174000e+04, 4.0591000e+04, 2.1158000e+04, 2.1442000e+04, 3.9942000e+04, 7.8908000e+04, 1.1113200e+05, 1.1074500e+05, 9.6283000e+04, 8.7661000e+04, 8.3932000e+04, 8.3829000e+04, 9.1813000e+04, 1.1164400e+05, 1.4345600e+05, 1.8157100e+05, 2.1733100e+05, 2.4140000e+05, 2.4698400e+05, 2.3859300e+05, 2.2822200e+05, 2.1775200e+05, 1.9801900e+05, 1.6521000e+05, 1.2179600e+05, 1.2179600e+05, 7.3989000e+04, 7.3989000e+04, 7.3989000e+04 ]) # The first 8 epochs are "masked" to # the last Ne at 40kya due to # the limitations of MSMC to infer # population size in this range. # # Similarly, the last 2 entries # are set to equal the third last. # # Durvasula et al 2017 shows that # MSMC has power in A.Thaliana # between 40kya and 1.6Mya. self.sizes[:8] = self.sizes[8] self.sizes[30:32] = self.sizes[30] self.generation_times = 1.0 self.demographic_events = [] self.population_configurations = [ msprime.PopulationConfiguration(initial_size=self.sizes[0]) ] for i, t in enumerate(self.times): self.demographic_events.append( msprime.PopulationParametersChange(time=t, initial_size=self.sizes[i], population_id=0)) self.migration_matrix = [[0]]