def LiStephanTwoPopulation(): id = "QC-OutOfAfrica_2L06" populations = [ stdpopsim.Population("AFR", ""), stdpopsim.Population("EUR", ""), ] # Parameters for the African population are taken from the section Demographic # History of the African Population generation_time = 0.1 # 10 generations per year N_A0 = 8.603e6 # modern African pop. size N_A1 = N_A0 / 5.0 # African pop. size before expansion # Parameters for the European population are taken from the section Demographic # History of the European Population N_E0 = 1.075e6 # modern European pop. size N_E1 = 2.2e3 # European founder pop. size # Times from from the section Demographic History of the * Population T_A0 = 6e4 / generation_time # time of 1st expansion in African pop. T_E_A = 15.8e3 / generation_time # European/African divergence time T_EE = T_E_A - 340 / generation_time # Time of European pop. re-expansion return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=generation_time, populations=populations, # Set population sizes at T=0 # pop0 is Africa, pop1 is Europe population_configurations=[ msprime.PopulationConfiguration(initial_size=N_A0, growth_rate=0), msprime.PopulationConfiguration(initial_size=N_E0, growth_rate=0), ], # Now we add the demographic events working backwards in time. demographic_events=[ # OOA bottleneck msprime.PopulationParametersChange( time=T_EE, initial_size=N_E1, population_id=1 ), # E and A coalesce msprime.MassMigration(time=T_E_A, source=1, destination=0, proportion=1.0), # Pre-expansion Africa msprime.PopulationParametersChange( time=T_A0, initial_size=N_A1, population_id=0 ), ], population_id_map=[ {"AFR": 0, "EUR": 1}, {"AFR": 0, "EUR": 1}, {"AFR": 0, "EUR": 1}, {"AFR": 0, "EUR": 1}, ], mutation_rate=1.450e-9, )
def LockePongo(): id = "QC-TwoSpecies_2L11" populations = [ stdpopsim.Population("Bornean", ""), stdpopsim.Population("Sumatran", ""), ] # This is a split-migration style model, with exponential growth or # decay allowed in each population after the split. They assumed a # generation time of 20 years and a mutation rate of 2e-8 per bp per gen generation_time = 20 # Parameters given in Table S21-2 Ne = 17934 s = 0.592 NB0 = s * Ne NS0 = (1 - s) * Ne NBF = 8805 NSF = 37661 mSB = 0.395 / 2 / Ne mBS = 0.239 / 2 / Ne T = 403149 / generation_time rB = np.log(NBF / NB0) / T rS = np.log(NSF / NS0) / T return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=generation_time, populations=populations, # pop 0 is Bornean, pop 1 is Sumatran population_configurations=[ msprime.PopulationConfiguration(initial_size=NBF, growth_rate=rB), msprime.PopulationConfiguration(initial_size=NSF, growth_rate=rS), ], migration_matrix=[[0, mBS], [mSB, 0]], demographic_events=[ # merge, turn off migration, change size and growth rate msprime.MassMigration(source=1, destination=0, time=T, proportion=1), msprime.MigrationRateChange(time=T, rate=0), msprime.PopulationParametersChange( time=T, initial_size=Ne, growth_rate=0, population_id=0 ), ], population_id_map=[ {"Bornean": 0, "Sumatran": 1}, {"Bornean": 0, "Sumatran": 1}, ], )
def _5pop_test_demog(N=1000): populations = [stdpopsim.Population(f"pop{i}", f"Population {i}") for i in range(5)] pop_config = [ msprime.PopulationConfiguration( initial_size=N, metadata=populations[i].asdict() ) for i in range(len(populations)) ] mig_mat = [ [0, 0, 0, 0, 0], [0, 0, 1e-5, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], ] dem_events = [ msprime.MassMigration(time=100, source=0, destination=1, proportion=0.1), msprime.MassMigration(time=200, source=3, destination=2), msprime.MigrationRateChange(time=200, rate=0), msprime.MassMigration(time=300, source=1, destination=0), msprime.MassMigration(time=400, source=2, destination=4, proportion=0.1), msprime.MassMigration(time=600, source=2, destination=0), msprime.MassMigration(time=700, source=4, destination=0), ] return stdpopsim.DemographicModel( id="5pop_test", description="5pop_test", long_description="5pop_test", populations=populations, generation_time=1, population_configurations=pop_config, demographic_events=dem_events, migration_matrix=mig_mat, )
def HuberThreeEpoch(): id = "QC-African3Epoch_1H18" populations = [ stdpopsim.Population(id="ATL", description="A. thalina"), ] # Time of second epoch T_2 = 7420 T_3 = 14534 # population sizes N_ANC = 161744 N_2 = 24076 N_3 = 203077 return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=_species.generation_time, populations=populations, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_3, metadata=populations[0].asdict() ), ], demographic_events=[ msprime.PopulationParametersChange( time=T_3, initial_size=N_2, population_id=0 ), msprime.PopulationParametersChange( time=T_2 + T_3, initial_size=N_ANC, population_id=0 ), ], )
def HuberTwoEpoch(): id = "QC-African2Epoch_1H18" populations = [ stdpopsim.Population(id="ATL", description="A. thalina"), ] # Time of second epoch T_2 = 568344 # population sizes N_ANC = 746148 N_2 = 100218 return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=_species.generation_time, populations=populations, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_2, metadata=populations[0].asdict() ), ], demographic_events=[ msprime.PopulationParametersChange( time=T_2, initial_size=N_ANC, population_id=0 ), ], )
def test_sampling_times_equal(self): no_sample_pop = stdpopsim.Population("none", "none", sampling_time=None) zero_sample_pop = stdpopsim.Population("zero", "zero") nonzero_sample_pop = stdpopsim.Population("nzero", "nzero", sampling_time=10) plist1 = [no_sample_pop] * 2 + [nonzero_sample_pop] + [zero_sample_pop] * 2 plist2 = [no_sample_pop] * 4 + [nonzero_sample_pop] plist3 = [no_sample_pop] * 3 + [nonzero_sample_pop] self.assertFalse( stdpopsim.sampling_times_equal([no_sample_pop], [zero_sample_pop]) ) self.assertFalse( stdpopsim.sampling_times_equal([nonzero_sample_pop], [zero_sample_pop]) ) self.assertFalse(stdpopsim.sampling_times_equal(plist1, plist2)) self.assertFalse(stdpopsim.sampling_times_equal(plist1, plist3)) self.assertTrue(stdpopsim.sampling_times_equal(plist3, plist3))
class _PiecewiseSize(stdpopsim.DemographicModel): """ A copy of stdpopsim.PiecewiseConstantSize that permits growth rates. """ id = "Piecewise" description = "Piecewise size population model over multiple epochs." citations = [] populations = [stdpopsim.Population(id="pop0", description="Population 0")] author = None year = None doi = None def __init__(self, N0, growth_rate, *args): self.population_configurations = [ msprime.PopulationConfiguration( initial_size=N0, growth_rate=growth_rate, metadata=self.populations[0].asdict()) ] self.migration_matrix = [[0]] self.demographic_events = [] for t, initial_size, growth_rate in args: self.demographic_events.append( msprime.PopulationParametersChange(time=t, initial_size=initial_size, growth_rate=growth_rate, population_id=0))
def _sma_1pop(): # the size during the interval times[k] to times[k+1] = sizes[k] times = np.array([ 699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324, 260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610, 954517, 1119341, 1312147, 1537686, 1801500, 2110100]) sizes = np.array([ 42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989]) # MSMC is accurate from 40Kya-1.6Mya for A.thaliana (Durvasula et al 2017) # set the first 7 sizes # equal to the size at 8 (~40Kya) sizes[:8] = sizes[8] # set the last 2 entries equal # to the size at 30 (~1.6Mya) sizes[30:32] = sizes[30] demographic_events = [] for sz, t in zip(sizes, times): demographic_events.append( msprime.PopulationParametersChange( time=t, initial_size=sz, population_id=0)) populations = [ stdpopsim.Population( id="SouthMiddleAtlas", description="Arabidopsis Thaliana South Middle Atlas population") ] return stdpopsim.DemographicModel( id="SouthMiddleAtlas_1D17", description="South Middle Atlas piecewise constant size", long_description=""" This model comes from MSMC using two randomly sampled homozygous individuals (Khe32 and Ifr4) from the South Middle Atlas region from the Middle Atlas Mountains in Morocco. The model is estimated with 32 time periods. Because estimates from the recent and ancient past are less accurate, we set the population size in the first 7 time periods equal to the size at the 8th time period and the size during last 2 time periods equal to the size in the 30th time period. """, populations=populations, citations=[stdpopsim.Citation( author="Durvasula et al.", year=2017, doi="https://doi.org/10.1073/pnas.1616736114", reasons={stdpopsim.CiteReason.DEM_MODEL}) ], generation_time=1, demographic_events=demographic_events, population_configurations=[ msprime.PopulationConfiguration( initial_size=sizes[0], metadata=populations[0].asdict()) ] )
def _afr_2epoch(): N_A = 746148 N_0 = 100218 t_1 = 568344 populations = [ stdpopsim.Population( id="Africa", description="Arabidopsis thaliana African population") ] return stdpopsim.DemographicModel( id="African2Epoch_1H18", description="African two epoch model", long_description=""" Model estimated from site frequency spectrum of synonymous SNPs from African samples using Williamson et al. (2005) methodology. """, populations=populations, citations=[stdpopsim.Citation( author="Huber et al.", year=2018, doi="https://doi.org/10.1038/s41467-018-05281-7", reasons={stdpopsim.CiteReason.DEM_MODEL}) ], generation_time=1, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_0, metadata=populations[0].asdict()) ], demographic_events=[ msprime.PopulationParametersChange( time=t_1, initial_size=N_A, population_id=0) ] )
def HuberTwoEpoch(): id = "QC-African2Epoch_1H18" populations = [ stdpopsim.Population(id="SouthMiddleAtlas", description="A. thalina"), ] # Time of second epoch T_2 = 568344 # population sizes N_ANC = 746148 N_2 = 100218 return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=_species.generation_time, populations=populations, population_configurations=[ msprime.PopulationConfiguration(initial_size=N_2, metadata=populations[0].asdict()), ], demographic_events=[ msprime.PopulationParametersChange(time=T_2, initial_size=N_ANC, population_id=0), ], population_id_map=[{ "SouthMiddleAtlas": 0 }] * 2, # Huber et al say "7e-9" but then refer to Ossowski, # which Durvasula reported as giving 7.1e-9 mutation_rate=7e-9, )
class TestNoQCWarning(unittest.TestCase): species = stdpopsim.get_species("EscCol") model = stdpopsim.DemographicModel( id="FakeModel", description="FakeModel", long_description="FakeModel", citations=[ stdpopsim.Citation( author="Farnsworth", year=3000, doi="https://doi.org/10.1000/xyz123", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=10, populations=[stdpopsim.Population("Omicronians", "Popplers, etc.")], population_configurations=[ msprime.PopulationConfiguration(initial_size=1000) ], ) def setUp(self): self.species.add_demographic_model(self.model) def tearDown(self): self.species.demographic_models.remove(self.model) def verify_noQC_warning(self, cmd): with self.assertWarns(stdpopsim.QCMissingWarning): capture_output(stdpopsim.cli.stdpopsim_main, cmd.split()) def test_noQC_warning(self): self.verify_noQC_warning("EscCol -d FakeModel -D 10 -L 10") def test_noQC_warning_quiet(self): self.verify_noQC_warning("-q EscCol -d FakeModel -D 10 -L 10") def verify_noQC_citations_not_written(self, cmd): # Non-QCed models shouldn't be used in publications, so citations # shouldn't be offered to the user. with self.assertLogs() as logs: out, err = capture_output(stdpopsim.cli.stdpopsim_main, cmd.split()) log_output = "\n".join(logs.output) for citation in self.model.citations: self.assertFalse(citation.author in out) self.assertFalse(citation.doi in out) self.assertFalse(citation.author in err) self.assertFalse(citation.doi in err) self.assertFalse(citation.author in log_output) self.assertFalse(citation.doi in log_output) def test_noQC_citations_not_written(self): self.verify_noQC_citations_not_written( "EscCol -d FakeModel -D 10 -L 10") def test_noQC_citations_not_written_verbose(self): self.verify_noQC_citations_not_written( "-vv EscCol -d FakeModel -D 10 -L 10")
def test_population_construction_popconfig_metadata(self): pop0 = stdpopsim.Population(id="A", description="Pop A") pc_meta = [msprime.PopulationConfiguration( initial_size=1, growth_rate=0.03, metadata=pop0.asdict())] dm = stdpopsim.DemographicModel( id="", description="", long_description="", generation_time=1, population_configurations=pc_meta) self.assertEqual(dm.populations[0].asdict(), pop0.asdict())
def _sma_1pop(): # the size during the interval times[k] to times[k+1] = sizes[k] times = np.array([ 699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324, 260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610, 954517, 1119341, 1312147, 1537686, 1801500, 2110100]) sizes = np.array([ 42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989]) # MSMC is accurate from 40Kya-1.6Mya for A.thaliana (Durvasula et al 2017) # set the first 7 sizes # equal to the size at 8 (~40Kya) sizes[:8] = sizes[8] # set the last 2 entries equal # to the size at 30 (~1.6Mya) sizes[30:32] = sizes[30] demographic_events = [] for sz, t in zip(sizes, times): demographic_events.append( msprime.PopulationParametersChange( time=t, initial_size=sz, population_id=0)) populations = [ stdpopsim.Population( id="SouthMiddleAtlas", description="Arabidopsis Thaliana South Middle Atlas population") ] return stdpopsim.DemographicModel( id="SouthMiddleAtlas_1D17", description="South Middle Atlas piecewise constant size", # TODO more detail here. We should at least explain that we're skipping # some of the estimates from MSMC because we don't think they're accurate. long_description=""" Model estimated from two homozygous individuals from the South Middle Atlas using MSMC (TODO: more detail). """, populations=populations, citations=[stdpopsim.Citation( author="Durvasula et al.", year=2017, doi="https://doi.org/10.1073/pnas.1616736114", reasons={stdpopsim.CiteReason.DEM_MODEL}) ], generation_time=1, demographic_events=demographic_events, population_configurations=[ msprime.PopulationConfiguration( initial_size=sizes[0], metadata=populations[0].asdict()) ] )
def make_model(self): # Create populations to test on _pop1 = stdpopsim.Population("pop0", "Test pop. 0") _pop2 = stdpopsim.Population("pop1", "Test pop. 1", sampling_time=10) _pop3 = stdpopsim.Population("pop2", "Test pop. 2", sampling_time=None) # Create an model to hold populations base_mod = models.DemographicModel( id="x", description="y", long_description="z", populations=[_pop1, _pop2, _pop3], population_configurations=[ msprime.PopulationConfiguration(initial_size=1), msprime.PopulationConfiguration(initial_size=1), msprime.PopulationConfiguration(initial_size=1), ], ) return base_mod
class _Durvasula2017MSMC(ArabidopsisThalianaModel): id = "fixme" # FIXME name = "Please give me a descriptive name" description = """ Model estimated from two homozygous individuals from the South Middle Atlas using MSMC (TODO: more detail). """ populations = [ stdpopsim.Population( name="a_thaliana", description="Arabidopsis Thaliana population") ] citations = [stdpopsim.Citation( author="Durvasula et al.", year=2017, doi="TODO") # FIXME ] def __init__(self): super().__init__() # the size during the interval times[k] to times[k+1] = sizes[k] self.times = np.array([ 699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324, 260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610, 954517, 1119341, 1312147, 1537686, 1801500, 2110100]) self.sizes = np.array([ 42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989]) # MSMC is accurate from 40Kya-1.6Mya for A.thaliana(Durvasula et al 2017) # set the first 7 sizes # equal to the size at 8 (~40Kya) self.sizes[:8] = self.sizes[8] # set the last 2 entries equal # to the size at 30 (~1.6Mya) self.sizes[30:32] = self.sizes[30] # generation time is 1 year self.generation_time = 1 self.demographic_events = [] for idx, t in enumerate(self.times): self.demographic_events.append( msprime.PopulationParametersChange( time=t, initial_size=self.sizes[idx], population_id=0)) self.migration_matrix = [[0]] self.population_configurations = [ msprime.PopulationConfiguration(initial_size=self.sizes[0]) ]
def _african(): id = "Africa_1T12" description = "African population" long_description = """ The model is a simplification of the two population Tennesen et al. model with the European-American population removed so that we are modeling the African population in isolation. """ populations = [ stdpopsim.Population(id="AFR", description="African"), ] citations = [_tennessen_et_al] generation_time = 25 T_AF = 148e3 / generation_time T_EG = 5115 / generation_time # Growth rate r_AF = 0.0166 # population sizes N_A = 7310 N_AF1 = 14474 # present Ne N_AF = N_AF1 / math.exp(-r_AF * T_EG) return stdpopsim.DemographicModel( id=id, description=description, long_description=long_description, populations=populations, citations=citations, generation_time=generation_time, population_configurations=[ msprime.PopulationConfiguration(initial_size=N_AF, growth_rate=r_AF, metadata=populations[0].asdict()), ], demographic_events=[ msprime.PopulationParametersChange(time=T_EG, growth_rate=0, initial_size=N_AF1, population_id=0), msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ], )
def _afr_3epoch(): # values from the supplementary Table 1. # the size changed as N_A -> N_2 -> N_3. # t_2 is time of 2nd epoch and t_3 of the third epoch N_A = 161744 N_2 = 24076 N_3 = 203077 t_2 = 7420 t_3 = 14534 populations = [ stdpopsim.Population( id="SouthMiddleAtlas", description="Arabidopsis Thaliana South Middle Atlas population", ) ] return stdpopsim.DemographicModel( id="African3Epoch_1H18", description="South Middle Atlas African three epoch model", long_description=""" Model estimated from site frequency spectrum of synonymous SNPs from African (South Middle Atlas) samples using Williamson et al. 2005 methodology. Values come from supplementary table 1 of Huber et al 2018. Sizes change from N_A -> N_2 -> N_3 and t_2 is the time of the second epoch and t_3 is the time of the 3rd epoch. """, populations=populations, citations=[ stdpopsim.Citation( author="Huber et al.", year=2018, doi="https://doi.org/10.1038/s41467-018-05281-7", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=1, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_3, metadata=populations[0].asdict() ) ], demographic_events=[ msprime.PopulationParametersChange( time=t_3, initial_size=N_2, population_id=0 ), msprime.PopulationParametersChange( time=t_2 + t_3, initial_size=N_A, population_id=0 ), ], )
class _TennessenOnePopAfrica(HomoSapiensModel): id = "african" name = "African population" description = """ The model is a simplification of the two population Tennesen et al. model with the European-American population removed so that we are modeling the African population in isolation. """ populations = [ stdpopsim.Population(name="AFR", description="African"), ] citations = [_tennessen_et_al] def __init__(self): super().__init__() T_AF = 148e3 / self.generation_time T_EG = 5115 / self.generation_time # Growth rate r_AF = 0.0166 # population sizes N_A = 7310 N_AF1 = 14474 # present Ne N_AF = N_AF1 / math.exp(-r_AF * T_EG) self.population_configurations = [ msprime.PopulationConfiguration( initial_size=N_AF, growth_rate=r_AF, metadata=self.populations[0].asdict()), ] self.migration_matrix = [[0]] self.demographic_events = [ msprime.PopulationParametersChange(time=T_EG, growth_rate=0, initial_size=N_AF1, population_id=0), msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ]
def SheehanSongThreeEpic(): id = "QC-African3Epoch_1S16" populations = [stdpopsim.Population("AFR", "")] # Model from paper https://doi.org/10.1371/journal.pcbi.1004845 # Parameters are taken from table 7 using the average stat prediction values # as those were generally stated to be the best N_1 = 544.2e3 # recent N_2 = 145.3e3 # bottleneck N_3 = 652.7e3 # ancestral # Times taken from simulating data section based on PSMC and converted to # number of generations from coalescent units using the baseline effective # population size. Note that the coalescent values are calculated by N_ref = 1e5 t_1_coal = 0.5 t_2_coal = 5 T_1 = t_1_coal * 4 * N_ref T_2 = (t_1_coal + t_2_coal) * 4 * N_ref return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=_species.generation_time, populations=populations, # Set population sizes at T=0 population_configurations=[ msprime.PopulationConfiguration(initial_size=N_1, growth_rate=0), ], # Now we add the demographic events working backwards in time. demographic_events=[ # Bottleneck msprime.PopulationParametersChange(time=T_1, initial_size=N_2, population_id=0), # Ancestral population size msprime.PopulationParametersChange(time=T_2, initial_size=N_3, population_id=0), ], population_id_map=[{ "AFR": 0 }] * 3, )
def test_ooa_model(self): correct_model = stdpopsim.get_species("HomSap").get_demographic_model( "OutOfAfrica_3G09") ooa_docs = examples.out_of_africa() pops = [] for pop_config in ooa_docs["population_configurations"]: pops.append(stdpopsim.Population(id=None, description=None)) pop_config.sample_size = None local_model = stdpopsim.DemographicModel( id=None, description=None, long_description=None, generation_time=None, populations=pops, population_configurations=ooa_docs["population_configurations"], migration_matrix=ooa_docs["migration_matrix"], demographic_events=ooa_docs["demographic_events"], ) correct_model.verify_equal(local_model)
def _afr_2epoch(): N_A = 746148 N_0 = 100218 t_1 = 568344 populations = [ stdpopsim.Population( id="SouthMiddleAtlas", description="Arabidopsis Thaliana South Middle Atlas population", ) ] return stdpopsim.DemographicModel( id="African2Epoch_1H18", description="South Middle Atlas African two epoch model", long_description=""" Model estimated from site frequency spectrum of synonymous SNPs from African South Middle Atlas samples using Williamson et al. 2005 methodology. Values come from supplementary table 1 of Huber et al 2018. Sizes change from N_A -> N_0 and t_1 is time of the second epoch. """, populations=populations, citations=[ stdpopsim.Citation( author="Huber et al.", year=2018, doi="https://doi.org/10.1038/s41467-018-05281-7", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=1, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_0, metadata=populations[0].asdict() ) ], demographic_events=[ msprime.PopulationParametersChange( time=t_1, initial_size=N_A, population_id=0 ) ], )
stdpopsim.Citation(author="Comeron et al", doi="https://doi.org/10.1371/journal.pgen.1002905", year=2012, reasons={stdpopsim.CiteReason.GEN_MAP}) ]) _species.add_genetic_map(_gm) ########################################################### # # Demographic models # ########################################################### # population definitions that are reused. _afr_population = stdpopsim.Population( id="AFR", description="African D. melanogaster population") _eur_population = stdpopsim.Population( id="EUR", description="European D. melanogaster population") def _afr_3epoch(): id = "African3Epoch_1S16" description = "Three epoch African population" long_description = """ The three epoch (modern, bottleneck, ancestral) model estimated for a single African Drosophila Melanogaster population from Sheehan and Song (2016). Population sizes are estimated by a deep learning model trained on simulation data. NOTE: Due to differences in coalescence units between PSMC (2N) and msms (4N) the number of generations were doubled from PSMC estimates when simulating data from msms in the original publication. We have faithfully represented the published model here.
class TestNoQCWarning: species = stdpopsim.get_species("EscCol") model = stdpopsim.DemographicModel( id="FakeModel", description="FakeModel", long_description="FakeModel", citations=[ stdpopsim.Citation( author="Farnsworth", year=3000, doi="https://doi.org/10.1000/xyz123", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=10, populations=[stdpopsim.Population("Omicronians", "Popplers, etc.")], population_configurations=[msprime.PopulationConfiguration(initial_size=1000)], ) @classmethod def setup_class(cls): cls.species.add_demographic_model(cls.model) @classmethod def teardown_class(cls): cls.species.demographic_models.remove(cls.model) def verify_noQC_warning(self, cmd): # setup_logging() interferes with pytest.warns(). with mock.patch("stdpopsim.cli.setup_logging", autospec=True): with pytest.warns(stdpopsim.QCMissingWarning): capture_output(stdpopsim.cli.stdpopsim_main, cmd.split()) def test_noQC_warning(self): self.verify_noQC_warning("EscCol -d FakeModel -D 10 -L 10") def test_noQC_warning_quiet(self): self.verify_noQC_warning("-q EscCol -d FakeModel -D 10 -L 10") def verify_noQC_citations_not_written(self, cmd, caplog): # Non-QCed models shouldn't be used in publications, so citations # shouldn't be offered to the user. out, err = capture_output(stdpopsim.cli.stdpopsim_main, cmd.split()) log_output = caplog.text for citation in self.model.citations: assert not (citation.author in out) assert not (citation.doi in out) assert not (citation.author in err) assert not (citation.doi in err) assert not (citation.author in log_output) assert not (citation.doi in log_output) # The following two tests use the "caplog" pytest fixture, which captures # the logging output. The caplog param is automatically passed to test_*() # methods by pytest, which we pass through to verify_noQC_citations_not_written(). @pytest.mark.filterwarnings("ignore::stdpopsim.QCMissingWarning") @pytest.mark.usefixtures("caplog") def test_noQC_citations_not_written(self, caplog): self.verify_noQC_citations_not_written( "EscCol -d FakeModel -D 10 -L 10", caplog ) @pytest.mark.filterwarnings("ignore::stdpopsim.QCMissingWarning") @pytest.mark.usefixtures("caplog") def test_noQC_citations_not_written_verbose(self, caplog): self.verify_noQC_citations_not_written( "-vv EscCol -d FakeModel -D 10 -L 10", caplog )
def hominin_composite(): id = "HomininComposite_4G20" description = "Four population out of Africa with Neandertal admixture" long_description = """ A composite of demographic parameters from multiple sources """ # samples: # T_Altai = 115e3 # T_Vindija = 55e3 # n_YRI = 108 # n_CEU = 99 populations = [ stdpopsim.Population(id="YRI", description="1000 Genomes YRI (Yorubans)"), stdpopsim.Population( id="CEU", description=( "1000 Genomes CEU (Utah Residents (CEPH) with Northern and " "Western European Ancestry" ), ), stdpopsim.Population(id="Nea", description="Neandertal lineage"), stdpopsim.Population( id="Anc", description="Ancestral hominins", sampling_time=None ), ] pop = {p.id: i for i, p in enumerate(populations)} citations = [ stdpopsim.Citation( author="Kuhlwilm et al.", year=2016, doi="https://doi.org/10.1038/nature16544", ), stdpopsim.Citation( author="Prüfer et al.", year=2017, doi="https://doi.org/10.1126/science.aao1887", ), stdpopsim.Citation( author="Ragsdale and Gravel", year=2019, doi="https://doi.org/10.1371/journal.pgen.1008204", ), ] generation_time = 29 # Kuhlwilm et al. 2016 N_YRI = 27000 N_Nea = 3400 N_Anc = 18500 # Ragsdale & Gravel 2019 N_CEU0 = 1450 r_CEU = 0.00202 T_CEU_exp = 31.9e3 / generation_time N_CEU = N_CEU0 * math.exp(r_CEU * T_CEU_exp) T_YRI_CEU_split = 65.7e3 / generation_time N_ooa_bottleneck = 1080 # Prüfer et al. 2017 T_Nea_human_split = 550e3 / generation_time T_Nea_CEU_mig = 55e3 / generation_time m_Nea_CEU = 0.0225 pop_meta = (p.asdict() for p in populations) population_configurations = [ msprime.PopulationConfiguration(initial_size=N_YRI, metadata=next(pop_meta)), msprime.PopulationConfiguration( initial_size=N_CEU, growth_rate=r_CEU, metadata=next(pop_meta) ), msprime.PopulationConfiguration(initial_size=N_Nea, metadata=next(pop_meta)), msprime.PopulationConfiguration(initial_size=N_Anc, metadata=next(pop_meta)), ] demographic_events = [ # out-of-Africa bottleneck msprime.PopulationParametersChange( time=T_CEU_exp, initial_size=N_ooa_bottleneck, growth_rate=0, population_id=pop["CEU"], ), # Neandertal -> CEU admixture msprime.MassMigration( time=T_Nea_CEU_mig, proportion=m_Nea_CEU, source=pop["CEU"], destination=pop["Nea"], ), # population splits msprime.MassMigration( time=T_YRI_CEU_split, source=pop["CEU"], destination=pop["Anc"] ), msprime.MassMigration( time=T_YRI_CEU_split, source=pop["YRI"], destination=pop["Anc"] ), msprime.MassMigration( time=T_Nea_human_split, source=pop["Nea"], destination=pop["Anc"] ), ] return stdpopsim.DemographicModel( id=id, description=description, long_description=long_description, populations=populations, citations=citations, generation_time=generation_time, population_configurations=population_configurations, demographic_events=demographic_events, )
def hominin_composite_archaic_africa(): dm = hominin_composite() id = "HomininComposite2_4G20" description = "HomininComposite2_4G20 plus archaic lineage in Africa" generation_time = 29 # Ragsdale & Gravel 2019 N_0 = 3600 T_arch_afr_split = 499e3 / generation_time T_arch_afr_mig = 125e3 / generation_time T_arch_adm_end = 18.7e3 / generation_time m_AF_arch_af = 1.98e-5 T_YRI_CEU_split = 65.7e3 / generation_time populations = dm.populations + [ stdpopsim.Population( "ArchaicAFR", "Putative Archaic Africans", sampling_time=None ), ] population_configurations = dm.population_configurations + [ msprime.PopulationConfiguration( initial_size=N_0, metadata=populations[-1].asdict() ), ] pop = {p.id: i for i, p in enumerate(populations)} demographic_events = dm.demographic_events + [ # migration turned on between Yoruban and archaic African populations msprime.MigrationRateChange( time=T_arch_adm_end, rate=m_AF_arch_af, matrix_index=(pop["YRI"], pop["ArchaicAFR"]), ), msprime.MigrationRateChange( time=T_arch_adm_end, rate=m_AF_arch_af, matrix_index=(pop["ArchaicAFR"], pop["YRI"]), ), # YRI merges into Anc: turn off migration between YRI and ArchaicAFR. msprime.MigrationRateChange( time=T_YRI_CEU_split, rate=0, matrix_index=(pop["YRI"], pop["ArchaicAFR"]), ), msprime.MigrationRateChange( time=T_YRI_CEU_split, rate=0, matrix_index=(pop["ArchaicAFR"], pop["YRI"]), ), # migration turned on between African and archaic African populations msprime.MigrationRateChange( time=T_YRI_CEU_split, rate=m_AF_arch_af, matrix_index=(pop["Anc"], pop["ArchaicAFR"]), ), msprime.MigrationRateChange( time=T_YRI_CEU_split, rate=m_AF_arch_af, matrix_index=(pop["ArchaicAFR"], pop["Anc"]), ), # Beginning of migration between African and archaic African populations msprime.MigrationRateChange( time=T_arch_afr_mig, rate=0, matrix_index=(pop["Anc"], pop["ArchaicAFR"]), ), msprime.MigrationRateChange( time=T_arch_afr_mig, rate=0, matrix_index=(pop["ArchaicAFR"], pop["Anc"]), ), # Archaic African merges with moderns msprime.MassMigration( time=T_arch_afr_split, source=pop["ArchaicAFR"], destination=pop["Anc"], proportion=1.0, ), ] demographic_events.sort(key=lambda x: x.time) return stdpopsim.DemographicModel( id=id, description=description, long_description=dm.long_description, populations=populations, citations=dm.citations.copy(), generation_time=generation_time, population_configurations=population_configurations, demographic_events=demographic_events, )
def _orangutan(): id = "TwoSpecies_2L11" description = "Two population orangutan model" long_description = """ The two orang-utan species, Sumatran (Pongo abelii) and Bornean (Pongo pygmaeus) inferred from the joint-site frequency spectrum with ten individuals from each population. This model is an isolation-with- migration model, with exponential growth or decay in each population after the split. The Sumatran population grows in size, while the Bornean population slightly declines. """ citations = [_locke2011.because(stdpopsim.CiteReason.DEM_MODEL)] populations = [ stdpopsim.Population("Bornean", "Pongo pygmaeus (Bornean) population"), stdpopsim.Population("Sumatran", "Pongo abelii (Sumatran) population"), ] # Parameters from paper: # ancestral size, before split Na = 17934 # time of split T_split_years = 403149 # get split time in units of generations generation_time = _species.generation_time T_split = T_split_years / generation_time # proportion of ancestral pop to branch B s = 0.592 # sizes at split Na_B = 17934 * s Na_S = 17934 * (1 - s) # present sizes N_B = 8805 N_S = 37661 # get growth rates r_B = -1 * math.log(Na_B / N_B) / T_split r_S = -1 * math.log(Na_S / N_S) / T_split # migration rates m_S_B = 0.395 / 2 / Na m_B_S = 0.239 / 2 / Na # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=B and 1=S # initially. return stdpopsim.DemographicModel( id=id, description=description, long_description=long_description, citations=citations, populations=populations, generation_time=generation_time, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_B, growth_rate=r_B, metadata=populations[0].asdict()), # NOQA msprime.PopulationConfiguration( initial_size=N_S, growth_rate=r_S, metadata=populations[1].asdict()), # NOQA ], migration_matrix=[ [0, m_B_S], # NOQA [m_S_B, 0], # NOQA ], demographic_events=[ # Merge populations and turn off migration, change to size Na msprime.MassMigration(time=T_split, source=1, destination=0, proportion=1.0), msprime.MigrationRateChange(time=T_split, rate=0), msprime.PopulationParametersChange(time=T_split, initial_size=Na, growth_rate=0, population_id=0), ], )
class TestPopulationSampling(unittest.TestCase): # Create populations to test on _pop1 = stdpopsim.Population("pop0", "Test pop. 0") _pop2 = stdpopsim.Population("pop1", "Test pop. 1", sampling_time=10) _pop3 = stdpopsim.Population("pop2", "Test pop. 2", sampling_time=None) # Create an empty model to hold populations base_mod = models.DemographicModel.empty(populations=[_pop1, _pop2, _pop3]) def test_num_sampling_populations(self): self.assertEqual(self.base_mod.num_sampling_populations, 2) def test_get_samples(self): test_samples = self.base_mod.get_samples(2, 1) self.assertEqual(len(test_samples), 3) # Check for error when prohibited sampling asked for with self.assertRaises(ValueError): self.base_mod.get_samples(2, 2, 1) # Get the population corresponding to each sample sample_populations = [i.population for i in test_samples] # Check sample populations self.assertEqual(sample_populations, [0, 0, 1]) # Test sampling times sample_times = [i.time for i in test_samples] self.assertEqual(sample_times, [0, 0, 10]) # Test that all sampling populations are specified before non-sampling populations # in the model.populations list def test_population_order(self): for model in stdpopsim.all_demographic_models(): allow_sample_status = [int(p.allow_samples) for p in model.populations] num_sampling = sum(allow_sample_status) # All sampling populations must be at the start of the list self.assertEqual(sum(allow_sample_status[num_sampling:]), 0) # Test that populations are listed in the same order in model.populations and # model.population_configurations def test_population_config_order_equal(self): for model in stdpopsim.all_demographic_models(): pop_ids = [pop.id for pop in model.populations] config_ids = [ config.metadata["id"] for config in model.population_configurations ] for p, c in zip(pop_ids, config_ids): self.assertEqual(p, c) # Test that we are indeed getting a valid DDB back # admittedly a pretty bad test... def test_demography_debugger(self): for model in stdpopsim.all_demographic_models(): ddb = model.get_demography_debugger() self.assertIsInstance(ddb, msprime.DemographyDebugger) # test for equality of ddbs def test_demography_debugger_equal(self): for model in stdpopsim.all_demographic_models(): ddb1 = model.get_demography_debugger() ddb2 = msprime.DemographyDebugger( population_configurations=model.population_configurations, migration_matrix=model.migration_matrix, demographic_events=model.demographic_events, ) f1 = io.StringIO() f2 = io.StringIO() ddb1.print_history(f1) ddb2.print_history(f2) self.assertEqual(f1.getvalue(), f2.getvalue())
import msprime import numpy as np import stdpopsim _species = stdpopsim.get_species("AraTha") # Some generic populations to use for qc population_sample_0 = stdpopsim.Population( "sampling_0", "Population that samples at time 0", 0 ) def Durvasula2017MSMC(): id = "QC-SouthMiddleAtlas_1D17" populations = [population_sample_0] # Both of the following are directly # converted from MSMC output scaled by A.Thaliana # mutation rate 7e-9 and 1 generation # per year. times = np.array( [ 6.990000e02, 2.796000e03, 6.068000e03, 9.894000e03, 1.437000e04, 1.960600e04,
def Durvasula2017MSMC(): id = "QC-SouthMiddleAtlas_1D17" populations = [stdpopsim.Population("SouthMiddleAtlas", "")] # Both of the following are directly # converted from MSMC output scaled by A.Thaliana # mutation rate 7e-9 and 1 generation # per year. times = np.array([ 6.990000e02, 2.796000e03, 6.068000e03, 9.894000e03, 1.437000e04, 1.960600e04, 2.573000e04, 3.289400e04, 4.127500e04, 5.107700e04, 6.254400e04, 7.595800e04, 9.164800e04, 1.100010e05, 1.314710e05, 1.565840e05, 1.859600e05, 2.203240e05, 2.605200e05, 3.075400e05, 3.625410e05, 4.268790e05, 5.021390e05, 5.901730e05, 6.931510e05, 8.136100e05, 9.545170e05, 1.119341e06, 1.312147e06, 1.537686e06, 1.801500e06, 2.110100e06, ]) sizes = np.array([ 4.2252426e07, 4.2252426e07, 6.0323000e04, 7.2174000e04, 4.0591000e04, 2.1158000e04, 2.1442000e04, 3.9942000e04, 7.8908000e04, 1.1113200e05, 1.1074500e05, 9.6283000e04, 8.7661000e04, 8.3932000e04, 8.3829000e04, 9.1813000e04, 1.1164400e05, 1.4345600e05, 1.8157100e05, 2.1733100e05, 2.4140000e05, 2.4698400e05, 2.3859300e05, 2.2822200e05, 2.1775200e05, 1.9801900e05, 1.6521000e05, 1.2179600e05, 1.2179600e05, 7.3989000e04, 7.3989000e04, 7.3989000e04, ]) # The first 8 epochs are "masked" to # the last Ne at 40kya due to # the limitations of MSMC to infer # population size in this range. # # Similarly, the last 2 entries # are set to equal the third last. # # Durvasula et al 2017 shows that # MSMC has power in A.Thaliana # between 40kya and 1.6Mya. sizes[:8] = sizes[8] sizes[30:32] = sizes[30] demographic_events = [] population_configurations = [ msprime.PopulationConfiguration(initial_size=sizes[0]) ] for i, t in enumerate(times): demographic_events.append( msprime.PopulationParametersChange(time=t, initial_size=sizes[i], population_id=0)) return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=_species.generation_time, populations=populations, population_configurations=population_configurations, demographic_events=demographic_events, population_id_map=[{ "SouthMiddleAtlas": 0 }] * 33, mutation_rate=7.1e-9, )
def _HolsteinFriesan_1M13(): id = "HolsteinFriesian_1M13" description = "Piecewise size changes in Holstein-Friesian cattle." long_description = """ The piecewise-constant population size model of Holstein-Friesian cattle from MacLeod et al. 2013. Population sizes were estimated from inferred runs of homozygosity, with parameter values taken from Figure 4A by visual inspection of the plots. """ populations = [ stdpopsim.Population(id="Holstein-Friesian", description="Holstein-Friesian"), ] citations = [_MacLeodEtAl.because(stdpopsim.CiteReason.DEM_MODEL)] return stdpopsim.DemographicModel( id=id, description=description, long_description=long_description, populations=populations, citations=citations, generation_time=_species.generation_time, population_configurations=[ msprime.PopulationConfiguration( initial_size=90, growth_rate=0.0166, metadata=populations[0].asdict() ) ], # Here 'time' should be in generation notation ie. how many # generations ago were that Ne (effective population size) # and growth rate. # Growth rate is "per generation exponential growth rate": # -alpha= [ln(initial_pop_size/next_stage_pop_size)/generation_span_in_years] # For example: ln(90/120)/3= -0.095894024 demographic_events=[ msprime.PopulationParametersChange( time=1, initial_size=90, growth_rate=-0.095894024, population_id=0, ), # Ne 90 to 120 msprime.PopulationParametersChange( time=4, growth_rate=-0.24465639, population_id=0 ), # Ne 120 to 250 msprime.PopulationParametersChange( time=7, growth_rate=-0.0560787, population_id=0 ), # Ne 250 to 350 msprime.PopulationParametersChange( time=13, growth_rate=-0.1749704, population_id=0 ), # Ne 350 to 1000 msprime.PopulationParametersChange( time=19, growth_rate=-0.0675775, population_id=0 ), # Ne 1000 to 1500 msprime.PopulationParametersChange( time=25, growth_rate=-0.0022129, population_id=0 ), # Ne 1500 to 2000 msprime.PopulationParametersChange( time=155, growth_rate=-0.0007438, population_id=0 ), # Ne 2000 to 2500 msprime.PopulationParametersChange( time=455, growth_rate=-0.0016824, population_id=0 ), # Ne 2500 to 3500 msprime.PopulationParametersChange( time=655, growth_rate=-0.0006301, population_id=0 ), # Ne 3500 to 7000 msprime.PopulationParametersChange( time=1755, growth_rate=-0.0005945, population_id=0 ), # Ne 7000 to 10000 msprime.PopulationParametersChange( time=2355, growth_rate=-0.0005306, population_id=0 ), # Ne 10000 to 17000 msprime.PopulationParametersChange( time=3355, growth_rate=-0.0000434, population_id=0 ), # Ne 17000 to 62000 msprime.PopulationParametersChange( time=33155, growth_rate=-0.0000, population_id=0 ), # Ne 62000 (model has "coalesced") msprime.PopulationParametersChange( time=933155, growth_rate=-0.0, population_id=0 ), ], )