def set_up_demography(t78, t68, t85, t54, t10, t20, t03, t93, t34, t410, ta1, ta2, ta3, ta4, f): #divergence of source populations (topology of tree) source_divergence = [msp.MassMigration(time=t78, source=PAP, destination=AYT, proportion=1), msp.MassMigration(time=t68, source=EAS, destination=AYT, proportion=1), msp.MassMigration(time=t85, source=AYT, destination=CEU, proportion=1), msp.MassMigration(time=t54, source=CEU, destination=AFR, proportion=1), msp.MassMigration(time=t10, source=DEN1, destination=DEN0, proportion=1), msp.MassMigration(time=t20, source=DEN2, destination=DEN0, proportion=1), msp.MassMigration(time=t03, source=DEN0, destination=DEN3, proportion=1), msp.MassMigration(time=t93, source=NEA, destination=DEN3, proportion=1), msp.MassMigration(time=t34, source=DEN3, destination=AFR, proportion=1), msp.MassMigration(time=t410, source=AFR, destination=CHM, proportion=1)] #admixture times and proportions admix = [msp.MassMigration(time=ta1, source=AYT, destination=DEN2, proportion=f[2]), #fraction from Denisovan 2 to Ayta msp.MassMigration(time=ta2, source=AYT, destination=DEN1, proportion=f[1]), #fraction from Denisovan 1 to Ayta/Papuan msp.MassMigration(time=ta3, source=CEU, destination=NEA, proportion=f[2]), #fraction from Neanderthal to OoA pops msp.MassMigration(time=ta4, source=AYT, destination=EAS, proportion=f[0])] #fraction from East Asian 2 to Ayta #population parameter changes N_change = [msp.PopulationParametersChange(time=2400, initial_size=2000, growth_rate=0, population_id=CEU), msp.PopulationParametersChange(time=2800, initial_size=5000, growth_rate=0, population_id=CEU)] #combine and sort the demography demography = source_divergence + admix + N_change return sorted(demography, key = lambda x: x.time)
def exponential_model(args, print=False): """Single population model with sudden population size increase from N1 to N2 at time T1 and exponential growth at time T2""" params, randomize, i, proposals = args necessary_params = ["mu", "r", "T1", "N1", "T2", "N2", "growth"] for p in necessary_params: if p not in list(params.keys()): print( "Invalid combination of parameters. " "Needed: mu | r | T1 | N1 | T2 | N2 | growth" ) if proposals: mu, r, T1, N1, T2, N2, growth = [ params[p].prop(i) if params[p].inferable else params[p].val for p in necessary_params ] else: mu, r, T1, N1, T2, N2, growth = [ params[p].rand() if randomize else params[p].val for p in necessary_params ] N0 = N1 / math.exp(-growth * T1) # Time is given in generations unit (t/25) demographic_events = [ msprime.PopulationParametersChange(time=0, initial_size=N0, growth_rate=growth), msprime.PopulationParametersChange(time=T1, initial_size=N1, growth_rate=0), msprime.PopulationParametersChange(time=T2, initial_size=N2), ] if print: debugger = msprime.DemographyDebugger(Ne=N0, demographic_events=demographic_events) debugger.print_history() return demographic_events, mu, r
def __init__(self): # Model from paper https://doi.org/10.1371/journal.pcbi.1004845 # Parameters are taken from table 7 using the average stat prediction values # as those were generally stated to be the best N_1 = 544.2e3 # recent N_2 = 145.3e3 # bottleneck N_3 = 652.7e3 # ancestral # Times taken from simulating data section based on PSMC and converted to # number of generations from coalescent units using the baseline effective # population size. Note that the coalescent values are calculated by N_ref = 1e5 t_1_coal = 0.5 t_2_coal = 5 T_1 = t_1_coal * 4 * N_ref T_2 = (t_1_coal + t_2_coal) * 4 * N_ref # Set population sizes at T=0 self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N_1, growth_rate=0), ] # Migration matrix, all migrations to admixed population are 0 self.migration_matrix = [[0]] # Now we add the demographic events working backwards in time. self.demographic_events = [ # Bottleneck msprime.PopulationParametersChange(time=T_1, initial_size=N_2, population_id=0), # Ancestral population size msprime.PopulationParametersChange(time=T_2, initial_size=N_3, population_id=0), ]
def configure_demography(self): self.demographic_events = [] self.pc = [msprime.PopulationConfiguration(self.sample_size)] for index in self.demography.index: if index == self.demography.shape[0] - 1: break forward_time = self.demography['generation'][index + 1] forward_size = self.demography['size'][index + 1] now_time = self.demography['generation'][index] now_size = self.demography['size'][index] g = (math.log(now_size) - math.log(forward_size)) / (forward_time - now_time) self.demographic_events.append( msprime.PopulationParametersChange(now_time, now_size, growth_rate=0))
def mazet2016_3b(print_): N_0 = 10000 # initially. population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_0, growth_rate=0), ] demographic_events = [ msprime.PopulationParametersChange(time=10000, initial_size=N_0/10,growth_rate=0), msprime.PopulationParametersChange(time=60000, initial_size=N_0,growth_rate=0), msprime.PopulationParametersChange(time=200000, initial_size=N_0/2,growth_rate=0) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations, demographic_events=demographic_events) if print_: print('Demographic history:\n') dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, demographic_events=demographic_events, length=150e+6, recombination_rate=2e-08, mutation_rate=2e-08) return sim
def test_growth_rate_and_size_change(self): g = 1024 growth_rate = 2 initial_size = 8192 event = msprime.PopulationParametersChange( time=g, initial_size=initial_size, growth_rate=growth_rate, population_id=1) ll_event = { "type": "population_parameters_change", "time": g, "population_id": 1, "initial_size": initial_size, "growth_rate": growth_rate } self.assertEqual(event.get_ll_representation(1), ll_event)
def split_times(split_time): T1 = split_time / gen_time demographic_events = [ #joining DIN to NGS msprime.MassMigration(time=T1, source=0, destination=1, proportion=1), #joining DIN with BSJ msprime.MassMigration(time=T2, source=1, destination=2, proportion=1), #bottleneck for CHW msprime.PopulationParametersChange(time=T3, initial_size=13000, population_id=3), #joining CHW with BSJ msprime.MassMigration(time=T4, source=2, destination=3, proportion=1) ] return demographic_events
def __init__(self, N0, growth_rate, *args): self.population_configurations = [ msprime.PopulationConfiguration( initial_size=N0, growth_rate=growth_rate, metadata=self.populations[0].asdict()) ] self.migration_matrix = [[0]] self.demographic_events = [] for t, initial_size, growth_rate in args: self.demographic_events.append( msprime.PopulationParametersChange(time=t, initial_size=initial_size, growth_rate=growth_rate, population_id=0))
def HuberThreeEpoch(): id = "QC-African3Epoch_1H18" populations = [ stdpopsim.Population(id="SouthMiddleAtlas", description="A. thalina"), ] # Time of second epoch T_2 = 7420 T_3 = 14534 # population sizes N_ANC = 161744 N_2 = 24076 N_3 = 203077 return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=_species.generation_time, populations=populations, population_configurations=[ msprime.PopulationConfiguration(initial_size=N_3, metadata=populations[0].asdict()), ], demographic_events=[ msprime.PopulationParametersChange(time=T_3, initial_size=N_2, population_id=0), msprime.PopulationParametersChange(time=T_2 + T_3, initial_size=N_ANC, population_id=0), ], population_id_map=[{ "SouthMiddleAtlas": 0 }] * 3, )
def test_different_types(self): events = [ msprime.PopulationParametersChange(time=1, initial_size=1), msprime.MigrationRateChange(time=1, rate=1), msprime.MassMigration(time=1, source=1), msprime.SimpleBottleneck(time=1)] for a, b in itertools.combinations(events, 2): self.assertFalse(models.demographic_events_equal([a], [b], 1)) self.assertFalse(models.demographic_events_equal([b], [a], 1)) self.assertTrue(models.demographic_events_equal([a], [a], 1)) self.assertTrue(models.demographic_events_equal([b], [b], 1)) with self.assertRaises(models.UnequalModelsError): models.verify_demographic_events_equal([b], [a], 1) with self.assertRaises(models.UnequalModelsError): models.verify_demographic_events_equal([a], [b], 1)
def test_bad_extended_events(self): engine = stdpopsim.get_engine("slim") for bad_ee in [ msprime.PopulationParametersChange(time=0, initial_size=100), None, {}, "", ]: with self.assertRaises(ValueError): engine.simulate( demographic_model=self.model, contig=self.contig, samples=self.samples, extended_events=[bad_ee], dry_run=True, )
def __init__(self): super().__init__() self.generation_time = 29 N0 = 14312 g_1 = 0.023025 t_1 = 33.333 n_1 = N0 g_2 = -0.005756 t_2 = 133.33 n_2 = N0 / 10 g_3 = 0.0014391 t_3 = 533.33 n_3 = N0 g_4 = -0.00035977 t_4 = 2133.33 n_4 = N0 / 10 g_5 = 8.99448e-5 t_5 = 8533.33 n_5 = N0 n_ancient = N0 / 10 t_ancient = 34133.31 self.population_configurations = [ msprime.PopulationConfiguration(initial_size=N0) ] self.migration_matrix = [[0]] self.demographic_events = [ msprime.PopulationParametersChange(initial_size=n_1, time=t_1, growth_rate=g_1), msprime.PopulationParametersChange(initial_size=n_2, time=t_2, growth_rate=g_2), msprime.PopulationParametersChange(initial_size=n_3, time=t_3, growth_rate=g_3), msprime.PopulationParametersChange(initial_size=n_4, time=t_4, growth_rate=g_4), msprime.PopulationParametersChange(initial_size=n_5, time=t_5, growth_rate=g_5), msprime.PopulationParametersChange(time=t_ancient, initial_size=n_ancient, growth_rate=0) ]
def simulate(self, **kwargs) -> "tskit.TreeSequence": """Simulate (using msprime) under demographic represented by self. Args: **kwargs: Passed through to msprime.simulate(). Returns: Tree sequence containing simulated data. """ import msprime as msp de = [ msp.PopulationParametersChange(time=tt, initial_size=Ne_t) for tt, Ne_t in zip(self.t, self.Ne[:-1]) ] kwargs.update({"demographic_events": de}) return msp.simulate(**kwargs)
def simulate(args): rho = 1e-8 all_lengths = [ 247249719, 242951149, 199501827, 191273063, 180857866, 170899992, 158821424, 146274826, 140273252, 135374737, 134452384, 132349534, 114142980, 106368585, 100338915, 88827254, 78774742, 76117153, 63811651, 62435964, 46944323, 49691432 ] chrom_lengths = all_lengths[:args.num_chroms] num_loci = chrom_lengths[-1] + 1 positions, rates = get_positions_rates(chrom_lengths, rho) recombination_map = msprime.RecombinationMap(positions, rates, num_loci=num_loci) population_configurations = [ msprime.PopulationConfiguration(sample_size=args.Ne, initial_size=args.Ne), msprime.PopulationConfiguration(sample_size=0, initial_size=args.Ne) ] demographic_events = [ msprime.MassMigration(time=args.admixture_time, source=0, dest=1, proportion=args.admixture_prop), msprime.MassMigration(time=args.admixture_time + 1, source=1, dest=0, proportion=1), msprime.PopulationParametersChange(time=args.admixture_time + 2, initial_size=1.0, population_id=0) ] replicates = msprime.simulate( recombination_map=recombination_map, demographic_events=demographic_events, population_configurations=population_configurations, model=args.model, record_migrations=True, num_replicates=args.replicates) return replicates
def sim_chrom(n_chrom, chrom_len, recombination_rate, mutation_rate, sample_size, Ne_ancestral, Ne_recent, Ne_switchdate, print_history=False): #recombination map rec_map = make_rec_map(n_chrom, chrom_len, recombination_rate) #population_configurations = [ # msprime.PopulationConfiguration( # sample_size=sample_size, initial_size=Ne_recent) #] demographic_events = [ msprime.PopulationParametersChange(time=Ne_switchdate, initial_size=Ne_ancestral, population_id=0) ] # Use the demography debugger to print out the demographic history # that we have just described. #dp = msprime.DemographyDebugger( # Ne=Ne_recent, # population_configurations=population_configurations, # demographic_events=demographic_events) #if print_history: # dp.print_history() # return None #else: tree_sequence = msprime.simulate( sample_size=sample_size, Ne=Ne_recent, recombination_map=rec_map, mutation_rate=mutation_rate, #population_configurations = population_configurations, demographic_events=demographic_events, #random_seed = random_seed ) shape = tree_sequence.get_num_mutations(), tree_sequence.get_sample_size() print 'num_mutations: {}, samples: {}'.format(*shape) return (tree_sequence)
def test_single_growth_rate_size_change(self): # Set out our values in units of generations and absolute sizes. Ne = 1000 growth_rate = -0.01 end_time = 20 end_size = Ne * math.exp(-growth_rate * end_time) new_size = 4 * Ne population_configurations = [ msprime.PopulationConfiguration(sample_size=2, initial_size=Ne, growth_rate=growth_rate) ] demographic_events = [ msprime.PopulationParametersChange(time=end_time, initial_size=new_size, growth_rate=0) ] simulator = msprime.simulator_factory( Ne=Ne, population_configurations=population_configurations, demographic_events=demographic_events) ll_sim = simulator.create_ll_instance() ll_end_time = ll_sim.debug_demography() self.assertEqual(end_time, ll_end_time * 4 * Ne) populations = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration() ] self.assertEqual(len(populations), 1) pop = populations[0] self.assertEqual(pop.growth_rate, growth_rate) self.assertEqual(pop.initial_size, Ne) self.assertEqual(pop.get_size(end_time), end_size) # Now fast forward to the next time slice. ll_end_time = ll_sim.debug_demography() self.assertTrue(math.isinf(ll_end_time)) populations = [ msprime.Population(Ne=Ne, **d) for d in ll_sim.get_population_configuration() ] pop = populations[0] self.assertEqual(pop.growth_rate, 0) self.assertEqual(pop.initial_size, new_size) self.assertEqual(pop.get_size(10), new_size)
def _set_demography(self): demography = [] N0 = None line_num = 0 t = 0 for line in open(self.demo_file, "r+"): spltln = line.split() Nt = int(spltln[1]) if spltln[2] != "inf": deltat = int(spltln[2]) if line_num == 0: N0 = Nt line_num += 1 t += deltat demography.append( msp.PopulationParametersChange(time=t, initial_size=Nt)) self.pop_config = [msp.PopulationConfiguration(initial_size=N0)] self.demography = demography
def finding_nearest_neighbors(): samples = [ msprime.Sample(0, 0), msprime.Sample(0, 1), msprime.Sample(0, 20), ] ts = msprime.simulate( Ne=1e6, samples=samples, demographic_events=[ msprime.PopulationParametersChange(time=10, growth_rate=2, population_id=0), ], random_seed=42, ) tree = ts.first() tree.draw_svg("_static/different_time_samples.svg", time_scale="rank")
def migration_example(num_replicates=1): M = read_migration_matrix("nussear_8x.migr.tsv") # M = read_migration_matrix("nussear.migr.tsv") assert (M.shape[0] == M.shape[1]) n = M.shape[0] # sample one individual per deme population_configurations = [ msprime.PopulationConfiguration(sample_size=1) for _ in range(n) ] # suppose pop size changes have been sinusoidal change_times = range(100) change_factors = [ 1.0 + 0.5 * np.sin(2 * np.pi * t / 20) for t in change_times ] demographic_events = [ msprime.PopulationParametersChange(time=t, initial_size=x, growth_rate=0) for t, x in zip(change_times, change_factors) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=M, demographic_events=demographic_events) dd.print_history() # run the simulation replicates = msprime.simulate( population_configurations=population_configurations, demographic_events=demographic_events, migration_matrix=M, num_replicates=num_replicates) # And then iterate over these replicates T = np.zeros(num_replicates) for i, tree_sequence in enumerate(replicates): tree = tree_sequence.first() T[i] = tree.time(tree.root) / 4
def main(): blockL = int(sys.argv[1]) winL = int(sys.argv[2]) #num_replicates is the # of windows num_replicates = int(sys.argv[3]) #nsub is the sizes of the subsample nsub = int(sys.argv[4]) #nwhole is the sizes of the subsample nwhole = int(sys.argv[5]) """ Demographic parameters specifying a step change in Ne to Ne/scal in a single population at time T_bott """ recr = 0 scal = 0.5 Ne = 4.05596e5 T_bott = 2e5 mu = 3.46e-9 population_configurations = [ msprime.PopulationConfiguration(sample_size=nwhole, initial_size=Ne) ] demographic_events = [ msprime.PopulationParametersChange(time=T_bott, initial_size=Ne / scal, population_id=0) ] # combos lists positions of subsampled inds: combos = list(itertools.combinations(range(nwhole), nsub)) # this generates the actual replicates replicates = msprime.simulate( num_replicates=num_replicates, length=winL, recombination_rate=recr, population_configurations=population_configurations, demographic_events=demographic_events, mutation_rate=mu) myBlockDict = makeblock_ali(replicates, blockL, winL) out = multibSFSall(myBlockDict, combos, nsub) breakpoint() outmath = str(out).replace("[", "{").replace("]", "}").replace( "(", "{").replace(")", "}") print(outmath)
def __init__(self): # Time of second epoch T_2 = 568344 # population sizes N_ANC = 746148 N_2 = 100218 self.population_configurations = [ msprime.PopulationConfiguration( initial_size=N_2, metadata=self.populations[0].asdict()), ] self.migration_matrix = [[0]] self.demographic_events = [ msprime.PopulationParametersChange(time=T_2, initial_size=N_ANC, population_id=0), ]
def _afr_2epoch(): N_A = 746148 N_0 = 100218 t_1 = 568344 populations = [ stdpopsim.Population( id="SouthMiddleAtlas", description="Arabidopsis Thaliana South Middle Atlas population", ) ] return stdpopsim.DemographicModel( id="African2Epoch_1H18", description="South Middle Atlas African two epoch model", long_description=""" Model estimated from site frequency spectrum of synonymous SNPs from African South Middle Atlas samples using Williamson et al. 2005 methodology. Values come from supplementary table 1 of Huber et al 2018. Sizes change from N_A -> N_0 and t_1 is time of the second epoch. """, populations=populations, citations=[ stdpopsim.Citation( author="Huber et al.", year=2018, doi="https://doi.org/10.1038/s41467-018-05281-7", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=1, population_configurations=[ msprime.PopulationConfiguration( initial_size=N_0, metadata=populations[0].asdict() ) ], demographic_events=[ msprime.PopulationParametersChange( time=t_1, initial_size=N_A, population_id=0 ) ], )
def __init__(self): # This is a split-migration style model, with exponential growth or # decay allowed in each population after the split. They assumed a # generation time of 20 years and a mutation rate of 2e-8 per bp per gen generation_time = 20 # Parameters given in Table S21-2 Ne = 17934 s = 0.592 NB0 = s * Ne NS0 = (1 - s) * Ne NBF = 8805 NSF = 37661 mSB = 0.395 / 2 / Ne mBS = 0.239 / 2 / Ne T = 403149 / generation_time rB = np.log(NBF / NB0) / T rS = np.log(NSF / NS0) / T # pop 0 is Bornean, pop 1 is Sumatran self.population_configurations = [ msprime.PopulationConfiguration(initial_size=NBF, growth_rate=rB), msprime.PopulationConfiguration(initial_size=NSF, growth_rate=rS) ] self.migration_matrix = [[0, mBS], [mSB, 0]] self.demographic_events = [ # merge, turn off migration, change size and growth rate msprime.MassMigration(source=1, destination=0, time=T, proportion=1), msprime.MigrationRateChange(time=T, rate=0), msprime.PopulationParametersChange(time=T, initial_size=Ne, growth_rate=0, population_id=0) ]
def _set_demography(self, scale=1.0): """Establish the demography, allowing for a scale parameter.""" demography = [] N0 = None line_num = 0 t = 0 for line in open(self.demo_file, "r+"): spltln = line.split() Nt = int(spltln[1]) * scale if spltln[2] != "inf": deltat = int(spltln[2]) if line_num == 0: N0 = Nt * scale line_num += 1 t += deltat demography.append( msp.PopulationParametersChange(time=t, initial_size=Nt) ) # Setting the population configurations / demography self.pop_config = [msp.PopulationConfiguration(initial_size=N0)] self.demography = demography
def exp_decline(self, N0=100, N1=1000, T=1000): """ One population model with exponential decline in population size. Used for testing that growth rates are handled appropriately. """ r = math.log(N0 / N1) / T return stdpopsim.DemographicModel( id="exp_decline", description="exp_decline", long_description="exp_decline", populations=[stdpopsim.models._pop0], generation_time=1, population_configurations=[ msprime.PopulationConfiguration( initial_size=N0, growth_rate=r, metadata=stdpopsim.models._pop0.asdict()) ], demographic_events=[ msprime.PopulationParametersChange( time=T, initial_size=N1, growth_rate=0, population_id=0), ], )
def __init__(self): # the size during the interval times[k] to times[k+1] = sizes[k] self.times = np.array([ 699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324, 260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610, 954517, 1119341, 1312147, 1537686, 1801500, 2110100 ]) self.sizes = np.array([ 42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989 ]) # MSMC is accurate from 40Kya-1.6Mya for A.thaliana(Durvasula et al 2017) # set the first 7 sizes # equal to the size at 8 (~40Kya) self.sizes[:8] = self.sizes[8] # set the last 2 entries equal # to the size at 30 (~1.6Mya) self.sizes[30:32] = self.sizes[30] # generation time is 1 year self.generation_time = default_generation_time self.demographic_events = [] for idx, t in enumerate(self.times): self.demographic_events.append( msprime.PopulationParametersChange( time=t, initial_size=self.sizes[idx], population_id=0)) self.migration_matrix = [[0]] self.population_configurations = [ msprime.PopulationConfiguration(initial_size=self.sizes[0]) ]
def ooa_3(N_A=7300, N_B=2100, N_AF=12300, N_EU0=1000, N_AS0=510, r_EU=0.004, \ r_AS=0.0055, T_AF=8800, T_B=5600, T_EU_AS=848, m_AF_B=25e-5, m_AF_EU=3e-5, m_AF_AS=1.9e-5, m_EU_AS=9.6e-5): id = "OutOfAfrica_3G09" description = "Three population out-of-Africa" long_description = """ The three population Out-of-Africa model from Gutenkunst et al. 2009. It describes the ancestral human population in Africa, the out of Africa event, and the subsequent European-Asian population split. Model parameters are the maximum likelihood values of the various parameters given in Table 1 of Gutenkunst et al. """ populations = [_yri_population, _ceu_population, _chb_population] '''citations = [stdpopsim.Citation( author="Gutenkunst et al.", year=2009, doi="https://doi.org/10.1371/journal.pgen.1000695", reasons={stdpopsim.CiteReason.DEM_MODEL}) ]''' generation_time = 25 # First we set out the maximum likelihood values of the various parameters # given in Table 1. #N_A = 7300 #N_B = 2100 #N_AF = 12300 #N_EU0 = 1000 #N_AS0 = 510 # Times are provided in years, so we convert into generations. #T_AF = 8800 #T_B = 5600 #T_EU_AS = 848 # We need to work out the starting (diploid) population sizes based on # the growth rates provided for these two populations #r_EU = 0.004 #r_AS = 0.0055 N_EU = N_EU0 / math.exp(-r_EU * T_EU_AS) N_AS = N_AS0 / math.exp(-r_AS * T_EU_AS) # Migration rates during the various epochs. #m_AF_B = 25e-5 #m_AF_EU = 3e-5 #m_AF_AS = 1.9e-5 #m_EU_AS = 9.6e-5 return models.DemographicModel( id=id, description=description, long_description=long_description, populations=populations, #citations=citations, generation_time=generation_time, # Population IDs correspond to their indexes in the population # configuration array. Therefore, we have 0=YRI, 1=CEU and 2=CHB # initially. population_configurations=[ msprime.PopulationConfiguration(initial_size=N_AF, metadata=populations[0].asdict()), msprime.PopulationConfiguration(initial_size=N_EU, growth_rate=r_EU, metadata=populations[1].asdict()), msprime.PopulationConfiguration(initial_size=N_AS, growth_rate=r_AS, metadata=populations[2].asdict()), ], migration_matrix=[ [0, m_AF_EU, m_AF_AS], # noqa [m_AF_EU, 0, m_EU_AS], # noqa [m_AF_AS, m_EU_AS, 0], # noqa ], demographic_events=[ # CEU and CHB merge into B with rate changes at T_EU_AS msprime.MassMigration(time=T_EU_AS, source=2, destination=1, proportion=1.0), msprime.MigrationRateChange(time=T_EU_AS, rate=0), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(0, 1)), msprime.MigrationRateChange(time=T_EU_AS, rate=m_AF_B, matrix_index=(1, 0)), msprime.PopulationParametersChange(time=T_EU_AS, initial_size=N_B, growth_rate=0, population_id=1), # Population B merges into YRI at T_B msprime.MassMigration(time=T_B, source=1, destination=0, proportion=1.0), msprime.MigrationRateChange(time=T_B, rate=0), # Size changes to N_A at T_AF msprime.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0) ], )
def _sma_1pop(): # the size during the interval times[k] to times[k+1] = sizes[k] times = np.array( [ 699, 2796, 6068, 9894, 14370, 19606, 25730, 32894, 41275, 51077, 62544, 75958, 91648, 110001, 131471, 156584, 185960, 220324, 260520, 307540, 362541, 426879, 502139, 590173, 693151, 813610, 954517, 1119341, 1312147, 1537686, 1801500, 2110100, ] ) sizes = np.array( [ 42252426, 42252426, 60323, 72174, 40591, 21158, 21442, 39942, 78908, 111132, 110745, 96283, 87661, 83932, 83829, 91813, 111644, 143456, 181571, 217331, 241400, 246984, 238593, 228222, 217752, 198019, 165210, 121796, 121796, 73989, 73989, 73989, ] ) # MSMC is accurate from 40Kya-1.6Mya for A.thaliana (Durvasula et al 2017) # set the first 7 sizes # equal to the size at 8 (~40Kya) sizes[:8] = sizes[8] # set the last 2 entries equal # to the size at 30 (~1.6Mya) sizes[30:32] = sizes[30] demographic_events = [] for sz, t in zip(sizes, times): demographic_events.append( msprime.PopulationParametersChange(time=t, initial_size=sz, population_id=0) ) populations = [ stdpopsim.Population( id="SouthMiddleAtlas", description="Arabidopsis Thaliana South Middle Atlas population", ) ] return stdpopsim.DemographicModel( id="SouthMiddleAtlas_1D17", description="South Middle Atlas piecewise constant size", long_description=""" This model comes from MSMC using two randomly sampled homozygous individuals (Khe32 and Ifr4) from the South Middle Atlas region from the Middle Atlas Mountains in Morocco. The model is estimated with 32 time periods. Because estimates from the recent and ancient past are less accurate, we set the population size in the first 7 time periods equal to the size at the 8th time period and the size during last 2 time periods equal to the size in the 30th time period. """, populations=populations, citations=[ stdpopsim.Citation( author="Durvasula et al.", year=2017, doi="https://doi.org/10.1073/pnas.1616736114", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=1, demographic_events=demographic_events, population_configurations=[ msprime.PopulationConfiguration( initial_size=sizes[0], metadata=populations[0].asdict() ) ], )
def create_simulation_runner(parser, arg_list): """ Parses the arguments and returns a SimulationRunner instance. """ args = parser.parse_args(arg_list) if args.mutation_rate == 0 and not args.trees: parser.error("Need to specify at least one of --theta or --trees") num_loci = int(args.recombination[1]) if args.recombination[1] != num_loci: parser.error("Number of loci must be integer value") if args.recombination[0] != 0.0 and num_loci < 2: parser.error("Number of loci must > 1") r = 0.0 # We don't scale recombination or mutation rates by the size # of the region. if num_loci > 1: r = args.recombination[0] / (num_loci - 1) mu = args.mutation_rate / num_loci # ms uses a ratio to define the GC rate, but if the recombination rate # is zero we define the gc rate directly. gc_param, gc_tract_length = args.gene_conversion gc_rate = 0 if r == 0.0: if num_loci > 1: gc_rate = gc_param / (num_loci - 1) else: gc_rate = r * gc_param demography = msprime.Demography.isolated_model([1]) # Check the structure format. symmetric_migration_rate = 0.0 num_populations = 1 migration_matrix = [[0.0]] num_samples = [args.sample_size] if args.structure is not None: num_populations = convert_int(args.structure[0], parser) # We must have at least num_population sample_configurations if len(args.structure) < num_populations + 1: parser.error("Must have num_populations sample sizes") demography = msprime.Demography.isolated_model([1] * num_populations) num_samples = [0] * num_populations for j in range(num_populations): num_samples[j] = convert_int(args.structure[j + 1], parser) if sum(num_samples) != args.sample_size: parser.error("Population sample sizes must sum to sample_size") # We optionally have the overall migration_rate here if len(args.structure) == num_populations + 2: symmetric_migration_rate = convert_float( args.structure[num_populations + 1], parser ) check_migration_rate(parser, symmetric_migration_rate) elif len(args.structure) > num_populations + 2: parser.error("Too many arguments to --structure/-I") if num_populations > 1: migration_matrix = [ [ symmetric_migration_rate / (num_populations - 1) * int(j != k) for j in range(num_populations) ] for k in range(num_populations) ] else: if len(args.migration_matrix_entry) > 0: parser.error( "Cannot specify migration matrix entries without " "first providing a -I option" ) if args.migration_matrix is not None: parser.error( "Cannot specify a migration matrix without " "first providing a -I option" ) if args.migration_matrix is not None: migration_matrix = convert_migration_matrix( parser, args.migration_matrix, num_populations ) for matrix_entry in args.migration_matrix_entry: pop_i = convert_population_id(parser, matrix_entry[0], num_populations) pop_j = convert_population_id(parser, matrix_entry[1], num_populations) rate = matrix_entry[2] if pop_i == pop_j: parser.error("Cannot set diagonal elements in migration matrix") check_migration_rate(parser, rate) migration_matrix[pop_i][pop_j] = rate # Set the initial demography if args.growth_rate is not None: for population in demography.populations: population.growth_rate = args.growth_rate for population_id, growth_rate in args.population_growth_rate: pid = convert_population_id(parser, population_id, num_populations) demography.populations[pid].growth_rate = growth_rate for population_id, size in args.population_size: pid = convert_population_id(parser, population_id, num_populations) demography.populations[pid].initial_size = size demographic_events = [] # First we look at population split events. We do this differently # to ms, as msprime requires a fixed number of population. Therefore, # modify the number of populations to take into account populations # splits. This is a messy hack, and will probably need to be changed. for index, (t, population_id, proportion) in args.admixture: check_event_time(parser, t) pid = convert_population_id(parser, population_id, num_populations) if proportion < 0 or proportion > 1: parser.error("Proportion value must be 0 <= p <= 1.") # In ms, the probability of staying in source is p and the probabilty # of moving to the new population is 1 - p. event = (index, msprime.MassMigration(t, pid, num_populations, 1 - proportion)) demographic_events.append(event) num_populations += 1 # We add another element to each row in the migration matrix # along with an other row. All new entries are zero. for row in migration_matrix: row.append(0) migration_matrix.append([0 for j in range(num_populations)]) demography.populations.append(msprime.Population(initial_size=1)) num_samples.append(0) # Add the demographic events for index, (t, alpha) in args.growth_rate_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eG") check_event_time(parser, t) demographic_events.append( (index, msprime.PopulationParametersChange(time=t, growth_rate=alpha)) ) for index, (t, population_id, alpha) in args.population_growth_rate_change: pid = convert_population_id(parser, population_id, num_populations) check_event_time(parser, t) demographic_events.append( ( index, msprime.PopulationParametersChange( time=t, growth_rate=alpha, population_id=pid ), ) ) for index, (t, x) in args.size_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eN") check_event_time(parser, t) demographic_events.append( ( index, msprime.PopulationParametersChange( time=t, initial_size=x, growth_rate=0 ), ) ) for index, (t, population_id, x) in args.population_size_change: check_event_time(parser, t) pid = convert_population_id(parser, population_id, num_populations) demographic_events.append( ( index, msprime.PopulationParametersChange( time=t, initial_size=x, growth_rate=0, population_id=pid ), ) ) for index, (t, pop_i, pop_j) in args.population_split: check_event_time(parser, t) pop_i = convert_population_id(parser, pop_i, num_populations) pop_j = convert_population_id(parser, pop_j, num_populations) demographic_events.append((index, msprime.MassMigration(t, pop_i, pop_j, 1.0))) # Migration rates from subpopulation i (M[k, i], k != i) are set to zero. for k in range(num_populations): if k != pop_i: event = msprime.MigrationRateChange(t, 0.0, matrix_index=(k, pop_i)) demographic_events.append((index, event)) # Demographic events that affect the migration matrix if num_populations == 1: condition = ( len(args.migration_rate_change) > 0 or len(args.migration_matrix_entry_change) > 0 or len(args.migration_matrix_change) > 0 ) if condition: parser.error("Cannot change migration rates for 1 population") for index, (t, x) in args.migration_rate_change: if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-eM") check_migration_rate(parser, x) check_event_time(parser, t) event = msprime.MigrationRateChange(t, x / (num_populations - 1)) demographic_events.append((index, event)) for index, event in args.migration_matrix_entry_change: t = event[0] check_event_time(parser, t) pop_i = convert_population_id(parser, event[1], num_populations) pop_j = convert_population_id(parser, event[2], num_populations) if pop_i == pop_j: parser.error("Cannot set diagonal elements in migration matrix") rate = event[3] check_migration_rate(parser, rate) msp_event = msprime.MigrationRateChange(t, rate, matrix_index=(pop_i, pop_j)) demographic_events.append((index, msp_event)) for index, event in args.migration_matrix_change: if len(event) < 3: parser.error("Need at least three arguments to -ma") if len(args.admixture) != 0: raise_admixture_incompatability_error(parser, "-ema") t = convert_float(event[0], parser) check_event_time(parser, t) if convert_int(event[1], parser) != num_populations: parser.error("num_populations must be equal for new migration matrix") matrix = convert_migration_matrix(parser, event[2:], num_populations) for j in range(num_populations): for k in range(num_populations): if j != k: msp_event = msprime.MigrationRateChange( t, matrix[j][k], matrix_index=(j, k) ) demographic_events.append((index, msp_event)) demographic_events.sort(key=lambda x: (x[0], x[1].time)) time_sorted = sorted(demographic_events, key=lambda x: x[1].time) if demographic_events != time_sorted: parser.error("Demographic events must be supplied in non-decreasing time order") demography.events = [event for _, event in demographic_events] demography.migration_matrix = migration_matrix # Adjust the population sizes so that the timescales agree. In principle # we could correct this with a ploidy value=0.5, but what we have here # seems less awful. for msp_event in demography.events: if isinstance(msp_event, msprime.PopulationParametersChange): if msp_event.initial_size is not None: msp_event.initial_size /= 2 for j, pop in enumerate(demography.populations): pop.initial_size /= 2 pop.name = f"pop_{j}" runner = SimulationRunner( num_samples, demography, num_loci=num_loci, num_replicates=args.num_replicates, recombination_rate=r, mutation_rate=mu, gene_conversion_rate=gc_rate, gene_conversion_tract_length=gc_tract_length, precision=args.precision, print_trees=args.trees, ms_random_seeds=args.random_seeds, hotspots=args.hotspots, ) return runner
msp.PopulationConfiguration(initial_size = Denisovasize), #3 msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #4 msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #5 msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #6 ] demographic_events_dict = { # admixture times 1551.06896552 + random.uniform(0,1)/100000.0: msp.MassMigration(time = 1551.06896552, source = 1, destination = 5,proportion = 0.02), Denisova_admix_time/gen_time + random.uniform(0,1)/100000.0: msp.MassMigration(time = Denisova_admix_time/gen_time, source = admix_into, destination = 2 ,proportion = DenisovaProportion), # Human parameters OutOfafrica - 300 + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = OutOfafrica - 300, initial_size = 1305, growth_rate = 0, population_id = 1), OutOfafrica - 200 + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = OutOfafrica - 200, initial_size = 5000, growth_rate = 0, population_id = 1), OutOfafrica - 100 + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = OutOfafrica - 100, initial_size = 250, growth_rate = 0, population_id = 1), OutOfafrica + random.uniform(0,1)/100000.0: msp.MassMigration(time = OutOfafrica, source = 1, destination = 0, proportion = 1.0), OutOfafrica + 0.0001 + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = OutOfafrica + 0.0001, initial_size = Modernhumans, growth_rate = 0, population_id = 0), # archaic population merges parameters IntrogressingVindijaSplit/gen_time + random.uniform(0,1)/100000.0: msp.MassMigration(time = IntrogressingVindijaSplit/gen_time, source = 6, destination = 5, proportion = 1.0), 130000/gen_time - 0.0001 + random.uniform(0,1)/100000.0: msp.MassMigration(time = 130000/gen_time - 0.0001, source = 5, destination = 4, proportion = 1.0), Denisova_split/gen_time - 0.0001 + random.uniform(0,1)/100000.0: msp.MassMigration(time = Denisova_split/gen_time, source = 3, destination = 2, proportion = 1.0), DenisovaNeanderthal/gen_time - 0.0001 + random.uniform(0,1)/100000.0: msp.MassMigration(time = DenisovaNeanderthal/gen_time, source = 4, destination = 2, proportion = 1.0), # psms pop sizes 130000/gen_time + random.uniform(0,1)/100000.0: msp.PopulationParametersChange(time = 130000/gen_time + 0.0001, initial_size = Neanderthal_latersize, growth_rate = 0, population_id = 4),