def msprime_sim(sample_size, Ne, length, recombination_rate, recombination_map, mutation_rate, migration_matrix, demographic_events, num_replicates): # Create sample list samples = [] for w in range(0, len(modern_samples)): x = modern_samples[w] for y in range(0, x): samples.append(msprime.Sample(w, 0)) for z in range(0, ancient_samples): samples.append(msprime.Sample(0, time)) print(samples) # Create population_configurations list population_configurations = [] for c in range(0, len(modern_samples)): population_configurations.append(msprime.PopulationConfiguration()) print(population_configurations) # Run simulation and extract results tree_seq = msprime.simulate( sample_size=sample_size, Ne=Ne, length=length, recombination_rate=recombination_rate, recombination_map=recombination_map, mutation_rate=mutation_rate, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, samples=samples, num_replicates=num_replicates) return tree_seq
def msprime_to_dadi_simulation_OutOfAfrica(path, seed, chrom, sample_size=20): ''' Generate however many different SFS with msprime and convert+save them into SFS for dadi to use. ''' #For testing # print(path, seed, chrom, sample_size) chrom = homo_sapiens.genome.chromosomes[chrom] model = homo_sapiens.GutenkunstThreePopOutOfAfrica() samples_pops_joint = [ msprime.Sample(population=0, time=0) ] * sample_size + [msprime.Sample(population=1, time=0)] * sample_size ts_pops_joint = msprime.simulate( samples=samples_pops_joint, recombination_map=chrom.recombination_map(), mutation_rate=chrom.default_mutation_rate, random_seed=seed, **model.asdict()) haps_pops_joint = np.array(ts_pops_joint.genotype_matrix()) #Break up the haplotypes into seperate populations based on sample_size haps_pop0_joint = haps_pops_joint[:, :sample_size] haps_pop1_joint = haps_pops_joint[:, sample_size:] genotypes_pop0_joint = allel.HaplotypeArray(haps_pop0_joint).to_genotypes( ploidy=2) allele_counts_pop0_joint = genotypes_pop0_joint.count_alleles() genotypes_pop1_joint = allel.HaplotypeArray(haps_pop1_joint).to_genotypes( ploidy=2) allele_counts_pop1_joint = genotypes_pop1_joint.count_alleles() sfs_joint = allel.joint_sfs(allele_counts_pop0_joint[:, 1], allele_counts_pop1_joint[:, 1]) sfs_joint = dadi.Spectrum(sfs_joint) sfs_joint.to_file(path)
def get_example_base(self, num_populations=1, length=1): N = num_populations population_configurations = [ msprime.PopulationConfiguration() for _ in range(N) ] migration_matrix = np.ones((N, N)) np.fill_diagonal(migration_matrix, 0) ts = msprime.simulate( samples=[msprime.Sample(0, 0) for _ in range(10)], length=length, random_seed=155, population_configurations=population_configurations, migration_matrix=migration_matrix, ) return tsutil.decapitate(ts, ts.num_edges // 2)
def test_sample_size_population_configuration(self): for d in range(1, 5): # Zero sample size is always an error configs = [msprime.PopulationConfiguration(0) for _ in range(d)] self.assertRaises( ValueError, msprime.simulator_factory, population_configurations=configs) configs = [msprime.PopulationConfiguration(2) for _ in range(d)] sim = msprime.simulator_factory(population_configurations=configs) self.assertEqual(len(sim.samples), 2 * d) samples = [] for j in range(d): samples += [msprime.Sample(population=j, time=0) for _ in range(2)] self.assertEqual(sim.samples, samples) ll_sim = sim.create_ll_instance() self.assertEqual(ll_sim.get_samples(), samples)
def test_samples(self): base_ts = self.get_example_base() self.assertRaises(ValueError, msprime.simulate, 2, from_ts=base_ts) self.assertRaises(ValueError, msprime.simulate, sample_size=2, from_ts=base_ts) self.assertRaises( ValueError, msprime.simulate, samples=[msprime.Sample(0, 0) for _ in range(10)], from_ts=base_ts, ) self.assertRaises( ValueError, msprime.simulate, population_configurations=[msprime.PopulationConfiguration(sample_size=2)], from_ts=base_ts, )
def set_up_pops(nS, tS): samples = [msp.Sample(population=DEN3, time=tS[1])] * ( 1 * nS[0]) # Denisovan 3 (Altai) samples.extend([msp.Sample(population=AFR, time=tS[0])] * (1 * nS[0])) # Africa samples.extend([msp.Sample(population=CEU, time=tS[0])] * (1 * nS[0])) # European samples.extend([msp.Sample(population=EAS, time=tS[0])] * (1 * nS[0])) # East Asian samples.extend([msp.Sample(population=PAP, time=tS[0])] * (1 * nS[0])) # Papuan samples.extend([msp.Sample(population=AYT, time=tS[0])] * (1 * nS[0])) # Negrito (Ayta) samples.extend([msp.Sample(population=NEA, time=tS[2])] * (1 * nS[0])) # Neanderthal samples.extend([msp.Sample(population=CHM, time=tS[0])] * (1 * nS[0])) # Chimp return samples
def test_samples(self): base_ts = self.get_example_base() with pytest.raises(ValueError): msprime.simulate(2, from_ts=base_ts) with pytest.raises(ValueError): msprime.simulate(sample_size=2, from_ts=base_ts) with pytest.raises(ValueError): msprime.simulate( samples=[msprime.Sample(0, 0) for _ in range(10)], from_ts=base_ts, ) with pytest.raises(ValueError): msprime.simulate( population_configurations=[ msprime.PopulationConfiguration(sample_size=2) ], from_ts=base_ts, )
def simulate(out_path, species, model, genetic_map, seed, chrmStr, sample_size=20, population=0): chrom = species.genome.chromosomes[chrmStr] samples = [msp.Sample(population=population, time=0)] * sample_size print("Simulating...") ts = msp.simulate(samples=samples, recombination_map=chrom.recombination_map( genetic_map.name), mutation_rate=chrom.default_mutation_rate, random_seed=seed, **model.asdict()) ts.dump(out_path) print("Simulation finished!")
def test_wf_hudson_ancient_samples(self): Ne = 10 t = 10 n = 20 ts = msprime.simulate( samples=[msprime.Sample(time=j, population=0) for j in range(n)], model=msprime.DiscreteTimeWrightFisher(Ne), demographic_events=[ msprime.SimulationModelChange(t, msprime.StandardCoalescent(Ne))], random_seed=2) tree = ts.first() self.assertEqual(tree.num_roots, 1) times = ts.tables.nodes.time[ts.tables.nodes.flags == 0] dtwf_times = times[np.logical_and(times > 0, times < t)] self.assertGreater(dtwf_times.shape[0], 0) self.assertTrue(np.all(dtwf_times == np.floor(dtwf_times))) coalescent_times = times[times > t] self.assertGreater(coalescent_times.shape[0], 0) self.assertTrue(np.all(coalescent_times != np.floor(coalescent_times)))
def test_wf_hudson_ancient_samples(self): Ne = 10 t = 10 n = 20 ts = msprime.simulate( samples=[msprime.Sample(time=j, population=0) for j in range(n)], Ne=Ne, model=["dtwf", (t, "hudson")], random_seed=2, ) tree = ts.first() self.assertEqual(tree.num_roots, 1) times = ts.tables.nodes.time[ts.tables.nodes.flags == 0] dtwf_times = times[np.logical_and(times > 0, times < t)] self.assertGreater(dtwf_times.shape[0], 0) self.assertTrue(np.all(dtwf_times == np.floor(dtwf_times))) coalescent_times = times[times > t] self.assertGreater(coalescent_times.shape[0], 0) self.assertTrue(np.all(coalescent_times != np.floor(coalescent_times)))
def runSimulator(simNum, chrNum, seedNum, Ne4, T, Ne3, Ne1, recRate): # San Nicolas demographic model, moving backwards in time from the year 2000 demographic_events = [ msprime.PopulationParametersChange(time=40, initial_size=10, population_id=0), msprime.PopulationParametersChange(time=42, initial_size=Ne3, population_id=0), msprime.PopulationParametersChange(time=T, initial_size=Ne4, population_id=0), msprime.PopulationParametersChange(time=8012, initial_size=20000, population_id=0) ] # Sample one individual (two haplotypes) in 1929, two individuals in 1988, and # one individual in 2000 samples = [ msprime.Sample(population=0, time=71), msprime.Sample(population=0, time=71), msprime.Sample(population=0, time=12), msprime.Sample(population=0, time=12), msprime.Sample(population=0, time=12), msprime.Sample(population=0, time=12), msprime.Sample(population=0, time=0), msprime.Sample(population=0, time=0) ] # Define parameters for the simulation tree_sequence = msprime.simulate(demographic_events=demographic_events, samples=samples, Ne=Ne1, length=1e7, recombination_rate=recRate, mutation_rate=2e-8, random_seed=seedNum) # Output VCF file with open('peak_sim' + str(seedNum) + '.vcf', 'w') as vcf_file: tree_sequence.write_vcf(vcf_file, 2, str(chrNum))
def get_samples(self, *args): """ Returns a list of msprime.Sample objects as described by the args and keyword args. Positional arguments are interpreted as the number of samples to take from the given population. .. todo:: Add a description how the positional arguments work and perhaps link into a section of the tutorial showing it in action. """ samples = [] for pop_index, n in enumerate(args): if self.populations[pop_index].allow_samples: sample = msprime.Sample( pop_index, time=self.populations[pop_index].sampling_time) samples.extend([sample] * n) elif n > 0: raise ValueError( "Samples requested from non-sampling population" f" {pop_index}") return samples
def get_samples(dg, pop_ids, sample_sizes): """ Get the samples list for the given population names and sample sizes. Samples can only be taken from populations that are leaves, and we assume that the sampling occurs at the end of that node in the graph. To get the time of the end of each leaf, we get all leaves accumulated end times since the root, take the max over those accumulated times, and subtract each leaf's time from the max. Need to have the pop_indexes that the population configurations """ pop_configs, pop_indexes = get_population_configurations(dg, [], {}, 1) leaf_times = util.get_accumulated_times(dg) max_leaf_time = max(leaf_times.values()) samples = [] for pop, ns in zip(pop_ids, sample_sizes): assert pop in dg.leaves, "samples can only be taken from leaves" pop_time = max_leaf_time - leaf_times[pop] samples.extend([msprime.Sample(pop_indexes[pop], time=pop_time)] * ns) return samples
def test_sample_combination_errors(self): # Make sure that the various ways we can specify the samples # operate correctly. s = msprime.Sample(time=0.0, population=0) self.assertRaises(ValueError, msprime.simulator_factory) # Cannot provide sample_size with either population configurations # or samples self.assertRaises(ValueError, msprime.simulator_factory, sample_size=2, samples=[s, s]) pop_configs = [msprime.PopulationConfiguration(sample_size=2)] self.assertRaises( ValueError, msprime.simulator_factory, sample_size=2, population_configurations=pop_configs, ) # If we provide samples and population_configurations we cannot # have a sample size for the config. pop_configs = [msprime.PopulationConfiguration(sample_size=2)] self.assertRaises( ValueError, msprime.simulator_factory, samples=[s, s], population_configurations=pop_configs, ) pop_configs = [ msprime.PopulationConfiguration(sample_size=None), msprime.PopulationConfiguration(sample_size=2), ] self.assertRaises( ValueError, msprime.simulator_factory, samples=[s, s], population_configurations=pop_configs, )
def get_samples(self, *args): """ Returns a list of msprime.Sample objects, with the number of samples from each population determined by the positional arguments. For instance, ``model.get_samples(2, 5, 7)`` would return a list of 14 samples, two of which are from the model's first population (i.e., with population ID ``model.populations[0].id``), five are from the model's second population, and seven are from the model's third population. The number of of arguments must be less than or equal to the number of "sampling" populations, ``model.num_sampling_populations``; if the number of arguments is less than the number of sampling populations, then remaining numbers are treated as zero. """ samples = [] for pop_index, n in enumerate(args): if self.populations[pop_index].allow_samples: sample = msprime.Sample( pop_index, time=self.populations[pop_index].sampling_time) samples.extend([sample] * n) elif n > 0: raise ValueError( "Samples requested from non-sampling population" f" {pop_index}") return samples
def _get_nsamples(self): """ If tips are not ultrametric then individuals must be entered to sim using the samples=[ms.Sample(popname, time), ...] format. If tips are ultrametric then this should be empty (None). """ # set to None and return if self._tips_are_ultrametric: self._samples = None return # create a list of sample tuples: [(popname, time), ...] self._samples = [] # iterate over all sampled tips for otip, tip in enumerate(self.tree.get_tip_labels()): # get height of this tip height = int(self.tip_to_heights[tip]) nsamples = self.sampledict[tip] # add for each nsamples for _ in range(nsamples): self._samples.append(ms.Sample(otip, height))
def neanderthal_admixture_model(num_modern=1000, anc_pop=1, anc_num=1, anc_time=900, mix_time=2000, split_time=120000, f=0.03, Ne0=10000, Ne1=2500, mu=1.5e-8, rho=1.0e-8, length=10000000, window_size=1000000, num_SNP=1, num_rep=100, coverage=False): #when is best time to sample Neanderthal? 100 gen before f? #error catching, leave there for now if f < 0 or f > 1: print "Admixture fraction is not in [0,1]" return None samples = [msp.Sample(population=0, time=0) ] * num_modern #sample 1 Neanderthal for comparison samples.extend([msp.Sample(population=anc_pop, time=anc_time)] * (anc_num)) pop_config = [ msp.PopulationConfiguration(initial_size=Ne0), msp.PopulationConfiguration(initial_size=Ne1) ] divergence = [ msp.MassMigration(time=mix_time, source=0, destination=1, proportion=f), msp.MassMigration(time=split_time, source=1, destination=0, proportion=1.0) ] sims = msp.simulate(samples=samples, Ne=Ne0, population_configurations=pop_config, demographic_events=divergence, mutation_rate=mu, recombination_rate=rho, length=length, num_replicates=num_rep) win = [] freq = [] leng = [] #FYI mean fragment length from test_2 model ~6000 bp for sim in sims: cur_win = 1 cur_start = 0 cur_end = window_size - 1 cur_site = (cur_start + cur_end) / 2.0 #random.randint(cur_start,cur_end) #print cur_start, cur_end, cur_site for tree in sim.trees(): F_int = tree.get_interval() if cur_site >= F_int[0] and cur_site < F_int[1]: #print cur_site, F_int #raw_input() cur_node = len( samples ) - 1 #the very last leaf, when adding more modern pops make sure Neanderthal is still last while tree.get_time(tree.get_parent(cur_node)) < split_time: cur_node = tree.get_parent(cur_node) F_length = tree.get_length() N_freq = (tree.get_num_leaves(cur_node) - 1 ) #minus our lone Neanderthal win.append(cur_win) freq.append(N_freq) leng.append(F_length) cur_start += window_size cur_end += window_size if cur_end > length: break cur_win += 1 cur_site = (cur_start + cur_end) / 2.0 #random.randint(cur_start,cur_end) #print cur_start, cur_end, cur_site outfile = open('outfile_s.txt', 'w') outfile.write("window\tfrequency\tlength") outfile.write('\n') for line in range(0, len(leng)): outfile.write(str(win[line])) outfile.write('\t') outfile.write(str(freq[line])) outfile.write('\t') outfile.write(str(leng[line])) outfile.write('\n') outfile.close() return np.array(win), np.array(freq), np.array(leng)
""" Example of using the stdpopsim library with msprime. """ import msprime import stdpopsim.h_sapiens as h_sap model = h_sap.models.GutenkunstThreePopOutOfAfrica() model.debug() # One sample each from YRI, CEU and CHB. There's no point in pushing # the sampling strategy into the model generation samples = [ msprime.Sample(population=0, time=0), msprime.Sample(population=1, time=0), msprime.Sample(population=2, time=0) ] ts = msprime.simulate(samples=samples, length=h_sap.chr22.length, recombination_rate=h_sap.chr22.mean_recombination_rate, mutation_rate=h_sap.chr22.mean_mutation_rate, **model.asdict()) # print(ts.tables) print("simulated:", ts.num_trees, ts.num_sites)
def run_model(self): # Load recomb map recomb_map = msprime.RecombinationMap.read_hapmap(self.infile) # initial population sizes: N_bronze = 50000 N_Yam = 20000 N_baa = 10000 N_whg = 10000 N_ehg = 10000 N_neo = 50000 N_chg = 10000 N_A = 5000 # Ancestor of WHG and EHG N_B = 5000 # Ancestor of CHG and Neolithic farmers # Time of events T_bronze = 150 T_Yam = 200 T_neo = 250 T_baa = 275 T_near_east = 800 T_europe = 500 T_basal = 1500 # Growth rate and initial population size for present day from bronze age r_EU = 0.067 N_present = N_bronze / math.exp(-r_EU * T_bronze) #Populations: 0=present/bronze/neolithic_farmers/Ana/B,1=Yam/CHG,2=WHG/A, 3=EHG, 4=BAA population_configurations = [ msprime.PopulationConfiguration(initial_size=N_present, growth_rate=r_EU), msprime.PopulationConfiguration(initial_size=N_Yam), msprime.PopulationConfiguration(initial_size=N_whg), msprime.PopulationConfiguration(initial_size=N_ehg), msprime.PopulationConfiguration(initial_size=N_baa) ] bronze_formation = [ msprime.MassMigration(time=T_bronze, source=0, dest=1, proportion=0.5), msprime.PopulationParametersChange(time=T_bronze, initial_size=N_neo, growth_rate=0, population=0) ] yam_formation = [ msprime.MassMigration(time=T_Yam, source=1, dest=3, proportion=0.5), msprime.PopulationParametersChange(time=T_Yam, initial_size=N_chg, population=1), msprime.MigrationRateChange(time=T_Yam, rate=self.hg_mig_rate, matrix_index=(2, 3)), msprime.MigrationRateChange(time=T_Yam, rate=self.hg_mig_rate, matrix_index=(3, 2)) ] european_neolithic = [ msprime.MassMigration(time=T_neo, source=0, dest=2, proportion=1.0 / 4.0) ] baa_formation = [ msprime.MassMigration(time=T_baa, source=4, dest=1, proportion=1.0 / 4.0) ] ana_split = [ msprime.MassMigration(time=276, source=4, dest=0, proportion=1) ] hg_split = [ msprime.MassMigration(time=T_europe, source=3, dest=2, proportion=1), msprime.MigrationRateChange(time=T_europe, rate=0), msprime.PopulationParametersChange(time=T_europe, initial_size=N_A, population=2) ] near_east_split = [ msprime.MassMigration(time=T_near_east, source=1, dest=0, proportion=1), msprime.PopulationParametersChange(time=T_near_east, initial_size=N_B, population=0) ] basal_split = [ msprime.MassMigration(time=T_basal, source=2, dest=0, proportion=1) ] demographic_events = bronze_formation + yam_formation + european_neolithic + baa_formation + ana_split + hg_split + near_east_split + basal_split # Define samples samples = [] for i, p in enumerate(self.populations): sample = [msprime.Sample(time=self.sample_times[i], population=p)] samples = samples + sample * self.nhaps[i] # Debugging the demography migration_matrix = [[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0]] dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dd.print_history() # Simulate chromosome 3 only tree_sequence = msprime.simulate( recombination_map=recomb_map, mutation_rate=self.mutation_rate, population_configurations=population_configurations, demographic_events=demographic_events, samples=samples) return tree_sequence
def twopop_pulse_migration_slim2(out_dir, seed): """ Two populations with different sizes and introgression from pop2 to pop1. Burn-in is disabled. Time and Ne are rescaled by a factor of 10. """ return _twopop_IM("slim", out_dir, seed, pulse=_pulse_m21, slim_burn_in=0, slim_scaling_factor=10) _ancient_samples = 50 * [ msprime.Sample(0, time=0), msprime.Sample(1, time=500) ] def twopop_ancient_samples_msprime1(out_dir, seed): """ Two populations, with ancient sampling of the second population. """ return _twopop_IM("msprime", out_dir, seed, samples=_ancient_samples) def twopop_ancient_samples_slim1(out_dir, seed): """ Two populations, with ancient sampling of the second population. """
def __init__(self, ta, n0=1, na=1, Ne=1e4, rec_rate=1e-4, loci=2, reps=100): """Initialize the model.""" generation_time = 25 T_AF = 148e3 / generation_time T_OOA = 51e3 / generation_time T_EU0 = 23e3 / generation_time T_EG = 5115 / generation_time # Growth rates r_EU0 = 0.00307 r_EU = 0.0195 r_AF = 0.0166 # population sizes N_A = 7310 N_AF1 = 14474 N_B = 1861 N_EU0 = 1032 N_EU1 = N_EU0 / np.exp(-r_EU0 * (T_EU0 - T_EG)) # migration rates m_AF_B = 15e-5 m_AF_EU = 2.5e-5 # present Ne N_EU = N_EU1 / np.exp(-r_EU * T_EG) N_AF = N_AF1 / np.exp(-r_AF * T_EG) population_configurations = [ msp.PopulationConfiguration(initial_size=N_AF, growth_rate=r_AF), msp.PopulationConfiguration(initial_size=N_EU, growth_rate=r_EU), ] migration_matrix = [[0, m_AF_EU], [m_AF_EU, 0]] demographic_events = [ msp.MigrationRateChange(time=T_EG, rate=m_AF_EU, matrix_index=(0, 1)), msp.MigrationRateChange(time=T_EG, rate=m_AF_EU, matrix_index=(1, 0)), msp.PopulationParametersChange(time=T_EG, growth_rate=r_EU0, initial_size=N_EU1, population_id=1), msp.PopulationParametersChange(time=T_EG, growth_rate=0, initial_size=N_AF1, population_id=0), msp.MigrationRateChange(time=T_EU0, rate=m_AF_B, matrix_index=(0, 1)), msp.MigrationRateChange(time=T_EU0, rate=m_AF_B, matrix_index=(1, 0)), msp.PopulationParametersChange(time=T_EU0, initial_size=N_B, growth_rate=0, population_id=1), msp.MassMigration(time=T_OOA, source=1, destination=0, proportion=1.0), msp.PopulationParametersChange(time=T_AF, initial_size=N_A, population_id=0), ] self.pop_config = population_configurations self.migration_matrix = migration_matrix self.demography = demographic_events self.rec_rate = rec_rate self.loci = loci self.samples1 = [msp.Sample(population=1, time=0) for i in range(n0)] self.samples2 = [msp.Sample(population=1, time=ta) for i in range(na)] self.samples = self.samples1 + self.samples2 self.reps = reps self.Ne = Ne self.treeseq = None
import msprime import numpy as np import matplotlib.pyplot as plt # number of generations t = 3 # number of chromosomes sampled S_dip = 50 S_hap = S_dip * 2 Ne = 200 mutation_rate = 1e-9 length = 2e8 recom_rate = 0 reps = 300 samples = [msprime.Sample(population=0, time=0) for i in range(S_hap) ] + [msprime.Sample(population=0, time=3) for i in range(S_hap)] def fc_variant(genotype, S_hap): # allele 1 : xi_1 = sum(v.genotypes[:S_hap]) / S_hap yi_1 = sum(v.genotypes[S_hap:]) / S_hap xi_2 = 1 - xi_1 yi_2 = 1 - yi_1 if xi_1 > 40 / 400 and xi_1 < 360 / 400: if xi_2 > -40 / 400: #print(xi_1,yi_1,xi_2, yi_2)
""" Example simulation for analysis """ import msprime # import stdpopsim from stdpopsim import homo_sapiens chrom = homo_sapiens.genome.chromosomes["chr22"] recomb_map = chrom.recombination_map() model = homo_sapiens.GutenkunstThreePopOutOfAfrica() # model.debug() # Currently sampling 20 individuals from a single popn. tmp_samples = [ msprime.Sample(population=0, time=0) ] samples = tmp_samples * 20 ts = msprime.simulate( samples=samples, recombination_map=chrom.recombination_map(), mutation_rate=chrom.mean_mutation_rate, **model.asdict()) # Hard coded output name. FIX ME ts.dump("simulated.trees")
times = sorted(list(thinned_configs.keys())) + [-1] epochs = [(t0, t1) for t0,t1 in zip(times[:-1],times[1:])] epoch_configs = {} for e in epochs: epoch_configs[e] = thinned_configs[e[0]] return epoch_configs if "__name__" == "__main__": ## your simulation inputs here import homo_sapiens dg = homo_sapiens.ooa_gutenkunst() pop_config, mig_mat, demo_events = dg.msprime_inputs() ## set up the samples you want # in the OOA model as defined here, YRI is pop 3, CEU is 4, and CHB is 5 # let's take 10 samples from each samples = [ msprime.Sample(population=3, time=0) for i in range(10) ] + [ msprime.Sample(population=4, time=0) for i in range(10) ]+ [ msprime.Sample(population=5, time=0) for i in range(10) ] sampling_times = get_sampling_times(samples) epoch_configs = get_epochs(pop_config, mig_mat, demo_events, sampling_times)
def sim_ongoing_interval(rec_map=None, L=3e9, Ne=10000, Nadmix=500, Tadmix_start=4, Tadmix_stop=12, frac_ongoing=0.05, seed=None, path=None, tszip=None): """ Simulate an ongoing model of admixture. With the disrete-time backwards wright-fisher. A new population (2) is formed by splitting off from population 0. At time=Tadmix_start migration starts from population 1, with rate frac_ongoing admixture continues until Tadmix_stop. rec_map = valid msprime recombination map L = length of genome, in base pairs (ignored if rec_map is specified) Ne = diploid population size for all three populations Tadmix = time of admixture Nadmix = number of observed admixed individuals seed = seed to pass to msprime.simulate path = file path, if given will write the ts to this path (NOT IMPLEMENTED) """ assert Tadmix_stop > Tadmix_start, "Tadmix_stop must be greater than Tadmix_start" Tadmix_start = int(Tadmix_start) Tadmix_stop = int(Tadmix_stop) Ne = int(Ne) Nadmix = int(Nadmix) # recombination map if rec_map: recomb_map = rec_map else: L = int(L) recomb_map = msprime.RecombinationMap.uniform_map(L, 1e-8, L) pop_configs = [ msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0), msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0), msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0) ] mig_mat = [ [0, 0, 0], [0, 0, 0], [0, 0, 0], ] admixture_events = [ # migration during the interval Tadmix_start - Tadmix_stop msprime.MigrationRateChange(time=Tadmix_start, rate=frac_ongoing, matrix_index=(2, 1)), msprime.MigrationRateChange(time=Tadmix_stop, rate=0, matrix_index=(2, 1)), # founding of pop 2 msprime.MassMigration(time=Tadmix_stop + 1, source=2, destination=0, proportion=1.0), ] samps = [msprime.Sample(population=2, time=0)] * 2 * Nadmix ts_admix = msprime.simulate( population_configurations=pop_configs, migration_matrix=mig_mat, demographic_events=admixture_events, recombination_map=recomb_map, mutation_rate=0, model='dtwf', samples=samps, random_seed=seed, start_time=0, end_time=Tadmix_stop + 2 ) return(ts_admix)
def sim_two_pulse(rec_map=None, L=1e9, Ne=10000, Nadmix=500, T1=4, T2=12, frac1=.2, frac2=.2, seed=None, path=None, tszip=None): """Simulate a simple pulse model of admixture. Using the disrete-time backwards wright-fisher. rec_map = valid msprime recombination map L = length of genome, in base pairs (ignored if rec_map is specified) Ne = diploid population size for all three populations Tadmix = time of admixture Nadmix = number of observed admixed diploid individuals seed = seed passed to msprime.simulate() path = file path, if given will write the ts to this path """ assert T2 > T1, "T2 must be greater than T1" # convert to correct dtypes and catch problems T1 = int(T1) T2 = int(T2) Ne = int(Ne) Nadmix = int(Nadmix) # recombination map if rec_map: recomb_map = rec_map else: L = int(L) recomb_map = msprime.RecombinationMap.uniform_map(L, 1e-8, L) pop_configs = [ msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0), msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0), msprime.PopulationConfiguration(initial_size=Ne, growth_rate=0) ] # no ongoing migration mig_mat = [ [0, 0, 0], [0, 0, 0], [0, 0, 0], ] admixture_events = [ msprime.MassMigration(time=T1, source=2, destination=1, proportion=frac1), msprime.MassMigration(time=T2, source=2, destination=1, proportion=frac2), msprime.MassMigration(time=T2 + 1, source=2, destination=0, proportion=1.0), ] samps = [msprime.Sample(population=2, time=0)] * 2 * Nadmix ts_admix = msprime.simulate( population_configurations=pop_configs, migration_matrix=mig_mat, demographic_events=admixture_events, recombination_map=recomb_map, mutation_rate=0, model='dtwf', samples=samps, random_seed=seed, start_time=0, end_time=T2 + 2 ) if path: if tszip: # save compressed ts import tszip tszip.compress(ts_admix, path, variants_only=False) else: # save uncompressed ts ts_admix.dump(path) return(ts_admix)
""" Example of using the stdpopsim library with msprime. """ import msprime import stdpopsim from stdpopsim import drosophila_melanogaster chrom = drosophila_melanogaster.genome.chromosomes["chrX"] recomb_map = chrom.recombination_map() model = drosophila_melanogaster.SheehanSongThreeEpoch() model.debug() samples = [msprime.Sample(population=0, time=0),msprime.Sample(population=0, time=0)] ts = msprime.simulate( samples=samples, recombination_map=chrom.recombination_map(), mutation_rate=chrom.mean_mutation_rate, **model.asdict()) print("simulated:", ts.num_trees, ts.num_sites)
infection_size = 1 stable_pop_size = 100 # ## subpops based on infected people, all subpops that want to exist at end of sim (present time) need stated here pop_list = [PopSource] sample_list = [] #Setting up the end, so all pops exist and at stable pop size, no death in simulation time for pop in range(final_num_pops): # print(pop) pop_list.append( msprime.PopulationConfiguration(initial_size=stable_pop_size, growth_rate=0)) #historical samples rather than contemporaneous ones, 1 week after infection for sample in range(sample_size): if sample < sample_size // 2: sample_list.append( msprime.Sample(population=(pop + 1), time=gens_list[pop] - 30)) else: sample_list.append(msprime.Sample(population=(pop + 1), time=0)) # no migration between sources accross time, only infection events, # so migration matrix is zeros M = np.zeros((final_num_pops + 1, final_num_pops + 1)) # Now get transmission events from the data. Use index as population number, but +1 since have fake source pop at index 0. #for i in list_of_root_and_kids: # print(list_of_root_and_kids.index(i) + 1) ####--- new version with sub-pops ---#### ## a simple model where independent sub-pop is infection derived from source pop # if infected by true pop, need to state when diverged from past pop if that's the case
# Split times (years) OutOfafrica = 62000/gen_time Denisova_split = 350000 IntrogressingVindijaSplit = 90000 DenisovaNeanderthal = 420000 # Admix parameters (years and admixture proportions (in percent)) Denisova_admix_time = 45000 DenisovaProportion = 0.08 admix_into = 1 # (1 for humans, 5 for neanderthals) # Number of samples n_ingroup = 2 African_samples = 1000 samples = [msp.Sample(0, 0)]*African_samples + [msp.Sample(1, 0)]*n_ingroup + [msp.Sample(3, 80000/gen_time)]*n_ingroup + [msp.Sample(4, 120000/gen_time)]*n_ingroup + [msp.Sample(6, 60000/gen_time)]*n_ingroup population_configurations = [ msp.PopulationConfiguration(initial_size = Ne_Africa), #0 msp.PopulationConfiguration(initial_size = Ne_Europe), #1 msp.PopulationConfiguration(initial_size = Denisovasize), #2 msp.PopulationConfiguration(initial_size = Denisovasize), #3 msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #4 msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #5 msp.PopulationConfiguration(initial_size = Neanderthal_recentsize), #6 ] demographic_events_dict = {
def neanderthal_admixture_model(num_eu=170, num_as=394, num_nean=1, anc_time=900, mix_time1=2000, mix_time2=1000, mix_time3=1000, mix_time4=1000, split_time_1=120000, split_time_2=2300, split_time_3=1500, f1=0.022, f2=0.00, f3=0.00, f4=0.00, Ne0=10000, Ne1=2500, Ne2=10000, mu=1.5e-8, window_size=100000, num_SNP=1, num_rep=1, coverage=False): infile = "chr1_map_5000.txt" rho_map = msp.RecombinationMap.read_hapmap(infile) samples = [msp.Sample(population=0, time=0)] * num_eu samples.extend([msp.Sample(population=1, time=0)] * num_as) #no sampling of Basal Eurasian pop samples.extend([msp.Sample(population=3, time=anc_time)] * (num_nean)) #sample 1 Neanderthal for comparison pop_config = [ msp.PopulationConfiguration(initial_size=Ne0), msp.PopulationConfiguration(initial_size=Ne0), msp.PopulationConfiguration(initial_size=Ne0), msp.PopulationConfiguration(initial_size=Ne1) ] divergence = [ msp.MassMigration(time=mix_time4, source=0, destination=2, proportion=f4), #BE dilution into EU msp.MassMigration(time=mix_time3, source=0, destination=3, proportion=f3), #second pulse EU msp.MassMigration(time=mix_time2, source=1, destination=3, proportion=f2), #second pulse AS msp.MassMigration(time=split_time_3, source=0, destination=1, proportion=1.0), #EU AS split msp.MassMigration(time=mix_time1, source=1, destination=3, proportion=f1), #first pulse msp.MassMigration(time=split_time_2, source=1, destination=2, proportion=1.0), #BE AS split msp.MassMigration(time=split_time_1, source=3, destination=2, proportion=1.0) ] # Neand AS split sims = msp.simulate(samples=samples, Ne=Ne0, population_configurations=pop_config, demographic_events=divergence, mutation_rate=mu, recombination_map=rho_map, num_replicates=num_rep) print "done simulating" win = [] freq_EU = [] freq_AS = [] last = np.array(rho_map.get_positions()[-1]) #leng = [] cur_sim = 0 for sim in sims: cur_win = 1 cur_start = 0 cur_end = window_size - 1 cur_site = (cur_start + cur_end) / 2.0 cur_sim += 1 print "current simulation" print cur_sim trees = sim.trees() while True: cur_tree = trees.next() F_int = cur_tree.get_interval() print F_int raw_input() for tree in sim.trees(): F_int = tree.get_interval() print cur_site, F_int raw_input() if cur_site >= F_int[0] and cur_site < F_int[1]: cur_node = len( samples ) - 1 #the very last leaf, when adding more modern pops make sure Neanderthal is still last while tree.get_time(tree.get_parent(cur_node)) < split_time_1: cur_node = tree.get_parent(cur_node) #F_length = tree.get_length() N_freq_EU = 0 N_freq_AS = 0 for leaf in tree.leaves(cur_node): if tree.get_population(leaf) == 0: N_freq_EU += 1 elif tree.get_population(leaf) == 1: N_freq_AS += 1 win.append(cur_win) freq_EU.append(N_freq_EU) freq_AS.append(N_freq_AS) #leng.append(F_length) cur_start += window_size cur_end += window_size print cur_end print last if cur_end > last: break cur_win += 1 print cur_win cur_site = (cur_start + cur_end) / 2.0 #random.randint(cur_start,cur_end) outfile = open('outfile_map_chr1_1f.txt', 'w') outfile.write("window\tfrequency_EU\tfrequency_AS") outfile.write('\n') for line in range(0, len(freq_AS)): outfile.write(str(win[line])) outfile.write('\t') outfile.write(str(freq_EU[line])) outfile.write('\t') outfile.write(str(freq_AS[line])) outfile.write('\t') #outfile.write(str(leng[line])) #outfile.write('\n') outfile.close() return np.array(win), np.array(freq_EU), np.array( freq_AS) #, np.array(leng)