def plot_stairwayplot_coalrate(sp_infiles, outfile, model, n_samp, generation_time, species, pop_id=0, steps=None): # JRA ddb = msprime.DemographyDebugger(**model.asdict()) if steps is None: end_time = ddb.epochs[-2].end_time + 10000 steps = np.linspace(1, end_time, end_time + 1) num_samples = [0 for _ in range(ddb.num_populations)] num_samples[pop_id] = n_samp coal_rate, P = ddb.coalescence_rate_trajectory( steps=steps, num_samples=num_samples, double_step_validation=False) steps = steps * generation_time f, ax = plt.subplots(1, 1, sharex=True, sharey=True, figsize=(7, 7)) ax.plot(steps, 1 / (2 * coal_rate), c="black") for infile in sp_infiles: nt = pandas.read_csv(infile, sep="\t", skiprows=5) line2, = ax.plot(nt['year'], nt['Ne_median'], alpha=0.8) ax.plot(steps, 1 / (2 * coal_rate), c="black") ax.set_title("stairwayplot") plt.suptitle(f"{species}, population id {pop_id}", fontsize=16) ax.set(xscale="log", yscale="log") ax.set_xlabel("time (years ago)") red_patch = mpatches.Patch(color='black', label='Coalescence rate derived Ne') ax.legend(frameon=False, fontsize=10, handles=[red_patch]) ax.set_ylabel("population size") f.savefig(outfile, bbox_inches='tight', alpha=0.8)
def const0001(N_0,mig_prop,t_1,t_2,seq_length,recomb_rate,mut_rate,print_): # def instant_struct0001(N_0,mig_prop,t_1,t_2,seq_length,recomb_rate,mut_rate,print_): print('N_0 is {}'.format(N_0)) print('mig_prop is {}, but is irrelevant here'.format(mig_prop)) print('t_1 is {} and t_2 is {}, but these are irrelevant here'.format(t_1,t_2)) print('seq_length is {}'.format(seq_length)) print('recomb_rate is {}'.format(recomb_rate)) print('mut_rate is {}'.format(mut_rate)) N_B0 = mig_prop * 1e+04 # initially. population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_0, growth_rate=0), ] demographic_events = [] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations, demographic_events=demographic_events) if print_: print('Demographic history:\n') dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, demographic_events=demographic_events, length=seq_length, recombination_rate=recomb_rate, mutation_rate=mut_rate) return sim
def zigzag_model(args, print=False): """Derived model used by Schiffels and Durbin (2014) and Terhorst and Terhorst, Kamm, and Song (2017) with periods of exponential growth and decline in a single population. Here, growth rates are changed to pop sizes. Schiffels and Durbin, 2014. https://doi.org/10.1038/ng.3015""" params, randomize, i, proposals = args necessary_params = [ "mu", "r", "T1", "N1", "T2", "N2", "T3", "N3", "T4", "N4", "T5", "N5", ] for p in necessary_params: if p not in list(params.keys()): print( "Invalid combination of parameters. Needed: " "mu | r | T1 | N1 | T2 | N2 | T3 | N3 | T4 | N4 | T5 | N5" ) if proposals: mu, r, T1, N1, T2, N2, T3, N3, T4, N4, T5, N5 = [ params[p].prop(i) if params[p].inferable else params[p].val for p in necessary_params ] else: mu, r, T1, N1, T2, N2, T3, N3, T4, N4, T5, N5 = [ params[p].rand() if randomize else params[p].val for p in necessary_params ] generation_time = 30 N0 = 71560 n_ancient = N0 / 10 t_ancient = 34133.318528 demographic_events = [ msprime.PopulationParametersChange(time=0, initial_size=N0, population_id=0), msprime.PopulationParametersChange(time=T1, initial_size=N1, population_id=0), msprime.PopulationParametersChange(time=T2, initial_size=N2, population_id=0), msprime.PopulationParametersChange(time=T3, initial_size=N3, population_id=0), msprime.PopulationParametersChange(time=T4, initial_size=N4, population_id=0), msprime.PopulationParametersChange(time=T5, initial_size=N5, population_id=0), msprime.PopulationParametersChange( time=t_ancient, initial_size=n_ancient, population_id=0 ), ] if print: debugger = msprime.DemographyDebugger(Ne=71560, demographic_events=demographic_events) debugger.print_history() return demographic_events, mu, r
def constmig20201216(): # 20201214 mig_rate = 5e-04; T1, T2 = 20000,40000 ; seq_length = 150e+06 N_A0 = 1e+04 N_B0 = 1e+04 m = 5e-05 T_1 = 20000 T_2 = 40000 seq_length = 150e+06 population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_A0, growth_rate=0), msprime.PopulationConfiguration( sample_size=0, initial_size=N_B0, growth_rate=0) ] migration_matrix = [[0,0],[0,0]] demographic_events = [ msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(0,1)), msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(1,0)), msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(0,1)), msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(1,0)), msprime.MassMigration(time=T_2, source =0, destination =1, proportion = 1) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations,migration_matrix=migration_matrix, demographic_events=demographic_events) print('Demographic history:\n') dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, demographic_events=demographic_events, length=seq_length, recombination_rate=2e-08, mutation_rate=2e-08) return sim
def constmig_m5e05(): T_1 = 2e+04 T_2 = 6e+04 N_A0 = 1e+04 N_B0 = 1e+04 m = 5e-05 seq_length = 150e+06 population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_A0, growth_rate=0), msprime.PopulationConfiguration( sample_size=0, initial_size=N_B0, growth_rate=0) ] migration_matrix = [[0,0],[0,0]] demographic_events = [ msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(0,1)), msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(1,0)), msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(0,1)), msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(1,0)), msprime.MassMigration(time=T_2, source =1, destination =0, proportion = 1) ] dd = msprime.DemographyDebugger( population_configurations=population_configurations,migration_matrix=migration_matrix, demographic_events=demographic_events) print('Demographic history:\n') dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, demographic_events=demographic_events, length=seq_length, recombination_rate=2e-08,mutation_rate=2e-08) return sim
def bottleneck_model(args, print=False): params, randomize, i, proposals = args necessary_params = ["mu", "r", "N0", "T1", "N1", "T2", "N2"] for p in necessary_params: if p not in list(params.keys()): print( "Invalid combination of parameters. Needed: " "mu | r | N0 | T1 | N1 | T2 | N2" ) if proposals: mu, r, N0, T1, N1, T2, N2 = [ params[p].prop(i) if params[p].inferable else params[p].val for p in necessary_params ] else: mu, r, N0, T1, N1, T2, N2 = [ params[p].rand() if randomize else params[p].val for p in necessary_params ] # Infer the 3 pop sizes, where N0 = N2 demographic_events = [ msprime.PopulationParametersChange(time=0, initial_size=N0), msprime.PopulationParametersChange(time=T1, initial_size=N1), msprime.PopulationParametersChange(time=T2, initial_size=N2), ] if print: debugger = msprime.DemographyDebugger(Ne=10000, demographic_events=demographic_events) debugger.print_history() return demographic_events, mu, r
def pscmatch20201214(): # 20201214 mig_rate = 5e-04; T1, T2 = 20000,40000 ; seq_length = 150e+06 N_A0 = 1e+04 N_B0 = 1e+04 change = 2 T_1 = 20000 T_2 = 40000 seq_length = 150e+06 N_A = 10000 population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_A,growth_rate=0) ] migration_matrix = [ [0], ] demographic_events = [ msprime.PopulationParametersChange(time=T_1, initial_size=N_A0*change), msprime.PopulationParametersChange(time=T_2, initial_size=N_A0) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, length=seq_length, recombination_rate=2e-8,mutation_rate=2e-8) return sim
def setUp(self): self.model = _5pop_test_demog() self.ddb = msprime.DemographyDebugger( demographic_events=self.model.demographic_events, population_configurations=self.model.population_configurations, migration_matrix=self.model.migration_matrix, )
def double_m0002(N_0,mig_prop,T_1,T_2,seq_length,recomb_rate,mut_rate,print_): # here mig_prop is the percentage change in N_0 between times T_1 and T_2 # use this to match contmig5e4 population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_0,growth_rate=0) ] migration_matrix = [ [0], ] demographic_events = [ msprime.PopulationParametersChange(time=T_1, initial_size=N_0*mig_prop), msprime.PopulationParametersChange(time=T_2, initial_size=N_0) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) if print_: dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, length=seq_length, recombination_rate=recomb_rate,mutation_rate=mut_rate) return sim
def struct0001(print_): N_A0 = 1e+04 N_B0 = 1e+04 T_A = 2e+04 T_B = 3e+04 # initially. population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_A0, growth_rate=0), msprime.PopulationConfiguration( sample_size=0, initial_size=N_B0, growth_rate=0), ] migration_matrix = [[0,0],[0,0]] demographic_events = [ msprime.MassMigration( time=T_A, source = 0, dest=1,proportion=1), msprime.MassMigration( time=T_B, source=1, dest=0, proportion=1) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) if print_: dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, length=3e+6, recombination_rate=2e-8, mutation_rate=2e-8,random_seed=50) return sim
def m0002(print_): N_A = 10000 T_A1 = 20000 population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_A,growth_rate=0) ] migration_matrix = [ [0], ] demographic_events = [ msprime.PopulationParametersChange(time=T_A1, initial_size=N_A*2) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) if print_: dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, length=150e+6, recombination_rate=2e-8,mutation_rate=2e-8) return sim
def const_mig0002(print_): N_A0 = 1e+04 N_B0 = 1e+04 T_1 = 2e+04 T_2 = 2.5e+04 m = 4e-03 population_configurations = [ msprime.PopulationConfiguration( sample_size=2, initial_size=N_A0, growth_rate=0), msprime.PopulationConfiguration( sample_size=0, initial_size=N_B0, growth_rate=0) ] migration_matrix = [[0,0],[0,0]] demographic_events = [ msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(0,1)), msprime.MigrationRateChange(time = T_1,rate = m, matrix_index=(1,0)), msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(0,1)), msprime.MigrationRateChange(time = T_2,rate = 0, matrix_index=(1,0)), msprime.MassMigration(time=T_2, source =0, destination =1, proportion = 1) ] # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=population_configurations,migration_matrix=migration_matrix, demographic_events=demographic_events) if print_: print('Demographic history:\n') dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, demographic_events=demographic_events, length=150e+06, recombination_rate=2e-08, mutation_rate=2e-08) return sim
def simulate_seqs(self): LOGGER.debug("Entering simulate_seqs - {}".format(self.name)) ## TODO: Here we are assuming only one island and that the migration ## is only ever unidirectional from the mainland to the island migmat = [[0, self.stats["migration_rate"]], [0, 0]] pop_local = self._get_local_configuration() pop_meta = self._get_meta_configuration() split_events = self._get_local_meta_split() ## Useful for debugging demographic events. if LOGGER.getEffectiveLevel() == 10: debug = msprime.DemographyDebugger(population_configurations = [pop_local, pop_meta],\ migration_matrix = migmat,\ demographic_events = split_events) ## Enable this at your own peril, it will dump a ton of shit to stdout debug.print_history() ## I briefly toyed with the idea of logging this to a file, but you really ## don't need it that often, and it'd be a pain to get the outdir in here. #debugfile = os.path.join(self._hackersonly["outdir"], # self.paramsdict["name"] + "-simout.txt") #with open(debugfile, 'a') as outfile: # outfile.write(debug.print_history()) LOGGER.debug("Executing msprime - {}".format(self.name)) self.tree_sequence = msprime.simulate(length = self.paramsdict["sequence_length"],\ mutation_rate = self.paramsdict["mutation_rate"],\ population_configurations = [pop_local, pop_meta],\ migration_matrix = migmat,\ demographic_events = split_events) self.get_sumstats()
def _demography_debug(self): """Demography debugging.""" dd = msp.DemographyDebugger( population_configurations=self.pop_config, demographic_events=self.demography, ) dd.print_history()
def matching_CR(N_0, seq_length, mut_rate, recomb_rate, demographic_events_input, print_=True): # msprime model to implement population size changes # demographic_events_input is the array of population sizes that you are implementing # check these with the model you are matching N_0 = 10000 population_configurations = [ msprime.PopulationConfiguration(sample_size=2, initial_size=N_0, growth_rate=0) ] migration_matrix = [[0]] demographic_events = demographic_events_input dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) if print_: dd.print_history() sim = msprime.simulate(population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, length=seq_length, recombination_rate=recomb_rate, mutation_rate=mut_rate) return sim
def austin(preNE=10000,NE=100,postNE=5000,Nsamples=20,Tend=10,Tstart=25,debug=False): # population sizes N_Xpostb = postNE N_Xb = NE N_Xpreb = preNE # event times in generations T_Xb = Tend T_Xpreb = Tstart # the growth rate r_X = 0 # initialize populations population_configurations = [ msprime.PopulationConfiguration( sample_size=Nsamples, initial_size=N_Xpostb, growth_rate=r_X) ] migration_matrix = [ [0] ] demographic_events = [ #start bottleneck msprime.PopulationParametersChange( time=T_Xb, initial_size=N_Xb, population_id=0), #end bottleneck msprime.PopulationParametersChange( time=T_Xpreb, initial_size=N_Xpreb, population_id=0) ] if debug: dd = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dd.print_history() return return population_configurations, migration_matrix, demographic_events
def print_debug(self, output): print(np.array(self.get_migration_matrix()), file=output) msprime.DemographyDebugger( Ne=self.N_A, population_configurations=self.get_debug_configuration(), migration_matrix=self.get_migration_matrix(), demographic_events=self.get_demographic_events() ).print_history(output)
def debug(self, out_file=sys.stdout): # Use the demography debugger to print out the demographic history # that we have just described. dd = msprime.DemographyDebugger( population_configurations=self.population_configurations, migration_matrix=self.migration_matrix, demographic_events=self.demographic_events) dd.print_history(out_file)
def plot_all_ne_estimates(sp_infiles, smcpp_infiles, msmc_infiles, outfile, model, n_samp, generation_time, species, pop_id=0, steps=None): ddb = msprime.DemographyDebugger(**model.asdict()) if steps is None: end_time = ddb.epochs[-2].end_time + 10000 steps = np.linspace(1, end_time, end_time + 1) num_samples = [0 for _ in range(ddb.num_populations)] num_samples[pop_id] = n_samp coal_rate, P = ddb.coalescence_rate_trajectory( steps=steps, num_samples=num_samples, double_step_validation=False) steps = steps * generation_time num_msmc = set( [os.path.basename(infile).split(".")[0] for infile in msmc_infiles]) num_msmc = sorted([int(x) for x in num_msmc]) f, ax = plt.subplots(1, 2 + len(num_msmc), sharex=True, sharey=True, figsize=(14, 7)) for infile in smcpp_infiles: nt = pandas.read_csv(infile, usecols=[1, 2], skiprows=0) line1, = ax[0].plot(nt['x'], nt['y'], alpha=0.8) ax[0].plot(steps, 1 / (2 * coal_rate), c="black") ax[0].set_title("smc++") for infile in sp_infiles: nt = pandas.read_csv(infile, sep="\t", skiprows=5) line2, = ax[1].plot(nt['year'], nt['Ne_median'], alpha=0.8) ax[1].plot(steps, 1 / (2 * coal_rate), c="black") ax[1].set_title("stairwayplot") for i, sample_size in enumerate(num_msmc): for infile in msmc_infiles: fn = os.path.basename(infile) samp = fn.split(".")[0] if (int(samp) == sample_size): nt = pandas.read_csv(infile, usecols=[1, 2], skiprows=0) line3, = ax[2 + i].plot(nt['x'], nt['y'], alpha=0.8) ax[2 + i].plot(steps, 1 / (2 * coal_rate), c="black") ax[2 + i].set_title(f"msmc, ({sample_size} samples)") plt.suptitle(f"{species}, population id {pop_id}", fontsize=16) for i in range(2 + len(num_msmc)): ax[i].set(xscale="log") ax[i].set_xlabel("time (years ago)") red_patch = mpatches.Patch(color='black', label='Coalescence rate derived Ne') ax[0].legend(frameon=False, fontsize=10, handles=[red_patch]) ax[0].set_ylabel("population size") # maxy, miny = ax[0].get_ylim() # ax[0].set_yticks(np.arange(maxy, miny, 10)) f.savefig(outfile, bbox_inches='tight', alpha=0.8)
def _demography_debug(self): """Demography debugging.""" dd = msp.DemographyDebugger( population_configurations=self.pop_config, migration_matrix=self.migration_matrix, demographic_events=self.demography, ) # print out the debugging history dd.print_history()
def two_bin(NA, N1, N2, Ts, M1, M2): NA = NA N1 = N1 N2 = N2 Ts = Ts M1 = M1 M2 = M2 population_configurations = [ msprime.PopulationConfiguration(sample_size=0, initial_size=N1), msprime.PopulationConfiguration(sample_size=50, initial_size=N2) ] migration_matrix = [[0, M2], [0, 0]] demographic_events = [ msprime.MigrationRateChange(time=Ts / 2, rate=M1, matrix_index=(0, 1)), #msprime.MigrationRateChange(time=Ts/2, rate=M1, matrix_index=(1, 0)), msprime.MassMigration(time=Ts, source=1, destination=0, proportion=1.0) ] dp = msprime.DemographyDebugger( Ne=NA, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) #dp.print_history() replicates = 10 length = 100000 sim = msprime.simulate(Ne=NA, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, mutation_rate=1e-7, recombination_rate=1e-8, length=length, num_replicates=replicates) #pi = np.zeros(replicates) #seg = np.zeros(replicates) #ld = np.zeros(replicates) #for j,s in enumerate(sim): # pi[j]=s.get_pairwise_diversity() # seg[j] = s.get_num_mutations() # ld[j] = np.var(msprime.LdCalculator(s).get_r2_matrix()) #return(np.array([np.mean(pi),np.var(pi),np.mean(seg),np.var(seg)])) #return(np.array([np.var(pi),np.var(seg),np.var(ld)])) # for MS like output (for msstats) for j, s in enumerate(sim): print("//") print("segsites: " + str(s.get_num_mutations())) pos = [((mut.position) / length) for mut in s.mutations()] print("positions: " + " ".join(str(e) for e in pos)) for h in s.haplotypes(): print(h)
def test_demography_debugger_equal(self): for model in stdpopsim.all_demographic_models(): ddb1 = model.get_demography_debugger() ddb2 = msprime.DemographyDebugger( population_configurations=model.population_configurations, migration_matrix=model.migration_matrix, demographic_events=model.demographic_events) f1 = io.StringIO() f2 = io.StringIO() ddb1.print_history(f1) ddb2.print_history(f2) self.assertEqual(f1.getvalue(), f2.getvalue())
def verify_debug(self, population_configurations, migration_matrix, demographic_events): with tempfile.TemporaryFile("w+") as f: dp = msprime.DemographyDebugger( population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dp.print_history(f) f.seek(0) debug_output = f.read() # TODO when there is better output, write some tests to # verify its format. self.assertGreater(len(debug_output), 0)
def nucleotide_diversity_ILS(paramsdict, tree, debug=False): generation_time = 1 population_configurations = [] demographic_events = [] msp_idx = 0 ## Traverse the tree generating population configurations for tips and ## nodes. Kind of annoying. for i, node in enumerate(tree.treenode.traverse("postorder")): children = node.get_descendants() if len(children) == 0: pop = msprime.PopulationConfiguration(sample_size=5, initial_size=node.Ne) population_configurations.append(pop) node.add_feature("msprime_idx", msp_idx) if debug: print("I'm a tip {} - {}".format(node.idx, node.msprime_idx)) msp_idx += 1 else: chidx = [c.msprime_idx for c in children] gens = node.height * 1e6 / generation_time mig = msprime.MassMigration(time=gens, source=chidx[0], dest=chidx[1]) demographic_events.append(mig) node.add_feature("msprime_idx", chidx[1]) if debug: print("I'm a node {} ({}) [gens {}]- {}".format(node.idx, node.msprime_idx, gens,\ " ".join(map(lambda x: str(x), chidx)))) ## Sort the demographic events demographic_events = sorted(demographic_events, key=lambda x: x.time) if debug: dd = msprime.DemographyDebugger( population_configurations=population_configurations, demographic_events=demographic_events) dd.print_history() ## Do the simulation ts = msprime.simulate(population_configurations=population_configurations, demographic_events=demographic_events, length=paramsdict["sequence_length"], mutation_rate=paramsdict["mutation_rate"]) pop_inds = {} for pop in ts.populations(): pop_inds[pop.id] = ts.samples(pop.id) pis = ts.diversity(list(pop_inds.values())) for pi, leaf in zip(pis, tree.treenode.get_leaves()): leaf.pi = pi return ts, tree
def get_demography_debugger(self): """ Returns an :class:`msprime.DemographyDebugger` instance initialized with the parameters for this model. Please see the msprime documentation for details on how to use a DemographyDebugger. :return: A DemographyDebugger instance for this DemographicModel. :rtype: msprime.DemographyDebugger """ ddb = msprime.DemographyDebugger( population_configurations=self.population_configurations, migration_matrix=self.migration_matrix, demographic_events=self.demographic_events) return ddb
def mad_1(): for chrom in range(len(muts)): print(chrom) generation_time = 1 # Population IDs correspond to their indexes in the population configuration array. # 0: Cvi, 1: High Atlas N_start = 5000 r_start = 0.0 population_configurations = [ msprime.PopulationConfiguration(sample_size=252, initial_size=N_start, growth_rate=r_start), msprime.PopulationConfiguration(sample_size=16, initial_size=1e5, growth_rate=0.0) ] migration_matrix = [[0, 0], [0, 0]] demographic_events = [] demographic_events.append( msprime.MassMigration(time=splitTime, source=0, destination=1, proportion=1.0)) # Use the demography debugger to print out the demographic history dp = msprime.DemographyDebugger( Ne=N_start, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) dp.print_history() # # to run the required number of replicates. num_replicates = repl # 1 #e6 replicates = msprime.simulate( mutation_rate=muts[chrom], population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, recombination_rate=1.8e-8, length=winSize, num_replicates=num_replicates) for i, tree_sequence in enumerate(replicates): with open( "Path_to_working_directory/vcf/cvi_winToHa" + str(splitTime) + "_Chr" + str(chrom) + "_" + str(iterate) + "_" + str(i) + ".vcf", "w") as vcf_file: tree_sequence.write_vcf(vcf_file, 1)
def popn_coal_rate(model, pop_id, n_samp, generation_time, steps=None): """ returns tuple (coal_rate, P, steps) for pop_id conditional on the model and configuration """ ddb = msprime.DemographyDebugger(**model.asdict()) if steps is None: end_time = ddb.epochs[-2].end_time + 10000 steps = np.linspace(1, end_time, end_time + 1) num_samples = [0 for _ in range(ddb.num_populations)] num_samples[pop_id] = n_samp coal_rate, P = ddb.coalescence_rate_trajectory( steps=steps, num_samples=num_samples, double_step_validation=False) steps = steps * generation_time return coal_rate, P, steps
def one_bin(NA, N1, N2, Ts, M): NA = NA N1 = N1 N2 = N2 Ts = Ts M_ave = M population_configurations = [ msprime.PopulationConfiguration(sample_size=50, initial_size=N1), msprime.PopulationConfiguration(sample_size=50, initial_size=N2) ] migration_matrix = [[0, M_ave], [M_ave, 0]] demographic_events = [ msprime.MassMigration(time=Ts, source=1, destination=0, proportion=1.0) ] dp = msprime.DemographyDebugger( Ne=NA, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events) #dp.print_history() replicates = 1 length = 100000 sim = msprime.simulate(Ne=NA, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, mutation_rate=1e-7, recombination_rate=1e-8, length=length, num_replicates=replicates) pairwise_diff = [] for j, s in enumerate(sim): s0 = len(s.get_samples(0)) s1 = len(s.get_samples(1)) haps = [h for h in s.haplotypes()] h0 = haps[0:s0] h1 = haps[s0:s0 + s1 - 1] for hap0 in h0: for hap1 in h1: pairwise_diff.append( sum(1 for a, b in zip(hap0, hap1) if a != b)) #pairwise_diff.append([pairwise_diffs(hap0,hap1) for hap0 in h0 for hap1 in h1]) return (np.var(np.array(pairwise_diff)))
def plot_rainbow(input, output, g=30, model=None, steps=None, pop_id=1): """ Creates a plot of all iterations by colour. This plot is useful for assessing convergence. :param str input: The full file path to an aggregated output file from :code:`smcsmc`. :param str output: Filepath to save the plot. :param int g: The length of one generation in years. This is used to scale the x axis. :param stdpopsim.Model model: Model for plotting. :param float ymax: The maximum y value to plot. This is used to scale the plots up or down. :param int steps: Don't worry about this. :param int pop_id: If your model includes multiple populations, which one do you want to plot? .. figure:: ../img/rainbow.png :align: center """ f, ax = plt.subplots(figsize=(7, 7)) ax.set(xscale="log", yscale="log") ax.set_ylim([10e2, 10e4]) ax.set_xlim([1e3, 1e6]) if model == "ooa": if stdpopsim is not None: model = getattr(stdpopsim.homo_sapiens, "GutenkunstThreePopOutOfAfrica")() else: print("Module stdpopsim not available - cannot go on.") sys.exit(1) if model is not None: import msprime # needs to be installed. ddb = msprime.DemographyDebugger(**model.asdict()) if steps is None: end_time = ddb.epochs[-2].end_time + 10000 steps = np.exp(np.linspace(1, np.log(end_time), 31)) num_samples = [0 for _ in range(ddb.num_populations)] num_samples[pop_id] = 20 coal_rate, P = ddb.coalescence_rate_trajectory( steps=steps, num_samples=num_samples, double_step_validation=False) steps = steps * g ax.plot(steps, 1 / (2 * coal_rate), c="black", drawstyle='steps-pre') nt = pd.read_csv(input, sep='\s+') nt['Start'] *= g for k, g in nt.groupby(['Iter']): g.plot(x='Start', y='Ne', ax=ax, drawstyle='steps-pre', label=k) ax.legend(title='EM Iterations', ncol=5) f.savefig(output)
def debugger(self): """ A debugger to run before the simulation. """ # SLiM debugging slim = self.slim_script print('\nSLiM input file:', slim) # Test 1: is an output file saved? with open(slim ,'r') as f: lines = f.readlines() string_pre = ".treeSeqOutput(" string_post = ")" ind = 0 for line in lines: if string_pre in line and string_post in line: out_file = line.split(string_pre)[1].split(string_post)[0] self.slim_out = out_file.strip('""') print('SLiM output file:', self.slim_out) ind = 1 if ind == 0: print( """SLiM error: Oh no, your script does not produce a .trees file! Please ensure you include a call to 'treeSeqOutput()' at the end of your script. """) # Test 2: subsampling if self.populations is not None or self.sample_sizes is not None: if len(self.populations) != len(self.sample_sizes): print( """ Subsampling error: The list of populations to sample from must have the same length as the list of sample sizes.""" ) print("We are sampling:") for ind in range(len(self.populations)): print("-", self.sample_sizes[ind], "individuals from population", self.populations[ind]) else: "No subsampling will be performed." # Test 3: demography debugging in recapitation print('Ancient demography:') dd = msprime.DemographyDebugger( population_configurations=self.ancient_population_configurations, demographic_events=self.ancient_demographic_events) dd.print_history() # Test 4: Adding variation # Neutral mutations print('Neutral mutation rate:', self.neutral_mutation_rate)