def _mallard_black_split(): id = "MallardBlackDuck_2L19" description = "North American Mallard/Black Duck split" long_description = """ This is a model fit to contemporary samples of wild North American mallard and black duck, using the "split-migration" model of dadi. See Figure 6 of Lavretsky et al 2019. """ T = 632305 / 4 # in generations, not years N_BlackDuck = 1.57e6 N_Mallard = 1.37e6 # personal communication from Joshua Brown 13 Apr 2021: # "Based on the contemporary dataset, the ancestral population size # "for the Black duck/Mallard dadi model was 819535." N_Anc = 819535 # the migration rate is reported as 2.82 in each direction. From the dadi # manual, m12 is "the fraction of individuals each generation in pop 1 that # are new migrants from pop 2, times the 2Nref". To convert back to real # time units (fraction replaced per generation) we divide by 2 * N_anc. m = 2.82 / (2 * N_Anc) model = msprime.Demography() model.add_population( initial_size=N_Mallard, name="Mallard", description="Wild North American mallards", ) model.add_population( initial_size=N_BlackDuck, name="Black_duck", description="Wild black ducks", ) model.add_population( initial_size=N_Anc, name="Ancestral", description="Ancestral population", ) model.set_symmetric_migration_rate(populations=["Mallard", "Black_duck"], rate=m) model.add_population_split(time=T, derived=["Mallard", "Black_duck"], ancestral="Ancestral") return stdpopsim.DemographicModel( id=id, description=description, long_description=long_description, citations=[ stdpopsim.Citation( author="Lavretsky et al.", year=2019, doi="https://doi.org/10.1111/mec.15343", reasons={stdpopsim.CiteReason.DEM_MODEL}, ) ], generation_time=4, model=model, mutation_rate=4.83e-9, )
def __init__(self, NA, N1, N2, T, M12, M21): model = msprime.Demography() model.add_population(initial_size=N1, name="pop1") model.add_population(initial_size=N2, name="pop2") model.add_population(initial_size=NA, name="ancestral") # FIXME This is BACKWARDS in time, so the rates are the other # way around forwards time. We should explain this in the documentation # (and probably swap around). Seems like there's not really much # good reason to have this model in here any more though - what # does it do that wouldn't be better done in demes/msprime? model.set_migration_rate(source="pop1", dest="pop2", rate=M12) model.set_migration_rate(source="pop2", dest="pop1", rate=M21) model.add_population_split(time=T, ancestral="ancestral", derived=["pop1", "pop2"]) long_description = """ A generic isolation with migration model where a single ancestral population of size NA splits into two populations of constant size N1 and N2 time T generations ago, with migration rates M12 and M21 between the split populations. """ super().__init__( id="IsolationWithMigration", description="Generic IM model", long_description=long_description, model=model, generation_time=1, )
def test_all_fields(self): demography = msprime.Demography() demography.add_population(name="A", initial_size=10_000) demography.add_population(name="B", initial_size=5_000) demography.add_population(name="C", initial_size=1_000) demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C") ts = msprime.sim_ancestry( samples={"A": 1, "B": 1}, demography=demography, random_seed=42, record_migrations=True, ) ts = msprime.sim_mutations(ts, rate=1, random_seed=42) tables = ts.dump_tables() for name, table in tables.table_name_map.items(): if name not in ["provenances", "edges"]: table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))] metadata, metadata_offset = tskit.pack_strings(metadatas) table.set_columns( **{ **table.asdict(), "metadata": metadata, "metadata_offset": metadata_offset, } ) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = "Test metadata" self.verify(tables.tree_sequence())
def __init__( self, recombination_rate: float = RHO_HUMAN, mutation_rate: float = MU_HUMAN, demographic_events: list = None, population: int = None, number_intervals: int = N, splitter=simple_split, # maust be annotiede num_replicates: int = 1, lengt: int = L_HUMAN, model: str = "hudson", random_seed: int = 42, sample_size: int = 1, ): self.sample_size = sample_size self.recombination_rate = recombination_rate self.mutation_rate = mutation_rate self.num_replicates = num_replicates if not demographic_events: if not population: raise BaseException( "Eiter demographic_events or population must be speciefied" ) demographic_events = msprime.Demography() demographic_events.add_population(name="A", initial_size=population) self.demographic_events = demographic_events self.splitter = splitter self.model = model self.len = lengt self.random_seed = random_seed self.number_intervals = number_intervals self._data = None
def full_ts(): """ A tree sequence with data in all fields - duplicated from tskit's conftest.py as other test suites using this file will not have that fixture defined. """ demography = msprime.Demography() demography.add_population(initial_size=100, name="A") demography.add_population(initial_size=100, name="B") demography.add_population(initial_size=100, name="C") demography.add_population_split(time=10, ancestral="C", derived=["A", "B"]) ts = msprime.sim_ancestry( {"A": 5, "B": 5}, demography=demography, random_seed=1, sequence_length=10, record_migrations=True, ) assert ts.num_migrations > 0 assert ts.num_individuals > 0 ts = msprime.sim_mutations(ts, rate=0.1, random_seed=2) assert ts.num_mutations > 0 tables = ts.dump_tables() tables.individuals.clear() for ind in ts.individuals(): tables.individuals.add_row(flags=0, location=[ind.id, ind.id], parents=[-1, -1]) for name, table in tables.table_name_map.items(): if name != "provenances": table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) metadatas = [f"n_{name}_{u}" for u in range(len(table))] metadata, metadata_offset = tskit.pack_strings(metadatas) table.set_columns( **{ **table.asdict(), "metadata": metadata, "metadata_offset": metadata_offset, } ) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = {"A": "Test metadata"} tables.reference_sequence.data = "A" * int(tables.sequence_length) tables.reference_sequence.url = "https://example.com/sequence" tables.reference_sequence.metadata_schema = tskit.MetadataSchema.permissive_json() tables.reference_sequence.metadata = {"A": "Test metadata"} # Add some more provenance so we have enough rows for the offset deletion test. for j in range(10): tables.provenances.add_row(timestamp="x" * j, record="y" * j) return tables.tree_sequence()
def LavertskyEtAl2019TwoPop(): id = "QC-MallardBlackDuck_2L19" # Parameters are taken from Fig 6 of Lavertsky et al. (2019) # analysis of contemporary samples generation_time = 4 # 4 years per generation # (see page 7, section 2.6, in Lavertsky et al. (2019) N_Mallard = 1.37e6 # Mallard estimated Ne # N_Mallard = 10.37e6 # Mallard estimated Ne N_Black_duck = 1.57e6 # Black duck estimated Ne N_anc = 819535 # ancestral population size; not reported in the paper, # but reported to Peter Ralph in personal communication T_div = 632305 / generation_time # Divergence time # symmetric migration model. Reported rates correspond to # number of migrants per generation. scaled by the ancestral Ne # so m_ij = M_ij / 2N_anc # (m_ij is the simulated rate and M_ij is the rate reported in paper) m_Mallard_Black = 2.82 / (2 * N_anc) m_Black_Mallard = m_Mallard_Black model = msprime.Demography() model.add_population(name="Mallard", description="Mallard", initial_size=N_Mallard) model.add_population(name="Black_duck", description="Black_duck", initial_size=N_Black_duck) model.add_population(name="Ancestral", description="Ancestral", initial_size=N_anc) model.add_population_split(time=T_div, derived=["Mallard", "Black_duck"], ancestral="Ancestral") model.set_migration_rate(source="Mallard", dest="Black_duck", rate=m_Mallard_Black) model.set_migration_rate(source="Black_duck", dest="Mallard", rate=m_Black_Mallard) return stdpopsim.DemographicModel( id=id, description=id, long_description=id, generation_time=generation_time, mutation_rate=4.83e-9, model=model, )
def ts_fixture(): """ A tree sequence with data in all fields """ demography = msprime.Demography() demography.add_population(name="A", initial_size=10_000) demography.add_population(name="B", initial_size=5_000) demography.add_population(name="C", initial_size=1_000) demography.add_population(name="D", initial_size=500) demography.add_population(name="E", initial_size=100) demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C") ts = msprime.sim_ancestry( samples={"A": 10, "B": 10}, demography=demography, sequence_length=5, random_seed=42, record_migrations=True, record_provenance=True, ) ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42) tables = ts.dump_tables() # Add locations to individuals individuals_copy = tables.individuals.copy() tables.individuals.clear() for i, individual in enumerate(individuals_copy): tables.individuals.append( individual.replace(location=[i, i + 1], parents=[i - 1, i - 1]) ) for name, table in tables.name_map.items(): if name != "provenances": table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))] metadata, metadata_offset = tskit.pack_strings(metadatas) table.set_columns( **{ **table.asdict(), "metadata": metadata, "metadata_offset": metadata_offset, } ) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = "Test metadata" # Add some more rows to provenance to have enough for testing. for _ in range(3): tables.provenances.add_row(record="A") return tables.tree_sequence()
def generate_demographic_events( population: int = None) -> 'msprime.Demography': if not population: population = give_population_size() demography = msprime.Demography() demography.add_population(name="A", initial_size=population) number_of_events = np.random.randint(*NUMBER_OF_EVENTS_LIMITS) times = sorted(np.random.exponential(LAMBDA_EXP, size=number_of_events)) last_population_size = population for t in times: last_population_size = max( last_population_size * np.random.uniform(*POPULATION_COEFF_LIMITS), MIN_POPULATION_NUM) demography.add_population_parameters_change( t, initial_size=last_population_size) return demography
def test_mixed_old_and_new_style(self): demography = msprime.Demography() def f( population_configurations=None, migration_matrix=None, demographic_events=None, ): msprime.demography_factory( Ne=1, demography=demography, population_configurations=population_configurations, migration_matrix=migration_matrix, demographic_events=demographic_events, ) with self.assertRaises(ValueError): f(population_configurations=[]) with self.assertRaises(ValueError): f(migration_matrix=[[]]) with self.assertRaises(ValueError): f(demographic_events=[])
def generate_demographic_events_complex( population: int = None) -> 'msprime.Demography': if not population: population = give_population_size() demography = msprime.Demography() demography.add_population(name="A", initial_size=population) last_population_size = population T = 0 coal_probability = 0.0 coal_probability_list = [] non_coal_probability = 1.0 while T < 420_000: t = np.random.exponential(lambda_exp) T += t #last_population_size = max(last_population_size * np.random.uniform(*POPULATION_COEFF_LIMITS), # MIN_POPULATION_NUM) coeff = (np.random.uniform( *POPULATION_COEFF_LIMIT_COMPLEX))**(np.random.choice([-1, 1])) # print(last_population_size) last_population_size = min( max(last_population_size * coeff, MIN_POPULATION_NUM), MAX_POPULATION_NUM) demography.add_population_parameters_change( T, initial_size=last_population_size) coal_probability = non_coal_probability + t / last_population_size coal_probability_list.append(coal_probability) non_coal_probability = non_coal_probability + (-t / last_population_size) return demography
md["selection_coeff"] = mut_map[sid] tables.mutations.add_row(site=m.site, node=m.node, time=m.time, derived_state=m.derived_state, parent=m.parent, metadata={"mutation_list": md_list}) assert tables.mutations.num_rows == mts.num_mutations print( f"The selection coefficients range from {min(mut_map.values()):0.2e}") print(f"to {max(mut_map.values()):0.2e}.") return tables.tree_sequence() # Snakes: snake_demog = msprime.Demography() snake_demog.add_population(name="p0", initial_size=10000) snakes = msprime.sim_ancestry( samples={"p0": 300}, # number of individividuals sampled demography=snake_demog, recombination_rate=1e-8, sequence_length=sequence_length) snakes = pyslim.annotate_defaults( snakes, model_type='nonWF', slim_generation=1, ) # add mutations snakes = add_mutations(snakes,
def run_simulation(param_df): """Run msprime simulation. Parameters ---------- param_df : TYPE DESCRIPTION. check_demo : TYPE, optional DESCRIPTION. The default is True. run_stats : TYPE, optional DESCRIPTION. The default is False. Returns ------- ts : TYPE DESCRIPTION. """ demo_events = msp.Demography() # set samples sizes, here in diploids. so nsam/2 sample_sizes = model_dt["sampleSize"] samples = { f'pop_{i}': sample_size / 2 for i, sample_size in enumerate(sample_sizes) } # set population sizes init_sizes = [size * ploidy for size in model_dt["initialSize"]] for i, init in enumerate(init_sizes): demo_events.add_population(name=f"pop_{i}", initial_size=init) # set migration rates from migration matrix if > 0 mig_mat = model_dt["migmat"] if np.sum(mig_mat) > 0: sym_rates = [ model_dt["migmat"][i, j] for i, j in zip( *np.where(~np.eye(model_dt["migmat"].shape[0], dtype=bool))) ] if sym_rates.count(sym_rates[0]) == len(sym_rates): demo_events.set_migration_rate(source=None, dest=None, rate=sym_rates[0]) else: mig_matrix = zip(*mig_mat) for p, pop_m in enumerate(mig_matrix): for i, m in pop_m: if p != i and m > 0: demo_events.set_migration_rate(source=p, dest=i, rate=m) # build demographic command line demo_events = demo_config(param_df, demo_events) # set hybrid models if hybrid_switch_over: model_list = [ msp.DiscreteTimeWrightFisher(duration=hybrid_switch_over), msp.StandardCoalescent(), ] else: model_list = msp.StandardCoalescent() # check demo if dry_run: checkDemo(demo_events) return None elif vcf: tree = msp.sim_ancestry(samples, recombination_rate=param_df["rec_t"], demography=demo_events, sequence_length=model_dt["contig_length"], model=model_list) tree = msp.sim_mutations(tree, rate=param_df["mu_t"]) return tree else: trees = msp.sim_ancestry(samples, recombination_rate=param_df["rec_t"], demography=demo_events, num_replicates=model_dt["loci"], sequence_length=model_dt["contig_length"], model=model_list) # calc stats stat_mat = np.zeros([model_dt["loci"], header_len]) length_bp = stats_dt["length_bp"] pfe = stats_dt["perfixder"] for i, tree in enumerate(trees): tree = msp.sim_mutations(tree, rate=param_df["mu_t"], model="binary") stats_ls = [] pos, haps, counts, bp = read_trees(tree, length_bp, pfe, seq_error=True) stats_dt["breakpoints"] = bp popsumstats = PopSumStats(pos, haps, counts, stats_dt) for stat in stats_dt["calc_stats"]: stat_fx = getattr(popsumstats, stat) try: ss = stat_fx() # print(f"{stat} = {len(ss)}") except IndexError: ss = [np.nan] * len(stats_dt["pw_quants"]) stats_ls.extend(ss) stat_mat[i, :] = stats_ls return np.nanmean(stat_mat, axis=0)
import pyslim import tskit import msprime from IPython.display import SVG import numpy as np import subprocess import os #import util # Neutral burn in with msprime, coalescent simulation breaks = [0, 33333334, 66666667, 100000000] # the length of the genome? recomb_map = msprime.RateMap( position=breaks, rate=[1e-8, 1e-8, 1e-8]) # why do we set the recombination rate this way? demog_model = msprime.Demography() demog_model.add_population(initial_size=10000) print("Working on Snake Sim") ots = msprime.sim_ancestry( samples=1000, # number of individividuals sampled? demography=demog_model, # random_seed=5, recombination_rate=recomb_map) ots = pyslim.annotate_defaults(ots, model_type="nonWF", slim_generation=1) # this is adding anotation or metadata to all of the individuals mut_map = msprime.RateMap(position=breaks, rate=[1e-10, 1e-10, 1e-10]) # what rate(s) would I put in here mut_model = msprime.SLiMMutationModel(type=2) # mutation "m2" ots = msprime.sim_mutations( ots, rate=mut_map,