def test_no_msprime_DFE(self): # test we cannot simulate a non-neutral DFE with msprime m1 = stdpopsim.ext.MutationType( dominance_coeff=0.2, distribution_type="e", distribution_args=[0.1], ) desc = "test test" long_desc = "test test 🐢" dfe = stdpopsim.DFE( id="abc", description=desc, long_description=long_desc, mutation_types=[m1], ) contig = stdpopsim.Contig.basic_contig( length=10000, mutation_rate=1e-6, ) contig.clear_genomic_mutation_types() contig.add_DFE( intervals=np.array([[0, contig.length / 2]], dtype="int"), DFE=dfe, ) model = stdpopsim.PiecewiseConstantSize(1000) samples = model.get_samples(2) engine = stdpopsim.get_engine("msprime") with pytest.raises(ValueError): _ = engine.simulate( model, contig, samples, )
def OutOfAfrica_3G09_with_DFE(seed): """ The Gutenkunst et al. HomSap/OutOfAfrica_3G09 model, simulated with a DFE. """ species = stdpopsim.get_species("HomSap") model = species.get_demographic_model("OutOfAfrica_3G09") contig = species.get_contig("chr1", length_multiplier=0.001) samples = model.get_samples(100, 100, 100) # YRI, CEU, CHB # neutral and deleterious mutations occur across the whole contig contig.add_genomic_element_type( intervals=np.array([[0, int(contig.recombination_map.sequence_length)]]), **KimDFE(), ) # Simulate. engine = stdpopsim.get_engine("slim") ts = engine.simulate( model, contig, samples, seed=seed, slim_scaling_factor=10, slim_burn_in=10, # Set slim_script=True to print the script instead of running it. # slim_script=True, ) return ts
def test_bad_GenerationAfter_times(self): engine = stdpopsim.get_engine("slim") for start_time, end_time in [ # Errors caught when the event is created. (-1, 0), (0, -1), (1, 100), (100, 100), (0, 0), # Errors caught when the GenerationAfter has been calculated. (1e-9, 0), (100 + 1e-9, 100), ]: with self.assertRaises(ValueError): extended_events = [ stdpopsim.ext.DrawMutation(time=self.T_mut, mutation_type_id=self.mut_id, population_id=0, coordinate=100, save=True), stdpopsim.ext.ConditionOnAlleleFrequency( start_time=stdpopsim.ext.GenerationAfter(start_time), end_time=end_time, mutation_type_id=self.mut_id, population_id=0, op=">", allele_frequency=0) ] engine.simulate(demographic_model=self.model, contig=self.contig, samples=self.samples, mutation_types=self.mutation_types, extended_events=extended_events, dry_run=True)
def test_multiple_mutation_types_in_script(self): engine = stdpopsim.get_engine("slim") mutation_types = [ stdpopsim.ext.MutationType(weight=1), stdpopsim.ext.MutationType(weight=2), ] out, _ = capture_output( engine.simulate, demographic_model=self.model, contig=self.contig, samples=self.samples, mutation_types=mutation_types, slim_script=True, ) self.assertEqual(out.count("initializeMutationType"), 2) mutation_types = [ stdpopsim.ext.MutationType(weight=i) for i in range(10) ] positive = stdpopsim.ext.MutationType(convert_to_substitution=False) mutation_types.append(positive) out, _ = capture_output( engine.simulate, demographic_model=self.model, contig=self.contig, samples=self.samples, mutation_types=mutation_types, slim_script=True, ) self.assertEqual(out.count("initializeMutationType"), 11)
def OutOfAfrica_3G09_with_DFE(seed): """ The Gutenkunst et al. HomSap/OutOfAfrica_3G09 model, simulated with a DFE. """ species = stdpopsim.get_species("HomSap") model = species.get_demographic_model("OutOfAfrica_3G09") contig = species.get_contig("chr1", length_multiplier=0.001) samples = model.get_samples(100, 100, 100) # YRI, CEU, CHB mutation_types = KimDFE() # Simulate. engine = stdpopsim.get_engine("slim") ts = engine.simulate( model, contig, samples, seed=seed, mutation_types=mutation_types, slim_scaling_factor=10, slim_burn_in=10, # Set slim_script=True to print the script instead of running it. # slim_script=True, ) return ts
def generic_Neutral(model, contig, samples, seed, engine="slim", **kwargs): kwargs = dict() if engine == "slim": kwargs.update(slim_burn_in=0.1, slim_scaling_factor=10) engine = stdpopsim.get_engine(engine) ts = engine.simulate(model, contig, samples, seed=seed, **kwargs) return ts, (contig.origin, 0, 0, 0)
def test_bad_params(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr1") model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) for scaling_factor in (0, -1, -1e-6): with self.assertRaises(ValueError): engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=scaling_factor, dry_run=True, ) for burn_in in (-1, -1e-6): with self.assertRaises(ValueError): engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_burn_in=burn_in, dry_run=True, )
def run_simulation(args): if args.demographic_model is None: model = stdpopsim.PiecewiseConstantSize(species.population_size) model.generation_time = species.generation_time model.citations.extend(species.population_size_citations) model.citations.extend(species.generation_time_citations) qc_complete = True else: model = get_model_wrapper(species, args.demographic_model) qc_complete = model.qc_model is not None if len(args.samples) > model.num_sampling_populations: exit( f"Cannot sample from more than {model.num_sampling_populations} " "populations") samples = model.get_samples(*args.samples) contig = species.get_contig( args.chromosome, genetic_map=args.genetic_map, length_multiplier=args.length_multiplier, length=args.length, inclusion_mask=args.inclusion_mask, exclusion_mask=args.exclusion_mask, ) engine = stdpopsim.get_engine(args.engine) logger.info(f"Running simulation model {model.id} for {species.id} on " f"{contig} with {len(samples)} samples using {engine.id}.") write_simulation_summary(engine=engine, model=model, contig=contig, samples=samples, seed=args.seed) if not qc_complete: warnings.warn( stdpopsim.QCMissingWarning( f"{model.id} has not been QCed. Use at your own risk! " "Demographic models that have not undergone stdpopsim's " "Quality Control procedure may contain implementation " "errors, leading to differences between simulations " "and the model described in the original publication. " "More information about the QC process can be found in " "the developer documentation. " "https://stdpopsim.readthedocs.io/en/latest/development.html" "#demographic-model-review-process")) # extract simulate() parameters from CLI args accepted_params = inspect.signature(engine.simulate).parameters.keys() kwargs = {k: v for k, v in vars(args).items() if k in accepted_params} kwargs.update(demographic_model=model, contig=contig, samples=samples) ts = engine.simulate(**kwargs) summarise_usage() if ts is not None: write_output(ts, args) # Non-QCed models shouldn't be used in publications, so we skip the # "If you use this simulation in published work..." citation request. if qc_complete: write_citations(engine, model, contig, species) if args.bibtex_file is not None: write_bibtex(engine, model, contig, species, args.bibtex_file)
def test_conditioning_without_save(self): extended_events = [ stdpopsim.ext.DrawMutation( time=self.T_mut, mutation_type_id=self.mut_id, population_id=0, coordinate=100, ), stdpopsim.ext.ConditionOnAlleleFrequency( start_time=stdpopsim.ext.GenerationAfter(self.T_mut), end_time=0, mutation_type_id=self.mut_id, population_id=0, op=">=", allele_frequency=1, ), ] engine = stdpopsim.get_engine("slim") with self.assertRaises(stdpopsim.SLiMException): # TODO: get this to fail using dry_run=True engine.simulate( demographic_model=self.model, contig=self.contig, samples=self.samples, mutation_types=self.mutation_types, extended_events=extended_events, slim_scaling_factor=10, slim_burn_in=0.1, )
def run_simulation(args): if args.demographic_model is None: model = stdpopsim.PiecewiseConstantSize(species.population_size) model.generation_time = species.generation_time model.citations.extend(species.population_size_citations) model.citations.extend(species.generation_time_citations) else: model = get_model_wrapper(species, args.demographic_model) if len(args.samples) > model.num_sampling_populations: exit( f"Cannot sample from more than {model.num_sampling_populations} " "populations") samples = model.get_samples(*args.samples) contig = species.get_contig( args.chromosome, genetic_map=args.genetic_map, length_multiplier=args.length_multiplier) engine = stdpopsim.get_engine(args.engine) logger.info( f"Running simulation model {model.id} for {species.id} on " f"{contig} with {len(samples)} samples using {engine.id}.") kwargs = vars(args) kwargs.update(demographic_model=model, contig=contig, samples=samples) if not args.quiet: write_simulation_summary(engine=engine, model=model, contig=contig, samples=samples, seed=args.seed) if not args.dry_run: ts = engine.simulate(**kwargs) summarise_usage() if ts is not None: write_output(ts, args) if not args.quiet: write_citations(engine, model, contig, species) if args.bibtex_file is not None: write_bibtex(engine, model, contig, species, args.bibtex_file)
def test_script_generation(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr1") model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) model.generation_time = species.generation_time out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True) self.assertTrue("sim.registerLateEvent" in out) model = species.get_demographic_model("AncientEurasia_9K19") samples = model.get_samples(1, 2, 3, 4, 5, 6, 7) out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True) self.assertTrue("sim.registerLateEvent" in out) model = species.get_demographic_model("AmericanAdmixture_4B11") samples = model.get_samples(10, 10, 10) out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True) self.assertTrue("sim.registerLateEvent" in out)
def _twopop_IM(engine_id, out_dir, seed, NA=1000, N1=500, N2=5000, T=1000, M12=0, M21=0, pulse=None, **sim_kwargs): species = stdpopsim.get_species("AraTha") contig = species.get_contig("chr5", length_multiplier=0.01) # ~270 kb model = stdpopsim.IsolationWithMigration(NA=NA, N1=N1, N2=N2, T=T, M12=M12, M21=M21) if pulse is not None: model.demographic_events.append(pulse) model.demographic_events.sort(key=lambda x: x.time) model.generation_time = species.generation_time samples = model.get_samples(50, 50, 0) engine = stdpopsim.get_engine(engine_id) t0 = time.perf_counter() ts = engine.simulate(model, contig, samples, seed=seed, **sim_kwargs) t1 = time.perf_counter() out_file = out_dir / f"{seed}.trees" ts.dump(out_file) return out_file, t1 - t0
def gene_with_noncoding_OutOfAfrica_3G09(seed): """ Simulating a 1kb gene flanked by 1kb neutral regions. Within genes, 30% of the total influx of mutations are neutral and 70% are deleterious, with the DFE from Kim et al. The HomSap/OutOfAfrica_3G09 model was simulated. """ species = stdpopsim.get_species("HomSap") model = species.get_demographic_model("OutOfAfrica_3G09") contig = species.get_contig(length=3000) samples = model.get_samples(100, 100, 100) # YRI, CEU, CHB # within the gene, KimDFE is used, outside genomic elements # neutral muts are added with msprime gene_interval = np.array([[1000, 2000]]) contig.add_genomic_element_type(intervals=gene_interval, **KimDFE()) # Simulate. engine = stdpopsim.get_engine("slim") ts = engine.simulate( model, contig, samples, seed=seed, slim_scaling_factor=10, slim_burn_in=10, # Set slim_script=True to print the script instead of running it. # slim_script=True, ) return ts
def test_save_point_creation(self): extended_events = [ stdpopsim.ext.DrawMutation(time=self.T_mut, mutation_type_id=self.mut_id, population_id=0, coordinate=100, save=True), stdpopsim.ext.ConditionOnAlleleFrequency( start_time=stdpopsim.ext.GenerationAfter(self.T_mut), end_time=0, mutation_type_id=self.mut_id, population_id=0, op=">", allele_frequency=0, save=True), stdpopsim.ext.ConditionOnAlleleFrequency( start_time=self.T_mut // 2, end_time=self.T_mut // 2, mutation_type_id=self.mut_id, population_id=0, op=">", allele_frequency=0, save=True), ] engine = stdpopsim.get_engine("slim") engine.simulate(demographic_model=self.model, contig=self.contig, samples=self.samples, mutation_types=self.mutation_types, extended_events=extended_events, dry_run=True)
def simulate_stdpopsim(self, engine, species, model, pop, error_prob=None, seed=None): stdengine = stdpopsim.get_engine(engine) stdspecies = stdpopsim.get_species(species) stdmodel = stdspecies.get_demographic_model(model) geno = [(i, get_chrom_size(i)) for i in range(1, 23)] # Sort the list by size. geno.sort(key=lambda a: a[1], reverse=True) cum_weights = [] rng = random.Random(seed) for i, (chrom, size) in enumerate(geno): cum_weights.append(size if i == 0 else size + cum_weights[i-1]) # The order for sampling from populations is ['YRI', 'CEU', 'CHB'] if pop=='YRI': stdsamples = stdmodel.get_samples(self.num_samples, 0, 0) elif pop=='CEU': stdsamples = stdmodel.get_samples(0, self.num_samples, 0) elif pop=='CHB': stdsamples = stdmodel.get_samples(0, 0, self.num_samples) sims = [] for i in range(self.num_reps): chrom, size = rng.choices(geno, cum_weights=cum_weights)[0] factor = self.seq_len/size stdcontig = stdspecies.get_contig( 'chr' + str(chrom), length_multiplier=factor) sims.append(stdengine.simulate(stdmodel, stdcontig, stdsamples)) mat = np.zeros((self.num_reps, self.num_samples, self.fixed_dim)) # For each tree sequence output from the simulation for i, ts in enumerate(sims): if type(error_prob) is float: mat[i] = self._mutate_geno_old(ts, p=error_prob) elif type(error_prob) is np.ndarray: mat[i] = self._mutate_geno_old(ts, p=error_prob[i]) # No error prob, it doesn't mutate the matrix else: mat[i] = self._resize_from_ts(ts) # Scale genotype matrices from [0, 1] to [-1, 1]. If we were to use # a generator, this scale should be done with tanh function if self.scale: mat = scale_matrix(mat) # Expand dimension by 1 (add channel dim). -1 stands for last axis. mat = np.expand_dims(mat, axis=-1) return mat
def test_recombination_map(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr1", genetic_map="HapMapII_GRCh37") model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) out, _ = capture_output( engine.simulate, demographic_model=model, contig=contig, samples=samples, dry_run=True)
def test_msprime_kwargs(self): species = stdpopsim.get_species("HomSap") model = species.get_demographic_model("AshkSub_7G19") contig = species.get_contig("chr22", length_multiplier=0.01) samples = model.get_samples(10) engine = stdpopsim.get_engine("msprime") sim_arg = engine.simulate( model, contig, samples, record_full_arg=True, random_seed=1 ) assert any(msprime.NODE_IS_RE_EVENT == sim_arg.tables.nodes.flags)
def test_msprime_seed(self): species = stdpopsim.get_species("HomSap") model = species.get_demographic_model("AshkSub_7G19") contig = species.get_contig("chr22", length_multiplier=0.01) samples = model.get_samples(10) engine = stdpopsim.get_engine("msprime") with pytest.raises(ValueError): engine.simulate(model, contig, samples, seed=1, random_seed=1) sim_seed = engine.simulate(model, contig, samples, seed=1) sim_random_seed = engine.simulate(model, contig, samples, random_seed=1) assert sim_seed.tables.edges == sim_random_seed.tables.edges
def test_register_engine(self): class MyEngine(stdpopsim.Engine): id = "test-engine" name = "test" citations = [] engine1 = MyEngine() stdpopsim.register_engine(engine1) engine2 = stdpopsim.get_engine(engine1.id) self.assertEqual(engine1, engine2) # remove engine to avoid possible problems with other tests del stdpopsim.engines._registered_engines[engine1.id]
def test_simulate(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("AraTha") contig = species.get_contig("chr5", length_multiplier=0.001) model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) ts = engine.simulate( demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=10, slim_burn_in=0) self.assertEqual(ts.num_samples, 10) self.assertTrue(all(tree.num_roots == 1 for tree in ts.trees()))
def test_assert_min_version(self): engine = stdpopsim.get_engine("slim") with mock.patch("stdpopsim.slim_engine._SLiMEngine.get_version", return_value="3.4"): with self.assertRaises(RuntimeError): engine._assert_min_version("3.5", engine.slim_path()) with self.assertRaises(RuntimeError): engine._assert_min_version("4.0", None) with mock.patch("stdpopsim.slim_engine._SLiMEngine.get_version", return_value="4.0"): engine._assert_min_version("3.5", engine.slim_path()) engine._assert_min_version("3.6", None)
def test_invalid_API_parameters(self): engine = stdpopsim.get_engine("msprime") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr20") model = species.get_demographic_model("OutOfAfrica_2T12") samples = model.get_samples(10) with self.assertRaises(ValueError): engine.simulate(model, contig, samples, msprime_model="notamodel") with self.assertRaises(ValueError): engine.simulate( model, contig, samples, msprime_change_model=[(10, "notamodel"), ])
def _onepop_PC(engine_id, out_dir, seed, N0=1000, *size_changes, **sim_kwargs): species = stdpopsim.get_species("CanFam") contig = species.get_contig("chr35", length_multiplier=0.01) # ~265 kb model = stdpopsim.PiecewiseConstantSize(N0, *size_changes) model.generation_time = species.generation_time samples = model.get_samples(100) engine = stdpopsim.get_engine(engine_id) t0 = time.perf_counter() ts = engine.simulate(model, contig, samples, seed=seed, **sim_kwargs) t1 = time.perf_counter() out_file = out_dir / f"{seed}.trees" ts.dump(out_file) return out_file, t1 - t0
def test_recombination_map(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr1", genetic_map="HapMapII_GRCh37") model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) model.generation_time = species.generation_time out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True) self.assertTrue("sim.registerLateEvent" in out)
def homsap_DFE(model, contig, samples, seed, **kwargs): mutation_types = KimDFE() engine = stdpopsim.get_engine("slim") ts = engine.simulate( model, contig, samples, seed=seed, mutation_types=mutation_types, slim_burn_in=10, slim_scaling_factor=10, ) return ts, (contig.origin, 0, 0, 0)
def test_recap_and_rescale(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr22", length_multiplier=0.001) model = species.get_demographic_model("OutOfAfrica_3G09") samples = model.get_samples(10, 10, 10) for weight, seed in zip((0, 1), (1234, 2345)): if weight: mutation_types = [stdpopsim.ext.MutationType(weight=1)] extended_events = None else: mutation_types = None extended_events = [] ts1 = engine.simulate( demographic_model=model, contig=contig, samples=samples, mutation_types=mutation_types, extended_events=extended_events, slim_scaling_factor=10, slim_burn_in=0, seed=seed, ) ts2_headless = slim_simulate_no_recap( demographic_model=model, contig=contig, samples=samples, mutation_types=mutation_types, extended_events=extended_events, slim_scaling_factor=10, slim_burn_in=0, seed=seed, ) ts2 = engine.recap_and_rescale( ts2_headless, demographic_model=model, contig=contig, samples=samples, mutation_types=mutation_types, extended_events=extended_events, slim_scaling_factor=10, seed=seed, ) tables1 = ts1.dump_tables() tables2 = ts2.dump_tables() self.assertEqual(tables1.nodes, tables2.nodes) self.assertEqual(tables1.edges, tables2.edges) self.assertEqual(tables1.mutations, tables2.mutations)
def test_no_mutation_types_defined(self): extended_events = [ stdpopsim.ext.DrawMutation(time=self.T_mut, mutation_type_id=self.mut_id, population_id=0, coordinate=100), ] engine = stdpopsim.get_engine("slim") with self.assertRaises(ValueError): engine.simulate(demographic_model=self.model, contig=self.contig, samples=self.samples, extended_events=extended_events, dry_run=True)
def test_exclusion_of_drawn_mutation(self): coordinate = round(self.contig.recombination_map.get_length() / 2) extended_events = [ stdpopsim.ext.DrawMutation( time=self.T_mut, mutation_type_id=self.mut_id, population_id=0, coordinate=coordinate, save=True, ), stdpopsim.ext.ConditionOnAlleleFrequency( start_time=0, end_time=0, mutation_type_id=self.mut_id, population_id=0, op=">", allele_frequency=0, ), ] contig = stdpopsim.Contig( mutation_rate=0, recombination_map=self.contig.recombination_map, genetic_map=self.contig.genetic_map, ) slim = stdpopsim.get_engine("slim") with mock.patch("warnings.warn", autospec=True): ts = slim.simulate( demographic_model=self.model, contig=contig, samples=self.samples, mutation_types=self.mutation_types, extended_events=extended_events, slim_scaling_factor=10, slim_burn_in=0.1, seed=1, ) self.assertEqual(ts.num_mutations, 1) ts_af = self.allele_frequency(ts) self.assertGreaterEqual(ts_af, 0) rng = np.random.default_rng(seed=31415) A, af = convert.ts2mat(ts, 32, 0, rng, exclude_mut_with_metadata=False) self.assertGreater(A.sum(), 0) self.assertEqual(len(af), 1) self.assertEqual(ts_af, af[0]) A, af = convert.ts2mat(ts, 32, 0, rng, exclude_mut_with_metadata=True) self.assertEqual(A.sum(), 0) self.assertEqual(len(af), 1) self.assertEqual(ts_af, af[0])
def test_bad_extended_events(self): engine = stdpopsim.get_engine("slim") for bad_ee in [ msprime.PopulationParametersChange(time=0, initial_size=100), None, {}, "", ]: with self.assertRaises(ValueError): engine.simulate(demographic_model=self.model, contig=self.contig, samples=self.samples, extended_events=[bad_ee], dry_run=True)
def test_draw_mutation_no_save(self): extended_events = [ stdpopsim.ext.DrawMutation(time=self.T_mut, mutation_type_id=self.mut_id, population_id=0, coordinate=100), ] engine = stdpopsim.get_engine("slim") engine.simulate(demographic_model=self.model, contig=self.contig, samples=self.samples, mutation_types=self.mutation_types, extended_events=extended_events, dry_run=True)