def test_number_of_calls(self): # Test that genetic map citations are converted. species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr22", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() cites_and_cites = [ genetic_map.citations, model.citations, engine.citations, species.genome.mutation_rate_citations, species.genome.recombination_rate_citations, species.genome.assembly_citations, ] ncite = len(set([ref.doi for cites in cites_and_cites for ref in cites])) # Patch out writing to a file, then # ensure that the method is called # the correct number of times. with mock.patch("builtins.open", mock.mock_open()): with open('tmp.bib', 'w') as bib: with mock.patch.object( stdpopsim.citations.Citation, "fetch_bibtex") as mock_bib: cli.write_bibtex(engine, model, contig, species, bib) self.assertEqual(mock_bib.call_count, ncite)
def test_number_of_calls(self): # Test that genetic map citations are converted. species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr20", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() local_cites = stdpopsim.Citation.merge( [stdpopsim.citations._stdpopsim_citation] + genetic_map.citations + model.citations + engine.citations + species.genome.citations + species.citations ) dois = set([ref.doi for ref in local_cites]) ncite = len(dois) assert ncite == len(local_cites) cli_cites = cli.get_citations(engine, model, contig, species) assert len(cli_cites) == len(local_cites) # Patch out writing to a file, then # ensure that the method is called # the correct number of times. with mock.patch("builtins.open", mock.mock_open()): with open("tmp.bib", "w") as bib: with mock.patch.object( stdpopsim.citations.Citation, "fetch_bibtex", autospec=True ) as mock_bib: cli.write_bibtex(engine, model, contig, species, bib) assert mock_bib.call_count == ncite
def generic_models_example(): species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr22", length_multiplier=0.1) model = stdpopsim.PiecewiseConstantSize(species.population_size) samples = model.get_samples(10) engine = stdpopsim.get_default_engine() ts = engine.simulate(model, contig, samples)
def test_genetic_map_citations(self): species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr22", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() stdout, stderr = capture_output( cli.write_citations, engine, model, contig, species) self.assertEqual(len(stdout), 0) self.check_citations(engine, species, genetic_map, model, stderr)
def test_model_citations(self): contig = stdpopsim.Contig() species = stdpopsim.get_species("HomSap") model = species.get_demographic_model("OutOfAfrica_3G09") engine = stdpopsim.get_default_engine() stdout, stderr = capture_output( cli.write_citations, engine, model, contig, species) self.assertEqual(len(stdout), 0) genetic_map = None self.check_citations(engine, species, genetic_map, model, stderr)
def test_model_citations(self, caplog): species = stdpopsim.get_species("HomSap") contig = species.get_contig("22") model = species.get_demographic_model("OutOfAfrica_3G09") engine = stdpopsim.get_default_engine() stdout, stderr = capture_output( cli.write_citations, engine, model, contig, species ) assert len(stdout) == 0 genetic_map = None self.check_citations(engine, species, genetic_map, model, caplog.text)
def test_genetic_map_citations(self, caplog): species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr20", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() dfe = None stdout, stderr = capture_output(cli.write_citations, engine, model, contig, species, dfe) assert len(stdout) == 0 self.check_citations(engine, species, genetic_map, model, caplog.text)
def test_dfe_citations(self, caplog): species = stdpopsim.get_species("HomSap") genetic_map = species.get_genetic_map("HapMapII_GRCh37") dfe = species.get_genetic_map("HapMapII_GRCh37") contig = species.get_contig("chr20", genetic_map=genetic_map.id) model = stdpopsim.PiecewiseConstantSize(species.population_size) engine = stdpopsim.get_default_engine() dfe = species.get_dfe("Gamma_K17") stdout, stderr = capture_output(cli.write_citations, engine, model, contig, species, dfe) assert len(stdout) == 0 assert "[distribution of fitness effects]" in caplog.text assert "Kim et al., 2017" in caplog.text
def test_simulation_runs(self): # With a recombination_map of None, we simulate a coalescent without # recombination in msprime, with no mutation. contig = stdpopsim.Contig() # Generate vector with 2 samples for each pop with sampling enabled sample_count = [] for p in self.model.populations: if p.allow_samples: sample_count.append(2) else: sample_count.append(0) samples = self.model.get_samples(*sample_count) engine = stdpopsim.get_default_engine() ts = engine.simulate(self.model, contig, samples) self.assertEqual(ts.num_populations, self.model.num_populations)
def test_simulation_runs(self): # With a recombination_map of None, we simulate a coalescent without # recombination in msprime, with mutation rate equal to rate from model. contig = stdpopsim.Contig.basic_contig( length=100, mutation_rate=self.model.mutation_rate ) # Generate vector with 2 samples for each pop with sampling enabled sample_count = [] for p in self.model.populations: if p.allow_samples: sample_count.append(2) else: sample_count.append(0) samples = self.model.get_samples(*sample_count) engine = stdpopsim.get_default_engine() ts = engine.simulate(self.model, contig, samples) assert ts.num_populations == self.model.num_populations
def run_chr20_ooa(samples, Ne, length, mutation_rate, recombination_rate, rng, seed=None): """ Run StandardPopSim Out of Africa Chromosome 20 """ species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr20", genetic_map="HapMapII_GRCh37") model = species.get_demographic_model("OutOfAfrica_3G09") engine = stdpopsim.get_default_engine() ts = engine.simulate(model, contig, samples, seed=seed) snippet_start = rng.randint(0, ts.get_sequence_length() - length) snippet = [snippet_start, snippet_start + length] return ts.keep_intervals(np.array([snippet])).trim()
def test_register_duplicate(self): engine = stdpopsim.get_default_engine() with self.assertRaises(ValueError): stdpopsim.register_engine(engine)
def test_get_default_engine(self): engine = stdpopsim.get_default_engine() self._test_engine(engine)
def stdpopsim_cli_parser(): # TODO the CLI defined by this hierarchical and clumsy, but it's the best # I could figure out. It can definitely be improved! top_parser = argparse.ArgumentParser( description="Command line interface for stdpopsim.") top_parser.add_argument("-V", "--version", action='version', version='%(prog)s {}'.format( stdpopsim.__version__)) top_parser.add_argument("-v", "--verbosity", action='count', default=0, help="Increase the verbosity") top_parser.add_argument( "-c", "--cache-dir", type=str, default=None, help=("Set the cache directory to the specified value. " "Note that this can also be set using the environment variable " "STDPOPSIM_CACHE. If both the environment variable and this " "option are set, the option takes precedence. " f"Default: {stdpopsim.get_cache_dir()}")) top_parser.add_argument("-e", "--engine", default=stdpopsim.get_default_engine().id, choices=[e.id for e in stdpopsim.all_engines()], help="Specify a simulation engine.") for engine in stdpopsim.all_engines(): group = top_parser.add_argument_group( f"{engine.id} specific parameters") engine.add_arguments(group) subparsers = top_parser.add_subparsers(dest="subcommand") subparsers.required = True for species in stdpopsim.all_species(): add_simulate_species_parser(subparsers, species) download_maps_parser = subparsers.add_parser( "download-genetic-maps", help="Download genetic maps", description=( "Download genetic maps and store them in the cache directory. " "Maps are downloaded regardless of whether they are already " "in the cache or not. Please use the --cache-dir option to " "download maps to a specific directory. ")) download_maps_parser.add_argument( "species", nargs="?", help=("Download genetic maps for this species. If not specified " "download all known genetic maps.")) download_maps_parser.add_argument( "genetic_maps", type=str, nargs="*", help=("If specified, download these genetic maps. If no maps " "are provided, download all maps for this species.")) download_maps_parser.set_defaults(runner=run_download_genetic_maps) return top_parser
def stdpopsim_cli_parser(): # TODO the CLI defined by this hierarchical and clumsy, but it's the best # I could figure out. It can definitely be improved! top_parser = argparse.ArgumentParser( description="Command line interface for stdpopsim.") top_parser.add_argument("-V", "--version", action='version', version='%(prog)s {}'.format( stdpopsim.__version__)) top_parser.add_argument("-v", "--verbosity", action='count', default=1, help="Increase the verbosity") top_parser.add_argument( "-c", "--cache-dir", type=str, default=None, help=("Set the cache directory to the specified value. " "Note that this can also be set using the environment variable " "STDPOPSIM_CACHE. If both the environment variable and this " "option are set, the option takes precedence. " f"Default: {stdpopsim.get_cache_dir()}")) top_parser.add_argument("-e", "--engine", default=stdpopsim.get_default_engine().id, choices=[e.id for e in stdpopsim.all_engines()], help="Specify a simulation engine.") supported_models = stdpopsim.get_engine("msprime").supported_models msprime_parser = top_parser.add_argument_group( "msprime specific parameters") msprime_parser.add_argument( "--msprime-model", default=supported_models[0], choices=supported_models, help="Specify the simulation model used by msprime. " "See msprime API documentation for details.") def time_or_model(arg, _arg_is_time=[ True, ], parser=top_parser): if _arg_is_time[0]: try: arg = float(arg) except ValueError: parser.error(f"`{arg}' is not a number") else: if arg not in supported_models: parser.error(f"`{arg}' is not a supported model") _arg_is_time[0] = not _arg_is_time[0] return arg msprime_parser.add_argument( "--msprime-change-model", metavar=("T", "MODEL"), type=time_or_model, default=[], action="append", nargs=2, help="Change to the specified simulation MODEL at generation T. " "This option may provided multiple times.") # SLiM is not available for windows. if not IS_WINDOWS: def slim_exec(path): # Hack to set the SLIM environment variable at parse time, # before get_version() can be called. os.environ["SLIM"] = path return path slim_parser = top_parser.add_argument_group("SLiM specific parameters") slim_parser.add_argument("--slim-path", metavar="PATH", type=slim_exec, default=None, help="Full path to `slim' executable.") slim_parser.add_argument( "--slim-script", action="store_true", default=False, help="Write script to stdout and exit without running SLiM.") slim_parser.add_argument( "--slim-scaling-factor", metavar="Q", default=1, type=float, help="Rescale model parameters by Q to speed up simulation. " "See SLiM manual: `5.5 Rescaling population sizes to " "improve simulation performance`. " "[default=%(default)s].") slim_parser.add_argument( "--slim-burn-in", metavar="X", default=10, type=float, help="Length of the burn-in phase, in units of N generations " "[default=%(default)s].") subparsers = top_parser.add_subparsers(dest="subcommand") subparsers.required = True for species in stdpopsim.all_species(): add_simulate_species_parser(subparsers, species) download_maps_parser = subparsers.add_parser( "download-genetic-maps", help="Download genetic maps", description=( "Download genetic maps and store them in the cache directory. " "Maps are downloaded regardless of whether they are already " "in the cache or not. Please use the --cache-dir option to " "download maps to a specific directory. ")) download_maps_parser.add_argument( "species", nargs="?", help=("Download genetic maps for this species. If not specified " "download all known genetic maps.")) download_maps_parser.add_argument( "genetic_maps", type=str, nargs="*", help=("If specified, download these genetic maps. If no maps " "are provided, download all maps for this species.")) download_maps_parser.set_defaults(runner=run_download_genetic_maps) return top_parser
species = stdpopsim.get_species("DroMel") ## I have specified the desired chromosome arm at the command line, let's contig = species.get_contig(sys.argv[1], genetic_map = "ComeronCrossover_dm6") ## For testing, it is good to model a lil chunk of chromosome #contig = species.get_contig(sys.argv[1], length_multiplier = 0.10) ## You can grab the genetic map out of the simulations using: #for p, r in zip( contig.recombination_map.get_positions() , contig.recombination_map.get_rates() ): # print( p , r ) Ne = species.population_size/10 #Ne = 10000 model = stdpopsim.IsolationWithMigration(2*Ne, Ne, Ne, Ne, 1.5/Ne, 1.5/Ne) print("NA", "N1", "N2", "T", "M12", "M21") print( 2*Ne, Ne, Ne, Ne, 1.5/Ne, 1.5/Ne) ## I want to simulate 20 samples from each population samples = model.get_samples(20,20) ## I will simulate using msprime engine = stdpopsim.get_default_engine() print("running simulation") ts = engine.simulate(model, contig, samples) ## Save the simulated data to a VCF with open("drosophilaSimulated."+ sys.argv[1] + ".vcf", "w") as vcf_file: ts.write_vcf(vcf_file)