Example #1
0
class TestGenome(unittest.TestCase, test_species.GenomeTestMixin):
    """
    Tests for the human genome.
    """

    genome = stdpopsim.get_species("HomSap").genome

    def test_basic_attributes(self):
        self.assertEqual(len(self.genome.chromosomes), 25)

    def test_recombination_rates(self):
        # recompute recombination rates from HapMapII_GRCh37 map then
        # compare the results to the current recombination rates for each chromosome
        genetic_map = "HapMapII_GRCh37"
        species = stdpopsim.get_species("HomSap")
        for chrom in self.genome.chromosomes:
            if chrom.id == "chrY":
                with self.assertWarns(Warning):
                    contig = species.get_contig(chrom.id,
                                                genetic_map=genetic_map)
            else:
                contig = species.get_contig(chrom.id, genetic_map=genetic_map)
            self.assertAlmostEqual(
                chrom.recombination_rate,
                contig.recombination_map.mean_recombination_rate,
            )
Example #2
0
def main():

    num_samples = 100
    seed = 42

    data = {
        "species": [],
        "model": [],
        "cpu_time": [],
        "ram": [],
        "file_size": []
    }
    for species_id in ["PonAbe", "HomSap", "DroMel", "AraTha"]:
        species = stdpopsim.get_species(species_id)
        # Get the shortest chromosome
        chrom = sorted(species.genome.chromosomes, key=lambda x: x.length)[0]
        assert chrom.recombination_rate > 0
        for model in species.demographic_models:
            with tempfile.NamedTemporaryFile() as out:
                cmd = (
                    f"{species_id} -d {model.id} -c {chrom.id} {num_samples} "
                    f"-s {seed} -o {out.name}")
                cpu_time, ram = time_cmd(["stdpopsim"] + cmd.split())
                file_size = os.path.getsize(out.name)
                data["species"].append(species.id)
                data["model"].append(model.id)
                data["cpu_time"].append(cpu_time)
                data["ram"].append(ram)
                data["file_size"].append(file_size)

                df = pd.DataFrame(data)
                df.to_csv("data/benchmark.csv")
                print(df)
Example #3
0
def _twopop_IM(engine_id,
               out_dir,
               seed,
               NA=1000,
               N1=500,
               N2=5000,
               T=1000,
               M12=0,
               M21=0,
               pulse=None,
               **sim_kwargs):
    species = stdpopsim.get_species("AraTha")
    contig = species.get_contig("chr5", length_multiplier=0.01)  # ~270 kb
    model = stdpopsim.IsolationWithMigration(NA=NA,
                                             N1=N1,
                                             N2=N2,
                                             T=T,
                                             M12=M12,
                                             M21=M21)
    if pulse is not None:
        model.demographic_events.append(pulse)
        model.demographic_events.sort(key=lambda x: x.time)
    model.generation_time = species.generation_time
    samples = model.get_samples(50, 50, 0)
    engine = stdpopsim.get_engine(engine_id)
    t0 = time.perf_counter()
    ts = engine.simulate(model, contig, samples, seed=seed, **sim_kwargs)
    t1 = time.perf_counter()
    out_file = out_dir / f"{seed}.trees"
    ts.dump(out_file)
    return out_file, t1 - t0
Example #4
0
def homsap_composite_model(length, sample_counts, seed, model=hominin_composite()):
    if "Nea" in sample_counts and sample_counts["Nea"] != 4:
        raise RuntimeError(
            "Must have one sample each for the Vindija and Altai Neanderthals"
        )
    species = stdpopsim.get_species("HomSap")
    model = hominin_composite()
    contig = random_autosomal_chunk(species, "HapMapII_GRCh37", length, seed)
    samples = model.get_samples(
        *[
            sample_counts.get(p.id, 0)
            if p.id != "Nea" and p.sampling_time is not None
            else 0
            for p in model.populations
        ]
    )
    if "Nea" in sample_counts:
        # Altai and Vindija Neanderthal dates from Prüfer et al. 2017.
        T_Altai = 115e3 / model.generation_time
        T_Vindija = 55e3 / model.generation_time
        pop = {p.id: i for i, p in enumerate(model.populations)}
        samples.extend(
            [
                msprime.Sample(pop["Nea"], T_Altai),
                msprime.Sample(pop["Nea"], T_Altai),
                msprime.Sample(pop["Nea"], T_Vindija),
                msprime.Sample(pop["Nea"], T_Vindija),
            ]
        )
    return species, model, contig, samples
Example #5
0
 def test_number_of_calls(self):
     # Test that genetic map citations are converted.
     species = stdpopsim.get_species("HomSap")
     genetic_map = species.get_genetic_map("HapMapII_GRCh37")
     contig = species.get_contig("chr22", genetic_map=genetic_map.id)
     model = stdpopsim.PiecewiseConstantSize(species.population_size)
     engine = stdpopsim.get_default_engine()
     cites_and_cites = [
             genetic_map.citations,
             model.citations,
             engine.citations,
             species.genome.mutation_rate_citations,
             species.genome.recombination_rate_citations,
             species.genome.assembly_citations,
             ]
     ncite = len(set([ref.doi for cites in cites_and_cites for ref in cites]))
     # Patch out writing to a file, then
     # ensure that the method is called
     # the correct number of times.
     with mock.patch("builtins.open", mock.mock_open()):
         with open('tmp.bib', 'w') as bib:
             with mock.patch.object(
                     stdpopsim.citations.Citation,
                     "fetch_bibtex") as mock_bib:
                 cli.write_bibtex(engine, model, contig, species, bib)
                 self.assertEqual(mock_bib.call_count, ncite)
Example #6
0
def OutOfAfrica_3G09_with_DFE(seed):
    """
    The Gutenkunst et al. HomSap/OutOfAfrica_3G09 model, simulated with a DFE.
    """
    species = stdpopsim.get_species("HomSap")
    model = species.get_demographic_model("OutOfAfrica_3G09")
    contig = species.get_contig("chr1", length_multiplier=0.001)
    samples = model.get_samples(100, 100, 100)  # YRI, CEU, CHB

    mutation_types = KimDFE()

    # Simulate.
    engine = stdpopsim.get_engine("slim")
    ts = engine.simulate(
        model,
        contig,
        samples,
        seed=seed,
        mutation_types=mutation_types,
        slim_scaling_factor=10,
        slim_burn_in=10,
        # Set slim_script=True to print the script instead of running it.
        # slim_script=True,
    )
    return ts
Example #7
0
class TestGenomeData(test_species.GenomeTestBase):

    genome = stdpopsim.get_species("AnoGam").genome

    @pytest.mark.skip("Recombination rate QC not done yet")
    @pytest.mark.parametrize(
        ["name", "rate"],
        {
            "2L": -1,
            "2R": -1,
            "3L": -1,
            "3R": -1,
            "X": -1,
            "Mt": -1
        }.items(),
    )
    def test_recombination_rate(self, name, rate):
        assert rate == pytest.approx(
            self.genome.get_chromosome(name).recombination_rate)

    @pytest.mark.skip("Mutation rate QC not done yet")
    @pytest.mark.parametrize(
        ["name", "rate"],
        {
            "2L": -1,
            "2R": -1,
            "3L": -1,
            "3R": -1,
            "X": -1,
            "Mt": -1
        }.items(),
    )
    def test_mutation_rate(self, name, rate):
        assert rate == pytest.approx(
            self.genome.get_chromosome(name).mutation_rate)
Example #8
0
    def test_recombination_rates(self, chr_id):
        # We should recast this test and just hard code in the values.
        # Tests should be *obvious* not clever.

        # recompute recombination rates from HapMapII_GRCh37 map then
        # compare the results to the current recombination rates for each chromosome
        genetic_map = "HapMapII_GRCh37"
        species = stdpopsim.get_species("HomSap")
        chrom = species.genome.get_chromosome(chr_id)
        if chr_id in ["X", "Y", "MT"]:
            with pytest.warns(stdpopsim.NonAutosomalWarning):
                contig = species.get_contig(chr_id, genetic_map=genetic_map)
        elif chr_id in ["3", "5", "7", "11", "16", "17", "18", "20"]:
            contig = species.get_contig(chr_id, genetic_map=genetic_map)
        else:
            # The rest of the chromosomes are currently emitting a warning about
            # the mismatch in chromosome lengths because of the fact that we're
            # on 37 for the map. This should be resolved when we start using the
            # lifted over map.
            with pytest.warns(UserWarning,
                              match="longer than chromosome length"):
                contig = species.get_contig(chr_id, genetic_map=genetic_map)
        assert pytest.approx(
            chrom.recombination_rate,
            contig.recombination_map.mean_rate,
        )
 def test_download_over_cache(self):
     species = stdpopsim.get_species("DroMel")
     gm = species.get_genetic_map("ComeronCrossover_dm6")
     gm.download()
     self.assertTrue(gm.is_cached())
     gm.download()
     self.assertTrue(gm.is_cached())
Example #10
0
 def test_bad_genetic_map(self):
     species = stdpopsim.get_species("HomSap")
     with mock.patch("stdpopsim.cli.exit", autospec=True) as mocked_exit:
         cli.get_genetic_map_wrapper(species, "XXX")
         available_maps = ", ".join([gm.id for gm in species.genetic_maps])
         mocked_exit.assert_called_once_with(
             f"GeneticMap 'HomSap/XXX' not in catalog ({available_maps})")
Example #11
0
class TestGetChromosomeAnnotations(tests.CacheReadingTest):
    """
    Tests if we get chromosome level annotations
    using the Ensembl_GRCh38 human GFF.
    """
    # TODO: The HomSap annotations are huge. Once we include a smaller
    # annotation set, we should instead use that, so tests are faster.
    species = stdpopsim.get_species("HomSap")
    an = species.get_annotations("Ensembl_GRCh38_gff3")

    def test_known_chromosome(self):
        cm = self.an.get_chromosome_annotations("21")
        self.assertIsInstance(cm, pandas.DataFrame)

    def test_known_chromosome_prefix(self):
        cm = self.an.get_chromosome_annotations("chr21")
        self.assertIsInstance(cm, pandas.DataFrame)

    def test_unknown_chromosome(self):
        for bad_chrom in ["", "ABD", None]:
            with self.assertRaises(ValueError):
                self.an.get_chromosome_annotations(bad_chrom)

    def test_get_genes(self):
        g = self.an.get_genes_from_chromosome("21")
        self.assertIsInstance(g, pandas.DataFrame)

    def test_get_genes_full(self):
        g = self.an.get_genes_from_chromosome("21", full_table=True)
        self.assertIsInstance(g, pandas.DataFrame)

    def test_bad_annot_type(self):
        bad_annot = "foo"
        with self.assertRaises(ValueError):
            self.an.get_annotation_type_from_chromomosome(bad_annot, "21")
Example #12
0
    def test_bad_params(self):
        engine = stdpopsim.get_engine("slim")
        species = stdpopsim.get_species("HomSap")
        contig = species.get_contig("chr1")
        model = stdpopsim.PiecewiseConstantSize(species.population_size)
        samples = model.get_samples(10)

        for scaling_factor in (0, -1, -1e-6):
            with self.assertRaises(ValueError):
                engine.simulate(
                    demographic_model=model,
                    contig=contig,
                    samples=samples,
                    slim_scaling_factor=scaling_factor,
                    dry_run=True,
                )

        for burn_in in (-1, -1e-6):
            with self.assertRaises(ValueError):
                engine.simulate(
                    demographic_model=model,
                    contig=contig,
                    samples=samples,
                    slim_burn_in=burn_in,
                    dry_run=True,
                )
Example #13
0
class TestGetContig(unittest.TestCase):
    """
    Tests for the get contig method.
    """
    species = stdpopsim.get_species("HomSap")

    def test_length_multiplier(self):
        contig1 = self.species.get_contig("chr22")
        for x in [0.125, 1.0, 2.0]:
            contig2 = self.species.get_contig("chr22", length_multiplier=x)
            self.assertEqual(contig1.recombination_map.get_positions()[-1] * x,
                             contig2.recombination_map.get_positions()[-1])

    def test_length_multiplier_on_empirical_map(self):
        with self.assertRaises(ValueError):
            self.species.get_contig("chr1",
                                    genetic_map="HapMapII_GRCh37",
                                    length_multiplier=2)

    def test_genetic_map(self):
        # TODO we should use a different map here so we're not hitting the cache.
        contig = self.species.get_contig("chr22",
                                         genetic_map="HapMapII_GRCh37")
        self.assertIsInstance(contig.recombination_map,
                              msprime.RecombinationMap)
Example #14
0
 def test_get_known_genetic_map(self):
     good = ["HapmapII_GRCh37", "Decode_2010_sex_averaged"]
     species = stdpopsim.get_species("homsap")
     for name in good:
         gmap = species.get_genetic_map(name)
         self.assertIsInstance(gmap, stdpopsim.GeneticMap)
         self.assertEqual(gmap.name, name)
Example #15
0
 def test_bad_model(self):
     species = stdpopsim.get_species("HomSap")
     with mock.patch("stdpopsim.cli.exit", autospec=True) as mocked_exit:
         cli.get_model_wrapper(species, "XXX")
         mocked_exit.assert_called_once_with(
             "DemographicModel 'HomSap/XXX' not in catalog"
         )
 def test_known_chromosome(self):
     species = stdpopsim.get_species("CanFam")
     genetic_map = species.get_genetic_map("Campbell2016_CanFam3_1")
     chrom = species.genome.get_chromosome("1")
     cm = genetic_map.get_chromosome_map(chrom.id)
     self.assertIsInstance(cm, msprime.RateMap)
     self.assertEqual(chrom.length, cm.sequence_length)
Example #17
0
 def test_bad_genetic_map(self):
     species = stdpopsim.get_species("HomSap")
     with mock.patch("stdpopsim.cli.exit", autospec=True) as mocked_exit:
         cli.get_genetic_map_wrapper(species, "XXX")
         mocked_exit.assert_called_once_with(
             "Genetic map 'HomSap/XXX' not in catalog"
         )
Example #18
0
 def test_required_params(self):
     species = stdpopsim.get_species("HomSap")
     model = species.get_demographic_model("AshkSub_7G19")
     contig = (species.get_contig("chr1"), )
     for engine in stdpopsim.all_engines():
         with self.assertRaises(TypeError):
             engine.simulate(model, contig)
def setup_sample_file(args):
    """
    Return a Thousand Genomes Project sample data file, the
    corresponding recombination rate array, a prefix to use for files, and None
    """
    filename = args.sample_file
    map = args.genetic_map
    if not filename.endswith(".samples"):
        raise ValueError("Sample data file must end with '.samples'")
    sd = tsinfer.load(filename)
    inference_pos = sd.sites_position[:][sd.sites_inference[:]]

    match = re.search(r'(chr\d+)', filename)
    if match or map is not None:
        if map is not None:
            chr_map = msprime.RecombinationMap.read_hapmap(map)
        else:
            chr = match.group(1)
            print(
                f"Using {chr} from HapMapII_GRCh37 for the recombination map")
            map = stdpopsim.get_species("HomSap").get_genetic_map(
                id="HapMapII_GRCh37")
            if not map.is_cached():
                map.download()
            chr_map = map.get_chromosome_map(chr)
        inference_distances = physical_to_genetic(chr_map, inference_pos)
        d = np.diff(inference_distances)
        rho = np.concatenate(([0.0], d))
    else:
        inference_distances = inference_pos
        d = np.diff(inference_distances)
        rho = np.concatenate(([0.0], d / sd.sequence_length))

    return sd, rho, filename[:-len(".samples")], None
Example #20
0
class PiecewiseConstantSizeMixin:
    """
    Mixin that sets up a simple demographic model.
    """

    species = stdpopsim.get_species("HomSap")
    contig = species.get_contig("chr22", length_multiplier=0.001)  # ~50 kb

    N0 = 1000  # size in the present
    N1 = 500  # ancestral size
    T = 500  # generations since size change occurred
    T_mut = 300  # introduce a mutation at this generation
    model = stdpopsim.PiecewiseConstantSize(N0, (T, N1))
    model.generation_time = 1
    samples = model.get_samples(100)
    mutation_types = [
        stdpopsim.ext.MutationType(convert_to_substitution=False)
    ]
    mut_id = len(mutation_types)

    def allele_frequency(self, ts):
        """
        Get the allele frequency of the drawn mutation.
        """
        # surely there's a simpler way!
        assert ts.num_mutations == 1
        samples = ts.samples()
        mut = next(ts.mutations())
        tree = ts.at(ts.site(mut.site).position)
        have_mut = [u for u in samples if tree.is_descendant(u, mut.node)]
        af = len(have_mut) / len(samples)
        return af
Example #21
0
    def run(self):
        species = stdpopsim.get_species(self.arguments[0])
        sid = f"sec_catalog_{species.id}"
        species_target = self.get_target(sid)
        section = nodes.section(ids=[sid], names=[sid])
        section += nodes.title(text=species.name)
        section += self.species_summary(species)

        genome_section = nodes.section(
            ids=[f"sec_catalog_{species.id}_genome"])
        genome_section += nodes.title(text="Genome")
        genome_section += self.chromosomes_table(species)
        section += genome_section
        section += nodes.transition()

        maps_section = nodes.section(
            ids=[f"sec_catalog_{species.id}_genetic_maps"])
        maps_section += nodes.title(text="Genetic Maps")
        maps_section += self.genetic_maps_table(species)
        for gmap in species.genetic_maps:
            maps_section += self.genetic_map_section(species, gmap)
        section += maps_section
        section += nodes.transition()

        models_section = nodes.section(
            ids=[f"sec_catalog_{species.id}_models"])
        models_section += nodes.title(text="Models")
        models_section += self.models_table(species)
        for model in species.demographic_models:
            models_section += self.model_section(species, model)
        section += models_section

        return [species_target, section]
Example #22
0
 def test_get_known_genetic_map(self):
     good = ["HapMapII_GRCh37", "DeCodeSexAveraged_GRCh36"]
     species = stdpopsim.get_species("HomSap")
     for name in good:
         gmap = species.get_genetic_map(name)
         self.assertIsInstance(gmap, stdpopsim.GeneticMap)
         self.assertEqual(gmap.id, name)
Example #23
0
    def test_script_generation(self):
        engine = stdpopsim.get_engine("slim")
        species = stdpopsim.get_species("HomSap")
        contig = species.get_contig("chr1")

        model = stdpopsim.PiecewiseConstantSize(species.population_size)
        samples = model.get_samples(10)
        model.generation_time = species.generation_time
        out, _ = capture_output(engine.simulate,
                                demographic_model=model,
                                contig=contig,
                                samples=samples,
                                slim_script=True)
        self.assertTrue("sim.registerLateEvent" in out)

        model = species.get_demographic_model("AncientEurasia_9K19")
        samples = model.get_samples(1, 2, 3, 4, 5, 6, 7)
        out, _ = capture_output(engine.simulate,
                                demographic_model=model,
                                contig=contig,
                                samples=samples,
                                slim_script=True)
        self.assertTrue("sim.registerLateEvent" in out)

        model = species.get_demographic_model("AmericanAdmixture_4B11")
        samples = model.get_samples(10, 10, 10)
        out, _ = capture_output(engine.simulate,
                                demographic_model=model,
                                contig=contig,
                                samples=samples,
                                slim_script=True)
        self.assertTrue("sim.registerLateEvent" in out)
Example #24
0
class TestGenomeData(test_species.GenomeTestBase):

    genome = stdpopsim.get_species("AedAeg").genome

    @pytest.mark.parametrize(
        ["name", "rate"],
        {
            "1": 0.306e-8,
            "2": 0.249e-8,
            "3": 0.291e-8,
            "MT": 0.0
        }.items(),
    )
    def test_recombination_rate(self, name, rate):
        assert pytest.approx(
            rate,
            self.genome.get_chromosome(name).recombination_rate)

    @pytest.mark.parametrize(["name", "rate"], {
        "1": 3.5e-9,
        "2": 3.5e-9,
        "3": 3.5e-9,
        "MT": 3.5e-9
    }.items())
    def test_mutation_rate(self, name, rate):
        assert pytest.approx(rate,
                             self.genome.get_chromosome(name).mutation_rate)
Example #25
0
    def test_number_of_calls(self):
        # Test that genetic map citations are converted.
        species = stdpopsim.get_species("HomSap")
        genetic_map = species.get_genetic_map("HapMapII_GRCh37")
        contig = species.get_contig("chr20", genetic_map=genetic_map.id)
        model = stdpopsim.PiecewiseConstantSize(species.population_size)
        engine = stdpopsim.get_default_engine()
        local_cites = stdpopsim.Citation.merge(
            [stdpopsim.citations._stdpopsim_citation]
            + genetic_map.citations
            + model.citations
            + engine.citations
            + species.genome.citations
            + species.citations
        )
        dois = set([ref.doi for ref in local_cites])
        ncite = len(dois)
        assert ncite == len(local_cites)
        cli_cites = cli.get_citations(engine, model, contig, species)
        assert len(cli_cites) == len(local_cites)

        # Patch out writing to a file, then
        # ensure that the method is called
        # the correct number of times.
        with mock.patch("builtins.open", mock.mock_open()):
            with open("tmp.bib", "w") as bib:
                with mock.patch.object(
                    stdpopsim.citations.Citation, "fetch_bibtex", autospec=True
                ) as mock_bib:
                    cli.write_bibtex(engine, model, contig, species, bib)
                    assert mock_bib.call_count == ncite
Example #26
0
def get_models_help(species_id, model_id):
    """
    Generate help text for the specified species. If model_id is None, generate
    help for all models. Otherwise, it must be a string with a valid model ID.
    """
    species = stdpopsim.get_species(species_id)
    if model_id is None:
        models_text = f"\nAll simulation models for {species.name}\n\n"
        models = [model.id for model in species.demographic_models]
    else:
        models = [model_id]
        models_text = f"\nModel description\n\n"

    # TODO improve this text formatting.
    indent = " " * 4
    wrapper = textwrap.TextWrapper(initial_indent=indent,
                                   subsequent_indent=indent)
    for model_id in models:
        model = get_model_wrapper(species, model_id)
        models_text += f"{model.id}: {model.description}\n"
        models_text += wrapper.fill(textwrap.dedent(model.long_description))
        models_text += "\n\n"

        models_text += indent + "Populations:\n"

        for population in model.populations:
            if population.allow_samples:
                models_text += indent * 2
                models_text += f"{population.id}: {population.description}\n"
        models_text += "\n"

    return models_text
Example #27
0
class TestGenome(test_species.GenomeTestBase):

    genome = stdpopsim.get_species("HomSap").genome

    def test_basic_attributes(self):
        assert len(self.genome.chromosomes) == 25

    @pytest.mark.parametrize("chr_id",
                             [chrom.id for chrom in genome.chromosomes])
    def test_recombination_rates(self, chr_id):
        # recompute recombination rates from HapMapII_GRCh37 map then
        # compare the results to the current recombination rates for each chromosome
        genetic_map = "HapMapII_GRCh37"
        species = stdpopsim.get_species("HomSap")
        chrom = species.genome.get_chromosome(chr_id)
        if chr_id in ["X", "Y", "MT"]:
            with pytest.warns(stdpopsim.NonAutosomalWarning):
                contig = species.get_contig(chr_id, genetic_map=genetic_map)
        elif chr_id in ["3", "5", "7", "11", "16", "17", "18", "20"]:
            contig = species.get_contig(chr_id, genetic_map=genetic_map)
        else:
            # The rest of the chromosomes are currently emitting a warning about
            # the mismatch in chromosome lengths because of the fact that we're
            # on 37 for the map. This should be resolved when we start using the
            # lifted over map.
            with pytest.warns(UserWarning,
                              match="longer than chromosome length"):
                contig = species.get_contig(chr_id, genetic_map=genetic_map)
        assert pytest.approx(
            chrom.recombination_rate,
            contig.recombination_map.mean_rate,
        )
Example #28
0
class TestSpeciesData(test_species.SpeciesTestBase):

    species = stdpopsim.get_species("AnaPla")

    def test_ensembl_id(self):
        assert self.species.ensembl_id == "anas_platyrhynchos"

    def test_name(self):
        assert self.species.name == "Anas platyrhynchos"

    def test_common_name(self):
        assert self.species.common_name == "Mallard"

    # QC Tests. These tests are performed by another contributor
    # independently referring to the citations provided in the
    # species definition, filling in the appropriate values
    # and deleting the pytest "skip" annotations.

    # @pytest.mark.skip("Population size QC not done yet")
    def test_qc_population_size(self):
        assert self.species.population_size == 156000

    # @pytest.mark.skip("Generation time QC not done yet")
    def test_qc_generation_time(self):
        assert self.species.generation_time == 4
Example #29
0
class TestGenome(unittest.TestCase, test_species.GenomeTestMixin):
    """
    Tests for the Pongo abelii genome.
    """
    genome = stdpopsim.get_species("PonAbe").genome

    def test_basic_attributes(self):
        self.assertEqual(len(self.genome.chromosomes), 24)

    def test_chromosome_lengths(self):
        genome = self.genome
        self.assertEqual(genome.get_chromosome("chr1").length, 229942017)
        self.assertEqual(genome.get_chromosome("chr2a").length, 113028656)
        self.assertEqual(genome.get_chromosome("chr2b").length, 135000294)
        self.assertEqual(genome.get_chromosome("chr3").length, 202140232)
        self.assertEqual(genome.get_chromosome("chr4").length, 198332218)
        self.assertEqual(genome.get_chromosome("chr5").length, 183952662)
        self.assertEqual(genome.get_chromosome("chr6").length, 174210431)
        self.assertEqual(genome.get_chromosome("chr7").length, 157549271)
        self.assertEqual(genome.get_chromosome("chr8").length, 153482349)
        self.assertEqual(genome.get_chromosome("chr9").length, 135191526)
        self.assertEqual(genome.get_chromosome("chr10").length, 133410057)
        self.assertEqual(genome.get_chromosome("chr11").length, 132107971)
        self.assertEqual(genome.get_chromosome("chr12").length, 136387465)
        self.assertEqual(genome.get_chromosome("chr13").length, 117095149)
        self.assertEqual(genome.get_chromosome("chr14").length, 108868599)
        self.assertEqual(genome.get_chromosome("chr15").length, 99152023)
        self.assertEqual(genome.get_chromosome("chr16").length, 77800216)
        self.assertEqual(genome.get_chromosome("chr17").length, 73212453)
        self.assertEqual(genome.get_chromosome("chr18").length, 94050890)
        self.assertEqual(genome.get_chromosome("chr19").length, 60714840)
        self.assertEqual(genome.get_chromosome("chr20").length, 62736349)
        self.assertEqual(genome.get_chromosome("chr21").length, 48394510)
        self.assertEqual(genome.get_chromosome("chr22").length, 46535552)
        self.assertEqual(genome.get_chromosome("chrX").length, 156195299)
Example #30
0
def get_rho(ancestors, filename):
    inference_pos = ancestors.sites_position[:]

    match = re.search(r'(chr\d+)', filename)
    if match is None:
        raise ValueError("chr must be in filename")
    chr = match.group(1)
    map = params.genetic_map
    if match or map is not None:
        if map is not None:
            print(f"Using {chr} from GRCh38 for the recombination map")
            chr_map = msprime.RecombinationMap.read_hapmap(map + chr + ".txt")
        else:
            print(
                f"Using {chr} from HapMapII_GRCh37 for the recombination map")
            map = stdpopsim.get_species("HomSap").get_genetic_map(
                id="HapMapII_GRCh37")
            if not map.is_cached():
                map.download()
            chr_map = map.get_chromosome_map(chr)
        inference_distances = physical_to_genetic(chr_map, inference_pos)
        d = np.diff(inference_distances)
        rho = np.concatenate(([0.0], d))
    else:
        inference_distances = inference_pos
        d = np.diff(inference_distances)
        rho = np.concatenate(([0.0], d / sd.sequence_length))

    if np.any(d == 0):
        w = np.where(d == 0)
        raise ValueError("Zero recombination rates at", w, inference_pos[w])

    return rho