Beispiel #1
0
 def check_citations(self, engine, species, genetic_map, model, stderr):
     if genetic_map is None:
         genetic_map = stdpopsim.GeneticMap(species.id, citations=[])
     for citations, assert_msg in zip(
             (engine.citations, model.citations, genetic_map.citations),
             (f"engine citation not written for {engine.id}",
                 f"model citation not written for {model.id}",
                 f"genetic map citation not written for {genetic_map.id}")):
         for citation in citations:
             self.assertTrue(citation.author in stderr, msg=assert_msg)
             self.assertTrue(str(citation.year) in stderr, msg=assert_msg)
             self.assertTrue(citation.doi in stderr, msg=assert_msg)
Beispiel #2
0
 def check_citations(self, engine, species, genetic_map, model, output):
     if genetic_map is None:
         genetic_map = stdpopsim.GeneticMap(
             species,
             id="test",
             url="http://example.com/test.tgz",
             sha256="1234",
             citations=[])
     for citations, assert_msg in zip(
         (engine.citations, model.citations, genetic_map.citations),
         (f"engine citation not written for {engine.id}",
          f"model citation not written for {model.id}",
          f"genetic map citation not written for {genetic_map.id}")):
         for citation in citations:
             self.assertTrue(citation.author in output, msg=assert_msg)
             self.assertTrue(str(citation.year) in output, msg=assert_msg)
             self.assertTrue(citation.doi in output, msg=assert_msg)
import stdpopsim

_species = stdpopsim.get_species("AraTha")

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="SalomeAveraged_TAIR7",
    description="Crossover frequency map averaged over 17 populations",
    long_description="""
        This map is based on the study of crossover frequencies in over 7000
        plants in 17 F2 populations derived from crosses between 18 A. thaliana
        accessions. Salomé et al provide genetic maps for each of these
        populations. To get a single map for each chromosome, the Haldane map
        function distances were converted to recombination rates (cM/Mb) for
        each cross and then averaged across the 17 populations using loess.
        """,
    url=("https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
         "AraTha/salome2012_maps.tar.gz"),
    sha256="49745e1cab87d59e33eacfdf66303839632d3b07883dd55a99fe1dc27b336ac6",
    file_pattern="arab_chr{id}_map_loess.txt",
    citations=[
        stdpopsim.Citation(
            doi="https://doi.org/10.1038/hdy.2011.95",
            author="Salomé et al.",
            year=2012,
            reasons={stdpopsim.CiteReason.GEN_MAP},
        )
    ],
)
_species.add_genetic_map(_gm)
Beispiel #4
0
#
# Genetic maps
#
###########################################################

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="ComeronCrossover_dm6",
    description="Crossover map from meioses products of 8 lab crosses",
    long_description="""
        The crossover map from a study of 8 crosses of 12 highly
        inbred lines of D. melanogaster. This is based on the
        products of 5,860 female meioses from whole genome sequencing data.
        Recombination rates were calculated from the density of individual
        recombination events that were detected in crosses. This map was
        subsequently lifted over to the dm6 assembly.
        """,
    url=("https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
         "DroMel/comeron2012_maps.tar.gz"),
    file_pattern="genetic_map_comeron2012_dm6_{id}.txt",
    citations=[
        stdpopsim.Citation(author="Comeron et al",
                           doi="https://doi.org/10.1371/journal.pgen.1002905",
                           year=2012,
                           reasons={stdpopsim.CiteReason.GEN_MAP})
    ])

_species.add_genetic_map(_gm)

###########################################################
#
Beispiel #5
0
# There are two genetic maps available for Orangutan species: one for Pongo
# abelii (Sumatran orangutan) and one for Pongo pygmaeus (Bornean orangutan).
# Both recombination maps were inferred using LDhat in Nater et al. (2017),
# doi: 10.1016/j.cub.2017.09.047. Both recombination maps are mapped to PonAbe2.
# Recombination maps from Nater et al. were converted from rho/kbp to cM using
# Watterson's estimator of theta to estimate Ne = 41,000 (Sumatra) and
# Ne = 27,000 (Borneo). See supporting information in Nater et al. for details.

_gm_pa = stdpopsim.GeneticMap(
    species=_species,
    id="NaterPA_PonAbe2",
    description="From Nater et al. (2017) for Pongo abelii",
    long_description="""
        This genetic map is from the Nater et al. (2017) study, inferred using
        LDhat from n=15 whole-genome sequenced Sumatran orangutan individuals.
        See https://doi.org/10.1016/j.cub.2017.09.047 for more details.
        """,
    url=("https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/PonAbe/"
         "NaterPA_PonAbe2.tar.gz"),
    sha256="33b0162a6d945dd341bd1086d213ad1bc16949c9210d3b49d92692fd7f831ace",
    file_pattern="Nater_et_al_PA_chr{id}_PonAbe2.txt",
    citations=[_nater2017.because(stdpopsim.CiteReason.GEN_MAP)],
)
_species.add_genetic_map(_gm_pa)

_gm_pp = stdpopsim.GeneticMap(
    species=_species,
    id="NaterPP_PonAbe2",
    description="From Nater et al. (2017) for Pongo pygmaeus",
    long_description="""
        This genetic map is from the Nater et al. (2017) study, inferred using
Beispiel #6
0
# Both recombination maps were inferred using LDhat in Nater et al. (2017),
# doi: 10.1016/j.cub.2017.09.047. Both recombination maps are mapped to PonAbe2.
# Recombination maps from Nater et al. were converted from rho/kbp to cM using
# Watterson's estimator of theta to estimate Ne = 41,000 (Sumatra) and
# Ne = 27,000 (Borneo). See supporting information in Nater et al. for details.

_gm_pa = stdpopsim.GeneticMap(
    species=_species,
    id="NaterPA_PonAbe2",
    description="From Nater et al. (2017) for Pongo abelii",
    long_description="""
        This genetic map is from the Nater et al. (2017) study, inferred using
        LDhat from n=15 whole-genome sequenced Sumatran orangutan individuals.
        See https://doi.org/10.1016/j.cub.2017.09.047 for more details.
        """,
    url=
    ("https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/PonAbe/NaterPA_PonAbe2.tar.gz"
     ),  # NOQA
    file_pattern="Nater_et_al_PA_{id}_PonAbe2.txt",
    citations=[
        stdpopsim.Citation(doi="https://doi.org/10.1016/j.cub.2017.09.047",
                           year=2017,
                           author="Nater et al.",
                           reasons={stdpopsim.CiteReason.GEN_MAP}),
    ])
_species.add_genetic_map(_gm_pa)

_gm_pp = stdpopsim.GeneticMap(
    species=_species,
    id="NaterPP_PonAbe2",
    description="From Nater et al. (2017) for Pongo pygmaeus",
Beispiel #7
0
import stdpopsim

_species = stdpopsim.get_species("CanFam")

_CampbellEtAl = stdpopsim.Citation(
    # A Pedigree-Based Map of Recombination in the Domestic Dog Genome.
    author="Campbell et al.",
    year=2016,
    doi="https://doi.org/10.1534/g3.116.034678",
)

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="Campbell2016_CanFam3_1",
    description="Pedigree-based crossover map from 237 individuals",
    long_description="""
        Sex-averaged crossover frequency map based on 163,400 autosomal SNPs
        genotyped in a pedigree of 237 Labrador Retriever x Greyhound crosses.
        Genotypes were phased without respect to the pedigree, using SHAPEIT2,
        recombinations were called using duoHMM, and genetic distances were
        obtained using Haldane's map function.
        """,
    url="https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
    "CanFam/dog_genetic_maps.tar.gz",
    sha256="585afb424615e2fb0825d807db0b10fe1c797a6dbb804ecbb3fef5e8387d194f",
    file_pattern="chr{id}_average_canFam3.1.txt",
    citations=[_CampbellEtAl.because(stdpopsim.CiteReason.GEN_MAP)],
)
_species.add_genetic_map(_gm)
_species = stdpopsim.get_species("DroMel")

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="ComeronCrossover_dm6",
    description="Crossover map from meioses products of 8 lab crosses",
    long_description="""
        The crossover map from a study of 8 crosses of 12 highly
        inbred lines of D. melanogaster. This is based on the
        products of 5,860 female meioses from whole genome sequencing data.
        Recombination rates were calculated from the density of individual
        recombination events that were detected in crosses. This map was
        subsequently lifted over to the dm6 assembly.
        """,
    url=(
        "https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
        "DroMel/comeron2012_maps.tar.gz"
    ),
    sha256="08185a0e3b0ad26eefe69fc6bdb8f3f599a760e11e87dd343335b33d1563f62a",
    file_pattern="genetic_map_comeron2012_dm6_chr{id}.txt",
    citations=[
        stdpopsim.Citation(
            author="Comeron et al",
            doi="https://doi.org/10.1371/journal.pgen.1002905",
            year=2012,
            reasons={stdpopsim.CiteReason.GEN_MAP},
        )
    ],
)
_species.add_genetic_map(_gm)
)

_species = stdpopsim.get_species("HomSap")

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="HapMapII_GRCh37",
    description="HapMap Phase II lifted over to GRCh37",
    long_description="""
        This genetic map is from the Phase II Hapmap project
        and based on 3.1 million genotyped SNPs
        from 270 individuals across four populations (YRI, CEU, CHB and JPT).
        Genome wide recombination rates were estimated using LDHat.
        This version of the HapMap genetic map was lifted over to GRCh37
        (and adjusted in regions where the genome assembly had rearranged)
        for use in the 1000 Genomes project. Please see the README file on
        the 1000 Genomes download site for details of these adjustments.
        ftp://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/20110106_recombination_hotspots
        """,
    url=(
        "https://stdpopsim.s3-us-west-2.amazonaws.com/genetic_maps/"
        "HomSap/HapmapII_GRCh37_RecombinationHotspots.tar.gz"
    ),
    sha256="80f22d9e6cb0e497074ed1bc277e765fa9d8e22f21b2f66c3b10286520f6b68f",
    file_pattern="genetic_map_GRCh37_chr{id}.txt",
    citations=[_hapmap2007.because(stdpopsim.CiteReason.GEN_MAP)],
)
_species.add_genetic_map(_gm)

_gm = stdpopsim.GeneticMap(
    species=_species,
Beispiel #10
0
# Genetic maps
#
###########################################################

_gm = stdpopsim.GeneticMap(
    species=_species,
    name="HapmapII_GRCh37",
    url=("http://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/"
         "20110106_recombination_hotspots/"
         "HapmapII_GRCh37_RecombinationHotspots.tar.gz"),
    file_pattern="genetic_map_GRCh37_{name}.txt",
    description=(
        "The Phase II HapMap Genetic map (lifted over to GRCh37) used in "
        "1000 Genomes. Please see the README for more details."),
    citations=[
        stdpopsim.Citation(doi="https://doi.org/10.1038/nature06258",
                           year=2007,
                           author="1000 Genomes Project consortium"),
        # TODO update the citation class to accept a plain URL as well
        # for things like this.
        stdpopsim.Citation(
            doi=("<ftp://ftp-trace.ncbi.nih.gov/1000genomes"
                 "/ftp/technical/working/20110106_recombination_hotspots"
                 "/README_hapmapII_GRCh37_map>"),
            year=None,
            author=None)
    ])
_species.add_genetic_map(_gm)

_gm = stdpopsim.GeneticMap(
    species=_species,
Beispiel #11
0
# Genetic maps
#
###########################################################

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="HapMapII_GRCh37",
    description="HapMap Phase II lifted over to GRCh37",
    long_description="""
        This genetic map is from the Phase II Hapmap project
        and based on 3.1 million genotyped SNPs
        from 270 individuals across four populations (YRI, CEU, CHB and JPT).
        Genome wide recombination rates were estimated using LDHat.
        This version of the HapMap genetic map was lifted over to GRCh37
        (and adjusted in regions where the genome assembly had rearranged)
        for use in the 1000 Genomes project. Please see the README file on
        the 1000 Genomes download site for details of these adjustments.
        """,
    url=("https://ftp-trace.ncbi.nih.gov/1000genomes/ftp/technical/working/"
         "20110106_recombination_hotspots/"
         "HapmapII_GRCh37_RecombinationHotspots.tar.gz"),
    file_pattern="genetic_map_GRCh37_{id}.txt",
    citations=[
        stdpopsim.Citation(doi="https://doi.org/10.1038/nature06258",
                           year=2007,
                           author="The International HapMap Consortium",
                           reasons={stdpopsim.CiteReason.GEN_MAP}),
    ])
_species.add_genetic_map(_gm)

_gm = stdpopsim.GeneticMap(
    species=_species,
#
# Genetic maps
#
###########################################################

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="SalomeAveraged_TAIR7",
    description="Crossover frequency map averaged over 17 populations",
    long_description="""
        This map is based on the study of crossover frequencies in over 7000
        plants in 17 F2 populations derived from crosses between 18 A. thaliana
        accessions. Salomé et al provide genetic maps for each of these
        populations. To get a single map for each chromosome, the Haldane map
        function distances were converted to recombination rates (cM/Mb) for
        each cross and then averaged across the 17 populations using loess.
        """,
    url=("http://www.eeb.ucla.edu/Faculty/Lohmueller/data/"
         "uploads/salome2012_maps.tar.gz"),
    file_pattern="arab_{id}_map_loess.txt",
    citations=[
        stdpopsim.Citation(doi="https://doi.org/10.1038/hdy.2011.95",
                           author="Salomé et al.",
                           year=2012,
                           reasons={stdpopsim.CiteReason.GEN_MAP})
    ])
_species.add_genetic_map(_gm)

###########################################################
#
# Demographic models
stdpopsim.register_species(_species)

###########################################################
#
# Genetic maps
#
###########################################################

_gm = stdpopsim.GeneticMap(
    species=_species,
    name="Comeron2012_dm6",
    url=("http://sesame.uoregon.edu/~adkern/dmel_recombination_map/"
         "comeron2012_maps.tar.gz"),
    file_pattern="genetic_map_comeron2012_dm6_{name}.txt",
    description=(
        # TODO more detail
        "Comeron et al. (2012) maps (lifted over to dm6)."),
    citations=[
        stdpopsim.Citation(
            author="Comeron et al",
            doi="FIXME",  # FIXME
            year=2012)
    ])

_species.add_genetic_map(_gm)

###########################################################
#
# Demographic models
#
###########################################################
    generation_time_citations=[
        # Everyone uses 3 years because everyone else uses it.
        # It's likely higher, at least in wolves:
        # https://pubs.er.usgs.gov/publication/70187564
    ],
    population_size=13000,  # ancestral dog size
    population_size_citations=[
        _LindbladTohEtAl.because(stdpopsim.CiteReason.POP_SIZE)
    ],
)

stdpopsim.register_species(_species)

_gm = stdpopsim.GeneticMap(
    species=_species,
    id="Campbell2016_CanFam3_1",
    description="Pedigree-based crossover map from 237 individuals",
    long_description="""
        Sex-averaged crossover frequency map based on 163,400 autosomal SNPs
        genotyped in a pedigree of 237 Labrador Retriever x Greyhound crosses.
        Genotypes were phased without respect to the pedigree, using SHAPEIT2,
        recombinations were called using duoHMM, and genetic distances were
        obtained using Haldane's map function.
        """,
    url="https://github.com/cflerin/dog_recombination/raw/master/"
    "dog_genetic_maps.tar.gz",
    file_pattern="{id}_average_canFam3.1.txt",
    citations=[_CampbellEtAl.because(stdpopsim.CiteReason.GEN_MAP)],
)
_species.add_genetic_map(_gm)
stdpopsim.register_species(_species)

###########################################################
#
# Genetic maps
#
###########################################################

_gm = stdpopsim.GeneticMap(
    species=_species,
    name="Salome2012",
    url=(
        "http://www.eeb.ucla.edu/Faculty/Lohmueller/data/"
        "uploads/salome2012_maps.tar.gz"),
    file_pattern="arab_{name}_map_loess.txt",
    description=(
        "Genetic map from Salome 2012 averaged across population crosses. "
        "Please see this repo for details on how this was done: "
        "https://github.com/LohmuellerLab/arabidopsis_recomb_maps"),
    citations=[stdpopsim.Citation(
        doi="FIXME",
        author="Salome et al.",
        year=2012)]
    )
_species.add_genetic_map(_gm)


###########################################################
#
# Demographic models
#
###########################################################