def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species( id="tesspe", name="Test species", genome=genome) super().__init__( species=_species, name="test_map", url="http://example.com/genetic_map.tar.gz", file_pattern="prefix_{name}.txt")
def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species(id="TesSpe", name="Test species", common_name="Testy McTestface", genome=genome) super().__init__(species=_species, id="test_annotation", url="http://example.com/annotation.gff.gz", zarr_url="http://example.com/annotation.zip", file_name="annotation.gff.gz")
def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species(id="TesSpe", name="Test species", common_name="Testy McTestface", genome=genome) super().__init__( species=_species, id="test_map", url="http://example.com/genetic_map.tar.gz", sha256="1234", # url doesn't exist, so this will never be checked file_pattern="prefix_{name}.txt")
def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species(id="TesSpe", name="Test species", common_name="Testy McTestface", genome=genome) super().__init__( species=_species, id="test_annotation", url="http://example.com/annotation.gff.gz", zarr_url="http://example.com/annotation.zip", zarr_sha256="1234", # this shouldn't be checked anywhere description="test annotation", )
def __init__(self): genome = stdpopsim.Genome(chromosomes=[]) _species = stdpopsim.Species( id="TesSpe", ensembl_id="test_species", name="Test species", common_name="Testy McTestface", genome=genome, ) super().__init__( species=_species, id="test_annotation", url="http://example.com/annotation.gff.gz", intervals_url="http://example.com/annotation.zip", intervals_sha256="1234", # this shouldn't be checked anywhere gff_sha256="6789", description="test annotation", file_pattern="yolo_{id}.txt", annotation_source="your mom", annotation_type="test", )
length=4641652, # Lapierre et al. (2016) refer to: # Genomic adaptive mutation rate: 1e-5, Perfeito et al. (2007), and # Genomic deleterious mutation rate: 2e−4, Kibota and Lynch (1996). mutation_rate=1e-5+2e-4, recombination_rate=0.0)) # mean_conversion_rate=8.9e-11 # not implemented yet! # mean_conversion_length=542 # not implemented yet! #: :class:`stdpopsim.Genome` definition for E. Coli. # Chromosome length data is based on strain K-12. _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _perfeito_et_al.because(stdpopsim.CiteReason.MUT_RATE), _kibota_and_lynch.because(stdpopsim.CiteReason.MUT_RATE), ], assembly_citations=[ _blattner_et_al.because(stdpopsim.CiteReason.ASSEMBLY)]) _species = stdpopsim.Species( id="EscCol", name="Escherichia coli", common_name="E. coli", genome=_genome, generation_time=0.00003805175, # 1.0 / (525600 min/year / 20 min/gen) generation_time_citations=[ _sezonov_et_al.because(stdpopsim.CiteReason.GEN_TIME)], population_size=1.8e8, population_size_citations=[ _lapierre_et_al.because(stdpopsim.CiteReason.POP_SIZE)])
for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Harland et al. (2017), sex-averaged estimate per bp per generation. mutation_rate=1.2e-8, recombination_rate=_recombination_rate_data[name], ) ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE), ], recombination_rate_citations=[_MaEtAl.because(stdpopsim.CiteReason.REC_RATE)], assembly_citations=[_RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY)], ) _species = stdpopsim.Species( id="BosTau", name="Bos Taurus", common_name="Cattle", genome=_genome, generation_time=5, generation_time_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.GEN_TIME)], population_size=62000, population_size_citations=[_MacLeodEtAl.because(stdpopsim.CiteReason.POP_SIZE)], )
_chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Harland et al. (2017), sex-averaged estimate per bp per generation. mutation_rate=1.2e-8, recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, citations=[ _HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE), _MaEtAl.because(stdpopsim.CiteReason.REC_RATE), _RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY), ], ) _species = stdpopsim.Species( id="BosTau", ensembl_id="bos_taurus", name="Bos Taurus", common_name="Cattle", genome=_genome, generation_time=5, population_size=62000, citations=[_MacLeodEtAl], )
for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Nater et al. 2017 used mu=1.5e-8 per generation, based on the # assumption that it's similar to humans and chimps. mutation_rate=1.5e-8, recombination_rate=_recombination_rate_data[name], ) ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[_nater2017], ) _species = stdpopsim.Species( id="PonAbe", ensembl_id="pongo_abelii", name="Pongo abelii", common_name="Sumatran orangutan", genome=_genome, # generation time used by Locke et al. without further citation generation_time=20, # Locke et al. inferred ancestral Ne population_size=1.79e4, citations=[_locke2011], )
name, length = line.split()[:2] _chromosomes.append(stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=7e-9, recombination_rate=200 / 124000 / 2 / 1e6)) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ stdpopsim.Citation( author="Ossowski et al.", year="2010", doi="https://doi.org/10.1126/science.1180677", reasons={stdpopsim.CiteReason.MUT_RATE})], recombination_rate_citations=[ stdpopsim.Citation( author="Huber et al.", year="2014", doi="https://doi.org/10.1093/molbev/msu247", reasons={stdpopsim.CiteReason.REC_RATE})], assembly_citations=[ stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gkm965", year="2007", author="Swarbreck et al.", reasons={stdpopsim.CiteReason.ASSEMBLY})]) _species = stdpopsim.Species( id="AraTha", name="Arabidopsis thaliana", common_name="A. thaliana", genome=_genome,
for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Nater et al. 2017 used mu=1.5e-8 per generation, based on the # assumption that it's similar to humans and chimps. mutation_rate=1.5e-8, recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ _nater2017.because(stdpopsim.CiteReason.MUT_RATE) ], ) _species = stdpopsim.Species( id="PonAbe", name="Pongo abelii", common_name="Sumatran orangutan", genome=_genome, # generation time used by Locke et al. without further citation generation_time=20, generation_time_citations=[ _locke2011.because(stdpopsim.CiteReason.GEN_TIME) ], # Locke et al. inferred ancestral Ne
synonyms=data["synonyms"], # Wielgoss et al. (2011) calculated for strain REL606, # from synonymous substitutions over 40,000 generations. mutation_rate=8.9e-11, recombination_rate=0.0, ) ) # mean_conversion_rate=8.9e-11 # not implemented yet! # mean_conversion_length=542 # not implemented yet! _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ _wielgoss_et_al.because(stdpopsim.CiteReason.MUT_RATE), ], assembly_citations=[_blattner_et_al.because(stdpopsim.CiteReason.ASSEMBLY)], ) _species = stdpopsim.Species( id="EscCol", name="Escherichia coli", common_name="E. coli", # We use the K-12 strain, because the parameters we're using more # closely match this strain than the ensembl default (HUSEC2011). ensembl_id="escherichia_coli_str_k_12_substr_mg1655_gca_000005845", genome=_genome, # E. coli K-12 strain MG1655 "doubling time during steady-state growth in
stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=5.49e-9, # _SchriderEtAl de novo mutation rate recombination_rate=_recombination_rate_data[name], ) ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE), _DosSantosEtAl, _HoskinsEtAl, _ComeronEtAl.because(stdpopsim.CiteReason.REC_RATE), ], ) _species = stdpopsim.Species( id="DroMel", ensembl_id="drosophila_melanogaster", name="Drosophila melanogaster", common_name="D. melanogaster", genome=_genome, generation_time=0.1, population_size=1720600, citations=[_LiAndStephan],
chr22 51304566 1.4445022767788226e-08 chrX 155270560 1.164662223273842e-08 chrY 59373566 0.0 """ _chromosomes = [] for line in _chromosome_data.splitlines(): name, length, mean_rr = line.split()[:3] _chromosomes.append( stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=1e-8, # WRONG!, recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome(chromosomes=_chromosomes) _species = stdpopsim.Species( id="homsap", name="H**o sapiens", genome=_genome, # TODO reference for these generation_time=25, population_size=10**4) stdpopsim.register_species(_species) ########################################################### # # Genetic maps #
stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=1.84e-9, # _Konrad et al. de-nove mutation rate, # it's not uniform and it's much better to use a mutation map. # mutation_rate=_mutation_rate_data[name], recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _genome1998, _KonradEtAl2019.because(stdpopsim.CiteReason.MUT_RATE), _KonradEtAl2017.because(stdpopsim.CiteReason.MUT_RATE), _Rockman2009.because(stdpopsim.CiteReason.REC_RATE), ], ) _species = stdpopsim.Species( id="CaeEle", ensembl_id="", name="Caenorhabditis elegans", common_name="C. elegans", genome=_genome, generation_time=0.01, # the generation time in the lab ~150 # generation per year (0.00666), it should be less in the wild population_size=10000,
for line in _chromosome_data.splitlines(): name, length, mean_rr = line.split() _chromosomes.append( stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=4e-9, # based on non-CpG sites only recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _SkoglundEtAl.because(stdpopsim.CiteReason.MUT_RATE), _FranzEtAl.because(stdpopsim.CiteReason.MUT_RATE), ], recombination_rate_citations=[ _CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE) ], assembly_citations=[ _LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY) ], ) _species = stdpopsim.Species( id="CanFam", name="Canis familiaris", common_name="Dog", genome=_genome, generation_time=3, generation_time_citations=[ # Everyone uses 3 years because everyone else uses it.
_chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=1.29e-8, recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[_tian2019.because(stdpopsim.CiteReason.MUT_RATE)], recombination_rate_citations=[ _hapmap2007.because(stdpopsim.CiteReason.REC_RATE) ], assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], assembly_citations=[_genome2001], ) _species = stdpopsim.Species( id="HomSap", name="H**o sapiens", common_name="Human", genome=_genome, generation_time=30, generation_time_citations=[ _tremblay2000.because(stdpopsim.CiteReason.GEN_TIME) ], population_size=10**4,
author="Nater et al.", year=2017, doi="https://doi.org/10.1016/j.cub.2017.09.047") _chromosomes = [] for line in _chromosome_data.splitlines(): name, length, mean_rr = line.split()[:3] _chromosomes.append( stdpopsim.Chromosome(id=name, length=int(length), mutation_rate=1.5e-8, recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome(chromosomes=_chromosomes, mutation_rate_citations=[ _nater2017.because( stdpopsim.CiteReason.MUT_RATE) ]) _species = stdpopsim.Species( id="PonAbe", name="Pongo abelii", common_name="Sumatran orangutan", genome=_genome, generation_time=20, generation_time_citations=[ _locke2011.because(stdpopsim.CiteReason.GEN_TIME) ], population_size=1.79e4, population_size_citations=[ _locke2011.because(stdpopsim.CiteReason.POP_SIZE)
recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ stdpopsim.Citation( author="Ossowski et al.", year=2010, doi="https://doi.org/10.1126/science.1180677", reasons={stdpopsim.CiteReason.MUT_RATE}, ), stdpopsim.Citation( author="Huber et al.", year=2014, doi="https://doi.org/10.1093/molbev/msu247", reasons={stdpopsim.CiteReason.REC_RATE}, ), stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gkm965", year=2007, author="Swarbreck et al.", reasons={stdpopsim.CiteReason.ASSEMBLY}, ), ], ) stdpopsim.utils.append_common_synonyms(_genome) _species = stdpopsim.Species(
_recombination_rate_data["mitochondrion_genome"] = 0 _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=5.49e-9, # citation: _SchriderEtAl recombination_rate=_recombination_rate_data[name])) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ _SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE) ], assembly_citations=[_DosSantosEtAl]) _species = stdpopsim.Species( id="DroMel", name="Drosophila melanogaster", common_name="D. melanogaster", genome=_genome, generation_time=0.1, generation_time_citations=[ _LiAndStephan.because(stdpopsim.CiteReason.GEN_TIME) ], population_size=1720600, population_size_citations=[
for line in _chromosome_data.splitlines(): name, length = line.split()[:2] _chromosomes.append(stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=7e-9, recombination_rate=8.1e-9)) _SwarbreckEtAl = stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gkm965", year="2007", author="Swarbreck et al.", reasons={stdpopsim.CiteReason.ASSEMBLY} ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_citations=[ _SwarbreckEtAl]) _species = stdpopsim.Species( id="AraTha", name="Arabidopsis thaliana", common_name="A. thaliana", genome=_genome, generation_time=1.0, generation_time_citations=[stdpopsim.Citation( doi="https://doi.org/10.1890/0012-9658(2002)083[1006:GTINSO]2.0.CO;2", year="2002", author="Donohue", reasons={stdpopsim.CiteReason.GEN_TIME})], population_size=10**4, population_size_citations=[stdpopsim.Citation(
for line in _chromosome_data.splitlines(): name, length = line.split()[:2] _chromosomes.append( stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=5.49e-9, # citation: _SchriderEtAl recombination_rate=8.4e-9)) # WRONG, underestimate used in S&S! # TODO need to port this documentation somewhere. # class:`stdpopsim.Genome` definition for D. melanogaster. Chromosome length data is # based on `dm6 <https://www.ncbi.nlm.nih.gov/assembly/GCF_000001215.4/>`_. _genome = stdpopsim.Genome(chromosomes=_chromosomes, mutation_rate_citations=[ _SchriderEtAl.because( stdpopsim.CiteReason.MUT_RATE) ], assembly_citations=[_DosSantosEtAl]) _species = stdpopsim.Species( id="DroMel", name="Drosophila melanogaster", common_name="D. melanogaster", genome=_genome, generation_time=0.1, generation_time_citations=[ _LiAndStephan.because(stdpopsim.CiteReason.GEN_TIME) ], population_size=1720600, population_size_citations=[ _LiAndStephan.because(stdpopsim.CiteReason.POP_SIZE)
for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Harland et al. (2017), sex-averaged estimate per bp per generation. mutation_rate=1.2e-8, recombination_rate=_recombination_rate_data[name], ) ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, citations=[ _RosenEtAl, _HarlandEtAl, _MaEtAl, ], ) _species = stdpopsim.Species( id="BosTau", ensembl_id="bos_taurus", name="Bos taurus", common_name="Cattle", genome=_genome, generation_time=5, population_size=90, # most recent Ne in _MacLeodEtAl citations=[_MacLeodEtAl], )
for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=4e-9, # based on non-CpG sites only recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _SkoglundEtAl.because(stdpopsim.CiteReason.MUT_RATE), _FranzEtAl.because(stdpopsim.CiteReason.MUT_RATE), _CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE), _LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY), ], ) _species = stdpopsim.Species( id="CanFam", ensembl_id="canis_familiaris", name="Canis familiaris", common_name="Dog", genome=_genome, population_size=13000, # ancestral dog size generation_time=3, citations=[
_chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=1.29e-8, recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _genome2001, _tian2019.because(stdpopsim.CiteReason.MUT_RATE), _hapmap2007.because(stdpopsim.CiteReason.REC_RATE), ], ) stdpopsim.utils.append_common_synonyms(_genome) _species = stdpopsim.Species( id="HomSap", ensembl_id="homo_sapiens", name="H**o sapiens", common_name="Human", genome=_genome, generation_time=30, population_size=10**4, citations=[