year=2011, doi="http://doi.org/10.1038/nature09687") _nater2017 = stdpopsim.Citation( author="Nater et al.", year=2017, doi="https://doi.org/10.1016/j.cub.2017.09.047") _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Nater et al. 2017 used mu=1.5e-8 per generation, based on the # assumption that it's similar to humans and chimps. mutation_rate=1.5e-8, recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ _nater2017.because(stdpopsim.CiteReason.MUT_RATE) ], ) _species = stdpopsim.Species(
reasons={stdpopsim.CiteReason.ASSEMBLY}) _genome_wide_estimate = 8.4e-9 # WRONG, underestimate used in S&S! _recombination_rate_data = collections.defaultdict( lambda: _genome_wide_estimate) # Set some exceptions for non-recombining chrs. _recombination_rate_data["Y"] = 0 _recombination_rate_data["mitochondrion_genome"] = 0 _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=5.49e-9, # citation: _SchriderEtAl recombination_rate=_recombination_rate_data[name])) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ _SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE) ], assembly_citations=[_DosSantosEtAl]) _species = stdpopsim.Species( id="DroMel", name="Drosophila melanogaster",
"2R": 2.23458641776e-08, "3L": 1.79660308862e-08, "3R": 1.71642045777e-08, "4": 2.00579550709e-08, "X": 2.89650687913e-08, "Y": 0, "mitochondrion_genome": 0, } _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=5.49e-9, # _SchriderEtAl de novo mutation rate recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _SchriderEtAl.because(stdpopsim.CiteReason.MUT_RATE), _DosSantosEtAl, _HoskinsEtAl, _ComeronEtAl.because(stdpopsim.CiteReason.REC_RATE), ], )
_locke2011 = stdpopsim.Citation(author="Locke et al.", year=2011, doi="http://doi.org/10.1038/nature09687") _nater2017 = stdpopsim.Citation( author="Nater et al.", year=2017, doi="https://doi.org/10.1016/j.cub.2017.09.047") _chromosomes = [] for line in _chromosome_data.splitlines(): name, length, mean_rr = line.split()[:3] _chromosomes.append( stdpopsim.Chromosome(id=name, length=int(length), mutation_rate=1.5e-8, recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome(chromosomes=_chromosomes, mutation_rate_citations=[ _nater2017.because( stdpopsim.CiteReason.MUT_RATE) ]) _species = stdpopsim.Species( id="PonAbe", name="Pongo abelii", common_name="Sumatran orangutan", genome=_genome, generation_time=20, generation_time_citations=[
chr21 34683425 0.95e-8 chr22 35308119 0.95e-8 chrX 151242693 0.95e-8 """ _locke2011 = stdpopsim.Citation( author="Locke et al.", year=2011, doi="http://doi.org/10.1038/nature09687" ) _chromosomes = [] for line in _chromosome_data.splitlines(): name, length, mean_rr = line.split()[:3] _chromosomes.append(stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=2.0e-8, recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _locke2011.because(stdpopsim.CiteReason.MUT_RATE)]) _species = stdpopsim.Species( id="PonPyg", name="Pongo pygmaeus", common_name="Bornean orangutan", genome=_genome, generation_time=20, generation_time_citations=[ _locke2011.because(stdpopsim.CiteReason.GEN_TIME)],
_kibota_and_lynch = stdpopsim.Citation( author="Kibota and Lynch", year="1996", doi="https://doi.org/10.1038/381694a0") _blattner_et_al = stdpopsim.Citation( author="Blattner et al.", year="1997", doi="10.1126/science.277.5331.1453") _chromosomes = [] _chromosomes.append(stdpopsim.Chromosome( id=None, length=4641652, # Lapierre et al. (2016) refer to: # Genomic adaptive mutation rate: 1e-5, Perfeito et al. (2007), and # Genomic deleterious mutation rate: 2e−4, Kibota and Lynch (1996). mutation_rate=1e-5+2e-4, recombination_rate=0.0)) # mean_conversion_rate=8.9e-11 # not implemented yet! # mean_conversion_length=542 # not implemented yet! #: :class:`stdpopsim.Genome` definition for E. Coli. # Chromosome length data is based on strain K-12. _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _perfeito_et_al.because(stdpopsim.CiteReason.MUT_RATE), _kibota_and_lynch.because(stdpopsim.CiteReason.MUT_RATE), ],
_kibota_and_lynch = stdpopsim.Citation(author="Kibota and Lynch", year="1996", doi="https://doi.org/10.1038/381694a0") _blattner_et_al = stdpopsim.Citation(author="Blattner et al.", year="1997", doi="10.1126/science.277.5331.1453") _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Lapierre et al. (2016) refer to: # Genomic adaptive mutation rate: 1e-5, Perfeito et al. (2007), and # Genomic deleterious mutation rate: 2e−4, Kibota and Lynch (1996). mutation_rate=1e-5 + 2e-4, recombination_rate=0.0)) # mean_conversion_rate=8.9e-11 # not implemented yet! # mean_conversion_length=542 # not implemented yet! #: :class:`stdpopsim.Genome` definition for E. Coli. # Chromosome length data is based on strain K-12. _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"],
_chromosome_data = """\ chr1 30427671 chr2 19698289 chr3 23459830 chr4 18585056 chr5 26975502 """ # mutation rate from Ossowski 2010 Science # recombination value from Huber et al 2014 MBE # rho=200/Mb, assume Ne=124,000, rho=2*Ne*r _chromosomes = [] for line in _chromosome_data.splitlines(): name, length = line.split()[:2] _chromosomes.append(stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=7e-9, recombination_rate=200 / 124000 / 2 / 1e6)) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ stdpopsim.Citation( author="Ossowski et al.", year="2010", doi="https://doi.org/10.1126/science.1180677", reasons={stdpopsim.CiteReason.MUT_RATE})], recombination_rate_citations=[ stdpopsim.Citation( author="Huber et al.", year="2014", doi="https://doi.org/10.1093/molbev/msu247",
"II": 3.999342e-11, "III": 4.484974e-11, "IV": 2.417689e-11, "V": 2.722476e-11, "X": 3.447911e-11, "MtDNA": 0, } _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=1.84e-9, # _Konrad et al. de-nove mutation rate, # it's not uniform and it's much better to use a mutation map. # mutation_rate=_mutation_rate_data[name], recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _genome1998, _KonradEtAl2019.because(stdpopsim.CiteReason.MUT_RATE), _KonradEtAl2017.because(stdpopsim.CiteReason.MUT_RATE), _Rockman2009.because(stdpopsim.CiteReason.REC_RATE), ],
_chromosome_data = """\ chr1 30427671 chr2 19698289 chr3 23459830 chr4 18585056 chr5 26975502 """ # mutation rate from Ossowski 2010 Science # recombination value from Huber et al 2014 MBE # rho=200/Mb, assume Ne=124,000, rho=2*Ne*r _chromosomes = [] for line in _chromosome_data.splitlines(): name, length = line.split()[:2] _chromosomes.append(stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=7e-9, recombination_rate=8.1e-9)) _SwarbreckEtAl = stdpopsim.Citation( doi="https://doi.org/10.1093/nar/gkm965", year="2007", author="Swarbreck et al.", reasons={stdpopsim.CiteReason.ASSEMBLY} ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_citations=[ _SwarbreckEtAl]) _species = stdpopsim.Species(
chr2L 23513712 chr2R 25286936 chr3L 28110227 chr3R 32079331 chr4 1348131 chrY 3667352 chrM 19524 """ _chromosomes = [] for line in _chromosome_data.splitlines(): name, length = line.split()[:2] _chromosomes.append( stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=8.4e-9, # WRONG!, underestimate used in S&S recombination_rate=8.4e-9)) # WRONG, underestimate used in S&S! # TODO need to port this documentation somewhere. # class:`stdpopsim.Genome` definition for D. melanogaster. Chromosome length data is # based on `dm6 <https://www.ncbi.nlm.nih.gov/assembly/GCF_000001215.4/>`_. _genome = stdpopsim.Genome(chromosomes=_chromosomes) _species = stdpopsim.Species( id="dromel", name="Drosophila melanogaster", genome=_genome, # TODO reference for these generation_time=0.1,
year=2016, doi="https://doi.org/10.1126/science.aaf3161") _CampbellEtAl = stdpopsim.Citation( # A Pedigree-Based Map of Recombination in the Domestic Dog Genome. author="Campbell et al.", year=2016, doi="https://doi.org/10.1534/g3.116.034678") _chromosomes = [] for line in _chromosome_data.splitlines(): name, length, mean_rr = line.split() _chromosomes.append( stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=4e-9, # based on non-CpG sites only recombination_rate=float(mean_rr))) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _SkoglundEtAl.because(stdpopsim.CiteReason.MUT_RATE), _FranzEtAl.because(stdpopsim.CiteReason.MUT_RATE), ], recombination_rate_citations=[ _CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE) ], assembly_citations=[ _LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY) ],
_recombination_rate_data = { str(j): _mean_recombination_rate for j in range(1, 6) } _recombination_rate_data["Mt"] = 0 _recombination_rate_data["Pt"] = 0 # JK Is this correct?? # mutation rate from Ossowski 2010 Science # recombination value from Huber et al 2014 MBE # rho=200/Mb, assume Ne=124,000, rho=2*Ne*r _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append(stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=7e-9, recombination_rate=_recombination_rate_data[name])) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ stdpopsim.Citation( author="Ossowski et al.", year="2010", doi="https://doi.org/10.1126/science.1180677", reasons={stdpopsim.CiteReason.MUT_RATE})], recombination_rate_citations=[ stdpopsim.Citation(
_wielgoss_et_al = stdpopsim.Citation( author="Wielgoss et al.", year="2011", doi="https://doi.org/10.1534/g3.111.000406" ) _blattner_et_al = stdpopsim.Citation( author="Blattner et al.", year="1997", doi="10.1126/science.277.5331.1453" ) _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Wielgoss et al. (2011) calculated for strain REL606, # from synonymous substitutions over 40,000 generations. mutation_rate=8.9e-11, recombination_rate=0.0, ) ) # mean_conversion_rate=8.9e-11 # not implemented yet! # mean_conversion_length=542 # not implemented yet! _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], mutation_rate_citations=[ _wielgoss_et_al.because(stdpopsim.CiteReason.MUT_RATE),
) _tremblay2000 = stdpopsim.Citation( doi="https://doi.org/10.1086/302770", year=2000, author="Tremblay and Vézina", reasons={stdpopsim.CiteReason.GEN_TIME}, ) _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=1.29e-8, recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _genome2001, _tian2019.because(stdpopsim.CiteReason.MUT_RATE), _hapmap2007.because(stdpopsim.CiteReason.REC_RATE), ], ) stdpopsim.utils.append_common_synonyms(_genome)
author="Schrider et al.", year=2013, doi="https://doi.org/10.1534/genetics.113.151670") _DosSantosEtAl = stdpopsim.Citation(doi="https://doi.org/10.1093/nar/gku1099", year="2015", author="dos Santos et al.", reasons={stdpopsim.CiteReason.ASSEMBLY}) _chromosomes = [] for line in _chromosome_data.splitlines(): name, length = line.split()[:2] _chromosomes.append( stdpopsim.Chromosome( id=name, length=int(length), mutation_rate=5.49e-9, # citation: _SchriderEtAl recombination_rate=8.4e-9)) # WRONG, underestimate used in S&S! # TODO need to port this documentation somewhere. # class:`stdpopsim.Genome` definition for D. melanogaster. Chromosome length data is # based on `dm6 <https://www.ncbi.nlm.nih.gov/assembly/GCF_000001215.4/>`_. _genome = stdpopsim.Genome(chromosomes=_chromosomes, mutation_rate_citations=[ _SchriderEtAl.because( stdpopsim.CiteReason.MUT_RATE) ], assembly_citations=[_DosSantosEtAl]) _species = stdpopsim.Species(
# 24.35 / 2628394923 = 9.26e-9 per bp per generation. _genome_wide_recombination_rate = 9.26e-9 _recombination_rate_data = collections.defaultdict( lambda: _genome_wide_recombination_rate ) # Set some exceptions for non-recombining chrs. _recombination_rate_data["MT"] = 0 _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], # Harland et al. (2017), sex-averaged estimate per bp per generation. mutation_rate=1.2e-8, recombination_rate=_recombination_rate_data[name], ) ) _genome = stdpopsim.Genome( chromosomes=_chromosomes, mutation_rate_citations=[ _HarlandEtAl.because(stdpopsim.CiteReason.MUT_RATE), ], recombination_rate_citations=[_MaEtAl.because(stdpopsim.CiteReason.REC_RATE)], assembly_citations=[_RosenEtAl.because(stdpopsim.CiteReason.ASSEMBLY)], ) _species = stdpopsim.Species(
) _CampbellEtAl = stdpopsim.Citation( # A Pedigree-Based Map of Recombination in the Domestic Dog Genome. author="Campbell et al.", year=2016, doi="https://doi.org/10.1534/g3.116.034678", ) _chromosomes = [] for name, data in genome_data.data["chromosomes"].items(): _chromosomes.append( stdpopsim.Chromosome( id=name, length=data["length"], synonyms=data["synonyms"], mutation_rate=4e-9, # based on non-CpG sites only recombination_rate=_recombination_rate_data[name], )) _genome = stdpopsim.Genome( chromosomes=_chromosomes, assembly_name=genome_data.data["assembly_name"], assembly_accession=genome_data.data["assembly_accession"], citations=[ _SkoglundEtAl.because(stdpopsim.CiteReason.MUT_RATE), _FranzEtAl.because(stdpopsim.CiteReason.MUT_RATE), _CampbellEtAl.because(stdpopsim.CiteReason.REC_RATE), _LindbladTohEtAl.because(stdpopsim.CiteReason.ASSEMBLY), ], )
# ########################################################### _lapierre_et_al = stdpopsim.Citation( author="Lapierre et al.", year="2016", doi="https://doi.org/10.1093/molbev/msw048") _sezonov_et_al = stdpopsim.Citation(author="Sezonov et al.", year="2007", doi="https://doi.org/10.1128/JB.01368-07") _chromosomes = [] _chromosomes.append( stdpopsim.Chromosome(id=None, length=4641652, mutation_rate=1e-5 + 2e-4, recombination_rate=0.0)) # mean_conversion_rate=8.9e-11 # not implemented yet! # mean_conversion_length=542 # not implemented yet! #: :class:`stdpopsim.Genome` definition for E. Coli. # Chromosome length data is based on strain K-12. _genome = stdpopsim.Genome(chromosomes=_chromosomes) _species = stdpopsim.Species( id="EscCol", name="Escherichia coli", common_name="E. coli", genome=_genome, generation_time=0.00003805175, # 1.0 / (525600 min/year / 20 min/gen)