コード例 #1
0
ファイル: sampling.py プロジェクト: sudorook/simuPOP
def indexToID(pop, idField='ind_id', fatherField='father_id', motherField='mother_id',
              fatherIndex='father_idx', motherIndex='mother_idx', reset=False):
    '''This function adds information field idField (default to ``'ind_id'``)
    to population ``pop`` and assigns an unique ID to every individual in this
    Population. It then adds information fields fatherField (default to
    ``'fatherField'``) and motherField (default to ``'motherField'``) and set
    their values with IDs according to the established index based
    parents-children relationship. Existing information fields will be used if
    idField, fatherField or motherField already exist. This function uses a
    system-wide ID generator for unique IDs, which does not have to start from
    zero. A parameter ``reset`` could be used to reset starting ID to zero
    (if ``reset=True``) or a specified number (``reset=number``).
    '''
    pop.addInfoFields([idField, fatherField, motherField], -1)
    # set each individual's unique ID to idField
    tagID(pop, reset=reset, infoFields=idField)
    # save each individual's parents' IDs to fatherField and motherField
    for gen in range(pop.ancestralGens()-1, -1, -1):
        pop.useAncestralGen(gen)
        for ind in pop.individuals():
            if ind.info(fatherIndex) != -1:
                father = pop.ancestor(ind.info(fatherIndex), gen+1)
                ind.setInfo(father.info(idField), fatherField)
            if ind.info(motherIndex) != -1:
                mother = pop.ancestor(ind.info(motherIndex), gen+1)
                ind.setInfo(mother.info(idField), motherField)
コード例 #2
0
 def initialize_tuson_pop(self, chromosome_lengths, info_fields,
                          all_genotypes_handle):
     tuson_founders = sim.Population(size=105, loci=chromosome_lengths,
                                     infoFields=info_fields)
     for ind, genotype in zip(tuson_founders.individuals(),
                              all_genotypes_handle):
         ind.setGenotype(genotype)
     sim.tagID(tuson_founders, reset=True)
     return tuson_founders
コード例 #3
0
 def drawSample(self, pop, penet, nFamilies):
     self.pop = pop.clone()
     self.pop.addInfoFields(['ind_id', 'father_id', 'mother_id'])
     self.pop.setAncestralDepth(1)
     sim.tagID(self.pop, reset=True)
     self.pop.evolve(
         preOps = penet,
         matingScheme=sim.RandomMating(ops=[
             sim.MendelianGenoTransmitter(), # pass genotype
             sim.IdTagger(),       # assign new ID to offspring
             sim.PedigreeTagger(), # record the parent of each offspring
             penet,                # determine offspring affection status
             sim.DiscardIf(cond=self._discardTrio)
             ], subPopSize=nFamilies),
         gen = 1
     )
     return self.pop
コード例 #4
0
recombination_rates = []
for chromosome in cM_positions:
    for cM in chromosome:
        if str(cM)[-2:] == '.6':
            recombination_rates.append(0.01)
        else:
            recombination_rates.append(0.0)

flat_cM_positions = []
for cMs in cM_positions:
    flat_cM_positions.extend(cMs)

# In[ ]:

nam = sim.loadPopulation('nam_prefounders.pop')
sim.tagID(nam, reset=True)
nam.setSubPopName('prefounders', 0)
sample_sizes = {i: 100 for i in range(0, 21, 2)}
locus_names = list(range(nam.totNumLoci()))

genetic_structure = {}
#genetic_structure['cM_positions'] = cM_positions
#enetic_structure['chr_cM_positions'] = chr_cM_positions
genetic_structure['allele_names'] = allele_names
genetic_structure['integral_valued_loci'] = integral_valued_loci
genetic_structure[
    'relative_integral_valued_loci'] = relative_integral_valued_loci
genetic_structure['alleles'] = alleles
genetic_structure['recombination_rates'] = recombination_rates

# In[ ]:
コード例 #5
0
generations = 10
replicates = 100

td = TusonDrift(genetic_map_filename=map_filename, population_size=popsize,
                number_of_breeding_females=females,
                number_of_breeding_males=males,
                number_of_generations=generations,
                founder_population_filename=founder_filename,
                population_structure_filename=popst_filename)

recom_rates = parser.parse_recombination_rates(map_filename)

tuson = sim.loadPopulation(td.founder_population_filename)
tuson.setSubPopName('tuson', 0)

sim.tagID(tuson, reset=True)
# tuson_meta = td.initialize_meta_population(tuson, number_of_reps=replicates)


# sites, inds = td.find_fixed_sites(tuson, 0.15, 0.03)
sim.stat(tuson, numOfSegSites=sim.ALL_AVAIL,
         vars=['numOfFixedSites', 'fixedSites'])

sites = tuson.dvars().fixedSites




# Assigns popoulation structure
ps = parameterizer.PopulationStructure(tuson, td.population_structure_filename,
                                       0.15, 0.03)
コード例 #6
0
input_file_prefix = '/home/vakanas/BISB/rjwlab-scripts/saegus_project/devel/magic/1478/'

mg = analyze.MultiGeneration('epsilon')
run_id = 'epsilon'
generations = 10
heritability = 0.7
number_of_qtl = 50
number_of_replicates = 2
founders = [[2, 26], [3, 25], [4, 24], [5, 23]]
os_per_pair = 500
recombination_rates = [0.01] * 1478
prefounders = sim.loadPopulation(input_file_prefix + 'bia_prefounders.pop')
config_file_template = input_file_prefix + 'gwas_pipeline.xml'

sim.tagID(prefounders, reset=True)
alleles = np.array(
    pd.read_hdf(input_file_prefix + 'parameters/alleles_at_1478_loci.hdf'))

rdm_populations = sim.Simulator(prefounders,
                                number_of_replicates,
                                stealPops=False)
rdm_magic = breed.MAGIC(rdm_populations, founders, recombination_rates)
sim.tagID(prefounders, reset=27)
rdm_magic.generate_f_one(founders, os_per_pair)

sim.stat(rdm_populations.population(0), alleleFreq=sim.ALL_AVAIL)
af = analyze.allele_data(rdm_populations.population(0), alleles,
                         list(range(len(alleles))))
minor_alleles = np.asarray(af.minor_allele, dtype=np.int8)
rdm_magic.recombinatorial_convergence(rdm_populations, len(founders),
コード例 #7
0
ファイル: IdTagger.py プロジェクト: sudorook/simuPOP
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#

# This script is an example in the simuPOP user's guide. Please refer to
# the user's guide (http://simupop.sourceforge.net/manual) for a detailed
# description of this example.
#

import simuPOP as sim
pop = sim.Population(10, infoFields='ind_id', ancGen=1)
pop.evolve(initOps=sim.IdTagger(),
           matingScheme=sim.RandomSelection(ops=[
               sim.CloneGenoTransmitter(),
               sim.IdTagger(),
           ]),
           gen=1)
print([int(ind.ind_id) for ind in pop.individuals()])
pop.useAncestralGen(1)
print([int(ind.ind_id) for ind in pop.individuals()])
sim.tagID(pop)  # re-assign ID
print([int(ind.ind_id) for ind in pop.individuals()])
コード例 #8
0
                             ploidy=2,
                             loci=chromosome_lengths,
                             lociNames=founder.snpID.astype(str))
#Set genotypes in simuPOP populations from converted genotypes
for i, ind in enumerate(example_pop.individuals()):
    ind.setGenotype(converted_genotypes[i])

#check genotype for Parent A (index 0), can repeat for all founders
example_individual = example_pop.individual(0)
example_genotype = np.array(example_individual.genotype(ploidy=0, chroms=0))
print(example_genotype)

#add required info fields
example_pop.addInfoFields(['ind_id', 'mother_id', 'father_id'])
sim.tagID(
    example_pop, reset=1
)  #assigns unique id to all founders starting from 1, will be used for tracking allele origin

#Create list of crossover probabilities from founder data measured in centimorgans
#parse.RecomRates() is designed to be used to read in genetic map and then parse, not currently
#set to parse recombination rates from data already read in (genetic_map above)
tf = parse.RecomRates()
recom_map = tf.parse_recombination_rates('founder_key_25K_vcf_28FEB20.txt')

### Generate F1 data
### Set founders and designate number of offspring for F1
founders = [[1, 2], [3, 4], [5, 6], [7, 3]]
offspring_per_pair = 1  #One offspring per pair, 4 individuals total

founder_chooser = breed.PairwiseIDChooser(founders, offspring_per_pair)
number_of_pairs = len(founders)
コード例 #9
0
def test_generate_operating_population():

    genetic_map = pd.read_csv('nam_prefounders_genetic_map.txt', index_col=None,
                             sep='\t')

    pf_map = shelve.open('pf_map')
    misc_gmap = shelve.open('misc_gmap')
    uniparams = shelve.open('uniparams')

    locus_names = uniparams['locus_names']
    pos_column = uniparams['pos_column']
    allele_names = uniparams['allele_names']
    snp_to_integer = uniparams['snp_to_integer']
    integer_to_snp = uniparams['integer_to_snp']

    alleles = misc_gmap['alleles']
    chr_cM_positions = misc_gmap['chr_cM_positions']
    cM_positions = misc_gmap['cM_positions']
    integral_valued_loci = misc_gmap['integral_valued_loci']
    relative_integral_valued_loci = misc_gmap['relative_integral_valued_loci']
    recombination_rates = misc_gmap['recombination_rates']

    nam = sim.loadPopulation(uniparams['prefounder_file_name'])
    sim.tagID(nam, reset=True)
    nam.setSubPopName('maize_nam_prefounders', 0)

    selection_statistics = {
        'aggregate': {},
        'selected': {},
        'non-selected': {}
    }

    ind_names_for_gwas = {i: {} for i in range(uniparams[
        'number_of_replicates'])}
    uniparams['meta_pop_sample_sizes'] = {i: 100 for i in
                                          range(0, uniparams['generations_of_selection'] + 1, 2)
                                          }

    s = simulate.Truncation(uniparams['generations_of_selection'],
                           uniparams['generations_of_random_mating'],
                           uniparams['operating_population_size'],
                            uniparams[
                                'proportion_of_individuals_saved'],
                           uniparams['overshoot_as_proportion'],
                       uniparams['individuals_per_breeding_subpop'],
                           uniparams['heritability'],
                           uniparams['meta_pop_sample_sizes'],
                           uniparams['number_of_replicates'])

    ind_names_for_gwas = {i: {} for i in range(uniparams[
        'number_of_replicates'])}

    founders = uniparams['founders']
    replicated_nam = sim.Simulator(nam, rep=2, stealPops=False)
    pop = replicated_nam.extract(0)

    assert pop.popSize() == 26, "Population is too large."

    s.generate_f_one(pop, recombination_rates, founders, 100)

    assert pop.popSize() == 400, "Population should have size: {} after the F_1 mating " \
                                               "procedure." \
                                               "".format(len(founders) * 100)

    #pop.splitSubPop(0, [100] * 4)
    #subpop_list = list(range(pop.numSubPop()))

    intmd_os_struct = s.restructure_offspring(pop, 100, 4)
    snd_order = breed.SecondOrderPairIDChooser(intmd_os_struct, 1)

    pop.evolve(
        preOps=[sim.MergeSubPops()],
        matingScheme=sim.HomoMating(
            sim.PyParentsChooser(snd_order.snd_ord_id_pairs),
            sim.OffspringGenerator(ops=[
                sim.IdTagger(),
                sim.ParentsTagger(),
                sim.PedigreeTagger(),
                sim.Recombinator(rates=recombination_rates)
            ],
                numOffspring=1),
            subPopSize=[200],
        ),
        gen=1,
    )

    assert pop.popSize() == 1, "Population does not have correct size after second round of mating."

    second_intmd_os_struct = s.restructure_offspring(pop, 100, 2)
    third_order = breed.SecondOrderPairIDChooser(second_intmd_os_struct, 1)


    pop.evolve(
        preOps=[sim.MergeSubPops()],
        matingScheme=sim.HomoMating(
            sim.PyParentsChooser(third_order.snd_ord_id_pairs),
            sim.OffspringGenerator(ops=[
                sim.IdTagger(),
                sim.ParentsTagger(),
                sim.PedigreeTagger(),
                sim.Recombinator(rates=recombination_rates)
            ],
                numOffspring=1),
            subPopSize=[100],
        ),
        gen=1,
    )

    assert pop.popSize() == 100, "Second merge of breeding sub-populations. Offspring population does not have " \
                                 "correct size"
コード例 #10
0
                       N=(8100, 8100, 7900, 900000),
                       G=(20000, 10, 370),
                       mu=1.8e-8,
                       steps=[100, 1, 10],
                       selModel='multiplicative',
                       selDist='constant',
                       selCoef=None,
                       popFile='example.pop')
# load population
# print('Loading population example.pop')
# pop = sim.loadPopulation('example.pop')

# evolve the population for a few more generations to produce pedigrees
print('Evolving the population for three generations.')
pop.addInfoFields(['ind_id', 'father_id', 'mother_id'])
sim.tagID(pop)
# save all ancestral generations during evolution
pop.setAncestralDepth(-1)
pop.evolve(matingScheme=sim.RandomMating(
    numOffspring=(sim.UNIFORM_DISTRIBUTION, 2, 4),
    ops=(sim.MendelianGenoTransmitter(), sim.IdTagger(),
         sim.PedigreeTagger())),
           gen=3)
# what is the average number of mutants in this population?
avgMutants = (pop.popSize() * pop.totNumLoci() * 2. -
              pop.genotype().count(0)) / pop.popSize()
print(('Average number of mutants is %.2f' % avgMutants))
#
# This contains marker information for the initial population
print('Mutant locations are saved to file sample.map')
markers = saveMarkerInfoToFile(pop, 'pedigree.map')
コード例 #11
0
def do_forward_sims(sim_data, chrom_len, diploid_Ne, batchname, repilcates,
                    simupop_seed):
    print("start getting chromosomes positions")
    chromosome_positions = get_chromosome_positions(sim_data=sim_data,
                                                    chromsome_length=chrom_len)
    print("done getting chromosomes positions")

    haplotypes = get_haplotypes(sim_data)
    loci_per_chromsome = get_loci_per_chromosome(chromosome_positions)
    n_chrom = len(loci_per_chromsome)
    # set up the ancestral pop in simuPOP
    initial = simuPOP.Population(
        diploid_Ne,  # here is the diploid number
        loci=
        loci_per_chromsome,  # should be the number of loci on each chromosome
        lociPos=list(chromosome_positions),
        ploidy=2,
        infoFields=['father_idx', 'mother_idx', 'ind_id'],
        #alleleNames=['A', 'C', 'G', 'T'],
        lociNames=[
            'locus_{}'.format(x) for x in xrange(len(chromosome_positions))
        ])
    simuPOP.initGenotype(initial,
                         prop=[1.0 / len(haplotypes)] * len(haplotypes),
                         haplotypes=list(haplotypes))
    simuPOP.tagID(initial, reset=1)
    initial_export = get_export_genotypes(initial, initial.popSize())
    np.savetxt('./share/{}/Ne-{}_Chr-{}/Ne-{}_Chr-{}.inital.txt'.format(
        batchname, diploid_Ne, n_chrom, diploid_Ne, n_chrom),
               initial_export,
               delimiter='\t',
               fmt='%01d')

    # map-ped
    simuPOP.utils.export(
        initial,
        format='PED',
        output='./share/{}/Ne-{}_Chr-{}/Ne-{}_Chr-{}.inital.ped'.format(
            batchname, diploid_Ne, n_chrom, diploid_Ne, n_chrom),
        gui=False,
        idField='ind_id')
    simuPOP.utils.export(
        initial,
        format='MAP',
        output='./share/{}/Ne-{}_Chr-{}/Ne-{}_Chr-{}.inital.map'.format(
            batchname, diploid_Ne, n_chrom, diploid_Ne, n_chrom),
        gui=False)

    # Doesn't yet work!!
    # set the seed for simuPOP
    #simuPOP.setRNG(seed=simupop_seed)
    # and in Python
    #random.seed(simupop_seed)
    # and for numpy
    #np.random.seed(simupop_seed)

    print("initalizing the forward simulator")
    simu = simuPOP.Simulator(
        simuPOP.Population(
            diploid_Ne,  # here is the diploid number
            loci=get_loci_per_chromosome(
                chromosome_positions
            ),  # should be the number of loci on each chromosome
            lociPos=list(chromosome_positions),
            ploidy=2,
            infoFields=['ind_id', 'father_idx', 'mother_idx'],
            #alleleNames=['A', 'C', 'G', 'T'],
            lociNames=[
                'locus_{}'.format(x) for x in xrange(len(chromosome_positions))
            ]),
        rep=repilcates)

    print("Start evolving {} replicates".format(repilcates))
    simu.evolve(
        initOps=[
            simuPOP.InitSex(
                sex=[simuPOP.MALE, simuPOP.FEMALE]),  # alternate sex
            simuPOP.InitGenotype(prop=[1.0 / len(haplotypes)] *
                                 len(haplotypes),
                                 haplotypes=list(haplotypes))
        ],
        matingScheme=simuPOP.HomoMating(
            chooser=simuPOP.PyParentsChooser(fixedChooser),
            generator=simuPOP.OffspringGenerator(
                sexMode=(simuPOP.GLOBAL_SEQUENCE_OF_SEX, simuPOP.MALE,
                         simuPOP.FEMALE),
                ops=[
                    simuPOP.Recombinator(intensity=1.0 / chrom_len),
                    simuPOP.ParentsTagger()
                ]),
        ),
        postOps=[],
        gen=20)

    print("Done evolving {} replicates!".format(repilcates))

    # export the data
    print("Exporting data!".format(repilcates))
    for rep, pop in enumerate(simu.populations()):
        if diploid_Ne >= 200:
            pop_genotypes = get_export_genotypes(pop,
                                                 n_ind=200)  #  select 200 inds
        else:
            pop_genotypes = get_export_genotypes(
                pop, n_ind=diploid_Ne)  #  select 200 inds

        np.savetxt('./share/{}/Ne-{}_Chr-{}/Ne-{}_Chr-{}_Frep-{}.geno'.format(
            batchname, diploid_Ne, n_chrom, diploid_Ne, n_chrom,
            rep).format(rep),
                   pop_genotypes,
                   delimiter='\t',
                   fmt='%01d')
        if rep % 10 == 0:
            print "saved rep {}".format(rep)
コード例 #12
0
#!/home/vakanas/anaconda43/python3.6
import simuOpt
simuOpt.setOptions(alleleType='short', quiet=True, numThreads=4)
import simuPOP as sim
import numpy as np
import pandas as pd
import random
import h5py
from saegus import analyze, operators, parameters
#np.set_printoptions(suppress=True, precision=5)

example_pop = sim.loadPopulation('example_pop.pop')
example_pop.addInfoFields(['ind_id', 'mother_id', 'father_id', 'g', 'p'])
sim.tagID(example_pop)
sim.stat(example_pop,
         numOfSegSites=sim.ALL_AVAIL,
         vars=['numOfSegSites', 'segSites', 'fixedSites'])
sim.stat(example_pop, alleleFreq=sim.ALL_AVAIL)

segregating_loci = example_pop.dvars().segSites
allele_states = analyze.gather_allele_data(example_pop)
allele_frequencies = analyze.gather_allele_frequencies(example_pop,
                                                       allele_states)
gwas = analyze.GWAS(example_pop, np.array(segregating_loci, dtype=np.int_),
                    allele_states[:, 3], 'example')
count_matrix = gwas.calculate_count_matrix('example_count_matrix.txt')
gwas.hapmap_formatter(hapmap_file_name='example_hapmap.txt')
eigenvalues, eigenvectors = gwas.pop_struct_eigendecomp(count_matrix)
gwas.population_structure_formatter(
    eigenvalues,
    eigenvectors,