def indexToID(pop, idField='ind_id', fatherField='father_id', motherField='mother_id', fatherIndex='father_idx', motherIndex='mother_idx', reset=False): '''This function adds information field idField (default to ``'ind_id'``) to population ``pop`` and assigns an unique ID to every individual in this Population. It then adds information fields fatherField (default to ``'fatherField'``) and motherField (default to ``'motherField'``) and set their values with IDs according to the established index based parents-children relationship. Existing information fields will be used if idField, fatherField or motherField already exist. This function uses a system-wide ID generator for unique IDs, which does not have to start from zero. A parameter ``reset`` could be used to reset starting ID to zero (if ``reset=True``) or a specified number (``reset=number``). ''' pop.addInfoFields([idField, fatherField, motherField], -1) # set each individual's unique ID to idField tagID(pop, reset=reset, infoFields=idField) # save each individual's parents' IDs to fatherField and motherField for gen in range(pop.ancestralGens()-1, -1, -1): pop.useAncestralGen(gen) for ind in pop.individuals(): if ind.info(fatherIndex) != -1: father = pop.ancestor(ind.info(fatherIndex), gen+1) ind.setInfo(father.info(idField), fatherField) if ind.info(motherIndex) != -1: mother = pop.ancestor(ind.info(motherIndex), gen+1) ind.setInfo(mother.info(idField), motherField)
def initialize_tuson_pop(self, chromosome_lengths, info_fields, all_genotypes_handle): tuson_founders = sim.Population(size=105, loci=chromosome_lengths, infoFields=info_fields) for ind, genotype in zip(tuson_founders.individuals(), all_genotypes_handle): ind.setGenotype(genotype) sim.tagID(tuson_founders, reset=True) return tuson_founders
def drawSample(self, pop, penet, nFamilies): self.pop = pop.clone() self.pop.addInfoFields(['ind_id', 'father_id', 'mother_id']) self.pop.setAncestralDepth(1) sim.tagID(self.pop, reset=True) self.pop.evolve( preOps = penet, matingScheme=sim.RandomMating(ops=[ sim.MendelianGenoTransmitter(), # pass genotype sim.IdTagger(), # assign new ID to offspring sim.PedigreeTagger(), # record the parent of each offspring penet, # determine offspring affection status sim.DiscardIf(cond=self._discardTrio) ], subPopSize=nFamilies), gen = 1 ) return self.pop
recombination_rates = [] for chromosome in cM_positions: for cM in chromosome: if str(cM)[-2:] == '.6': recombination_rates.append(0.01) else: recombination_rates.append(0.0) flat_cM_positions = [] for cMs in cM_positions: flat_cM_positions.extend(cMs) # In[ ]: nam = sim.loadPopulation('nam_prefounders.pop') sim.tagID(nam, reset=True) nam.setSubPopName('prefounders', 0) sample_sizes = {i: 100 for i in range(0, 21, 2)} locus_names = list(range(nam.totNumLoci())) genetic_structure = {} #genetic_structure['cM_positions'] = cM_positions #enetic_structure['chr_cM_positions'] = chr_cM_positions genetic_structure['allele_names'] = allele_names genetic_structure['integral_valued_loci'] = integral_valued_loci genetic_structure[ 'relative_integral_valued_loci'] = relative_integral_valued_loci genetic_structure['alleles'] = alleles genetic_structure['recombination_rates'] = recombination_rates # In[ ]:
generations = 10 replicates = 100 td = TusonDrift(genetic_map_filename=map_filename, population_size=popsize, number_of_breeding_females=females, number_of_breeding_males=males, number_of_generations=generations, founder_population_filename=founder_filename, population_structure_filename=popst_filename) recom_rates = parser.parse_recombination_rates(map_filename) tuson = sim.loadPopulation(td.founder_population_filename) tuson.setSubPopName('tuson', 0) sim.tagID(tuson, reset=True) # tuson_meta = td.initialize_meta_population(tuson, number_of_reps=replicates) # sites, inds = td.find_fixed_sites(tuson, 0.15, 0.03) sim.stat(tuson, numOfSegSites=sim.ALL_AVAIL, vars=['numOfFixedSites', 'fixedSites']) sites = tuson.dvars().fixedSites # Assigns popoulation structure ps = parameterizer.PopulationStructure(tuson, td.population_structure_filename, 0.15, 0.03)
input_file_prefix = '/home/vakanas/BISB/rjwlab-scripts/saegus_project/devel/magic/1478/' mg = analyze.MultiGeneration('epsilon') run_id = 'epsilon' generations = 10 heritability = 0.7 number_of_qtl = 50 number_of_replicates = 2 founders = [[2, 26], [3, 25], [4, 24], [5, 23]] os_per_pair = 500 recombination_rates = [0.01] * 1478 prefounders = sim.loadPopulation(input_file_prefix + 'bia_prefounders.pop') config_file_template = input_file_prefix + 'gwas_pipeline.xml' sim.tagID(prefounders, reset=True) alleles = np.array( pd.read_hdf(input_file_prefix + 'parameters/alleles_at_1478_loci.hdf')) rdm_populations = sim.Simulator(prefounders, number_of_replicates, stealPops=False) rdm_magic = breed.MAGIC(rdm_populations, founders, recombination_rates) sim.tagID(prefounders, reset=27) rdm_magic.generate_f_one(founders, os_per_pair) sim.stat(rdm_populations.population(0), alleleFreq=sim.ALL_AVAIL) af = analyze.allele_data(rdm_populations.population(0), alleles, list(range(len(alleles)))) minor_alleles = np.asarray(af.minor_allele, dtype=np.int8) rdm_magic.recombinatorial_convergence(rdm_populations, len(founders),
# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim pop = sim.Population(10, infoFields='ind_id', ancGen=1) pop.evolve(initOps=sim.IdTagger(), matingScheme=sim.RandomSelection(ops=[ sim.CloneGenoTransmitter(), sim.IdTagger(), ]), gen=1) print([int(ind.ind_id) for ind in pop.individuals()]) pop.useAncestralGen(1) print([int(ind.ind_id) for ind in pop.individuals()]) sim.tagID(pop) # re-assign ID print([int(ind.ind_id) for ind in pop.individuals()])
ploidy=2, loci=chromosome_lengths, lociNames=founder.snpID.astype(str)) #Set genotypes in simuPOP populations from converted genotypes for i, ind in enumerate(example_pop.individuals()): ind.setGenotype(converted_genotypes[i]) #check genotype for Parent A (index 0), can repeat for all founders example_individual = example_pop.individual(0) example_genotype = np.array(example_individual.genotype(ploidy=0, chroms=0)) print(example_genotype) #add required info fields example_pop.addInfoFields(['ind_id', 'mother_id', 'father_id']) sim.tagID( example_pop, reset=1 ) #assigns unique id to all founders starting from 1, will be used for tracking allele origin #Create list of crossover probabilities from founder data measured in centimorgans #parse.RecomRates() is designed to be used to read in genetic map and then parse, not currently #set to parse recombination rates from data already read in (genetic_map above) tf = parse.RecomRates() recom_map = tf.parse_recombination_rates('founder_key_25K_vcf_28FEB20.txt') ### Generate F1 data ### Set founders and designate number of offspring for F1 founders = [[1, 2], [3, 4], [5, 6], [7, 3]] offspring_per_pair = 1 #One offspring per pair, 4 individuals total founder_chooser = breed.PairwiseIDChooser(founders, offspring_per_pair) number_of_pairs = len(founders)
def test_generate_operating_population(): genetic_map = pd.read_csv('nam_prefounders_genetic_map.txt', index_col=None, sep='\t') pf_map = shelve.open('pf_map') misc_gmap = shelve.open('misc_gmap') uniparams = shelve.open('uniparams') locus_names = uniparams['locus_names'] pos_column = uniparams['pos_column'] allele_names = uniparams['allele_names'] snp_to_integer = uniparams['snp_to_integer'] integer_to_snp = uniparams['integer_to_snp'] alleles = misc_gmap['alleles'] chr_cM_positions = misc_gmap['chr_cM_positions'] cM_positions = misc_gmap['cM_positions'] integral_valued_loci = misc_gmap['integral_valued_loci'] relative_integral_valued_loci = misc_gmap['relative_integral_valued_loci'] recombination_rates = misc_gmap['recombination_rates'] nam = sim.loadPopulation(uniparams['prefounder_file_name']) sim.tagID(nam, reset=True) nam.setSubPopName('maize_nam_prefounders', 0) selection_statistics = { 'aggregate': {}, 'selected': {}, 'non-selected': {} } ind_names_for_gwas = {i: {} for i in range(uniparams[ 'number_of_replicates'])} uniparams['meta_pop_sample_sizes'] = {i: 100 for i in range(0, uniparams['generations_of_selection'] + 1, 2) } s = simulate.Truncation(uniparams['generations_of_selection'], uniparams['generations_of_random_mating'], uniparams['operating_population_size'], uniparams[ 'proportion_of_individuals_saved'], uniparams['overshoot_as_proportion'], uniparams['individuals_per_breeding_subpop'], uniparams['heritability'], uniparams['meta_pop_sample_sizes'], uniparams['number_of_replicates']) ind_names_for_gwas = {i: {} for i in range(uniparams[ 'number_of_replicates'])} founders = uniparams['founders'] replicated_nam = sim.Simulator(nam, rep=2, stealPops=False) pop = replicated_nam.extract(0) assert pop.popSize() == 26, "Population is too large." s.generate_f_one(pop, recombination_rates, founders, 100) assert pop.popSize() == 400, "Population should have size: {} after the F_1 mating " \ "procedure." \ "".format(len(founders) * 100) #pop.splitSubPop(0, [100] * 4) #subpop_list = list(range(pop.numSubPop())) intmd_os_struct = s.restructure_offspring(pop, 100, 4) snd_order = breed.SecondOrderPairIDChooser(intmd_os_struct, 1) pop.evolve( preOps=[sim.MergeSubPops()], matingScheme=sim.HomoMating( sim.PyParentsChooser(snd_order.snd_ord_id_pairs), sim.OffspringGenerator(ops=[ sim.IdTagger(), sim.ParentsTagger(), sim.PedigreeTagger(), sim.Recombinator(rates=recombination_rates) ], numOffspring=1), subPopSize=[200], ), gen=1, ) assert pop.popSize() == 1, "Population does not have correct size after second round of mating." second_intmd_os_struct = s.restructure_offspring(pop, 100, 2) third_order = breed.SecondOrderPairIDChooser(second_intmd_os_struct, 1) pop.evolve( preOps=[sim.MergeSubPops()], matingScheme=sim.HomoMating( sim.PyParentsChooser(third_order.snd_ord_id_pairs), sim.OffspringGenerator(ops=[ sim.IdTagger(), sim.ParentsTagger(), sim.PedigreeTagger(), sim.Recombinator(rates=recombination_rates) ], numOffspring=1), subPopSize=[100], ), gen=1, ) assert pop.popSize() == 100, "Second merge of breeding sub-populations. Offspring population does not have " \ "correct size"
N=(8100, 8100, 7900, 900000), G=(20000, 10, 370), mu=1.8e-8, steps=[100, 1, 10], selModel='multiplicative', selDist='constant', selCoef=None, popFile='example.pop') # load population # print('Loading population example.pop') # pop = sim.loadPopulation('example.pop') # evolve the population for a few more generations to produce pedigrees print('Evolving the population for three generations.') pop.addInfoFields(['ind_id', 'father_id', 'mother_id']) sim.tagID(pop) # save all ancestral generations during evolution pop.setAncestralDepth(-1) pop.evolve(matingScheme=sim.RandomMating( numOffspring=(sim.UNIFORM_DISTRIBUTION, 2, 4), ops=(sim.MendelianGenoTransmitter(), sim.IdTagger(), sim.PedigreeTagger())), gen=3) # what is the average number of mutants in this population? avgMutants = (pop.popSize() * pop.totNumLoci() * 2. - pop.genotype().count(0)) / pop.popSize() print(('Average number of mutants is %.2f' % avgMutants)) # # This contains marker information for the initial population print('Mutant locations are saved to file sample.map') markers = saveMarkerInfoToFile(pop, 'pedigree.map')
def do_forward_sims(sim_data, chrom_len, diploid_Ne, batchname, repilcates, simupop_seed): print("start getting chromosomes positions") chromosome_positions = get_chromosome_positions(sim_data=sim_data, chromsome_length=chrom_len) print("done getting chromosomes positions") haplotypes = get_haplotypes(sim_data) loci_per_chromsome = get_loci_per_chromosome(chromosome_positions) n_chrom = len(loci_per_chromsome) # set up the ancestral pop in simuPOP initial = simuPOP.Population( diploid_Ne, # here is the diploid number loci= loci_per_chromsome, # should be the number of loci on each chromosome lociPos=list(chromosome_positions), ploidy=2, infoFields=['father_idx', 'mother_idx', 'ind_id'], #alleleNames=['A', 'C', 'G', 'T'], lociNames=[ 'locus_{}'.format(x) for x in xrange(len(chromosome_positions)) ]) simuPOP.initGenotype(initial, prop=[1.0 / len(haplotypes)] * len(haplotypes), haplotypes=list(haplotypes)) simuPOP.tagID(initial, reset=1) initial_export = get_export_genotypes(initial, initial.popSize()) np.savetxt('./share/{}/Ne-{}_Chr-{}/Ne-{}_Chr-{}.inital.txt'.format( batchname, diploid_Ne, n_chrom, diploid_Ne, n_chrom), initial_export, delimiter='\t', fmt='%01d') # map-ped simuPOP.utils.export( initial, format='PED', output='./share/{}/Ne-{}_Chr-{}/Ne-{}_Chr-{}.inital.ped'.format( batchname, diploid_Ne, n_chrom, diploid_Ne, n_chrom), gui=False, idField='ind_id') simuPOP.utils.export( initial, format='MAP', output='./share/{}/Ne-{}_Chr-{}/Ne-{}_Chr-{}.inital.map'.format( batchname, diploid_Ne, n_chrom, diploid_Ne, n_chrom), gui=False) # Doesn't yet work!! # set the seed for simuPOP #simuPOP.setRNG(seed=simupop_seed) # and in Python #random.seed(simupop_seed) # and for numpy #np.random.seed(simupop_seed) print("initalizing the forward simulator") simu = simuPOP.Simulator( simuPOP.Population( diploid_Ne, # here is the diploid number loci=get_loci_per_chromosome( chromosome_positions ), # should be the number of loci on each chromosome lociPos=list(chromosome_positions), ploidy=2, infoFields=['ind_id', 'father_idx', 'mother_idx'], #alleleNames=['A', 'C', 'G', 'T'], lociNames=[ 'locus_{}'.format(x) for x in xrange(len(chromosome_positions)) ]), rep=repilcates) print("Start evolving {} replicates".format(repilcates)) simu.evolve( initOps=[ simuPOP.InitSex( sex=[simuPOP.MALE, simuPOP.FEMALE]), # alternate sex simuPOP.InitGenotype(prop=[1.0 / len(haplotypes)] * len(haplotypes), haplotypes=list(haplotypes)) ], matingScheme=simuPOP.HomoMating( chooser=simuPOP.PyParentsChooser(fixedChooser), generator=simuPOP.OffspringGenerator( sexMode=(simuPOP.GLOBAL_SEQUENCE_OF_SEX, simuPOP.MALE, simuPOP.FEMALE), ops=[ simuPOP.Recombinator(intensity=1.0 / chrom_len), simuPOP.ParentsTagger() ]), ), postOps=[], gen=20) print("Done evolving {} replicates!".format(repilcates)) # export the data print("Exporting data!".format(repilcates)) for rep, pop in enumerate(simu.populations()): if diploid_Ne >= 200: pop_genotypes = get_export_genotypes(pop, n_ind=200) # select 200 inds else: pop_genotypes = get_export_genotypes( pop, n_ind=diploid_Ne) # select 200 inds np.savetxt('./share/{}/Ne-{}_Chr-{}/Ne-{}_Chr-{}_Frep-{}.geno'.format( batchname, diploid_Ne, n_chrom, diploid_Ne, n_chrom, rep).format(rep), pop_genotypes, delimiter='\t', fmt='%01d') if rep % 10 == 0: print "saved rep {}".format(rep)
#!/home/vakanas/anaconda43/python3.6 import simuOpt simuOpt.setOptions(alleleType='short', quiet=True, numThreads=4) import simuPOP as sim import numpy as np import pandas as pd import random import h5py from saegus import analyze, operators, parameters #np.set_printoptions(suppress=True, precision=5) example_pop = sim.loadPopulation('example_pop.pop') example_pop.addInfoFields(['ind_id', 'mother_id', 'father_id', 'g', 'p']) sim.tagID(example_pop) sim.stat(example_pop, numOfSegSites=sim.ALL_AVAIL, vars=['numOfSegSites', 'segSites', 'fixedSites']) sim.stat(example_pop, alleleFreq=sim.ALL_AVAIL) segregating_loci = example_pop.dvars().segSites allele_states = analyze.gather_allele_data(example_pop) allele_frequencies = analyze.gather_allele_frequencies(example_pop, allele_states) gwas = analyze.GWAS(example_pop, np.array(segregating_loci, dtype=np.int_), allele_states[:, 3], 'example') count_matrix = gwas.calculate_count_matrix('example_count_matrix.txt') gwas.hapmap_formatter(hapmap_file_name='example_hapmap.txt') eigenvalues, eigenvectors = gwas.pop_struct_eigendecomp(count_matrix) gwas.population_structure_formatter( eigenvalues, eigenvectors,