def sampleIndividuals(pop, param): """Samples individuals from each replicant population, and stores the genotypes of that sample in the database. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID) Returns: Boolean true: all PyOperators need to return true. """ (ssize, mutation, popsize, sim_id, num_loci) = param popID = pop.dvars().rep gen = pop.dvars().gen sample = drawRandomSample(pop, sizes=ssize) samplelist = [] for idx in range(ssize): genotype_list = list(sample.individual(idx).genotype()) indiv = dict(id=idx, genotype=genotype_list) samplelist.append(indiv) _storeIndividualSample(popID, num_loci, ssize, gen, mutation, popsize, sim_id, samplelist) return True
def sampleIndividuals(pop, param): """Samples individuals from each replicant population, and stores the genotypes of that sample in the database. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID) Returns: Boolean true: all PyOperators need to return true. """ (ssize, mutation, popsize, sim_id, num_loci) = param popID = pop.dvars().rep gen = pop.dvars().gen sample = drawRandomSample(pop, sizes=ssize) samplelist = [] for idx in range(ssize): genotype_list = list(sample.individual(idx).genotype()) indiv = dict(id=idx, genotype=genotype_list) samplelist.append(indiv) _storeIndividualSample(popID,num_loci,ssize,gen,mutation,popsize,sim_id,samplelist) return True
def sampleTraitCounts(pop, param): """Samples trait counts for all loci in a replicant population, and stores the counts in the database. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID, number of loci) Returns: Boolean true: all PyOperators need to return true. """ (ssize, mutation, popsize, sim_id, numloci) = param popID = pop.dvars().rep gen = pop.dvars().gen sample = drawRandomSample(pop, sizes=ssize) sim.stat(sample, alleleFreq=sim.ALL_AVAIL) for locus in range(numloci): alleleMap = sample.dvars().alleleNum[locus] for allele, count in alleleMap.iteritems(): _storeTraitCountSample(popID, ssize, locus, gen, mutation, popsize, sim_id, allele, count) return True
def sampleAndExport(pop): sz = pop.subPopSizes() new_sz = [x//1 for x in sz] sample = drawRandomSample(pop, new_sz) export(sample, format='genepop', output='sim16a_mono_1of2_%d_himu_sample_%d.gen' % (name,pop.dvars().gen), adjust = 0, gui = False) export(sample, format='fstat', output='sim16a_mono_1of2_%s_himu_sample_%d.dat' % (name,pop.dvars().gen), gui = False) return True
def sampleAndExport(pop): sz = pop.subPopSizes() new_sz = [x//200 for x in sz] sample = drawRandomSample(pop, new_sz) export(sample, format='genepop', output='sim15_time_mono_1of2_a_041400_sample_%d.gen' % pop.dvars().gen, adjust = 0, gui = False) export(sample, format='fstat', output='sim15_time_mono_1of2_a_041400_sample_%d.dat' % pop.dvars().gen, gui = False) return True
def add_to_meta_pop(self, pop): rep_id = pop.dvars().rep sampled = sampling.drawRandomSample(pop, sizes=self.sample_size[pop.dvars().gen]) pop.dvars().ss = self.sample_size[pop.dvars().gen] pop.dvars().gen_sampled_from = pop.dvars().gen self.meta_sample_library[rep_id].append(sampled) return True
def add_to_meta_pop(self, pop): rep_id = pop.dvars().rep sampled = sampling.drawRandomSample( pop, sizes=self.sample_size[pop.dvars().gen]) pop.dvars().ss = self.sample_size[pop.dvars().gen] pop.dvars().gen_sampled_from = pop.dvars().gen self.meta_replicates.population(rep_id).addIndFrom(sampled) return True
def sampleAlleleAndGenotypeFrequencies(pop, param): import simuPOP as sim import simuPOP.sampling as sampling (ssize, mutation, popsize, sim_id, num_loci, fname, fcli, seed) = param rep = pop.dvars().rep gen = pop.dvars().gen subpops = pop.subPopNames() sample_list = list() subpop_sizes = pop.subPopSizes() sample_sizes = [int(math.ceil(ssize * n)) for n in subpop_sizes] #log.debug("Sample sizes for subpops: %s", sample_sizes) min_sample_size = min(sample_sizes) for sp_name in subpops: sample = sampling.drawRandomSample(pop, subPops=pop.subPopByName(sp_name), sizes=min_sample_size) sim.stat(sample, haploFreq=range(0, num_loci), vars=['haploFreq', 'haploNum']) sim.stat(sample, alleleFreq=sim.ALL_AVAIL) keys = sample.dvars().haploFreq.keys() haplotype_map = sample.dvars().haploFreq[keys[0]] haplotype_count_map = sample.dvars().haploNum[keys[0]] num_classes = len(haplotype_map) #log.debug("gen: %s replicate: %s subpop: %s numclasses: %s class freq: %s", gen, popID, sp_name, num_classes, haplotype_map) #class_freq = {'-'.join(i[0]) : str(i[1]) for i in haplotype_map.items()} class_freq = dict() for k, v in haplotype_map.items(): key = '-'.join(str(x) for x in k) class_freq[key] = v #log.debug("class_freq packed: %s", class_freq) class_count = dict() for k, v in haplotype_count_map.items(): key = '-'.join(str(x) for x in k) class_count[key] = v # count_vals = sorted( [int(val) for val in class_count.values()] ) # # (prob, theta) = montecarlo(100000, count_vals, len(count_vals)) # #log.debug("slatkin test for class counts - prob: %s theta: %s ", prob, theta) sample = dict(subpop=sp_name, crichness=num_classes, cfreq=class_freq, ccount=class_count) sample_list.append(sample) data.storeClassFrequencySamples(sim_id, gen, rep, fname, fcli, seed, ssize, popsize, mutation, sample_list) return True
def sampleAndExport(pop): sz = pop.subPopSizes() new_sz = [x//2000 for x in sz] sample = drawRandomSample(pop, new_sz) export(sample, format = 'fstat', output = 'sim16b_4allele_mono_1of2_realmu_%d_000000_sample_%d.dat' % (iter,pop.dvars().gen), gui=False), export(sample, format = 'phylip', output = 'sim16b_4allele_mono_1of2_realmu_%d_000000_sample_%d.phy' % (iter,pop.dvars().gen), alleleNames = ('A','C','G','T'), gui=False), os.system('perl convert_diploid.pl N sim16b_4allele_mono_1of2_realmu_%d_000000_sample_%d.phy sim16b_4allele_mono_1of2_realmu_%d_000000_merged_sample_%d.phy' % (iter,pop.dvars().gen)), os.system('perl phylip_to_fasta.pl sim16b_4allele_mono_1of2_realmu_%d_000000_sample_%d.phy sim16b_4allele_mono_1of2_realmu_%d_000000_sample_%d.fas' % (iter,pop.dvars().gen)), return True
def get_mean_r2(Ne, S, n_loci, gens, n_subpops, initial_frequencies, m): """Returns the mean r2 value for each subpopulation, in list of length n_subpops""" # make pairwise migration matrix M = get_migration_matrix(m, n_subpops) # initialise population n_alleles = 2 pop = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, alleleNames=[str(i) for i in range(n_alleles)], infoFields='migrate_to') sim.initGenotype(pop, freq=[initial_frequencies, 1 - initial_frequencies]) #sim.initGenotype(pop, freq = [1/n_alleles for i in range(n_alleles)]) sim.initSex(pop) print(M) # run burn in generations pop.evolve(initOps=[], preOps=sim.Migrator(M, mode=sim.BY_PROBABILITY), matingScheme=sim.RandomMating(), gen=gens) # take sample from each subpopulation sample_pop = drawRandomSample(pop, sizes=[S] + [0] * (n_subpops - 1)) #sim.dump(sample_pop) # get allele frequencies sim.stat(sample_pop, alleleFreq=range(0, n_loci), vars=['alleleFreq_sp']) #print(sample_pop.dvars(0).alleleFreq) # calculate r2 values sim.stat(sample_pop, LD=list(itertools.combinations(list(range(n_loci)), r=2)), vars=['R2_sp']) #print(sample_pop.dvars(0).R2) r2s = [] for sp in [0]: #range(n_subpops*0): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [ k for k in range(n_loci) if np.abs(.5 - allele_freqs[k][0]) < .5 - 0.05 ] if len(seg_alleles) < 2: raise Exception("<2 segregating alleles") r2_sum = 0 count = 0 for pairs in itertools.combinations(seg_alleles, r=2): r2_sum += sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] count += 1 mean_r2 = r2_sum / count r2s.append(mean_r2) return r2s
def add_to_meta_pop(self, pop): sampled = sampling.drawRandomSample( pop, sizes=self.sample_size[pop.dvars().gen], subPops=sim.ALL_AVAIL) pop.dvars().ss = self.sample_size[pop.dvars().gen] pop.dvars().gen_sampled_from = pop.dvars().gen self.meta_population.addIndFrom(sampled) return True
def _get_sub_sample(pop, size, sub_pop=None): '''Gets a subsample of individuals.''' if sub_pop is None: pop_s = pop else: pop_s = pop.extractSubPops(subPops=[sub_pop]) if size is None: return pop_s pop_s = sampling.drawRandomSample(pop_s, sizes=size) return pop_s
def calcFst(pop): 'Calculate Fst and Gst for the whole population and a random sample' stat(pop, structure=range(5), vars=['F_st', 'G_st']) sample = drawRandomSample(pop, sizes=[500] * pop.numSubPop()) stat(sample, structure=range(5), vars=['F_st', 'G_st']) print 'Gen: %3d Gst: %.6f (all), %.6f (sample) Fst: %.6f (all) %.6f (sample)' \ % (pop.dvars().gen, pop.dvars().G_st, sample.dvars().G_st, pop.dvars().F_st, sample.dvars().F_st) return True
def add_to_meta_pop(self, pop): rep_id = pop.dvars().rep sampled = sampling.drawRandomSample( pop, sizes=self.sample_size[pop.dvars().gen]) sampled_file_name = 'sampled_rep_' + str(rep_id) + '_gen_' + str( pop.dvars().gen) + '_metapop.pop' sampled.save(sampled_file_name) pop.dvars().ss = self.sample_size[pop.dvars().gen] pop.dvars().gen_sampled_from = pop.dvars().gen self.meta_replicates.population(rep_id).addIndFrom(sampled) return True
def sampleAlleleAndGenotypeFrequencies(pop, param): import simuPOP as sim import simuPOP.sampling as sampling (ssize, mutation, popsize, sim_id, num_loci, fname, fcli, seed) = param rep = pop.dvars().rep gen = pop.dvars().gen subpops = pop.subPopNames() sample_list = list() subpop_sizes = pop.subPopSizes() sample_sizes = [int(math.ceil(ssize * n)) for n in subpop_sizes] #log.debug("Sample sizes for subpops: %s", sample_sizes) min_sample_size = min(sample_sizes) for sp_name in subpops: sample = sampling.drawRandomSample(pop, subPops=pop.subPopByName(sp_name), sizes=min_sample_size) sim.stat(sample, haploFreq = range(0, num_loci), vars=['haploFreq', 'haploNum']) sim.stat(sample, alleleFreq = sim.ALL_AVAIL) keys = sample.dvars().haploFreq.keys() haplotype_map = sample.dvars().haploFreq[keys[0]] haplotype_count_map = sample.dvars().haploNum[keys[0]] num_classes = len(haplotype_map) #log.debug("gen: %s replicate: %s subpop: %s numclasses: %s class freq: %s", gen, popID, sp_name, num_classes, haplotype_map) #class_freq = {'-'.join(i[0]) : str(i[1]) for i in haplotype_map.items()} class_freq = dict() for k,v in haplotype_map.items(): key = '-'.join(str(x) for x in k) class_freq[key] = v #log.debug("class_freq packed: %s", class_freq) class_count = dict() for k,v in haplotype_count_map.items(): key = '-'.join(str(x) for x in k) class_count[key] = v # count_vals = sorted( [int(val) for val in class_count.values()] ) # # (prob, theta) = montecarlo(100000, count_vals, len(count_vals)) # #log.debug("slatkin test for class counts - prob: %s theta: %s ", prob, theta) sample = dict(subpop = sp_name, crichness = num_classes, cfreq = class_freq, ccount = class_count) sample_list.append(sample) data.storeClassFrequencySamples(sim_id,gen,rep,fname,fcli,seed,ssize,popsize,mutation,sample_list) return True
def simulate(NA, N1, N2, Tbeforesplit, Taftersplit, r2loci, numLoci, K_sel, Mu_sel, S_sel, L_neut, Mu_neut, Nsample1, Nsample2): pop = Population(size=NA, ploidy=2, loci=[L_neut+1], infoFields='fitness') def getfitness(geno): # returns fitness of genotype geno at the overdominant locus with selection coeff S_sel # geno is (A1 A2) if geno[0] == geno[1] : return 1 - S_sel # homozygote else: return 1 # heterozygote g=pop.evolve( initOps=[ InitSex(), #initially put 5 alleles at the selected locus with equal frequencies InitGenotype(loci=0, freq=[1./5] * 5) ], preOps = [ #Resize the ancestral population at the time immediatly before the split sim.ResizeSubPops([0], sizes=[N1+N2], at=Tbeforesplit-1), # split ancestral population in 3 subpopulations only works if NA>N1+N2 sim.SplitSubPops(subPops=0, sizes=[N1, N2], at=Tbeforesplit), # apply overdominant selection by invoking function getfitness PySelector(loci=0, func=getfitness), #MaSelector (loci = selLocus, fitness = [1, 1.+2*eta*0.05, 1+2*0.05]), #sim.Stat(popSize=True), #sim.PyEval(r'"Gen %d :\t%s\n" % (gen, subPopSize)', step=100), ], matingScheme = RandomMating(ops= [ #apply recombination between the selected locus and the neutral locus at rate r2loci Recombinator(rates=r2loci, loci=0), ]), postOps = [ # apply mutation to the selected locus according to K allele model KAlleleMutator(k=K_sel, rates=[Mu_sel], loci=[0]), # apply mutation to the neutral sequence SNPMutator(u=Mu_neut,v=0,loci=range(1,L_neut)), #Computes the frequency of each allele at selected locus at the last generation sim.Stat(alleleFreq=[0]), #output to the screen the frequency of the 5 first alleles at the selected locus sim.PyEval(r'"%.3f\t %.3f\t %.3f\t %.3f\t %.3f\n" % (alleleFreq[0][0],alleleFreq[0][1],alleleFreq[0][2],alleleFreq[0][3],alleleFreq[0][4])', at=Tbeforesplit + Taftersplit) # "(gen, alleleFreq[0][0],alleleFreq[0][1], alleleFreq[0][2],alleleFreq[0][3]",at=Tbeforesplit + Taftersplit), ], # sets the last generation = Tbeforesplit+Taftersplit gen = Tbeforesplit + Taftersplit ) #draw two random samples from species 1 and 2 with size Nsample1 and Nsample2 sample = drawRandomSample(pop, sizes=[Nsample1,Nsample2]) #write to file the content of the two random samples sim.utils.saveCSV(sample, filename='output.txt') return g
def get_mean_r2(Ne, S, n_loci, gens, repeats, n_subpops, initial_frequencies, m): M = get_migration_matrix(m, n_subpops) pop = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, infoFields='migrate_to') sim.initGenotype(pop, freq=initial_frequencies) pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(freq=initial_frequencies)], preOps=sim.Migrator(rate=M), matingScheme=sim.RandomMating(), gen=gens) sample_pop = drawRandomSample(pop, sizes=[S] * n_subpops) # get allele frequencies sim.stat(sample_pop, alleleFreq=range(0, n_loci), vars=['alleleFreq_sp']) # calculate r2 values sim.stat(sample_pop, LD=list(combinations(list(range(n_loci)), r=2)), vars=['R2_sp']) r2s = [] for sp in range(n_subpops): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [ k for k in range(n_loci) if np.abs(.5 - allele_freqs[k][0]) < .5 - 0.05 ] if len(seg_alleles) < 2: raise Exception("<2 segregating alleles") r2_sum = count = 0 for pairs in combinations(seg_alleles, r=2): r2_sum += sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] count += 1 mean_r2 = r2_sum / count r2s.append(mean_r2) return r2s
def get_FCs(Ne, S, n_loci, gens, n_subpops, initial_frequencies, m): ''''Runs simulations for allelic fluctuations model with n subpopulations, and returns a list of FC values (one for each subpopulation)''' # population to evolve ((from infinite gamete pool)) popNe = sim.Population(size=[Ne] * n_subpops, ploidy=2, loci=[1] * n_loci, infoFields='migrate_to') # initial sample population (from infinite gamete pool) popS = sim.Population(size=[S] * n_subpops, ploidy=2, loci=[1] * n_loci) sim.initGenotype(popNe, freq=initial_frequencies) sim.initGenotype(popS, freq=initial_frequencies) # get initial sample allele frequencies sim.stat(popS, alleleFreq=range(n_loci), vars=['alleleFreq_sp']) M = get_migration_matrix(m, n_subpops) popNe.evolve(initOps=[sim.InitSex()], preOps=sim.Migrator(rate=M), matingScheme=sim.RandomMating(), gen=gens) sample_pop = drawRandomSample(popNe, sizes=[S] * n_subpops) sim.stat(sample_pop, alleleFreq=range(n_loci), vars=['alleleFreq_sp']) all_FCs = [] for sp in range(n_subpops): initial_allele_frequencies = popS.dvars(sp).alleleFreq final_allele_frequencies = sample_pop.dvars(sp).alleleFreq sp_count = 0 sp_FC = 0 for locus in range(n_loci): init_pair = repair(initial_allele_frequencies[locus]) end_pair = repair(final_allele_frequencies[locus]) if init_pair[0]**2 + init_pair[1]**2 != 1: sp_FC += fc_variant([init_pair[0], init_pair[1]], [end_pair[0], end_pair[1]]) sp_count += 1 all_FCs.append(sp_FC / sp_count) return all_FCs
def calcFst(pop): sortie = '' sim.stat(pop, structure=range(10), vars=['F_st']) Fstpop = pop.dvars().F_st for a in range(100): sample = drawRandomSample(pop, sizes=[50]*2) sim.stat(sample, structure=range(10), vars=['F_st']) Fstsample = sample.dvars().F_st sample.addInfoFields('order') order = list(range(100)) fstsim = '' for rep in range(1000): merged=sample merged.mergeSubPops() np.random.shuffle(order) merged.setIndInfo(order, field = 'order') merged.sortIndividuals('order') merged.splitSubPop(0, [50]*2) sim.stat(merged, structure=range(10), vars=['F_st']) fstsim += '%s\t' % merged.dvars().F_st sortie += '%3d\t%.6f\t%3d\t%.6f\t%s\n' % (pop.dvars().gen, Fstpop, a, Fstsample, fstsim) reccord (sortie, "dataout") return True
def sampleNumAlleles(pop, param): """Samples allele richness for all loci in a replicant population, and stores the richness of the sample in the database. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID) Returns: Boolean true: all PyOperators need to return true. """ (ssize, mutation, popsize,sim_id,numloci) = param popID = pop.dvars().rep gen = pop.dvars().gen sample = drawRandomSample(pop, sizes=ssize) sim.stat(sample, alleleFreq=sim.ALL_AVAIL) for locus in range(numloci): numAlleles = len(sample.dvars().alleleFreq[locus].values()) _storeRichnessSample(popID,ssize,numAlleles,locus,gen,mutation,popsize,sim_id) return True
def sampleNumAlleles(pop, param): """Samples allele richness for all loci in a replicant population, and stores the richness of the sample in the database. Args: pop (Population): simuPOP population replicate. params (list): list of parameters (sample size, mutation rate, population size, simulation ID) Returns: Boolean true: all PyOperators need to return true. """ (ssize, mutation, popsize, sim_id, numloci) = param popID = pop.dvars().rep gen = pop.dvars().gen sample = drawRandomSample(pop, sizes=ssize) sim.stat(sample, alleleFreq=sim.ALL_AVAIL) for locus in range(numloci): numAlleles = len(sample.dvars().alleleFreq[locus].values()) _storeRichnessSample(popID, ssize, numAlleles, locus, gen, mutation, popsize, sim_id) return True
estimates = [] for r in range(repeats): print(r+1) # set up population pop = sim.Population(size=[Ne], ploidy=2, loci=[1]*n_loci) # evolve population pop.evolve( initOps = [sim.InitSex(), sim.InitGenotype(freq = [0.5,0.5])], matingScheme = sim.RandomMating(), gen = gens ) # take sample of size S sample_pop = drawRandomSample(pop, sizes = S) # get allele frequency sim.stat(sample_pop, alleleFreq = range(0,n_loci), vars = ['alleleFreq']) allele_freqs = sample_pop.vars()['alleleFreq'] seg_alleles = [] # find which alleles are segregating for k in allele_freqs.keys(): if (allele_freqs[k][0] > 0.04) and (allele_freqs[k][1] > 0.04): seg_alleles.append(k) # only proceed if there are 2 or more segregating alleles (to measure r2) if len(seg_alleles) < 2: pass # calculate r2 values
def simulate(NA, N1, N2, Tbeforesplit, Taftersplit, r2loci, numLoci, K_sel, Mu_sel, S_sel, L_neut, Mu_neut, Nsample1, Nsample2): pop = Population(size=NA, ploidy=2, loci=[L_neut + 1], infoFields='fitness') def getfitness(geno): # returns fitness of genotype geno at the overdominant locus with selection coeff S_sel # geno is (A1 A2) if geno[0] == geno[1]: return 1 - S_sel # homozygote else: return 1 # heterozygote if useRPy: plotter = VarPlotter( 'alleleFreq[0][0],alleleFreq[0][1],alleleFreq[0][2],alleleFreq[0][3],alleleFreq[0][4]', ylim=[0, 1], ylab='allele frequency', update=Tbeforesplit + Taftersplit - 1, saveAs='slocus.png') else: plotter = NoneOp() g = pop.evolve( initOps=[ InitSex(), #initially put 5 alleles at the selected locus with equal frequencies InitGenotype(loci=0, freq=[.1] * 10) # InitGenotype(loci=0, freq=[0.01, 0.1, 0.4, 0.2, 0.29]) ], preOps=[ #Resize the ancestral population at the time immediatly before the split sim.ResizeSubPops([0], sizes=[N1 + N2], at=Tbeforesplit - 1), # split ancestral population in 3 subpopulations only works if NA>N1+N2 sim.SplitSubPops(subPops=0, sizes=[N1, N2], at=Tbeforesplit), # apply overdominant selection by invoking function getfitness PySelector(loci=0, func=getfitness), ], matingScheme=RandomMating(ops=[ #apply recombination between the selected locus and the neutral locus at rate r2loci Recombinator(rates=r2loci, loci=0), ]), postOps=[ # apply mutation to the selected locus according to K allele model KAlleleMutator(k=K_sel, rates=[Mu_sel], loci=[0]), # apply mutation to the neutral sequence SNPMutator(u=Mu_neut, v=0, loci=range(1, L_neut)), #Computes the frequency of each allele at selected locus at the last generation Stat(alleleFreq=0, step=1), #output to the screen the frequency of the 4 first alleles at the selected locus PyEval( r'"%.0f\t %.3f\t %.3f\t %.3f\t %.3f\t %.3f\n" % (gen, alleleFreq[0][0], alleleFreq[0][1], alleleFreq[0][2], alleleFreq[0][3], alleleFreq[0][4])', step=100), plotter, ], # sets the last generation = Tbeforesplit+Taftersplit gen=Tbeforesplit + Taftersplit) #draw two random samples from species 1 and 2 with size Nsample1 and Nsample2 sample = drawRandomSample(pop, sizes=[Nsample1, Nsample2]) #write to file the content of the two random samples sim.utils.saveCSV(sample, filename='output.txt') return g
def get_mean_r2(): ########################### full_estimates = {} for m in ms: m_adj = m / (n_subpops-1) M = np.full( (n_subpops,n_subpops), m_adj ) np.fill_diagonal(M, 0) M = M.tolist() r2s = [] estimates = [] for r in range(repeats): print(r+1) # set up population pop = sim.Population(size=[Ne]*n_subpops, ploidy=2, loci=[1]*n_loci, infoFields = 'migrate_to') # evolve population pop.evolve( initOps = [sim.InitSex(), sim.InitGenotype(freq = [0.5,0.5])], preOps = sim.Migrator(rate=M), matingScheme = sim.RandomMating(), gen = gens ) # take sample of size S sample_pop = drawRandomSample(pop, sizes = [S]*n_subpops) # get allele frequency sim.stat(sample_pop, alleleFreq = range(0,n_loci), vars = ['alleleFreq_sp']) # calculate r2 values sim.stat(sample_pop, LD = list(combinations(list(range(n_loci)), r=2)), vars = ['R2_sp']) estimates.append([]) r2s.append([]) for sp in range(n_subpops): allele_freqs = sample_pop.dvars(sp).alleleFreq seg_alleles = [] # find which alleles are segregating for k in allele_freqs.keys(): if (allele_freqs[k][0] > 0.04) and (allele_freqs[k][1] > 0.04): seg_alleles.append(k) # only proceed if there are 2 or more segregating alleles (to measure r2) if len(seg_alleles) < 2: continue # calculate mean r2 r2_total = 0 count = 0 for pairs in combinations(seg_alleles, r=2): r2_i = sample_pop.dvars(sp).R2[pairs[0]][pairs[1]] r2_total += r2_i count+=1 mean_r2 = r2_total / count # correct r2 for sample size r2_drift = (mean_r2 - 1/(2*S)) / (1 - 1/(2*S)) #get Ne estimate Ne_est = 1/(3*r2_drift) estimates[-1].append(Ne_est) r2s[-1].append(r2_drift) full_estimates[m] = estimates means = [np.mean(full_estimates[m]) for m in ms] plt.scatter(ms, means, edgecolors='black', color = 'white') plt.plot([min(ms),max(ms)], [100,100], 'k--') plt.xscale('log') plt.xticks(ticks = ms, labels = ms) plt.ylim(50,150) plt.xlim(min(ms)*0.95,max(ms)*1.05) plt.show()
def runSimulation(scenario_id, sub_population_size, minMatingAge, maxMatingAge, gen): ''' sub_population_size A vector giving the population sizes for each sub-population. The subpopulations determine which breeding ground an individual belongs to minMatingAge minimal mating age. maxMatingAge maximal mating age. Individuals older than this are effectively dead years number of years to simulate ''' # scenario_id describes the batch of files to load # The mitochondrial DNA will be in mtdna_<scenario_id> # The SNP DNA will be in snp_<scenario_id> # Read the mitochondrial haplotype frequencies. There's a bit to unpack here # We read the lines into an array, and for each one, call split() on it to get one element per column. # However, we do not want this - we want the transpose, where haplotype_frequencies[0] is a vector of # all the frequencies for population 0, and haplotype_frequencies[1] is the corresponding vector for # population 2. list(map(list, zip(*t))) will achieve this transformation for us. # While we are at it, we also convert the strings into floats. mitochondrial_file = "mtdna_" + scenario_id + ".txt" with open(mitochondrial_file, "r") as fd: haplotype_frequencies = list( map(list, zip(*[list(map(float, line[0:-1].split())) for line in fd]))) if len(haplotype_frequencies) != len(sub_population_size): raise ValueError( 'The number of populations in the population size vector and the number of populations deduced from the haplotype file are different' ) # Now read the SNP data. This builds a 2D array indexed as snp[locus][population] snp_file = "snp_" + scenario_id + ".txt" with open(snp_file, "r") as fd: snp = [list(map(float, line[0:-1].split())) for line in fd] sub_population_count = len(sub_population_size) print() print(sub_population_count, "subpopulations detected") # Now we can create the population. We want to give each population a population name, starting from A sub_population_names = list(map(chr, range(65, 65 + sub_population_count))) # We have two chromosomes. The first is an autosome with nb_loci loci, and the second is the mitochondrial chromosome with 1 locus pop = simuPOP.Population( sub_population_size, ploidy=2, loci=[nb_loci, 1], ancGen=2, infoFields=[ 'age', 'ind_id', 'father_id', 'mother_id', 'nitrogen', 'carbon', 'feeding_ground', 'native_breeding_ground', 'migrate_to' ], subPopNames=sub_population_names, chromTypes=[simuPOP.AUTOSOME, simuPOP.MITOCHONDRIAL]) sub_population_names = tuple(sub_population_names) # Create an attribute on each individual called 'age'. Set it to a random number between 0 and maxMatingAge # Note that size is a vector - the size of each population. We have to sum these to get the total number of individuals individual_count = sum(sub_population_size) # Assign a random age to each individual pop.setIndInfo( [random.randint(0, maxMatingAge) for x in range(individual_count)], 'age') # Assign a random feeding ground to each individual pop.setIndInfo([ random.randint(0, numberOfFeedingGrounds - 1) for x in range(individual_count) ], 'feeding_ground') # Currently we have these virtual subpopulations: # age < minMatingAge (juvenile) # age >= minMatingAge and age < maxMatingAge + 0.1 (age <= maxMatingAge) (mature) # age >= maxMatingAge (dead) # # Ideally we would want something like this: # 1) Immature # 2) Receptive female (every 3 years) # 3) Non-receptive female # 4) Mature male # 5) Dead # # Note that we use a cutoff InfoSplitter here, it is also possible to # provide a list of values, each corresponding to a virtual subpopulation. pop.setVirtualSplitter( simuPOP.CombinedSplitter([ simuPOP.ProductSplitter([ simuPOP.SexSplitter(), simuPOP.InfoSplitter('age', cutoff=[minMatingAge, maxMatingAge + 0.1], names=['juvenile', 'mature', 'dead']) ]) ], vspMap=[[0], [1], [2], [3], [4], [5], [0, 1, 3, 4], [1, 4]], names=[ 'Juvenile Male', 'Mature Male', 'Dead Male', 'Juvenile Female', 'Mature Female', 'Dead Female', 'Not dead yet', 'Active' ])) pop.evolve( initOps=[ simuPOP.InitSex(), simuPOP.IdTagger(), simuPOP.PyOperator(func=init_native_breeding_grounds) ] + [ simuPOP.InitGenotype(subPops=sub_population_names[i], freq=haplotype_frequencies[i], loci=[nb_loci]) for i in range(0, sub_population_count) ] + [ simuPOP.InitGenotype(subPops=sub_population_names[i], freq=[snp[n][i], 1 - snp[n][i]], loci=[n]) for i in range(0, sub_population_count) for n in range(0, nb_loci - 1) ], # increase age by 1 preOps=[simuPOP.InfoExec('age += 1')], matingScheme=simuPOP.HeteroMating( [ # age <= maxAge, copy to the next generation (weight=-1) # subPops is a list of tuples that will participate in mating. The tuple is a pair (subPopulation, virtualSubPopulation) # First, we propagate (clone) all individuals in all subpopulations (and all VSPs except the ones who are now in the VSP of deceased individuals) to the next generation simuPOP.CloneMating( ops=[simuPOP.CloneGenoTransmitter(chroms=[0, 1])], subPops=[ (sub_population, 6) for sub_population in range(0, sub_population_count) ], weight=-1), # Then we simulate random mating only in VSP 1 (ie reproductively mature individuals) within subpopulation (breeding/winter grounds) simuPOP.RandomMating( ops=[ simuPOP.MitochondrialGenoTransmitter(), simuPOP.MendelianGenoTransmitter(), simuPOP.IdTagger(), simuPOP.InheritTagger(mode=simuPOP.MATERNAL, infoFields=['feeding_ground']), simuPOP.InheritTagger( mode=simuPOP.MATERNAL, infoFields=['native_breeding_ground']), simuPOP.PedigreeTagger() ], subPops=[ (sub_population, 7) for sub_population in range(0, sub_population_count) ], weight=1) ], subPopSize=configure_new_population_size), postOps=[ # Determine the isotopic ratios in individuals simuPOP.PyOperator(func=postop_processing), simuPOP.Migrator(mode=simuPOP.BY_IND_INFO), # count the individuals in each virtual subpopulation #simuPOP.Stat(popSize=True, subPops=[(0,0), (0,1), (0,2), (1,0), (1, 1), (1, 2)]), # print virtual subpopulation sizes (there is no individual with age > maxAge after mating) #simuPOP.PyEval(r"'Size of age groups: %s\n' % (','.join(['%d' % x for x in subPopSize]))") # Alternatively, calculate the Fst # FIXME: How does this actually work? Does it work for > 2 populations? I don't really understand it yet # ELC: it is a calculation that partitions variance among and between populations, and can be calculated as a # global statistic or on a pairwise basis. We use it as an indication of genetic differentiation. simuPOP.Stat(structure=range(1), subPops=sub_population_names, suffix='_AB', step=10), simuPOP.PyEval(r"'Fst=%.3f \n' % (F_st_AB)", step=10) ], gen=years) #simuPOP.dump(pop, width=3, loci=[], subPops=[(simuPOP.ALL_AVAIL, simuPOP.ALL_AVAIL)], max=1000, structure=False); #return ped = simuPOP.Pedigree(pop) print("This is the pedigree stuff") simuPOP.dump(pop) # Now sample the individuals sample = drawRandomSample(pop, sizes=[sample_count] * sub_population_count) # Print out the allele frequency data simuPOP.stat(sample, alleleFreq=simuPOP.ALL_AVAIL) frequencies = sample.dvars().alleleFreq with open('freq.txt', 'w') as freqfile: index = 0 for locus in frequencies: if (locus == nb_loci): continue if (len(frequencies[locus]) < 2): continue print(index, end=' ', file=freqfile) index = index + 1 for allele in frequencies[locus]: print(frequencies[locus][allele], end=' ', file=freqfile) print(file=freqfile) # We want to remove monoallelic loci. This means a position in the genotype for which all individuals have the same value in both alleles # To implement this we will build up a list of loci that get ignored when we dump out the file. Generally speaking, if we add all the values up # then either they will sum to 0 (if all individuals have type 0) or to the number of individuals * 2 (if all individuals have type 1) geno_sum = [0] * (nb_loci + 1) * 2 for individual in sample.individuals(): geno_sum = list(map(add, geno_sum, individual.genotype())) final_sum = list( map(add, geno_sum[:(nb_loci + 1)], geno_sum[(nb_loci + 1):])) monoallelic_loci = [] for i in range(0, nb_loci): if final_sum[i] == 0 or final_sum[ i] == sample_count * sub_population_count * 2: monoallelic_loci = [i] + monoallelic_loci monoallelic_loci = sorted(monoallelic_loci, reverse=True) nb_ignored_loci = len(monoallelic_loci) # Generate the two files with open('mixfile.txt', 'w') as mixfile: with open('haploiso.txt', 'w') as haplofile: print(sub_population_count, nb_loci - nb_ignored_loci, 2, 1, file=mixfile) print("sex, haplotype, iso1, iso2, native_ground", file=haplofile) for i in range(0, nb_loci - nb_ignored_loci): print('Loc', i + 1, sep='_', file=mixfile) for individual in sample.individuals(): genotype = individual.genotype() print( 1 if individual.sex() == 1 else 0, genotype[nb_loci], individual.info('carbon'), individual.info('nitrogen'), # int(individual.info('native_breeding_ground')), file=haplofile, sep=' ') print(int(individual.info('native_breeding_ground') + 1), end=' ', file=mixfile) for i in range(0, nb_loci): if i not in monoallelic_loci: print(genotype[i] + 1, genotype[i + nb_loci + 1] + 1, ' ', end='', sep='', file=mixfile) print(file=mixfile) return sample
# Copyright (C) 2004 - 2010 Bo Peng ([email protected]) # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim from simuPOP.sampling import drawRandomSample pop = sim.Population([2000] * 5, loci=1) # sample from the whole population sample = drawRandomSample(pop, sizes=500) print(sample.subPopSizes()) # sample from each subpopulation sample = drawRandomSample(pop, sizes=[100] * 5) print(sample.subPopSizes())
#Recombination ops=[sim.Recombinator(rates=0.002)]), postOps=[ #Mutation rate 10e-6 sim.SNPMutator(u=0.000001, v=0.000001) ], #Evolve for a number 'numgen' of generations gen=numgen) #Getting population informations (number of subpopulations, population size) sim.stat(pop, popSize=True) subsize = pop.dvars().subPopSize numpop = len(subsize) #Setting environmental value for all individuals in each subpopulation for i in range(numpop): pop.setIndInfo(vec_env[i], 'env', subPop=i) #Sampling 20 individuals at random in each population sample = drawRandomSample(pop, sizes=[20] * numpop) #Adding population name to the field of individuals sample.addInfoFields('pop_name') vecname = [] for i in range(1, numpop + 1): vecname = vecname + [i] * 20 sample.setIndInfo(vecname, 'pop_name') #Saving the data into a .csv format saveCSV(sample, filename="sim" + str(k) + ".csv", infoFields=['pop_name', 'env'], sexFormatter=None, affectionFormatter=None, header=False)
sim.PyEval(r"', '.join(['%.3f' % alleleFreq[1][x] for x in range(4)]) + '\n'", step=1000), sim.PyEval(r"', '.join(['%.3f' % alleleFreq[2][x] for x in range(4)]) + '\n'", step=1000), sim.Stat(popSize=True, step=1000), sim.PyEval(r'"PreMig: %s\n" % subPopSize', step=1000), # sim.Migrator(rate=[ # [0, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7], # This is a chance # [1e-7, 0, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7], # based on a proportion # [1e-7, 1e-7, 0, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7], # [1e-7, 1e-7, 1e-7, 0, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7], # [1e-7, 1e-7, 1e-7, 1e-7, 0, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7], # [1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 0, 1e-7, 1e-7, 1e-7, 1e-7], # [1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 0, 1e-7, 1e-7, 1e-7], # [1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 0, 1e-7, 1e-7], # [1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 0, 1e-7], # [1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 1e-7, 0] # ], begin=1000), sim.Stat(popSize=True, step=1000), sim.PyEval(r'"PreSex: %s\n" % subPopSize', step=1000) ], matingScheme=sim.RandomMating(ops=sim.Recombinator(rates=[0.25e-6]), subPopSize=SbPSz), # add SbPSz(pop, gen) here postOps=[ sim.Stat(popSize=True, step=1000), sim.PyEval(r'"PstSex: %s\n" % subPopSize', step=1000), ], finalOps=[ ], gen=3001 ) sample = drawRandomSample(pop, sizes=[1]*10) sim.utils.saveCSV(sample, filename='replacetext'),
# You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim # create an age-structured population with a disease import random pop = sim.Population(10000, loci=10, infoFields='age') sim.initGenotype(pop, freq=[0.3, 0.7]) sim.initInfo(pop, lambda: random.randint(0, 70), infoFields='age') pop.setVirtualSplitter(sim.InfoSplitter(cutoff=(20, 40), field='age')) # different age group has different penetrance sim.maPenetrance(pop, loci=5, penetrance=(0.1, 0.2, 0.3), subPops=[(0, 1)]) sim.maPenetrance(pop, loci=5, penetrance=(0.2, 0.4, 0.6), subPops=[(0, 2)]) # count the number of affected individuals in each group sim.stat(pop, numOfAffected=True, subPops=[(0, 1), (0, 2)], vars='numOfAffected_sp') print(pop.dvars((0, 1)).numOfAffected, pop.dvars((0, 2)).numOfAffected) # from simuPOP.sampling import drawRandomSample sample = drawRandomSample(pop, sizes=[500, 500], subPops=[(0, 1), (0, 2)]) # virtual subpopulations are rearranged to different subpopulations. print(sample.subPopSizes())
myCsvRow = "%s \n" % (neVal1) fd.write(myCsvRow) fd.close() return True if __name__ == "__main__": # get all parameters pars = simuOpt.Params(options) if not pars.getParam(): sys.exit(0) print (pars.asList()) runName = pars.runID pathName = homeDir + "/Samples/" + runName print (pathName) make_sure_path_exists(pathName) sims = simuGeneticDrift( pathName, pars.popSize, pars.sampSize, pars.loc, pars.allPerLoc, pars.generations, pars.replications, pars.dist ) for i in range(0, pars.replications): samp = drawRandomSample(sims.population(i), pars.sampSize) outfile = pathName + "/sample" + str(i) + ".dat" SaveFstat(samp, outfile) os.remove(pathName + "/temp.csv")
], matingScheme=sim.RandomMating( ops=[sim.Recombinator(intensity=args.recomb_rate)]), postOps=[ sim.Stat(numOfSegSites=sim.ALL_AVAIL, step=50), sim.PyEval(r"'Gen: %2d #seg sites: %d\n' % (gen, numOfSegSites)", step=50) ], gen=args.generations) logfile.write("Done simulating!\n") logfile.write(time.strftime('%X %x %Z') + "\n") logfile.write("----------\n") logfile.flush() sample = drawRandomSample(pop, sizes=args.nsamples) if args.outfile is None: print("NOT writing out genotype data.\n") else: print("Writing out genotype data to " + args.outfile + "\n") export(sample, format='PED', output=args.outfile) logfile.write("Done writing out!\n") logfile.write(time.strftime('%X %x %Z') + "\n") logfile.write("----------\n") logfile.flush() logfile.write("All done!\n") logfile.close()
def sampleAndExport(pop): sz = pop.subPopSizes() new_sz = [x//200 for x in sz] sample = drawRandomSample(pop, new_sz) export(sample, format='fstat', output='sim10n_mono_a_000000_sample_%d.dat' % pop.dvars().gen, gui = False) return True
popS = sim.Population(size=[S], ploidy=2, loci=[1]*n_loci) sim.initGenotype(popNe, freq = initial_frequencies) sim.initGenotype(popS , freq = initial_frequencies) sim.stat(popS, alleleFreq = range(0,n_loci), vars = ['alleleFreq']) initial_allele_freqs = popS.vars()['alleleFreq'] popNe.evolve( initOps = [sim.InitSex()], preOps = sim.Migrator(rate=M), matingScheme = sim.RandomMating(), gen = gens ) sample_pop = drawRandomSample(popNe, sizes = [S]+[0]*9) sim.stat(sample_pop, alleleFreq = range(0,n_loci), vars = ['alleleFreq']) meanFC_repeat = 0 countFC = 0 allele_freqs = sample_pop.vars()['alleleFreq'] for k in allele_freqs.keys() : inits = list(initial_allele_freqs[k].values()) ends = list(allele_freqs[k].values()) if len(inits) + len(ends) == 4: meanFC_repeat += fc_variant(list(initial_allele_freqs[k].values()), list(allele_freqs[k].values()))