def _make_pop(popsize, nloci, locus_position, id_tagger, init_geno, recomb_rate, generations, length, init_ts): random.seed(123) pop = sim.Population(size=[popsize], loci=[nloci], lociPos=locus_position, infoFields=['ind_id']) # tag the first generation so we can pass it to rc id_tagger.apply(pop) first_gen = pop.indInfo("ind_id") haploid_labels = [(k, p) for k in first_gen for p in (0, 1)] node_ids = {x: j for x, j in zip(haploid_labels, init_ts.samples())} rc = ftprime.RecombCollector(ts=init_ts, node_ids=node_ids, locus_position=locus_position) recombinator = sim.Recombinator(intensity=recomb_rate, output=rc.collect_recombs, infoFields="ind_id") pop.evolve( initOps=[sim.InitSex()] + init_geno, preOps=[ sim.PyOperator(lambda pop: rc.increment_time() or True), # Must return true or false. True keeps whole population (?) ], matingScheme=mating_scheme_factory(recombinator, popsize, id_tagger), postOps=[sim.PyEval(r"'Gen: %2d\n' % (gen, )", step=1)], gen=generations) return pop, rc
def prepare_sim(self, params): for view in self._views: for info in view.info_fields: self._info_fields.add(info) if params['num_snps'] > 0: pop, init_ops, pre_ops, post_ops = \ self._create_island([params['pop_size']] * params['num_pops'], params['mig'], params['num_snps']) loci, genome_init = self._create_snp_genome( params['num_snps'], freq=params['snp_freq']) gpre_ops = [] else: pop, init_ops, pre_ops, post_ops = \ self._create_island([params['pop_size']] * params['num_pops'], params['mig'], params['num_msats']) loci, genome_init, gpre_ops = self._create_genome( params['num_msats'], start_alleles=params['num_msat_alleles']) view_ops = [] for view in self._views: view.pop = pop view_ops.extend(view.view_ops) for view in self._views: post_ops.append(sp.PyOperator(func=_hook_view, param=view)) post_ops = view_ops + post_ops sim = sp.Simulator(pop, 1, True) return {'sim': sim, 'pop': pop, 'init_ops': init_ops + genome_init, 'pre_ops': pre_ops, 'post_ops': post_ops, 'mating_scheme': sp.RandomMating()}
def prepare_sim(self, params): for view in self._views: for info in view.info_fields: self._info_fields.add(info) nloci = 1 + params['neutral_loci'] pop, init_ops, pre_ops, post_ops = \ self._create_single_pop(params['pop_size'], nloci) view_ops = [] for view in self._views: view.pop = pop view_ops.extend(view.view_ops) for view in self._views: post_ops.append(sp.PyOperator(func=_hook_view, param=view)) post_ops = view_ops + post_ops loci, genome_init = self._create_snp_genome( nloci, freq=params['snp_freq']) sim = sp.Simulator(pop, 1, True) if params['sel_type'] == 'hz_advantage': ms = sp.MapSelector(loci=0, fitness={ (0, 0): 1 - params['sel'], (0, 1): 1, (1, 1): 1 - params['sel']}) elif params['sel_type'] == 'recessive': ms = sp.MapSelector(loci=0, fitness={ (0, 0): 1 - params['sel'], (0, 1): 1 - params['sel'], (1, 1): 1}) else: # dominant ms = sp.MapSelector(loci=0, fitness={ (0, 0): 1 - params['sel'], (0, 1): 1, (1, 1): 1}) return {'sim': sim, 'pop': pop, 'init_ops': init_ops + genome_init, 'pre_ops': pre_ops, 'post_ops': post_ops, 'mating_scheme': sp.RandomMating( ops=[sp.MendelianGenoTransmitter(), ms])}
Fstsample = sample.dvars().F_st sample.addInfoFields('order') order = list(range(100)) fstsim = '' for rep in range(1000): merged = sample merged.mergeSubPops() np.random.shuffle(order) merged.setIndInfo(order, field='order') merged.sortIndividuals('order') merged.splitSubPop(0, [50] * 2) sim.stat(merged, structure=range(10), vars=['F_st']) fstsim += '%s\t' % merged.dvars().F_st sortie += '%3d\t%.6f\t%3d\t%.6f\t%s\n' % (pop.dvars().gen, Fstpop, a, Fstsample, fstsim) reccord(sortie, "dataout") return True pop = sim.Population([100000] * 2, loci=[10] * 10, infoFields='migrate_to') pop.evolve(initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) ], preOps=sim.Migrator(rate=[[0, 0.0001], [0.0001, 0]], mode=sim.BY_PROPORTION), matingScheme=sim.RandomMating(ops=sim.Recombinator(rates=0.01)), postOps=[sim.PyOperator(func=calcFst, step=50)], gen=5000)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--experiment", help="provide name for experiment", required=True, type=str, default="test") parser.add_argument("--debug", help="turn on debugging output") parser.add_argument("--reps", help="Replicated populations per parameter set", type=int, default=1) parser.add_argument( "--networkfile", help= "Name of GML file representing the network model for this simulation", required=True, type=str) parser.add_argument("--numloci", help="Number of loci per individual", type=int, required=True) parser.add_argument( "--maxinittraits", help="Max initial number of traits per locus for initialization", type=int, required=True) parser.add_argument( "--innovrate", help= "Rate at which innovations occur in population as a per-locus rate", type=float, default=0.001) parser.add_argument( "--simlength", help= "Time at which simulation and sampling end, defaults to 3000 generations", type=int, default="20") parser.add_argument( "--popsize", help="Initial size of population for each community in the model", type=int, required=True) parser.add_argument( "--migrationfraction", nargs='+', help="Fraction of population that migrates each time step", type=float, required=True, default=[]) parser.add_argument( "--seed", type=int, help="Seed for random generators to ensure replicability") parser.add_argument("--k_values", nargs='+', type=int, help="list of k-values to explore [e.g., 2 4 20 24", default=[]) parser.add_argument("--sub_pops", nargs='+', help="Number of sub populations", required=True, default=[10]) parser.add_argument("--maxalleles", type=int, help="Maximum number of alleles", default=50) parser.add_argument("--save_figs", type=bool, help="Save figures or not?", default=True) parser.add_argument("--burnintime", type=int, help="How long to wait before making measurements? ", default=2000) parser.add_argument("--rewiringprob", type=float, help="Probability of random rewiring", default=0) config = parser.parse_args() # check the k and migration rate combinations for kvalue in config.k_values: if float(kvalue) * float(config.migrationfraction) >= 1.0: print("k=%s * mig=%4f is greater than 1.0\n" % (kvalue, config.migrationfraction)) print( "Please adjust input values for k and/or migration rate and restart.\n " ) sys.exit() # setup output directories for writing output_path = utils.setup_output(config.experiment) # save parameters utils.save_parameters(str(sys.argv), config, output_path) k_run_values = config.k_values subpop_run_values = config.sub_pops ## make sure the k values are less than # of subpops and > 1 for k in k_run_values: for subnum in subpop_run_values: if int(k) > int(subnum) or int(k) < 2: print( "k values can not be greater than the number of sub populations. k = %s subpops = %s \n" % (k, subnum)) sys.exit() ## initialize the output dictionary for k in k_run_values: for sb in subpop_run_values: output[k][sb] = {} # set up the frequencies for the alleles in each loci. Here assuming a uniform distribution as a starting point distribution = utils.constructUniformAllelicDistribution( config.maxinittraits) iteration_number = -1 for k in k_run_values: for subnum in subpop_run_values: iteration_number += 1 ## these are lists of things that simuPop will do at different stages init_ops = OrderedDict() pre_ops = OrderedDict() post_ops = OrderedDict() # Construct a demographic model from a collection of network slices which represent a temporal network # of changing subpopulations and interaction strengths. This object is Callable, and simply is handed # to the mating function which applies it during the copying process #networkmodel = NetworkModel( networkmodel="/Users/clipo/Documents/PycharmProjects/RapaNuiSim/notebooks/test_graph.gml", networkmodel = network.NetworkModel( networkmodel="smallworld", simulation_id=config.experiment, sim_length=config.simlength, burn_in_time=config.burnintime, initial_subpop_size=config.popsize, migrationfraction=config.migrationfraction, sub_pops=subnum, connectedness=k, # if 0, then distance decay save_figs=config.save_figs, network_iteration=iteration_number) num_pops = networkmodel.get_subpopulation_number() sub_pop_size = int(config.popsize / num_pops) # The regional network model defines both of these, in order to configure an initial population for evolution # Construct the initial population pops = sp.Population( size=[sub_pop_size] * num_pops, subPopNames=str(list(networkmodel.get_subpopulation_names())), infoFields='migrate_to', ploidy=1, loci=config.numloci) ### now set up the activities init_ops['acumulators'] = sp.PyOperator( utils.init_acumulators, param=['fst', 'alleleFreq', 'haploFreq']) init_ops['Sex'] = sp.InitSex() init_ops['Freq'] = sp.InitGenotype(loci=list(range( config.numloci)), freq=distribution) post_ops['Innovate'] = sp.KAlleleMutator(k=config.maxalleles, rates=config.innovrate, loci=sp.ALL_AVAIL) post_ops['mig'] = sp.Migrator( rate=networkmodel.get_migration_matrix()) #, reps=[3]) #for i, mig in enumerate(migs): # post_ops['mig-%d' % i] = sp.Migrator(demography.migrIslandRates(mig, num_pops), reps=[i]) post_ops['Stat-fst'] = sp.Stat(structure=sp.ALL_AVAIL) post_ops['Stat-richness'] = sp.Stat( alleleFreq=[0], haploFreq=[0], vars=['alleleFreq', 'haploFreq', 'alleleNum', 'genoNum']) post_ops['fst_acumulation'] = sp.PyOperator( utils.update_acumulator, param=['fst', 'F_st']) post_ops['richness_acumulation'] = sp.PyOperator( utils.update_richness_acumulator, param=('alleleFreq', 'Freq of Alleles')) post_ops['class_richness'] = sp.PyOperator( utils.calculateAlleleAndGenotypeFrequencies, param=(config.popsize, config.numloci)) mating_scheme = sp.RandomSelection() #mating_scheme=sp.RandomSelection(subPopSize=sub_pop_size) ## go simuPop go! evolve your way to the future! sim = sp.Simulator(pops, rep=config.reps) print("now evolving... k = %s with sub_pops = %s" % (k, subnum)) sim.evolve(initOps=list(init_ops.values()), preOps=list(pre_ops.values()), postOps=list(post_ops.values()), matingScheme=mating_scheme, gen=config.simlength) # now make a figure of the Fst results fig = plt.figure(figsize=(16, 9)) ax = fig.add_subplot(111) count = 0 for pop in sim.populations(): ax.plot(pop.dvars().fst, label='Replicate: %s' % count) output[k][subnum][count] = deepcopy(pop.dvars()) count += 1 ax.legend(loc=2) ax.set_ylabel('FST') ax.set_xlabel('Generation') plt.show() sum_fig = plt.figure(figsize=(16, 9)) ax = sum_fig.add_subplot(111) iteration = -1 for k in k_run_values: for subnum in subpop_run_values: iteration += 1 # only label the first one for n in range(config.reps): if n == 0: ax.plot(output[k][subnum][n].fst, color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration], label='k = %s subpops = %s' % (k, subnum)) else: ax.plot(output[k][subnum][n].fst, color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration]) ax.legend(loc=2) ax.set_ylabel('Fst') ax.set_xlabel('Generations') plt.show() savefilename = output_path + "/sum_fig.png" sum_fig.savefig(savefilename, bbox_inches='tight') rich_fig = plt.figure(figsize=(16, 9)) ax = rich_fig.add_subplot(111) iteration = -1 for k in k_run_values: for sb in subpop_run_values: iteration += 1 # only add a label for the first one (not all the replicates) for n in range(config.reps): if n == 0: ax.plot(output[k][subnum][n].richness, color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration], label='k = %s subpops = %s' % (k, subnum)) else: ax.plot(output[k][subnum][n].richness, color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration]) ax.legend(loc=2) ax.set_ylabel('Richness') ax.set_xlabel('Generations') plt.show() savefilename = output_path + "/richness.png" rich_fig.savefig(savefilename, bbox_inches='tight') ## output CI for the parameters summary_fig = plt.figure(figsize=(16, 9)) ax = summary_fig.add_subplot(111) iteration = -1 for k in k_run_values: for subnum in subpop_run_values: iteration += 1 CI_average = [] CI_min = [] CI_max = [] for t in range(len(output[k][subnum][0].fst)): point_in_time = [] for n in range(config.reps): list_of_points = list(output[k][subnum][n].fst) point_in_time.append(list_of_points[t]) (ave, min, max) = utils.mean_confidence_interval(point_in_time, confidence=0.95) CI_average.append(ave) CI_min.append(min) CI_max.append(max) ax.plot(list(CI_average), color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration], label='k = %s subpops = %s' % (k, subnum)) ax.plot(list(CI_min), "--", color="0.5") ax.plot(list(CI_max), "--", color="0.5") ax.fill_between(list(CI_average), list(CI_max), list(CI_min), color="None", linestyle="--") ax.legend(loc=2) ax.set_ylabel('Fst') ax.set_xlabel('Generation') plt.show() savefilename = output_path + "/summary-ci.png" summary_fig.savefig(savefilename, bbox_inches='tight') ## now the richness graph richness_sum_fig = plt.figure(figsize=(16, 9)) ax = richness_sum_fig.add_subplot(111) iteration = -1 for k in k_run_values: for subnum in subpop_run_values: iteration += 1 CI_average = [] CI_min = [] CI_max = [] for t in range(len(output[k][subnum][0].richness)): point_in_time = [] for n in range(config.reps): list_of_points = list(output[k][subnum][n].richness) point_in_time.append(list_of_points[t]) (ave, min, max) = utils.mean_confidence_interval(point_in_time, confidence=0.95) CI_average.append(ave) CI_min.append(min) CI_max.append(max) ax.plot(list(CI_average), color=list( dict(mcolors.BASE_COLORS, **mcolors.CSS4_COLORS).keys())[iteration], label='k = %s subpops = %s' % (k, subnum)) ax.plot(list(CI_min), "--", color="0.5") ax.plot(list(CI_max), "--", color="0.5") ax.fill_between(list(CI_average), list(CI_max), list(CI_min), color="None", linestyle="--") ax.legend(loc=2) ax.set_ylabel('Richness') ax.set_xlabel('Generation') plt.show() savefilename = output_path + "/richness-ci.png" richness_sum_fig.savefig(savefilename, bbox_inches='tight')
import simuPOP as sim from simuPOP.utils import migrIslandRates import random def demo(pop): # this function randomly split populations numSP = pop.numSubPop() if random.random() > 0.3: pop.splitSubPop(random.randint(0, numSP - 1), [0.5, 0.5]) return pop.subPopSizes() def migr(pop): numSP = pop.numSubPop() sim.migrate(pop, migrIslandRates(0.01, numSP)) return True pop = sim.Population(10000, infoFields='migrate_to') pop.evolve(initOps=sim.InitSex(), preOps=[ sim.PyOperator(func=migr), sim.Stat(popSize=True), sim.PyEval(r'"Gen %d:\t%s\n" % (gen, subPopSize)') ], matingScheme=sim.RandomMating(subPopSize=demo), gen=5)
id_tagger.apply(pop) # record recombinations rc = RecombCollector(first_gen=pop.indInfo("ind_id"), ancestor_age=args.ancestor_age, length=2 * args.length, locus_position=locus_position + [args.length, 2 * args.length]) migr_mat = [[0, args.m, 0], [args.m, 0, args.M], [0, args.M, 0]] pop.evolve(initOps=[ sim.InitSex(), ] + init_geno, preOps=[ sim.PyOperator(lambda pop: rc.increment_time() or True), sim.Migrator(rate=migr_mat, mode=sim.BY_PROBABILITY), sim.SNPMutator(u=args.sel_mut_rate, v=args.sel_mut_rate), sim.PyMlSelector(fitness.left, subPops=fitness.left_subpops, output=">>" + selloci_file), sim.PyMlSelector(fitness.right, subPops=fitness.right_subpops, output=">>" + selloci_file), ], matingScheme=sim.RandomMating(ops=[ id_tagger, sim.Recombinator(intensity=args.recomb_rate, output=rc.collect_recombs, infoFields="ind_id"), ]),
def runSimulation(scenario_id, sub_population_size, minMatingAge, maxMatingAge, gen): ''' sub_population_size A vector giving the population sizes for each sub-population. The subpopulations determine which breeding ground an individual belongs to minMatingAge minimal mating age. maxMatingAge maximal mating age. Individuals older than this are effectively dead years number of years to simulate ''' # scenario_id describes the batch of files to load # The mitochondrial DNA will be in mtdna_<scenario_id> # The SNP DNA will be in snp_<scenario_id> # Read the mitochondrial haplotype frequencies. There's a bit to unpack here # We read the lines into an array, and for each one, call split() on it to get one element per column. # However, we do not want this - we want the transpose, where haplotype_frequencies[0] is a vector of # all the frequencies for population 0, and haplotype_frequencies[1] is the corresponding vector for # population 2. list(map(list, zip(*t))) will achieve this transformation for us. # While we are at it, we also convert the strings into floats. mitochondrial_file = "mtdna_" + scenario_id + ".txt" with open(mitochondrial_file, "r") as fd: haplotype_frequencies = list( map(list, zip(*[list(map(float, line[0:-1].split())) for line in fd]))) if len(haplotype_frequencies) != len(sub_population_size): raise ValueError( 'The number of populations in the population size vector and the number of populations deduced from the haplotype file are different' ) # Now read the SNP data. This builds a 2D array indexed as snp[locus][population] snp_file = "snp_" + scenario_id + ".txt" with open(snp_file, "r") as fd: snp = [list(map(float, line[0:-1].split())) for line in fd] sub_population_count = len(sub_population_size) print() print(sub_population_count, "subpopulations detected") # Now we can create the population. We want to give each population a population name, starting from A sub_population_names = list(map(chr, range(65, 65 + sub_population_count))) # We have two chromosomes. The first is an autosome with nb_loci loci, and the second is the mitochondrial chromosome with 1 locus pop = simuPOP.Population( sub_population_size, ploidy=2, loci=[nb_loci, 1], ancGen=2, infoFields=[ 'age', 'ind_id', 'father_id', 'mother_id', 'nitrogen', 'carbon', 'feeding_ground', 'native_breeding_ground', 'migrate_to' ], subPopNames=sub_population_names, chromTypes=[simuPOP.AUTOSOME, simuPOP.MITOCHONDRIAL]) sub_population_names = tuple(sub_population_names) # Create an attribute on each individual called 'age'. Set it to a random number between 0 and maxMatingAge # Note that size is a vector - the size of each population. We have to sum these to get the total number of individuals individual_count = sum(sub_population_size) # Assign a random age to each individual pop.setIndInfo( [random.randint(0, maxMatingAge) for x in range(individual_count)], 'age') # Assign a random feeding ground to each individual pop.setIndInfo([ random.randint(0, numberOfFeedingGrounds - 1) for x in range(individual_count) ], 'feeding_ground') # Currently we have these virtual subpopulations: # age < minMatingAge (juvenile) # age >= minMatingAge and age < maxMatingAge + 0.1 (age <= maxMatingAge) (mature) # age >= maxMatingAge (dead) # # Ideally we would want something like this: # 1) Immature # 2) Receptive female (every 3 years) # 3) Non-receptive female # 4) Mature male # 5) Dead # # Note that we use a cutoff InfoSplitter here, it is also possible to # provide a list of values, each corresponding to a virtual subpopulation. pop.setVirtualSplitter( simuPOP.CombinedSplitter([ simuPOP.ProductSplitter([ simuPOP.SexSplitter(), simuPOP.InfoSplitter('age', cutoff=[minMatingAge, maxMatingAge + 0.1], names=['juvenile', 'mature', 'dead']) ]) ], vspMap=[[0], [1], [2], [3], [4], [5], [0, 1, 3, 4], [1, 4]], names=[ 'Juvenile Male', 'Mature Male', 'Dead Male', 'Juvenile Female', 'Mature Female', 'Dead Female', 'Not dead yet', 'Active' ])) pop.evolve( initOps=[ simuPOP.InitSex(), simuPOP.IdTagger(), simuPOP.PyOperator(func=init_native_breeding_grounds) ] + [ simuPOP.InitGenotype(subPops=sub_population_names[i], freq=haplotype_frequencies[i], loci=[nb_loci]) for i in range(0, sub_population_count) ] + [ simuPOP.InitGenotype(subPops=sub_population_names[i], freq=[snp[n][i], 1 - snp[n][i]], loci=[n]) for i in range(0, sub_population_count) for n in range(0, nb_loci - 1) ], # increase age by 1 preOps=[simuPOP.InfoExec('age += 1')], matingScheme=simuPOP.HeteroMating( [ # age <= maxAge, copy to the next generation (weight=-1) # subPops is a list of tuples that will participate in mating. The tuple is a pair (subPopulation, virtualSubPopulation) # First, we propagate (clone) all individuals in all subpopulations (and all VSPs except the ones who are now in the VSP of deceased individuals) to the next generation simuPOP.CloneMating( ops=[simuPOP.CloneGenoTransmitter(chroms=[0, 1])], subPops=[ (sub_population, 6) for sub_population in range(0, sub_population_count) ], weight=-1), # Then we simulate random mating only in VSP 1 (ie reproductively mature individuals) within subpopulation (breeding/winter grounds) simuPOP.RandomMating( ops=[ simuPOP.MitochondrialGenoTransmitter(), simuPOP.MendelianGenoTransmitter(), simuPOP.IdTagger(), simuPOP.InheritTagger(mode=simuPOP.MATERNAL, infoFields=['feeding_ground']), simuPOP.InheritTagger( mode=simuPOP.MATERNAL, infoFields=['native_breeding_ground']), simuPOP.PedigreeTagger() ], subPops=[ (sub_population, 7) for sub_population in range(0, sub_population_count) ], weight=1) ], subPopSize=configure_new_population_size), postOps=[ # Determine the isotopic ratios in individuals simuPOP.PyOperator(func=postop_processing), simuPOP.Migrator(mode=simuPOP.BY_IND_INFO), # count the individuals in each virtual subpopulation #simuPOP.Stat(popSize=True, subPops=[(0,0), (0,1), (0,2), (1,0), (1, 1), (1, 2)]), # print virtual subpopulation sizes (there is no individual with age > maxAge after mating) #simuPOP.PyEval(r"'Size of age groups: %s\n' % (','.join(['%d' % x for x in subPopSize]))") # Alternatively, calculate the Fst # FIXME: How does this actually work? Does it work for > 2 populations? I don't really understand it yet # ELC: it is a calculation that partitions variance among and between populations, and can be calculated as a # global statistic or on a pairwise basis. We use it as an indication of genetic differentiation. simuPOP.Stat(structure=range(1), subPops=sub_population_names, suffix='_AB', step=10), simuPOP.PyEval(r"'Fst=%.3f \n' % (F_st_AB)", step=10) ], gen=years) #simuPOP.dump(pop, width=3, loci=[], subPops=[(simuPOP.ALL_AVAIL, simuPOP.ALL_AVAIL)], max=1000, structure=False); #return ped = simuPOP.Pedigree(pop) print("This is the pedigree stuff") simuPOP.dump(pop) # Now sample the individuals sample = drawRandomSample(pop, sizes=[sample_count] * sub_population_count) # Print out the allele frequency data simuPOP.stat(sample, alleleFreq=simuPOP.ALL_AVAIL) frequencies = sample.dvars().alleleFreq with open('freq.txt', 'w') as freqfile: index = 0 for locus in frequencies: if (locus == nb_loci): continue if (len(frequencies[locus]) < 2): continue print(index, end=' ', file=freqfile) index = index + 1 for allele in frequencies[locus]: print(frequencies[locus][allele], end=' ', file=freqfile) print(file=freqfile) # We want to remove monoallelic loci. This means a position in the genotype for which all individuals have the same value in both alleles # To implement this we will build up a list of loci that get ignored when we dump out the file. Generally speaking, if we add all the values up # then either they will sum to 0 (if all individuals have type 0) or to the number of individuals * 2 (if all individuals have type 1) geno_sum = [0] * (nb_loci + 1) * 2 for individual in sample.individuals(): geno_sum = list(map(add, geno_sum, individual.genotype())) final_sum = list( map(add, geno_sum[:(nb_loci + 1)], geno_sum[(nb_loci + 1):])) monoallelic_loci = [] for i in range(0, nb_loci): if final_sum[i] == 0 or final_sum[ i] == sample_count * sub_population_count * 2: monoallelic_loci = [i] + monoallelic_loci monoallelic_loci = sorted(monoallelic_loci, reverse=True) nb_ignored_loci = len(monoallelic_loci) # Generate the two files with open('mixfile.txt', 'w') as mixfile: with open('haploiso.txt', 'w') as haplofile: print(sub_population_count, nb_loci - nb_ignored_loci, 2, 1, file=mixfile) print("sex, haplotype, iso1, iso2, native_ground", file=haplofile) for i in range(0, nb_loci - nb_ignored_loci): print('Loc', i + 1, sep='_', file=mixfile) for individual in sample.individuals(): genotype = individual.genotype() print( 1 if individual.sex() == 1 else 0, genotype[nb_loci], individual.info('carbon'), individual.info('nitrogen'), # int(individual.info('native_breeding_ground')), file=haplofile, sep=' ') print(int(individual.info('native_breeding_ground') + 1), end=' ', file=mixfile) for i in range(0, nb_loci): if i not in monoallelic_loci: print(genotype[i] + 1, genotype[i + nb_loci + 1] + 1, ' ', end='', sep='', file=mixfile) print(file=mixfile) return sample
rc = RecombCollector(ts=init_ts, node_ids=node_ids, locus_position=locus_position) if min(args.gridheight,args.gridwidth)==1: migr_rates=migrSteppingStoneRates( args.migr, n=max(args.gridwidth,args.gridheight), circular=False) else: migr_rates=migr2DSteppingStoneRates( args.migr, m=args.gridwidth, n=args.gridheight, diagonal=False, circular=False) pop.evolve( initOps=[ sim.InitSex(), ]+init_geno, preOps=[ sim.PyOperator(lambda pop: rc.increment_time() or True), sim.Migrator( rate=migr_rates, mode=sim.BY_PROBABILITY), sim.SNPMutator(u=args.sel_mut_rate, v=args.sel_mut_rate), sim.PyMlSelector(GammaDistributedFitness(args.gamma_alpha, args.gamma_beta), output=">>"+selloci_file), ], matingScheme=sim.RandomMating( ops=[ id_tagger, sim.Recombinator(intensity=args.recomb_rate, output=rc.collect_recombs, infoFields="ind_id"), ] ), postOps=[
import simuPOP from simuPOP import utils from simuPOP import sampling from simuPOP import demography def calcFst(pop): 'Calculate Fst and Gst for the whole population and a random sample' simuPOP.stat(pop, structure=range(5), vars=['F_st', 'G_st']) sample = simuPOP.sampling.drawRandomSample(pop, sizes=[500]*pop.numSubPop()) simuPOP.stat(sample, structure=range(5), vars=['F_st', 'G_st']) print ('Gen: %3d Gst: %.6f (all), %.6f (sample) Fst: %.6f (all) %.6f (sample)' \ % (pop.dvars().gen, pop.dvars().G_st, sample.dvars().G_st, pop.dvars().F_st, sample.dvars().F_st)) return True pop = simuPOP.Population([10000]*5, loci=[1]*5, infoFields='migrate_to') pop.evolve( initOps = [ simuPOP.InitSex(), simuPOP.InitGenotype(freq=[0.5, 0.5], loci=[0, 2]), simuPOP.InitGenotype(freq=[0.2, 0.4, 0.4], loci=[1, 3, 4]), ], matingScheme = simuPOP.RandomMating(), postOps = [ #simuPOP.Migrator(rate=simuPOP.demography.migrIslandRates(0.01, 3)), simuPOP.PyOperator(func=calcFst, step=20), ], gen = 500 )
# but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim def rejectInd(off): 'reject an individual if it off.allele(0) == 1' return off.allele(0) == 0 pop = sim.Population(size=100, loci=1) pop.evolve( initOps=[sim.InitSex(), sim.InitGenotype(freq=[0.5, 0.5])], matingScheme=sim.RandomMating( ops=[sim.MendelianGenoTransmitter(), sim.PyOperator(func=rejectInd)]), gen=1) # You should see no individual with allele 1 at locus 0, ploidy 0. pop.genotype()[:20]
network_iteration=iteration_number) num_pops = networkmodel.get_subpopulation_number() sub_pop_size = int(pop_size / num_pops) # The regional network model defines both of these, in order to configure an initial population for evolution # Construct the initial population pops = sp.Population(size=networkmodel.get_initial_size(), subPopNames=str( list(networkmodel.get_subpopulation_names())), infoFields='migrate_to', ploidy=1, loci=100) ### now set up the activities init_ops['acumulators'] = sp.PyOperator( init_acumulators, param=['fst', 'alleleFreq', 'haploFreq']) init_ops['Sex'] = sp.InitSex() init_ops['Freq'] = sp.InitGenotype(loci=0, freq=distribution) post_ops['Innovate'] = sp.KAlleleMutator(k=MAXALLELES, rates=innovation_rate, loci=sp.ALL_AVAIL) #post_ops['mig'] = sp.Migrator(demography.migrIslandRates(migration_rate, num_pops)) #, reps=[i]) post_ops['mig'] = sp.Migrator(rate=networkmodel.get_migration_matrix()) #for i, mig in enumerate(migs): # post_ops['mig-%d' % i] = sp.Migrator(demography.migrIslandRates(mig, num_pops), reps=[i]) post_ops['Stat-fst'] = sp.Stat(structure=sp.ALL_AVAIL) #post_ops['haploFreq']=sp.stat(pops, haploFreq=[0], vars=['haploFreq', 'haploNum']) #post_ops['alleleFreq']=sp.stat(pops, alleleFreq=sp.ALL_AVAIL) post_ops['Stat-richness'] = sp.Stat(
I = sum(geno[40:60]) + normalvariate(0, 2.5) D = B + I - A + normalvariate(0, sigma**2) return A, B, I, D pop = sim.Population(100000, loci=[1] * 40, infoFields=['A', 'B', 'I', 'D']) pop.evolve( initOps=[ sim.InitSex(maleProp=0.5), sim.InitGenotype(freq=[0.5, 0.5]), ], preOps=[ sim.PyQuanTrait(func=traits, loci=sim.ALL_AVAIL, infoFields=['A', 'B', 'I', 'D']), sim.PyOperator(func=lambda pop: pop.sortIndividuals('D') is None), ], matingScheme=sim.HomoMating(chooser=sim.SequentialParentsChooser(), generator=sim.OffspringGenerator( ops=sim.MendelianGenoTransmitter(), numOffspring=2, sexMode=(sim.NUM_OF_MALES, 1))), finalOps=sim.PyQuanTrait(func=traits, loci=sim.ALL_AVAIL, infoFields=['A', 'B', 'I', 'D']), gen=10) from rpy import r def genoTraitCorrelation(loc, trait):
pop1 = pop.clone() pop1.addInfoFields('fitness') pop1.evolve( preOps=[ migr, sim.InfoExec('age += 1'), sim.InfoExec('fitness = (50 - ind.age)/50', exposeInd='ind'), ], matingScheme=sim.HeteroMating([ # only adult individuals with age >=3 will mate and produce # offspring. The age of offspring will be zero. sim.RandomMating(ops=[ sim.MendelianGenoTransmitter()], subPops=[(sim.ALL_AVAIL,'3 <= age < 17')], weight=-0.1), # individuals with age < 17 will be kept, but might be removed due to # population size decline sim.CloneMating(subPops=[(sim.ALL_AVAIL, 'age < 3'), (sim.ALL_AVAIL, '3 <= age < 17')]), ], subPopSize=demoModel), postOps=[ sim.Stat(popSize=True), sim.PyEval(r'f"{gen} {subPopSize}\n"'), sim.PyOperator(func=exportPop, step=10), ], gen=200 )
def simuRareVariants(regions, N, G, mu, selDist, selCoef, selModel='exponential', recRate=0, splitTo=[1], splitAt=0, migrRate=0, steps=[100], mutationModel='finite_sites', initPop='', extMutantFile='', addMutantsAt=0, postHook=None, statFile='', popFile='', markerFile='', mutantFile='', genotypeFile='', verbose=1, logger=None): ''' Please refer to simuRareVariants.py -h for a detailed description of all parameters. Note that a user-defined function can be passed to parameter selDist to specify arbitrary distribution of fitness. A script-only feature is that a Python function can be provided through parameter postHook to process the population at each generation. ''' # # convert regions to start/end positions ranges = [] chromTypes = [] for region in regions: start, end = [int(x) for x in region.split(':')[1].split('..')] ranges.append((start, end + 1)) if region.split(':')[0] == 'chrX': chromTypes.append(sim.CHROMOSOME_X) if len(regions) > 1: raise ValueError( 'The current implementation only allows one region if it is on chromosome X' ) logger.info('Chromosome {} is on chromosome X'.format(region)) elif region.split(':')[0] == 'chrY': raise ValueError( 'The current implementation does not support chromosome Y') chromTypes.append(sim.CHROMOSOME_Y) logger.info('Chromosome {} is on chromosome Y'.format(region)) else: chromTypes.append(sim.AUTOSOME) if logger: logger.info('%s regions with a total length of %d basepair.' % (len(ranges), sum([x[1] - x[0] for x in ranges]))) # # set default parameter if selCoef is None: # set default parameters if selDist == 'mixed_gamma': selCoef = [0.0186, 0.0001, 0.184, 0.160 * 2, 0.5, 0.0001, 0.1] elif selDist == 'mixed_gamma1': selCoef = [0, -1, 0.562341, 0.01, 0.5, 0.00001, 0.1] elif selDist == 'gamma1': selCoef = [0.23, 0.185 * 2, 0.5] elif selDist == 'gamma2': selCoef = [0.184, 0.160 * 2, 0.5] elif selDist == 'gamma3': selCoef = [0.206, 0.146 * 2, 0.5] elif selDist == 'constant': selCoef = [0.01, 0.5] elif not isinstance(selDist, collections.Callable): raise ValueError("Unsupported random distribution") else: # force to list type selCoef = list(selCoef) if len(steps) == 0: # at the end of each stage steps = G elif len(steps) == 1: # save step for each stage steps = steps * len(G) # use a right selection operator. collector = fitnessCollector() mode = { 'multiplicative': sim.MULTIPLICATIVE, 'additive': sim.ADDITIVE, 'exponential': sim.EXPONENTIAL }[selModel] # if type(popFile) == str: popFile = [popFile, -1] # if isinstance(selDist, collections.Callable): mySelector = MutSpaceSelector(selDist=selDist, mode=mode, output=collector.getCoef) elif selDist == 'mixed_gamma': mySelector = MutSpaceSelector(selDist=mixedGamma(selCoef), mode=mode, output=collector.getCoef) elif selDist == 'mixed_gamma1': mySelector = MutSpaceSelector(selDist=mixedGamma1(selCoef), mode=mode, output=collector.getCoef) elif selDist.startswith('gamma'): mySelector = MutSpaceSelector(selDist=[sim.GAMMA_DISTRIBUTION] + selCoef, mode=mode, output=collector.getCoef) elif selDist == 'constant': if selCoef == 0: mySelector = sim.NoneOp() else: mySelector = MutSpaceSelector(selDist=[sim.CONSTANT] + selCoef, mode=mode, output=collector.getCoef) # # Evolve if os.path.isfile(initPop): if logger: logger.info('Loading initial population %s...' % initPop) pop = sim.loadPopulation(initPop) if pop.numChrom() != len(regions): raise ValueError( 'Initial population %s does not have specified regions.' % initPop) for ch, reg in enumerate(regions): if pop.chromName(ch) != reg: raise ValueError( 'Initial population %s does not have region %s' % (initPop, reg)) pop.addInfoFields(['fitness', 'migrate_to']) else: pop = sim.Population(size=N[0], loci=[10] * len(regions), chromNames=regions, infoFields=['fitness', 'migrate_to'], chromTypes=chromTypes) if logger: startTime = time.clock() # progGen = [] # 0, G[0], G[0]+G[1], ..., sum(G) Gens = [sum(G[:i]) for i in range(len(G) + 1)] for i in range(len(Gens) - 1): progGen += list(range(Gens[i], Gens[i + 1], steps[i])) pop.evolve( initOps=sim.InitSex(), preOps=[ sim.PyOutput('''Statistics outputted are 1. Generation number, 2. population size (a list), 3. number of segregation sites, 4. average number of segregation sites per individual 5. average allele frequency * 100 6. average fitness value 7. minimal fitness value of the parental population ''', at = 0)] + \ [sim.PyOutput('Starting stage %d\n' % i, at = Gens[i]) for i in range(0, len(Gens))] + \ # add alleles from an existing population [sim.IfElse(extMutantFile != '', ifOps = [ sim.PyOutput('Loading and converting population %s' % extMutantFile), sim.PyOperator(func=addMutantsFrom, param=(extMutantFile, regions, logger)), ], at = addMutantsAt), # revert alleles at fixed loci to wildtype MutSpaceRevertFixedSites(), # mutate in a region at rate mu, if verbose > 2, save mutation events to a file MutSpaceMutator(mu, ranges, {'finite_sites':1, 'infinite_sites':2}[mutationModel], output='' if verbose < 2 else '>>mutations.lst'), # selection on all loci mySelector, # output statistics in verbose mode sim.IfElse(verbose > 0, ifOps=[ sim.Stat(popSize=True, meanOfInfo='fitness', minOfInfo='fitness'), NumSegregationSites(), sim.PyEval(r'"%5d %s %5d %.6f %.6f %.6f %.6f\n" ' '% (gen, subPopSize, numSites, avgSites, avgFreq*100, meanOfInfo["fitness"], minOfInfo["fitness"])', output='>>' + statFile), ], at = progGen ), sim.IfElse(len(splitTo) > 1, sim.Migrator(rate=migrIslandRates(migrRate, len(splitTo)), begin=splitAt + 1) ), ], matingScheme=sim.RandomMating(ops=MutSpaceRecombinator(recRate, ranges), subPopSize=multiStageDemoFunc(N, G, splitTo, splitAt)), postOps = [ sim.NoneOp() if postHook is None else sim.PyOperator(func=postHook), sim.SavePopulation(popFile[0], at=popFile[1]), ], finalOps=[ # revert fixed sites so that the final population does not have fixed sites MutSpaceRevertFixedSites(), sim.IfElse(verbose > 0, ifOps=[ # statistics after evolution sim.Stat(popSize=True), NumSegregationSites(), sim.PyEval(r'"%5d %s %5d %.6f %.6f %.6f %.6f\n" ' '% (gen+1, subPopSize, numSites, avgSites, avgFreq*100, meanOfInfo["fitness"], minOfInfo["fitness"])', output='>>' + statFile), sim.PyEval(r'"Simulated population has %d individuals, %d segregation sites.' r'There are on average %.1f sites per individual. Mean allele frequency is %.4f%%.\n"' r'% (popSize, numSites, avgSites, avgFreq*100)'), ]), ], gen = Gens[-1] ) # record selection coefficients to population if len(collector.selCoef) == 0: # this must be the neutral case where a NonOp has been used. pop.dvars().selCoef = 0 else: pop.dvars().selCoef = collector.selCoef # re-save the file with the added selCoef if popFile[-1] == -1: pop.save(popFile[0]) # if logger: logger.info('Population simulation takes %.2f seconds' % (time.clock() - startTime)) if markerFile or genotypeFile: if logger: logger.info('Saving marker information to file %s' % markerFile) mutants = saveMarkerInfoToFile(pop, markerFile, logger) if genotypeFile: if logger: logger.info('Saving genotype in .ped format to file %s' % genotypeFile) saveGenotypeToFile(pop, genotypeFile, mutants, logger) if mutantFile: if logger: logger.info('Saving mutants to file %s' % mutantFile) saveMutantsToFile(pop, mutantFile, logger=logger) return pop
s = random.gammavariate(self.alpha, self.beta) self.coefMap[loc] = s # print(str(loc)+":"+str(alleles)+"\n") # needn't return fitness for alleles=(0,0) as simupop knows that's 1 if 0 in alleles: return 1. - s else: return 1. - 2. * s simu.evolve(initOps=[sim.InitSex(), meioser] + init_geno, preOps=[ sim.AcgtMutator(rate=[0.0001], model='JC69'), sim.PyMlSelector(GammaDistributedFitness(alpha, beta), output='>>sel_loci.txt'), sim.PyOperator(func=step_gen, param=(1, )), ], matingScheme=reproduction, postOps=[ sim.Stat(numOfSegSites=sim.ALL_AVAIL, step=50), sim.PyEval( r"'Gen: %2d #seg sites: %d\n' % (gen, numOfSegSites)", step=50) ], gen=generations) pop = simu.population(0) pop_ids = [ind.info('ind_id') for ind in pop.individuals()] samples = random.sample(pop_ids, nsamples)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--experiment", help="provide name for experiment", required=True, type=str, default="test") parser.add_argument("--debug", help="turn on debugging output") parser.add_argument("--reps", help="Replicated populations per parameter set", type=int, default=3) parser.add_argument("--networkfile", help="Name of GML file representing the network model for this simulation", required=True, type=str, default="smallworld") parser.add_argument("--numloci", help="Number of loci per individual (use with care)", type=int, required=True, default=1) parser.add_argument("--maxinittraits", help="Max initial number of traits per locus for initialization", type=int, required=True, default=50) parser.add_argument("--innovrate", nargs='+', help="Rate(s) at which innovations occur in population as a per-locus rate", type=float, default=[]) parser.add_argument("--simlength", help="Time at which simulation and sampling end, defaults to 3000 generations", type=int, default="20") parser.add_argument("--popsize", help="Initial size of population for each community in the model", type=int, required=True) parser.add_argument("--migrationfraction", nargs='+', help="Fraction of population that migrates each time step", type=float, required=True, default=[]) parser.add_argument("--seed", type=int, help="Seed for random generators to ensure replicability") parser.add_argument( "--k_values", nargs='+', type=int, help="list of k-values to explore [e.g., 2 4 20 24]", default=[]) parser.add_argument("--sub_pops", nargs="+", help="Number of sub populations", required=True, default=[]) parser.add_argument("--maxalleles", type=int, help="Maximum number of alleles", default=50) parser.add_argument("--save_figs", type=bool, help="Save figures or not?", default=False) parser.add_argument("--burnintime", type=int, help="How long to wait before making measurements? ", default=2000) parser.add_argument("--rewiringprob", type=float, help="Probability of random rewiring", default=0) config = parser.parse_args() # setup output directories for writing output_path = utils.setup_output(config.experiment) # check the k and migration rate combinations check = utils.check_k_and_migration_rates(config) if check is not True: print("\nProblem(s):\t %s\n" % check) print("Please adjust input values for k and/or migration rate and restart.\n ") sys.exit() else: print("\nChecked on the migration and k values -- all looks good!\n") # save parameters utils.save_parameters(str(sys.argv), config, output_path) # set up the frequencies for the alleles in each loci. Here assuming a uniform distribution as a starting point distribution = utils.constructUniformAllelicDistribution(config.maxinittraits) # prepare file for output output_data_file_name = "%s/%s-rare-trait-output.csv" % (output_path, config.experiment) with open(output_data_file_name, mode='w') as output_file: output_writer = csv.writer(output_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) output_writer.writerow(["Iteration", "k", "NumSubPops", "Migration", "InnovationRate", "Ones_Mean", "Ones_95%_Lower", "Ones_95%_Upper", "Twos_Mean", "Twos_95%_Lower", "Twos_95%_Upper", "Richness_Mean", "Richness_95%_Lower", "Richness_95%_Upper","Fst_Mean","Fst_95%_Lower","Fst_95%_Upper"]) output_file.flush() subpop_run_values = config.sub_pops k_run_values = config.k_values mig_run_values = config.migrationfraction innov_run_values = config.innovrate iteration=-1 for subpop in subpop_run_values: if k_run_values == [0]: k_run_values = [2, int(float(subpop) * .1), int(float(subpop) * .2), int(float(subpop) * .5), int(float(subpop) * .8), int(float(subpop) * .9), int(subpop) - 1] for k in k_run_values: for mig in mig_run_values: for innov in innov_run_values: ## let us know whats happening iteration += 1 print("Now running with subpops: %s k-value: %s mig rate: %4f innov rate: %4f" % (subpop,k,mig,innov)) ## these are lists of things that simuPop will do at different stages init_ops = OrderedDict() pre_ops = OrderedDict() post_ops = OrderedDict() # Construct a demographic model #networkmodel = NetworkModel( networkmodel="/Users/clipo/Documents/PycharmProjects/RapaNuiSim/notebooks/test_graph.gml", networkmodel = network.NetworkModel( networkmodel=config.networkfile, simulation_id=config.experiment, sim_length=config.simlength, burn_in_time=config.burnintime, initial_subpop_size=config.popsize, migrationfraction=mig, sub_pops=subpop, connectedness=k, # if 0, then distance decay save_figs=config.save_figs, network_iteration=iteration) num_pops = networkmodel.get_subpopulation_number() sub_pop_size = int(config.popsize / num_pops) # The regional network model defines both of these, in order to configure an initial population for evolution # Construct the initial population pops = sp.Population(size = [sub_pop_size]*num_pops, subPopNames = str(list(networkmodel.get_subpopulation_names())), infoFields = 'migrate_to', ploidy=1, loci=config.numloci ) ### now set up the activities init_ops['acumulators'] = sp.PyOperator(utils.init_acumulators, param=['fst','alleleFreq', 'haploFreq']) init_ops['subpop_counts'] = sp.PyOperator(utils.init_count_traits_in_subpops) init_ops['Sex'] = sp.InitSex() init_ops['Freq'] = sp.InitGenotype(loci=list(range(config.numloci)),freq=distribution) post_ops['Innovate'] = sp.KAlleleMutator(k=config.maxalleles, rates=innov, loci=sp.ALL_AVAIL) post_ops['mig']=sp.Migrator(rate=networkmodel.get_migration_matrix()) #, reps=[3]) post_ops['Stat-fst'] = sp.Stat(structure=sp.ALL_AVAIL) post_ops['Stat-richness']=sp.Stat(alleleFreq=[0], haploFreq=[0], vars=['alleleFreq','haploFreq','alleleNum', 'genoNum']) post_ops['fst_acumulation'] = sp.PyOperator(utils.update_acumulator, param=['fst','F_st']) post_ops['richness_acumulation'] = sp.PyOperator(utils.update_richness_acumulator, param=('alleleFreq', 'Freq of Alleles')) post_ops['class_richness']=sp.PyOperator(utils.calculateAlleleAndGenotypeFrequencies, param=(config.popsize,config.numloci)) post_ops['count_traits_in_subpops'] = sp.PyOperator(utils.count_traits_in_subpops, param=(config.numloci,num_pops), subPops=sp.ALL_AVAIL) mating_scheme = sp.RandomSelection() ## go simuPop go! evolve your way to the future! sim = sp.Simulator(pops, rep=config.reps) sim.evolve(initOps=list(init_ops.values()), preOps=list(pre_ops.values()), postOps=list(post_ops.values()), matingScheme=mating_scheme, gen=config.simlength) count=0 for pop in sim.populations(): output[count] = deepcopy(pop.dvars()) count+=1 ones_point_in_time = [] twos_point_in_time = [] richness_point_in_time = [] fst_point_in_time = [] for n in range(config.reps): list_of_ones = list(output[n].ones) list_of_twos = list(output[n].twos) list_of_richness = list(output[n].richness) list_of_fst = list(output[n].fst) ones_point_in_time.append(list_of_ones[2000]) twos_point_in_time.append(list_of_twos[2000]) richness_point_in_time.append(list_of_richness[2000]) fst_point_in_time.append(list_of_fst[2000]) (ones_ave, ones_min, ones_max) = utils.mean_confidence_interval(ones_point_in_time, confidence=0.95) (twos_ave, twos_min, twos_max) = utils.mean_confidence_interval(twos_point_in_time, confidence=0.95) (richness_ave, richness_min, richness_max) = utils.mean_confidence_interval(richness_point_in_time, confidence=0.95) (fst_ave, fst_min, fst_max) = utils.mean_confidence_interval(fst_point_in_time, confidence=0.95) output_writer.writerow([iteration,k,subpop,mig,innov,ones_ave,ones_min,ones_max, twos_ave,twos_min,twos_max,richness_ave,richness_min,richness_max,fst_ave,fst_min,fst_max]) output_file.flush()
matingScheme=sim.HeteroMating([ # all individuals with age < 75 will be kept. Note that # CloneMating will keep individual sex, affection status and all # information fields (by default). sim.CloneMating(subPops=[(0,0), (0,1), (0,2)], weight=-1), # only individuals with age between 20 and 50 will mate and produce # offspring. The age of offspring will be zero. sim.RandomMating(ops=[ sim.IdTagger(), # give new born an ID sim.PedigreeTagger(), # track parents of each individual sim.MendelianGenoTransmitter(), # transmit genotype ], numOffspring=(sim.UNIFORM_DISTRIBUTION, 1, 3), subPops=[(0,1)]),], subPopSize=demoModel), # number of individuals? postOps=[ sim.PyPenetrance(func=pene, loci=0), sim.PyOperator(func=outputstat, step=20) ], gen = 200 ) # draw two Pedigrees from the last age-structured population from simuPOP import sampling sample = sampling.drawNuclearFamilySample(pop, families=2, numOffspring=(2,3), affectedParents=(1,2), affectedOffspring=(1,3)) sim.dump(sample)
# This script is an example in the simuPOP user's guide. Please refer to # the user's guide (http://simupop.sourceforge.net/manual) for a detailed # description of this example. # import simuPOP as sim def alleleCount(pop): summary = [0] * 6 for ind in pop.individuals(): geno = ind.genotype(ploidy=0) summary[geno[0] + geno[2] + geno[4] + geno[6] + geno[8]] += 1 print('%d %s' % (pop.dvars().gen, summary)) return True pop = sim.Population(1000, loci=[2] * 5, chromTypes=[sim.CUSTOMIZED] * 5) pop.evolve( # every one has miDNAs 10, 00, 00, 00, 00 initOps=[ sim.InitGenotype(haplotypes=[[1] + [0] * 9]), ], # random select cells for cytoplasmic segregation matingScheme=sim.RandomSelection(ops=[ sim.MitochondrialGenoTransmitter(), ]), postOps=sim.PyOperator(func=alleleCount, step=10), gen=51)
else: pop.vars()[accumulator] = [] return True def update_accumulator(pop, param): accumulator, var = param if var.endswith('_sp'): for sp in range(pop.numSubPop()): pop.vars()[accumulator][sp].append( deepcopy(pop.vars(sp)[var[:-3]])) else: pop.vars()[accumulator].append(deepcopy(pop.vars()[var])) return True init_ops['accumulators'] = sp.PyOperator(init_accumulators, param=['fst']) init_ops['Sex'] = sp.InitSex() init_ops['Freq'] = sp.InitGenotype(freq=[0.5, 0.5]) for i, mig in enumerate(migs): post_ops['mig-%d' % i] = sp.Migrator(demography.migrIslandRates(mig, num_pops), reps=[i]) post_ops['Stat-fst'] = sp.Stat(structure=sp.ALL_AVAIL) post_ops['fst_accumulation'] = sp.PyOperator(update_accumulator, param=('fst', 'F_st')) mating_scheme = sp.RandomMating() sim = sp.Simulator(pops, rep=len(migs)) sim.evolve(initOps=list(init_ops.values()), preOps=list(pre_ops.values()), postOps=list(post_ops.values()), matingScheme=mating_scheme,
loci with high mutation rate, as an attempt to raise allele frequency of rare loci to an higher level.''' # unpack parameter (cutoff, mu1, mu2) = param sim.stat(pop, alleleFreq=range(pop.totNumLoci())) for i in range(pop.totNumLoci()): # Get the frequency of allele 1 (disease allele) if pop.dvars().alleleFreq[i][1] < cutoff: sim.kAlleleMutate(pop, k=2, rates=mu1, loci=[i]) else: sim.kAlleleMutate(pop, k=2, rates=mu2, loci=[i]) return True pop = sim.Population(size=10000, loci=[2, 3]) pop.evolve( initOps=[ sim.InitSex(), sim.InitGenotype(freq=[.99, .01], loci=[0, 2, 4]), sim.InitGenotype(freq=[.8, .2], loci=[1, 3]) ], preOps=sim.PyOperator(func=dynaMutator, param=(.2, 1e-2, 1e-5)), matingScheme=sim.RandomMating(), postOps=[ sim.Stat(alleleFreq=range(5), step=10), sim.PyEval( r"' '.join(['%.2f' % alleleFreq[x][1] for x in range(5)]) + '\n'", step=10), ], gen=31)
pop.dvars().avgAllele = avg return True pop = sim.Population(10000, loci=[1, 1]) pop.setVirtualSplitter(sim.AffectionSplitter()) pop.evolve( initOps=[ sim.InitSex(), sim.InitGenotype(genotype=[50, 50]) ], matingScheme=sim.RandomMating(), postOps=[ # determine affection sim.status for each offspring (duringMating) sim.PyPenetrance(func=fragileX, loci=0), # unaffected offspring, mutation rate is high to save some time sim.StepwiseMutator(rates=1e-3, loci=1), # unaffected offspring, mutation rate is high to save some time sim.StepwiseMutator(rates=1e-3, loci=0, subPops=[(0, 0)]), # affected offspring have high probability of mutating upward sim.StepwiseMutator(rates=1e-2, loci=0, subPops=[(0, 1)], incProb=0.7, mutStep=3), # number of affected sim.PyOperator(func=avgAllele, step=20), sim.PyEval(r"'Gen: %3d #Aff: %d AvgRepeat: %.2f (unaff), %.2f (aff), %.2f (unrelated)\n'" + " % (gen, numOfAffected, avgAllele[0], avgAllele[1], avgAllele[2])", step=20), ], gen = 101 )
for p in range(3): for l in [dmi1, dmi2, dmi3, dmi4, ad1, ad2, ad3]: if l == ad3: print '%.2f\n' % pop.dvars(p).alleleFreq[l][1], else: print '%.2f' % pop.dvars(p).alleleFreq[l][1], return True pop.evolve( initOps=[ sim.InitSex(), #sim.Stat(popSize=True), #sim.PyEval(r'"%d %s " % (gen, subPopSize)'), ], preOps=sim.PySelector( loci=[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19 ], func=sel_1), # !! change for different types of selection !! # matingScheme=sim.RandomMating(ops=sim.Recombinator(intensity=r), subPopSize=[popsize, popsize, popsize]), postOps=[ sim.Migrator(rate=[[0.0, m, 0.0], [0.0, 0.0, 0.0], [0.0, m, 0.0]]), #sim.Stat(popSize=True), #sim.PyEval(r'"%d %s " % (gen, subPopSize)'), sim.PyOperator(printAlleleFreq, step=10), ], gen=1001)
def main(): start = time() MAXALLELES = 10000 parser = argparse.ArgumentParser() parser.add_argument("--experiment", help="provide name for experiment", required=True, type=str) parser.add_argument( "--cores", type=int, help= "Number of cores to use for simuPOP, overrides devel flag and auto calculation" ) parser.add_argument("--debug", help="turn on debugging output") parser.add_argument("--devel", help="Use only half of the available CPU cores", type=int, default=1) parser.add_argument("--dbhost", help="database hostname, defaults to localhost", default="localhost") parser.add_argument("--dbport", help="database port, defaults to 27017", default="27017") parser.add_argument("--stepsize", help="size of sample by proportion", type=float, default=1.0) parser.add_argument("--reps", help="Replicated populations per parameter set", type=int, default=4) parser.add_argument( "--networkfile", help= "Name of GML file representing the network model for this simulation", required=True, type=str) parser.add_argument("--numloci", help="Number of loci per individual", type=int, required=True) parser.add_argument( "--maxinittraits", help="Max initial number of traits per locus for initialization", type=int, required=True) parser.add_argument( "--samplefraction", help= "Size of samples taken to calculate all statistics, as a proportion", type=float, required=True) parser.add_argument( "--innovrate", help= "Rate at which innovations occur in population as a per-locus rate", type=float, required=True) parser.add_argument( "--simlength", help= "Time at which simulation and sampling end, defaults to 3000 generations", type=int, default="3000") parser.add_argument( "--popsize", help="Initial size of population for each community in the model", type=int, required=True) parser.add_argument( "--migrationfraction", help="Fraction of population that migrates each time step", type=float, required=True, default=0.2) parser.add_argument( "--seed", type=int, help="Seed for random generators to ensure replicability") (config, sim_id, script, cores) = setup(parser) log.info("config: %s", config) beginCollectingData = cpm.expectedIAQuasiStationarityTimeHaploid( config.popsize, config.innovrate) log.info("Starting data collection at generation: %s", beginCollectingData) ### NOTE ### ### ### the simuPOP module is deliberately imported here because we need to process the ### command line arguments first, to understand which version of the simuPOP module (e.g., ### long allele representation, etc, to import, and because we need to figure out how ### many cores the machine has, etc., to set it up for parallel processing. If we import ### at the top of the file as normal, the imports happen before any code is executed, ### and we can't set those options. DO NOT move these imports out of setup and main. import simuPOP as sim import demography as demo log.info("Starting simulation run %s", sim_id) log.debug("config: %s", config) if config.seed is None: log.info( "No random seed given, allowing RNGs to initialize with random seed" ) else: log.debug("Seeding RNGs with seed: %s", config.seed) npr.seed(config.seed) random.seed(config.seed) full_command_line = " ".join(sys.argv) # Calculate the burn in time burn_time = rapanuisim.utils.simulation_burnin_time( config.popsize, config.innovrate) log.info("Minimum burn in time given popsize and theta: %s", burn_time) initial_distribution = rapanuisim.utils.constructUniformAllelicDistribution( config.maxinittraits) log.info("Initial allelic distribution (for each locus): %s", initial_distribution) #innovation_rate = pypopgen.wf_mutation_rate_from_theta(config.popsize, config.innovrate) innovation_rate = float(config.innovrate) log.info("Per-locus innov rate within populations: %s", innovation_rate) # Construct a demographic model from a collection of network slices which represent a temporal network # of changing subpopulations and interaction strengths. This object is Callable, and simply is handed # to the mating function which applies it during the copying process networkmodel = demo.NetworkModel( networkmodel=config.networkfile, simulation_id=sim_id, sim_length=config.simlength, burn_in_time=burn_time, initial_subpop_size=config.popsize, migrationfraction=config.migrationfraction) # The regional network model defines both of these, in order to configure an initial population for evolution # Construct the initial population pop = sim.Population(size=networkmodel.get_initial_size(), subPopNames=networkmodel.get_subpopulation_names(), infoFields=networkmodel.get_info_fields(), ploidy=1, loci=config.numloci) log.info("population sizes: %s names: %s", pop.subPopSizes(), pop.subPopNames()) initial_distribution = utils.constructUniformAllelicDistribution( config.numloci) log.info("Initial allelic distribution: %s", initial_distribution) # We are going to evolve the same population over several replicates, in order to measure how stochastic variation # effects the measured copying process. simu = sim.Simulator(pop, rep=config.reps) # Start the simulation and evolve the population, taking samples after the burn-in time has elapsed simu.evolve( initOps=sim.InitGenotype(freq=initial_distribution), preOps=[ sim.PyOperator(func=sampling.logGenerationCount, param=(), step=100, reps=0) ], matingScheme=sim.RandomSelection(subPopSize=networkmodel), postOps=[ sim.KAlleleMutator(k=MAXALLELES, rates=config.innovrate, loci=sim.ALL_AVAIL), # sim.PyOperator(func=rapanuisim.data.sampleNumAlleles, # param=(config.samplefraction, config.innovrate, config.popsize, sim_id, config.numloci), # step=1, begin=beginCollectingData), # sim.PyOperator(func=rapanuisim.data.sampleTraitCounts, # param=(config.samplefraction, config.innovrate, config.popsize, sim_id, config.numloci), # step=1, begin=beginCollectingData), # sim.PyOperator(func=rapanuisim.data.censusTraitCounts, # param=(config.innovrate, config.popsize, sim_id, config.numloci), step=1, # begin=beginCollectingData), # sim.PyOperator(func=rapanuisim.data.censusNumAlleles, # param=(config.innovrate, config.popsize, sim_id, config.numloci), step=1, # begin=beginCollectingData) # #sim.PyOperator(func=rapanuisim.data.sampleIndividuals, # ## param=(config.samplefraction, config.innovrate, config.popsize, sim_id, config.numloci), # # step=1, begin=beginCollectingData), ], gen=3000) endtime = time() elapsed = endtime - start #log.info("simulation complete in %s seconds with %s cores", elapsed, cores) log.info("simulation complete,%s,%s", cores, elapsed) log.info("Ending simulation run at generation %s", simu.population(0).dvars().gen) sampled_length = int(config.simlength) - burn_time
loci=vecloc, infoFields=['migrate_to', 'fitness', 'env'], lociNames=allele_naming(numchrom, numloc)) #--------------------------Main evolving process------------------------ pop.evolve( #Initializing sex and genotype initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.5, 0.5]), ], preOps=[ #Splitting each population into two at 'atgen' generations sim.SplitSubPops(proportions=[0.5, 0.5], at=atgen), #Calling function 'migration' for individuals migration #(Operator Migrator does not allow for varying number of subpopulations) sim.PyOperator(migration, begin=atgen[0]), #Selection process #Create new environmental values for new daughter populations #Only takes place when fission takes place (atgen) sim.PyOperator(env_update, at=atgen), #Set environmental value (env infoField) for each individual in the population #Takes place at each generation after the first fission sim.PyOperator(env_set, begin=atgen[1]), #Selection occures at selected loci according to env information field sim.PySelector(fit_env, loci=locisel, begin=atgen[1]), ], #Mating at random (pangamy) matingScheme=sim.RandomMating( #Fixed population size (fixed at 'popsize') subPopSize=demo, #Recombination
pop.evolve( #Initializing sex and genotype initOps=[ sim.InitSex(), sim.InitGenotype(freq=[0.5, 0.5]), ], preOps=[ #Fisson of population into 16 at 'atgen' generation sim.SplitSubPops(proportions=[0.0625] * 16, at=atgen), #Migration using a simple stepping stone model sim.Migrator(rate=migrSteppingStoneRates(m, 16, circular=False), begin=atgen), #Selection process #Set environmental value (env infoField) for each individual in the population #Takes place at each generation after the first fission sim.PyOperator(env_set, begin=atgen), #Selection occures at selected loci according to env information field sim.PySelector(fit_env, loci=locisel, begin=atgen), ], #Mating at random (pangamy) matingScheme=sim.RandomMating( #Fixed population size (fixed at 'popsize') subPopSize=demo, #Recombination ops=[sim.Recombinator(rates=0.002)]), postOps=[ #Mutation rate 10e-6 sim.SNPMutator(u=0.000001, v=0.000001) ], #Evolve for a number 'numgen' of generations gen=numgen)
data.storeSimulationData(args.popsize, args.mutationrate, sim_id, args.samplesize, args.replications, args.numloci, __file__, args.numloci, simconfig.MAXALLELES) initial_distribution = utils.constructUniformAllelicDistribution(args.numloci) log.info("Initial allelic distribution: %s", initial_distribution) pop = sim.Population(size=args.popsize, ploidy=1, loci=args.numloci) simu = sim.Simulator(pop, rep=args.replications) simu.evolve( initOps=sim.InitGenotype(freq=initial_distribution), preOps=[ sim.PyOperator(func=utils.logGenerationCount, param=(), step=1000, reps=0), ], matingScheme=sim.RandomSelection(), postOps=[ sim.KAlleleMutator(k=simconfig.MAXALLELES, rates=args.mutationrate, loci=sim.ALL_AVAIL), sim.PyOperator(func=data.sampleNumAlleles, param=(args.samplesize, args.mutationrate, args.popsize, sim_id, args.numloci), step=args.stepsize, begin=beginCollectingData), sim.PyOperator(func=data.sampleTraitCounts, param=(args.samplesize, args.mutationrate, args.popsize, sim_id, args.numloci),
# get source subpop for all individuals in subpopulation i origins = pop.indInfo('migrate_from', sp) spSize = pop.subPopSize(sp) B_sp = [ origins.count(j) * 1.0 / spSize for j in range(pop.numSubPop()) ] print(' ' + ', '.join(['{:.3f}'.format(x) for x in B_sp])) return True pop.evolve( initOps=sim.InitSex(), preOps= # mark the source subpopulation of each individual [sim.InitInfo(i, subPops=i, infoFields='migrate_from') for i in range(3)] + [ # perform migration sim.BackwardMigrator( rate=[[0, 0.04, 0.02], [0.05, 0, 0.02], [0.02, 0.01, 0]]), # calculate and print observed backward migration matrix sim.PyOperator(func=originOfInds), # calculate population size sim.Stat(popSize=True), # and print it sim.PyEval( r'"Pop size after migration: {}\n".format(", ".join([str(x) for x in subPopSize]))' ), ], matingScheme=sim.RandomMating(), gen=5)
def simulation(self): self.pop = sim.Population(size = [500, 500], loci=[1]*20, infoFields = ["age",'ind_id', 'father_idx', 'mother_idx', "hc", "ywc",'migrate_to'], subPopNames = ["croatia", "slovenia"]) sim.initInfo(pop = self.pop, values = list(map(int, np.random.negative_binomial(n = 1, p = 0.25, size=500))), infoFields="age") self.pop.setVirtualSplitter(sim.CombinedSplitter([ sim.ProductSplitter([ sim.SexSplitter(), sim.InfoSplitter(field = "age", cutoff = [1,3,6,10])])], vspMap = [[0,1], [2], [3], [4], [5,6,7,8], [9] ])) # Age groups: from 0 to 1 - cubs, from 1 to 3 - prereproductive, from 3 to 6 - reproductive class, from 6 to 10 - dominant self.pop.evolve( initOps=[ sim.InitSex(), # random genotype sim.InitGenotype(freq=[0.01]*2 + [0.03]*2 + [0.23]*4), # assign an unique ID to everyone. sim.IdTagger(), ], # increase the age of everyone by 1 before mating. preOps=[sim.InfoExec('age += 1'), sim.InfoExec("hc +=1 if 0 < hc < 3 else 0"), # Mother bear can't have cubs for two years after pregnancy sim.Migrator(rate=[[self.cro_to_slo]], mode=sim.BY_PROPORTION, subPops=[(0, 0)], toSubPops=[1]), # reproductive males migrate from Cro to Slo sim.Migrator(rate=[[self.slo_to_cro]], mode=sim.BY_PROPORTION, subPops=[(1, 0)], toSubPops=[0]), sim.Stat(effectiveSize=sim.ALL_AVAIL, subPops=[(0,1),(0,2),(0,4), (1,1), (1,2), (1,4)], vars='Ne_demo_base'), sim.Stat(effectiveSize=sim.ALL_AVAIL,subPops=[(0,1),(0,2),(0,4), (1,1), (1,2), (1,4)], vars='Ne_demo_base_sp') #sim.PyEval(r'"Cro %d, Slo %d' ' % (Cro, Slo)', "Cro = pop.subPopSize(0)" "Slo = pop.subPopSize(1)",exposePop='pop'), ], matingScheme=sim.HeteroMating([ # CloneMating will keep individual sex and all # information fields (by default). # The age of offspring will be zero. sim.HomoMating(subPops=sim.ALL_AVAIL, chooser=sim.CombinedParentsChooser( fatherChooser=sim.PyParentsChooser(generator=self.bearFather), motherChooser=sim.PyParentsChooser(generator=self.bearMother) ), generator=sim.OffspringGenerator(ops=[ sim.InfoExec("age = 0"), sim.IdTagger(), #sim.PedigreeTagger(), sim.ParentsTagger(), sim.MendelianGenoTransmitter() ], numOffspring=(sim.UNIFORM_DISTRIBUTION, 1, 3))), sim.CloneMating(subPops=[(0,0), (0,1), (0,2), (0,4), (1,0), (1,1), (1,2), (1,4)], weight=-1), ], subPopSize=popmodel.demoModel), # number of individuals? postOps = [ #sim.PyOperator(func=popmodel.NaturalMortality), sim.PyOperator(func = popmodel.CalcNe, param={"me":self.me, "Ne":self.Ne}, begin=int(0.2*self.generations)), sim.PyOperator(func = popmodel.CalcLDNe, param={"me":self.me, "x":self.x}, begin=int(0.2*self.generations)), sim.PyOperator(func=popmodel.cullCountry,param={"slo_cull": self.slo_cull, "cro_cull": self.cro_cull}), ], gen = self.generations )
#### evolve population #### # evolve population pop.evolve( # initial population initOps = [ # individuals are randomly assigned as male or female sim.InitSex(), # each locus has 19 alleles with equal frequencies sim.InitGenotype(freq = [1/19]*19) ], # before mating rank individuals by their trait #preOps = sim.PyOperator(func = pickParents, param = args.selection_type), preOps = sim.PyOperator(func = assignFitness), # Random mating with fixed number of offspring # https://bopeng.github.io/simuPOP/userGuide_ch6_sec1.html#determine-the-number-of-offspring-during-mating matingScheme = sim.RandomMating( ops = [ # average recombination across all chromosomes sim.Recombinator((1*len(pop.numLoci()))/sum(pop.numLoci())), # avg of 2 recombinations == poisson with rate 2 sim.PedigreeTagger(), sim.IdTagger() ], # each cross results in 10 offspring numOffspring = 10 ), # after mating assign quantitative trait based on genotype postOps = [ sim.PyQuanTrait(