def __init__( self, shadie=None, file=None, ): """ Builds script to run SLiM3 simulation Parameters: ----------- shadie: (Shadie class object) Reads in Shadie class object """ if isinstance(shadie, Shadie): self.shadie = shadie self.tsraw = pyslim.load(self.shadie.outname) self.genemap = shadie.genemap self.genome = shadie.genome elif shadie is None: self.tsraw = pyslim.load(file) tsout = Coal(self.tsraw) tsout.slimcoal() self.tscoal = tsout.tscoal #positions positions = [] for mut in self.tscoal.mutations(): positions.append(int(mut.position)) self.positions = positions
def _update_tables(self): """...Remove extra psuedopopulation nodes.""" for tree_file in self.trees_files: treeseq = pyslim.load(tree_file) # get mutable tskit.TableCollection tables = treeseq.dump_tables() nnodes = tables.nodes.time.size # there is a null SLiM population (0) that doesnt really exist # and the actual poulation (1). So we set all to 0. tables.nodes.population = np.zeros(nnodes, dtype=np.int32) # drop nodes that are not connected to anything. This includes # the pseudo-nodes representing half of the haploid populations. nodes_in_edge_table = list( set(tables.edges.parent).union(tables.edges.child)) # remove the empty population nodes by using simplify, which tables.simplify( samples=nodes_in_edge_table, keep_input_roots=True, filter_individuals=True, filter_populations=True, filter_sites=False, ) # turn it back into a treesequence self._tree_sequences.append(pyslim.load_tables(tables))
def test_load(self): for _, ex in self.get_slim_examples(return_info=True): fn = ex['basename'] + ".trees" # load in msprime then switch msp_ts = tskit.load(fn) self.assertTrue(type(msp_ts) is msprime.TreeSequence) # transfer tables msp_tables = msp_ts.tables new_ts = pyslim.load_tables(msp_tables, legacy_metadata=True) self.assertTrue(isinstance(new_ts, pyslim.SlimTreeSequence)) self.verify_times(msp_ts, new_ts) new_tables = new_ts.tables self.assertTableCollectionsEqual(msp_tables, new_tables) # convert directly new_ts = pyslim.SlimTreeSequence(msp_ts) self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.tables self.assertTableCollectionsEqual(msp_tables, new_tables) # load to pyslim from file slim_ts = pyslim.load(fn, legacy_metadata=True) self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence) slim_tables = slim_ts.tables self.assertTableCollectionsEqual(msp_tables, slim_tables) self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
def end_sim(self): """ adds late() call that ends the simulation and saves the .trees file """ endtime = int(self._sim_time + 1) if self._file_in: ts_start = pyslim.load(self._file_in) sim_start = ts_start.max_root_time resched_end = int(endtime + sim_start) self.model.late( time = resched_end, scripts = [ "sim.treeSeqRememberIndividuals(sim.subpopulations.individuals)\n", f"sim.treeSeqOutput('{self._file_out}')"], comment = "end of sim; save .trees file", ) else: self.model.late( time = endtime, scripts = [ "sim.treeSeqRememberIndividuals(sim.subpopulations.individuals)\n", f"sim.treeSeqOutput('{self._file_out}')"], comment = "end of sim; save .trees file", )
def MutsFromTrees(Trees): print('read trees') ts = pyslim.load(Trees).simplify() variants = [] # for variant in pyslim.extract_mutation_metadata(ts.tables): # print(variant) # return for variant, mut, node in zip(ts.variants(), pyslim.extract_mutation_metadata(ts.tables), pyslim.extract_node_metadata(ts.tables)): # print () # print (variant) # print (mut) # print (node) freq = sum(variant.genotypes) / len(variant.genotypes) # print (freq) if freq >= 1: continue if len(mut) > 1: pass variants.append([variant.site.position, freq, mut[0].selection_coeff]) return variants
def get_slim_examples(self): """ Load some slim tree sequences for testing. """ for filename in _slim_example_files: yield pyslim.load(filename)
def postsim(self): "post-SLiMulation analysis" self.ts = pyslim.load(self.outname) M = [[0 for _ in pyslim.NUCLEOTIDES] for _ in pyslim.NUCLEOTIDES] for mut in self.ts.mutations(): mut_list = mut.metadata["mutation_list"] k = np.argmax([u["slim_time"] for u in mut_list]) derived_nuc = mut_list[k]["nucleotide"] if mut.parent == -1: acgt = self.ts.reference_sequence[int(mut.position)] parent_nuc = pyslim.NUCLEOTIDES.index(acgt) else: parent_mut = self.ts.mutation(mut.parent) assert parent_mut.site == mut.site parent_nuc = parent_mut.metadata["mutation_list"][0]["nucleotide"] M[parent_nuc][derived_nuc] += 1 counts = 0 print("{}\t{}\t{}".format('ancestr', 'derived', 'count')) for j, a in enumerate(pyslim.NUCLEOTIDES): for k, b in enumerate(pyslim.NUCLEOTIDES): counts += M[j][k] print("{}\t{}\t{}".format(a, b, M[j][k])) print( f"\nNumber of mutations: {counts}\n" " ---------------------\n" "Simulation settings\n\n" f"Carrying Capacity: {self.Ne}\n" f"Generations: {self.gens}\n" f"Tree: {self.tree}\n" f"Reproduction: {self.reproduction}\n" )
def getHapsPosLabelsLocs(direc): ''' loops through a trees directory created by the data generator class and returns the repsective genotype matrices, positions, and labels ''' haps = [] positions = [] labels=np.loadtxt(os.path.join(direc,"labels.txt")) locs = [] ntrees=np.shape(labels)[0] for i in range(ntrees): filename = str(i) + ".trees" filepath = os.path.join(direc,filename) ts = pyslim.load(filepath) haps.append(ts.genotype_matrix()) positions.append(np.array([s.position for s in ts.sites()])) sample_inds=np.unique([ts.node(j).individual for j in ts.samples()]) locs.append([[ts.individual(x).location[0], ts.individual(x).location[1]] for x in sample_inds]) haps = np.array(haps) positions = np.array(positions) locs=np.array(locs) return haps,positions,labels,locs
def _write_trees_file(self): """adds late() call to save and write .trees file. All shadie reproduction classes write a .trees file in a late() call, but the time at which to write it varies depending on whether the start point was loaded from a previous file. """ # get time AFTER the last even generation. endtime = int(self.model.sim_time + 1) # calculate end based on this sim AND the loaded parent sim. if self.model.metadata['file_in']: ts_start = pyslim.load(self.model.metadata['file_in']) sim_start = ts_start.max_root_time resched_end = int(endtime + sim_start) self.model.late( time=resched_end, scripts=[ "sim.treeSeqRememberIndividuals(sim.subpopulations.individuals)", f"sim.treeSeqOutput('{self.model.metadata['file_out']}', METADATA)" ], comment="end of sim; save .trees file", ) # write output at last generation of this simulation. else: self.model.late( time=endtime, scripts=[ "sim.treeSeqRememberIndividuals(sim.subpopulations.individuals)", f"sim.treeSeqOutput('{self.model.metadata['file_out']}', METADATA)" ], comment="end of sim; save .trees file", )
def test_load(self, recipe): fn = recipe["path"]["ts"] # load in msprime then switch msp_ts = tskit.load(fn) assert isinstance(msp_ts, tskit.TreeSequence) # transfer tables msp_tables = msp_ts.dump_tables() new_ts = pyslim.load_tables(msp_tables) assert isinstance(new_ts, pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, new_tables) # convert directly new_ts = pyslim.SlimTreeSequence(msp_ts) assert isinstance(new_ts, pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, new_tables) # load to pyslim from file slim_ts = pyslim.load(fn) assert isinstance(slim_ts, pyslim.SlimTreeSequence) slim_tables = slim_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, slim_tables) assert slim_ts.metadata['SLiM']['generation'] == new_ts.metadata[ 'SLiM']['generation']
def __init__( self, tree_files: List[str], seed: Optional[int]=None, **kwargs, ): # hidden attributes self._tree_files: List[str] = tree_files self._tree_sequences = [pyslim.load(i) for i in self._tree_files] self._nts: int = len(self._tree_files) # attributes to be parsed from the slim metadata self.generations: int=0 """The SLiM simulated length of time in diploid generations AFTER the ancestral burn in (branch gens only)""" self.popsize: int=kwargs.get("popsize") """The SLiM simulated diploid carrying capacity""" self.recomb: float=kwargs.get("recomb") """The recombination rate to use for recapitation.""" self.mut: float=kwargs.get("mut") """The mutation rate to use for recapitated treesequence.""" self.chromosome: ChromosomeBase=kwargs.get("chromosome") """The shadie.Chromosome class representing the SLiM genome.""" self.rng: np.random.Generator=np.random.default_rng(seed) # new attributes built as results self.tree_sequence: pyslim.SlimTreeSequence=None """A SlimTreeSequence that has been recapitated and mutated.""" # try to fill attributes by extracting metadata from the tree files. self._extract_metadata() self._union()
def run_slim_restart(self, in_ts, basename, **kwargs): # Saves out the tree sequence to the trees file that the SLiM script # basename.slim will load from. infile = basename + ".init.trees" outfile = basename + ".trees" slimfile = basename + ".slim" for treefile in infile, outfile: try: os.remove(treefile) except FileNotFoundError: pass in_ts.dump(infile) if 'STAGE' not in kwargs: kwargs['STAGE'] = in_ts.metadata['SLiM']['stage'] out = run_slim_script(slimfile, **kwargs) try: os.remove(infile) except FileNotFoundError: pass self.assertEqual(out, 0) self.assertTrue(os.path.isfile(outfile)) out_ts = pyslim.load(outfile) try: os.remove(outfile) except FileNotFoundError: pass return out_ts
def main(): """Execute main functions of script.""" # Load the .tress file ts = pyslim.load('../Data/AW_recap/seed_1.trees') # Recapitate recap = ts.recapitate(recombination_rate=1e-8, Ne=12500, random_seed=1) recap.dump('../Data/AW_recap/seed_1.trees')
def test_legacy_error(self, recipe, tmp_path): tmp_file = os.path.join(tmp_path, "test_legacy.trees") ts = recipe["ts"] ts.dump(tmp_file) with pytest.raises(ValueError, match="legacy metadata tools"): _ = pyslim.load(tmp_file, legacy_metadata=True) with pytest.raises(ValueError, match="legacy metadata tools"): _ = pyslim.SlimTreeSequence(ts, legacy_metadata=True)
def verify_dump_equality(self, ts): """ Verifies that we can dump a copy of the specified tree sequence to the specified file, and load an identical copy. """ ts.dump(self.temp_file) ts2 = pyslim.load(self.temp_file) self.assertEqual(ts.num_samples, ts2.num_samples) self.assertEqual(ts.sequence_length, ts2.sequence_length) self.assertEqual(ts.tables, ts2.tables)
def simplifyTreeSequenceDirectory(indir,outdir,nSamples): ntrees=len([f for f in os.listdir(indir) if not f.startswith(".")])-1 trees=[indir+str(i)+".trees" for i in range(ntrees)] for i in range(len(trees)): t=pyslim.load(trees[i]) o=simplifyTreeSequenceOnSubSampleSet(ts=t,nSamples=nSamples) o.dump(outdir+str(i)+".trees") shutil.copyfile(os.path.join(indir,"labels.txt"),os.path.join(outdir,"labels.txt")) return None
def Fst_IBD(trees): ts = pyslim.load(trees) mutated_tree = msprime.mutate(ts, 1e-8) # muts = len( [ v for v in mutated_tree.variants() ] ) # Get the genotype matrix, ready for using sci-kit.allel msprime_genotype_matrix = mutated_tree.genotype_matrix() # Convert msprime's haplotype matrix into genotypes by randomly merging chromosomes haplotype_array = allel.HaplotypeArray(msprime_genotype_matrix) genotype_array = haplotype_array.to_genotypes(ploidy=2) print(genotype_array.shape) ## Calculate Diversity pi = mutated_tree.diversity(windows=[ 0, 1e6, 2e6, 3e6, 4e6, 5e6, 6e6, 7e6, 8e6, 9e6, 10e6, 10e6 + 1 ]) ## Calculate Tajima's D ac = genotype_array.count_alleles() TD = allel.tajima_d(ac) print(TD) row = np.random.choice(13) pairs = [[row, row + (14 * i)] for i in range(14)] subpopulations = [[y for y in range(x, x + 100)] for x in range(0, genotype_array.shape[1], 100)] subpops = np.array(subpopulations)[np.random.choice(len(subpopulations), 10, replace=False)] mean_fst = allel.average_weir_cockerham_fst(genotype_array, blen=10000, subpops=subpops) rep = trees.split("/")[-1].split("_")[0] output = [] output.append([str(rep), str(int(-1)), str(mean_fst[0])]) for p in pairs: print(p) dist = (p[1] - p[0]) / 14 if dist == 0: continue subpops = np.array(subpopulations)[p] mean_fst = allel.average_weir_cockerham_fst(genotype_array, blen=1000, subpops=subpops) output.append([str(rep), str(int(dist)), str(mean_fst[0])]) # output.write( ",".join( str(rep), str(int(dist)), str(mean_fst[0]) ) + "\n") return (output)
def __init__( self, files: list = None, #takes exactly two files (for now) simlength: int = None, #length in generations popsize: int = None, #initial size of each population recomb: float = None, #recomcbination rate mutrate: float = None, #mutations rate chromosome=None, #'shadie.chromosome.ChromosomeBase' altgen: bool = True, #is model altgen or not? ): """ Reads in two SLiM .trees files, merges them, recapitates, overlays neutral mutations and saves info. """ if altgen is True: self.mutrate = mutrate / 2 else: self.mutrate = mutrate self.chromosome = chromosome self.simlength = simlength self.recomb = recomb self.popsize = popsize self.pops = None self.species = [] ids = [] species = [] #read in all thre tree sequences for i in range(0, len(files)): ts = pyslim.load(files[i]) species.append(ts) #merge the p0 and p1 populations in both edges tablelist = [] mod_tslist = [] for ts in species: tables = ts.tables tables.nodes.population = np.zeros(tables.nodes.num_rows, dtype=np.int32) modts = pyslim.load_tables(tables) mod_tslist.append(modts) #remove extra population onepop_tslist = [] for ts in mod_tslist: onepop = ts.simplify(keep_input_roots=True, keep_unary_in_individuals=True) onepop_tslist.append(onepop) self.onepop_tslist = onepop_tslist
def recapitate(treesfile, sample_size, recombination_rate, mutation_rate, Ne, gcBurnin): # load trees with pyslim ts = pyslim.load(treesfile) num_individuals_0 = len(ts.individuals_alive_at(0)) n_roots = pd.Series([ t.num_roots for t in ts.trees() ]).value_counts().to_frame(name="num_tree_with_num_roots") logging.debug( f"""The tree sequence has {ts.num_trees} trees on a genome of length {ts.sequence_length} {num_individuals_0} alive individuals, {ts.num_samples} 'sample' genomes and {ts.num_mutations} mutations. number of roots per tree: {n_roots.__str__()[:-12]}""") # discard second genomes (diploids) from the tree. #ts_haploid = ts.simplify(samples=[ind.nodes[0] for ind in ts.individuals()]) ts_recap = ts.recapitate( recombination_rate=gcBurnin + 1e-20, # can't put 0 here. Ne=Ne) #population_configurations=[msprime.PopulationConiguration(initial_size=Ne)]) # simplify to a subset of the haploids sample_inds = np.random.choice(ts_recap.individuals_alive_at(0), size=sample_size, replace=False) # choose n random leaves sample_nodes = [ts_recap.individual(i).nodes[0] for i in sample_inds] ts_samp = ts_recap.simplify(samples=sample_nodes) n_roots = pd.Series([ t.num_roots for t in ts_samp.trees() ]).value_counts().to_frame(name="num_tree_with_num_roots") logging.debug( f"""The tree sequence has {ts_samp.num_trees} trees on a genome of length {ts_samp.sequence_length} {ts_samp.num_individuals} alive individuals, {ts_samp.num_samples} 'sample' genomes and {ts_samp.num_mutations} mutations. number of roots per tree: {n_roots.__str__()[:-12]}""") # mutate ts_mutated = pyslim.SlimTreeSequence( msprime.mutate( ts_samp, rate=mutation_rate / 2, # To have 2.Ne.mu and not 4.Ne.mu keep=True) # keep existing mutations ) genotype_matrix = ts_mutated.genotype_matrix() snp_mat = genotype_matrix.T pos = np.round(ts_mutated.tables.asdict()["sites"]["position"]).astype(int) return snp_mat, pos, ts_mutated
def get_ts(self, include_text=False): if include_text: # read in from text node_file = open(os.path.join(self.dir, "NodeTable.txt"), "r") edge_file = open(os.path.join(self.dir, "EdgeTable.txt"), "r") try: site_file = open(os.path.join(self.dir, "SiteTable.txt"), "r") except IOerror: site_file = None try: mutation_file = open(os.path.join(self.dir, "MutationTable.txt"), "r") except IOerror: mutation_file = None try: individual_file = open(os.path.join(self.dir, "IndividualTable.txt"), "r") except IOerror: individual_file = None try: population_file = open(os.path.join(self.dir, "PopulationTable.txt"), "r") except IOerror: population_file = None text_ts = msprime.load_text(nodes=node_file, edges=edge_file, sites=site_file, mutations=mutation_file, individuals=individual_file, populations=population_file, base64_metadata=False) print("******* Text input.") yield text_ts # and binary bin_ts = pyslim.load(os.path.join(self.dir, "test_output.trees")) print("******** Binary input.") yield bin_ts # and nonsimplified binary print("******** Unsimplified binary.") bin_nonsip_ts = pyslim.load( os.path.join(self.dir, "test_output.unsimplified.trees")) yield bin_nonsip_ts
def guess_gen_time(ts): t=pyslim.load(ts) slim_muts=t.num_mutations g=4 msp_t=msprime.mutate(t,1e-8/g,keep=False) #slim mutation rate was set to 1e-8 msp_muts=msp_t.num_mutations while abs(msp_muts-slim_muts) > 0.01*slim_muts: new_g=g+np.random.uniform(-1,1,1) new_msp_t=msprime.mutate(t,1e-8/new_g,keep=False) new_msp_muts=new_msp_t.num_mutations if abs(new_msp_muts-slim_muts) < abs(msp_muts-slim_muts): msp_muts=new_msp_muts g=new_g return g[0]
def test_recap_and_rescale(self): engine = stdpopsim.get_engine("slim") species = stdpopsim.get_species("HomSap") contig = species.get_contig("chr22", length_multiplier=0.001) model = species.get_demographic_model("OutOfAfrica_3G09") samples = model.get_samples(10, 10, 10) seed = 12 ts1 = engine.simulate(demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=10, slim_burn_in=0, seed=seed) out, _ = capture_output(engine.simulate, demographic_model=model, contig=contig, samples=samples, slim_script=True, slim_scaling_factor=10, slim_burn_in=0, seed=seed) match = re.search(r'"trees_file",\s*"([^"]*)"', out) self.assertIsNotNone(match) tmp_trees_file = match.group(1) with tempfile.NamedTemporaryFile(mode="w") as slim_script, \ tempfile.NamedTemporaryFile(mode="w") as trees_file: out = out.replace(tmp_trees_file, trees_file.name) slim_script.write(out) slim_script.flush() engine._run_slim(slim_script.name, seed=seed) ts2_headless = pyslim.load(trees_file.name) ts2 = engine.recap_and_rescale(ts2_headless, demographic_model=model, contig=contig, samples=samples, slim_scaling_factor=10, seed=seed) tables1 = ts1.dump_tables() tables2 = ts2.dump_tables() self.assertEqual(tables1.nodes, tables2.nodes) self.assertEqual(tables1.edges, tables2.edges) self.assertEqual(tables1.mutations, tables2.mutations)
def test_dump_equality(self, recipe, tmp_path): """ Test that we can dump a copy of the specified tree sequence to the specified file, and load an identical copy. """ tmp_file = os.path.join(tmp_path, "test_dump.trees") ts = recipe["ts"] ts.dump(tmp_file) ts2 = pyslim.load(tmp_file) assert ts.num_samples == ts2.num_samples assert ts.sequence_length == ts2.sequence_length assert ts.tables == ts2.dump_tables() assert ts.has_reference_sequence() == ts2.has_reference_sequence() if ts.has_reference_sequence(): assert ts.reference_sequence.data == ts2.reference_sequence.data
def readFiles(recipeNum): ''' read in the tree sequence and the mutations Ids to find origin ''' adaptationFile = open("./Output1/MyRecipe" + recipeNum + "/Adaptation.txt", "r") ts = pyslim.load("./Output1/MyRecipe" + recipeNum + "/TreesAtAdaptation.trees", slim_format=True) header = adaptationFile.readline() PreExisting_FWAA = [int(Id) for Id in adaptationFile.readline().split()] Introduced_FWAA = [int(Id) for Id in adaptationFile.readline().split()] return Introduced_FWAA, PreExisting_FWAA, ts
def get_slim_restarts(self, **kwargs): # Loads previously produced tree sequences and SLiM scripts # appropriate for restarting from these tree sequences. for exname in restart_files: ex = restart_files[exname] use = True for a in kwargs: if a not in ex or ex[a] != kwargs[a]: use = False if use: basename = ex['basename'] treefile = ex['input'] print(f"restarting {treefile} as {basename}") self.assertTrue(os.path.isfile(treefile)) ts = pyslim.load(treefile) yield ts, basename
def merge(self): "Merges tree sequences" self.species = [] ids = [] species = [] #read in all thre tree sequences for i in range(0, len(self.files)): ts = pyslim.load(self.files[i]) species.append(ts) self.merged_ts = pyslim.SlimTreeSequence(species[0].union( species[1], node_mapping=[tskit.NULL for i in range(species[1].num_nodes)], add_populations=True, ))
def ancestry_local (treepath): starts, ends, subpops = [], [], [] ts = pyslim.load(treepath) for tree in ts.trees(sample_counts=True): subpop_sum, subpop_weights = 0, 0 for root in tree.roots: leaves_count = tree.num_samples(root) - 1 subpop_sum += tree.population(root) * leaves_count subpop_weights += leaves_count starts.append(tree.interval[0]) ends.append(tree.interval[1]) subpops.append(subpop_sum / float(subpop_weights)) x = [x for pair in zip(starts, ends) for x in pair] y = [x for x in subpops for _ in (0, 1)] matplotlib.pyplot.plot(x, y) matplotlib.pyplot.show() return x,y #x=genome positions; y = ancestry
def ancestry_position_writeout (treepath,n,admpop,popsize,t_sinceadm,region_name): ts = pyslim.load(treepath) starts=[] ends=[] for x in ts.trees(): starts.append(x.interval[0]) ends.append(x.interval[1]) outfilename = DIR_anc+ region_name+str(dominance)+ "_"+str(model)+ "_"+str(growth)+ "_"+str(m4s)+ "_"+str(hs) + "_"+str(n) + '.ancestry' outfile = open(outfilename, 'w') outfile.write('start,end,ancestry\n') p1ancestry = ancestry_p_varies(ts,admpop,popsize,t_sinceadm) for start, end, anc in zip(starts, ends, p1ancestry): outfile.write('{0},{1},{2}\n'.format(start, end, anc)) outfile.close()
def getBranchLengthSumStats(direc,n): #don't use this yet #read through a directory of SLiM tree sequences and return a matrix #of summary stats calculated from branch lengths out=[] for i in range(n): filename = str(i) + ".trees" filepath = os.path.join(direc,filename) ts = pyslim.load(filepath) samples=[x for x in ts.samples()] locs=[[ts.individual(ts.node(s).individual).location[0], ts.individual(ts.node(s).individual).location[1]] for s in samples] bs=msp.BranchLengthStatCalculator(ts) gd=np.array(bs.divergence([[x] for x in ts.samples()], windows=[0.0,ts.sequence_length])) gd = gd[np.logical_not(np.isnan(gd))] sd = np.array(scipy.spatial.distance.pdist(locs)) simSFS = np.array(bs.site_frequency_spectrum(samples)[0]) out.append(np.concatenate((sd,gd,simSFS))) return out
def get_slim_examples(self, return_info=False, **args): for ex in example_files.values(): basename = ex['basename'] use = True for a in args: if a not in ex or ex[a] != args[a]: use = False if use: treefile = basename + ".trees" print("---->", treefile) self.assertTrue(os.path.isfile(treefile)) ts = pyslim.load(treefile) if return_info: infofile = treefile + ".pedigree" if os.path.isfile(infofile): ex['info'] = self.get_slim_info(infofile) else: ex['info'] = None yield (ts, ex) else: yield ts
# Keywords: Python, tree-sequence recording, tree sequence recording import msprime, pyslim ts = pyslim.load("recipe_17.7.trees").simplify() # selection coefficients and locations of all selected mutations coeffs = [] for mut in ts.mutations(): md = pyslim.decode_mutation(mut.metadata) sel = [x.selection_coeff for x in md] if any([s != 0 for s in sel]): coeffs += sel b = [x for x in coeffs if x > 0] d = [x for x in coeffs if x < 0] print("Beneficial: " + str(len(b)) + ", mean " + str(sum(b) / len(b))) print("Deleterious: " + str(len(d)) + ", mean " + str(sum(d) / len(d)))
import msprime import pyslim import numpy as np decap = pyslim.load("temp/decapitated.trees", slim_format=True) recap = [] recap_simplified = [] for _ in range(10): recap = decap.recapitate(recombination_rate=1e-6, Ne=1000) recap_mut = pyslim.mutate(recap, rate=1e-6, keep=True) ru = recap.simplify(ru.samples()) recap_simplified.append(ru) def get_stats(ts): bs = msprime.BranchLengthStatCalculator(ts) stats = {'num_trees' : ts.num_trees, 'sequence_length' : ts.sequence_length, 'num_samples' : ts.num_samples, 'num_mutations' : ts.num_mutations, 'divergence' : -1 # bs.divergence(sample_sets=[list(ts.samples())], windows=[0.0, ts.sequence_length]) } print(stats) return stats def summarize_stats(tslist): statlist = list(map(get_stats, tslist)) out = {} for a in statlist[0]: x = [u[a] for u in statlist]
# Keywords: Python, tree-sequence recording, tree sequence recording # This is a Python recipe; note that it runs the SLiM model internally, below import subprocess, msprime, pyslim import matplotlib.pyplot as plt import numpy as np # Run the SLiM model and load the resulting .trees file subprocess.check_output(["slim", "-m", "-s", "0", "./recipe_17.5.slim"]) ts = pyslim.load("./recipe_17.5.trees").simplify() # Load the .trees file and assess true local ancestry breaks = np.zeros(ts.num_trees + 1) ancestry = np.zeros(ts.num_trees + 1) for tree in ts.trees(sample_counts=True): subpop_sum, subpop_weights = 0, 0 for root in tree.roots: leaves_count = tree.num_samples(root) - 1 # subtract one for the root, which is a sample subpop_sum += tree.population(root) * leaves_count subpop_weights += leaves_count breaks[tree.index] = tree.interval[0] ancestry[tree.index] = subpop_sum / subpop_weights breaks[-1] = ts.sequence_length ancestry[-1] = ancestry[-2] # Make a simple plot plt.plot(breaks, ancestry) plt.show()