Example #1
0
    def __init__(
        self,
        shadie=None,
        file=None,
    ):
        """
        Builds script to run SLiM3 simulation

        Parameters:
        -----------
        shadie: (Shadie class object)
            Reads in Shadie class object
        """
        if isinstance(shadie, Shadie):
            self.shadie = shadie
            self.tsraw = pyslim.load(self.shadie.outname)
            self.genemap = shadie.genemap
            self.genome = shadie.genome

        elif shadie is None:
            self.tsraw = pyslim.load(file)

        tsout = Coal(self.tsraw)
        tsout.slimcoal()

        self.tscoal = tsout.tscoal

        #positions
        positions = []
        for mut in self.tscoal.mutations():
            positions.append(int(mut.position))
        self.positions = positions
Example #2
0
    def _update_tables(self):
        """...Remove extra psuedopopulation nodes."""
        for tree_file in self.trees_files:
            treeseq = pyslim.load(tree_file)

            # get mutable tskit.TableCollection
            tables = treeseq.dump_tables()
            nnodes = tables.nodes.time.size

            # there is a null SLiM population (0) that doesnt really exist
            # and the actual poulation (1). So we set all to 0.
            tables.nodes.population = np.zeros(nnodes, dtype=np.int32)

            # drop nodes that are not connected to anything. This includes
            # the pseudo-nodes representing half of the haploid populations.
            nodes_in_edge_table = list(
                set(tables.edges.parent).union(tables.edges.child))

            # remove the empty population nodes by using simplify, which
            tables.simplify(
                samples=nodes_in_edge_table,
                keep_input_roots=True,
                filter_individuals=True,
                filter_populations=True,
                filter_sites=False,
            )

            # turn it back into a treesequence
            self._tree_sequences.append(pyslim.load_tables(tables))
 def test_load(self):
     for _, ex in self.get_slim_examples(return_info=True):
         fn = ex['basename'] + ".trees"
         # load in msprime then switch
         msp_ts = tskit.load(fn)
         self.assertTrue(type(msp_ts) is msprime.TreeSequence)
         # transfer tables
         msp_tables = msp_ts.tables
         new_ts = pyslim.load_tables(msp_tables, legacy_metadata=True)
         self.assertTrue(isinstance(new_ts, pyslim.SlimTreeSequence))
         self.verify_times(msp_ts, new_ts)
         new_tables = new_ts.tables
         self.assertTableCollectionsEqual(msp_tables, new_tables)
         # convert directly
         new_ts = pyslim.SlimTreeSequence(msp_ts)
         self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence)
         self.verify_times(msp_ts, new_ts)
         new_tables = new_ts.tables
         self.assertTableCollectionsEqual(msp_tables, new_tables)
         # load to pyslim from file
         slim_ts = pyslim.load(fn, legacy_metadata=True)
         self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence)
         slim_tables = slim_ts.tables
         self.assertTableCollectionsEqual(msp_tables, slim_tables)
         self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
Example #4
0
    def end_sim(self):
        """
        adds late() call that ends the simulation and saves the .trees file
        """
        endtime = int(self._sim_time + 1)

        if self._file_in:
            ts_start = pyslim.load(self._file_in)
            sim_start = ts_start.max_root_time
            resched_end = int(endtime + sim_start)
            self.model.late(
                    time = resched_end, 
                    scripts = [
                    "sim.treeSeqRememberIndividuals(sim.subpopulations.individuals)\n",
                    f"sim.treeSeqOutput('{self._file_out}')"],
                    comment = "end of sim; save .trees file",
                )
        else:
            self.model.late(
                    time = endtime, 
                    scripts = [
                    "sim.treeSeqRememberIndividuals(sim.subpopulations.individuals)\n",
                    f"sim.treeSeqOutput('{self._file_out}')"],
                    comment = "end of sim; save .trees file",
                )
Example #5
0
def MutsFromTrees(Trees):

    print('read trees')

    ts = pyslim.load(Trees).simplify()

    variants = []
    #	for  variant in pyslim.extract_mutation_metadata(ts.tables):
    #		print(variant)
    #	return

    for variant, mut, node in zip(ts.variants(),
                                  pyslim.extract_mutation_metadata(ts.tables),
                                  pyslim.extract_node_metadata(ts.tables)):

        #		print ()
        #		print (variant)
        #		print (mut)
        #		print (node)

        freq = sum(variant.genotypes) / len(variant.genotypes)
        #		print (freq)

        if freq >= 1: continue
        if len(mut) > 1: pass
        variants.append([variant.site.position, freq, mut[0].selection_coeff])

    return variants
Example #6
0
 def get_slim_examples(self):
     """
     Load some slim tree sequences
     for testing.
     """
     for filename in _slim_example_files:
         yield pyslim.load(filename)
Example #7
0
    def postsim(self):
        "post-SLiMulation analysis"
        self.ts = pyslim.load(self.outname)

        M = [[0 for _ in pyslim.NUCLEOTIDES] for _ in pyslim.NUCLEOTIDES]
        for mut in self.ts.mutations():
            mut_list = mut.metadata["mutation_list"]
            k = np.argmax([u["slim_time"] for u in mut_list])
            derived_nuc = mut_list[k]["nucleotide"]
            if mut.parent == -1:
                acgt = self.ts.reference_sequence[int(mut.position)]
                parent_nuc = pyslim.NUCLEOTIDES.index(acgt)
            else:
                parent_mut = self.ts.mutation(mut.parent)
                assert parent_mut.site == mut.site
                parent_nuc = parent_mut.metadata["mutation_list"][0]["nucleotide"]
            M[parent_nuc][derived_nuc] += 1
        
        counts = 0        
        print("{}\t{}\t{}".format('ancestr', 'derived', 'count'))
        for j, a in enumerate(pyslim.NUCLEOTIDES):
            for k, b in enumerate(pyslim.NUCLEOTIDES):
                counts += M[j][k]
                print("{}\t{}\t{}".format(a, b, M[j][k]))

        print(
            f"\nNumber of mutations: {counts}\n"
            " ---------------------\n"
            "Simulation settings\n\n"
            f"Carrying Capacity: {self.Ne}\n"
            f"Generations: {self.gens}\n"
            f"Tree: {self.tree}\n"
            f"Reproduction: {self.reproduction}\n"
            )
Example #8
0
def getHapsPosLabelsLocs(direc):
    '''
    loops through a trees directory created by the data generator class
    and returns the repsective genotype matrices, positions, and labels
    '''
    haps = []
    positions = []
    labels=np.loadtxt(os.path.join(direc,"labels.txt"))
    locs = []

    ntrees=np.shape(labels)[0]

    for i in range(ntrees):
        filename = str(i) + ".trees"
        filepath = os.path.join(direc,filename)
        ts = pyslim.load(filepath)
        haps.append(ts.genotype_matrix())
        positions.append(np.array([s.position for s in ts.sites()]))
        sample_inds=np.unique([ts.node(j).individual for j in ts.samples()])
        locs.append([[ts.individual(x).location[0],
               ts.individual(x).location[1]] for x in sample_inds])

    haps = np.array(haps)
    positions = np.array(positions)
    locs=np.array(locs)

    return haps,positions,labels,locs
Example #9
0
    def _write_trees_file(self):
        """adds late() call to save and write .trees file.

        All shadie reproduction classes write a .trees file in a late()
        call, but the time at which to write it varies depending on
        whether the start point was loaded from a previous file.
        """
        # get time AFTER the last even generation.
        endtime = int(self.model.sim_time + 1)

        # calculate end based on this sim AND the loaded parent sim.
        if self.model.metadata['file_in']:
            ts_start = pyslim.load(self.model.metadata['file_in'])
            sim_start = ts_start.max_root_time
            resched_end = int(endtime + sim_start)
            self.model.late(
                time=resched_end,
                scripts=[
                    "sim.treeSeqRememberIndividuals(sim.subpopulations.individuals)",
                    f"sim.treeSeqOutput('{self.model.metadata['file_out']}', METADATA)"
                ],
                comment="end of sim; save .trees file",
            )
        # write output at last generation of this simulation.
        else:
            self.model.late(
                time=endtime,
                scripts=[
                    "sim.treeSeqRememberIndividuals(sim.subpopulations.individuals)",
                    f"sim.treeSeqOutput('{self.model.metadata['file_out']}', METADATA)"
                ],
                comment="end of sim; save .trees file",
            )
Example #10
0
 def test_load(self, recipe):
     fn = recipe["path"]["ts"]
     # load in msprime then switch
     msp_ts = tskit.load(fn)
     assert isinstance(msp_ts, tskit.TreeSequence)
     # transfer tables
     msp_tables = msp_ts.dump_tables()
     new_ts = pyslim.load_tables(msp_tables)
     assert isinstance(new_ts, pyslim.SlimTreeSequence)
     self.verify_times(msp_ts, new_ts)
     new_tables = new_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, new_tables)
     # convert directly
     new_ts = pyslim.SlimTreeSequence(msp_ts)
     assert isinstance(new_ts, pyslim.SlimTreeSequence)
     self.verify_times(msp_ts, new_ts)
     new_tables = new_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, new_tables)
     # load to pyslim from file
     slim_ts = pyslim.load(fn)
     assert isinstance(slim_ts, pyslim.SlimTreeSequence)
     slim_tables = slim_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, slim_tables)
     assert slim_ts.metadata['SLiM']['generation'] == new_ts.metadata[
         'SLiM']['generation']
Example #11
0
    def __init__(
        self,
        tree_files: List[str],
        seed: Optional[int]=None,
        **kwargs,
        ):

        # hidden attributes
        self._tree_files: List[str] = tree_files
        self._tree_sequences = [pyslim.load(i) for i in self._tree_files]
        self._nts: int = len(self._tree_files)

        # attributes to be parsed from the slim metadata
        self.generations: int=0
        """The SLiM simulated length of time in diploid generations AFTER
        the ancestral burn in (branch gens only)"""
        self.popsize: int=kwargs.get("popsize")
        """The SLiM simulated diploid carrying capacity"""
        self.recomb: float=kwargs.get("recomb")
        """The recombination rate to use for recapitation."""
        self.mut: float=kwargs.get("mut")
        """The mutation rate to use for recapitated treesequence."""
        self.chromosome: ChromosomeBase=kwargs.get("chromosome")
        """The shadie.Chromosome class representing the SLiM genome."""
        self.rng: np.random.Generator=np.random.default_rng(seed)

        # new attributes built as results
        self.tree_sequence: pyslim.SlimTreeSequence=None
        """A SlimTreeSequence that has been recapitated and mutated."""

        # try to fill attributes by extracting metadata from the tree files.
        self._extract_metadata()
        self._union()
Example #12
0
 def run_slim_restart(self, in_ts, basename, **kwargs):
     # Saves out the tree sequence to the trees file that the SLiM script
     # basename.slim will load from.
     infile = basename + ".init.trees"
     outfile = basename + ".trees"
     slimfile = basename + ".slim"
     for treefile in infile, outfile:
         try:
             os.remove(treefile)
         except FileNotFoundError:
             pass
     in_ts.dump(infile)
     if 'STAGE' not in kwargs:
         kwargs['STAGE'] = in_ts.metadata['SLiM']['stage']
     out = run_slim_script(slimfile, **kwargs)
     try:
         os.remove(infile)
     except FileNotFoundError:
         pass
     self.assertEqual(out, 0)
     self.assertTrue(os.path.isfile(outfile))
     out_ts = pyslim.load(outfile)
     try:
         os.remove(outfile)
     except FileNotFoundError:
         pass
     return out_ts
Example #13
0
def main():
    """Execute main functions of script."""
    # Load the .tress file
    ts = pyslim.load('../Data/AW_recap/seed_1.trees')

    # Recapitate
    recap = ts.recapitate(recombination_rate=1e-8, Ne=12500, random_seed=1)
    recap.dump('../Data/AW_recap/seed_1.trees')
Example #14
0
 def test_legacy_error(self, recipe, tmp_path):
     tmp_file = os.path.join(tmp_path, "test_legacy.trees")
     ts = recipe["ts"]
     ts.dump(tmp_file)
     with pytest.raises(ValueError, match="legacy metadata tools"):
         _ = pyslim.load(tmp_file, legacy_metadata=True)
     with pytest.raises(ValueError, match="legacy metadata tools"):
         _ = pyslim.SlimTreeSequence(ts, legacy_metadata=True)
Example #15
0
 def verify_dump_equality(self, ts):
     """
     Verifies that we can dump a copy of the specified tree sequence
     to the specified file, and load an identical copy.
     """
     ts.dump(self.temp_file)
     ts2 = pyslim.load(self.temp_file)
     self.assertEqual(ts.num_samples, ts2.num_samples)
     self.assertEqual(ts.sequence_length, ts2.sequence_length)
     self.assertEqual(ts.tables, ts2.tables)
Example #16
0
def simplifyTreeSequenceDirectory(indir,outdir,nSamples):
    ntrees=len([f for f in os.listdir(indir) if not f.startswith(".")])-1
    trees=[indir+str(i)+".trees" for i in range(ntrees)]
    for i in range(len(trees)):
        t=pyslim.load(trees[i])
        o=simplifyTreeSequenceOnSubSampleSet(ts=t,nSamples=nSamples)
        o.dump(outdir+str(i)+".trees")
    shutil.copyfile(os.path.join(indir,"labels.txt"),os.path.join(outdir,"labels.txt"))

    return None
Example #17
0
def Fst_IBD(trees):
    ts = pyslim.load(trees)
    mutated_tree = msprime.mutate(ts, 1e-8)
    #	muts = len( [ v for v  in mutated_tree.variants() ] )
    # Get the genotype matrix, ready for using sci-kit.allel
    msprime_genotype_matrix = mutated_tree.genotype_matrix()
    # Convert msprime's haplotype matrix into genotypes by randomly merging chromosomes
    haplotype_array = allel.HaplotypeArray(msprime_genotype_matrix)

    genotype_array = haplotype_array.to_genotypes(ploidy=2)
    print(genotype_array.shape)
    ## Calculate Diversity
    pi = mutated_tree.diversity(windows=[
        0, 1e6, 2e6, 3e6, 4e6, 5e6, 6e6, 7e6, 8e6, 9e6, 10e6, 10e6 + 1
    ])
    ## Calculate Tajima's D
    ac = genotype_array.count_alleles()
    TD = allel.tajima_d(ac)
    print(TD)

    row = np.random.choice(13)
    pairs = [[row, row + (14 * i)] for i in range(14)]

    subpopulations = [[y for y in range(x, x + 100)]
                      for x in range(0, genotype_array.shape[1], 100)]

    subpops = np.array(subpopulations)[np.random.choice(len(subpopulations),
                                                        10,
                                                        replace=False)]

    mean_fst = allel.average_weir_cockerham_fst(genotype_array,
                                                blen=10000,
                                                subpops=subpops)

    rep = trees.split("/")[-1].split("_")[0]

    output = []

    output.append([str(rep), str(int(-1)), str(mean_fst[0])])

    for p in pairs:
        print(p)
        dist = (p[1] - p[0]) / 14
        if dist == 0: continue
        subpops = np.array(subpopulations)[p]

        mean_fst = allel.average_weir_cockerham_fst(genotype_array,
                                                    blen=1000,
                                                    subpops=subpops)
        output.append([str(rep), str(int(dist)), str(mean_fst[0])])


#		output.write( ",".join( str(rep), str(int(dist)), str(mean_fst[0]) ) + "\n")
    return (output)
Example #18
0
    def __init__(
            self,
            files: list = None,  #takes exactly two files (for now)
            simlength: int = None,  #length in generations
            popsize: int = None,  #initial size of each population
            recomb: float = None,  #recomcbination rate
            mutrate: float = None,  #mutations rate
            chromosome=None,  #'shadie.chromosome.ChromosomeBase'
            altgen: bool = True,  #is model altgen or not?
    ):
        """
        Reads in two SLiM .trees files, merges them, recapitates, 
        overlays neutral mutations and saves info.
        """

        if altgen is True:
            self.mutrate = mutrate / 2
        else:
            self.mutrate = mutrate

        self.chromosome = chromosome
        self.simlength = simlength
        self.recomb = recomb
        self.popsize = popsize
        self.pops = None

        self.species = []
        ids = []
        species = []

        #read in all thre tree sequences
        for i in range(0, len(files)):
            ts = pyslim.load(files[i])
            species.append(ts)

        #merge the p0 and p1 populations in both edges
        tablelist = []
        mod_tslist = []
        for ts in species:
            tables = ts.tables
            tables.nodes.population = np.zeros(tables.nodes.num_rows,
                                               dtype=np.int32)
            modts = pyslim.load_tables(tables)
            mod_tslist.append(modts)

        #remove extra population
        onepop_tslist = []
        for ts in mod_tslist:
            onepop = ts.simplify(keep_input_roots=True,
                                 keep_unary_in_individuals=True)
            onepop_tslist.append(onepop)

        self.onepop_tslist = onepop_tslist
def recapitate(treesfile, sample_size, recombination_rate, mutation_rate, Ne,
               gcBurnin):

    # load trees with pyslim
    ts = pyslim.load(treesfile)
    num_individuals_0 = len(ts.individuals_alive_at(0))
    n_roots = pd.Series([
        t.num_roots for t in ts.trees()
    ]).value_counts().to_frame(name="num_tree_with_num_roots")
    logging.debug(
        f"""The tree sequence has {ts.num_trees} trees on a genome of length {ts.sequence_length}
                         {num_individuals_0} alive individuals, {ts.num_samples} 'sample' genomes
                         and {ts.num_mutations} mutations.
                         number of roots per tree: 
                         {n_roots.__str__()[:-12]}""")

    # discard second genomes (diploids) from the tree.
    #ts_haploid = ts.simplify(samples=[ind.nodes[0] for ind in ts.individuals()])
    ts_recap = ts.recapitate(
        recombination_rate=gcBurnin + 1e-20,  # can't put 0 here. 
        Ne=Ne)
    #population_configurations=[msprime.PopulationConiguration(initial_size=Ne)])

    # simplify to a subset of the haploids
    sample_inds = np.random.choice(ts_recap.individuals_alive_at(0),
                                   size=sample_size,
                                   replace=False)  # choose n random leaves
    sample_nodes = [ts_recap.individual(i).nodes[0] for i in sample_inds]
    ts_samp = ts_recap.simplify(samples=sample_nodes)

    n_roots = pd.Series([
        t.num_roots for t in ts_samp.trees()
    ]).value_counts().to_frame(name="num_tree_with_num_roots")
    logging.debug(
        f"""The tree sequence has {ts_samp.num_trees} trees on a genome of length {ts_samp.sequence_length}
                         {ts_samp.num_individuals} alive individuals, {ts_samp.num_samples} 'sample' genomes
                         and {ts_samp.num_mutations} mutations.
                         number of roots per tree: 
                         {n_roots.__str__()[:-12]}""")

    # mutate
    ts_mutated = pyslim.SlimTreeSequence(
        msprime.mutate(
            ts_samp,
            rate=mutation_rate / 2,  # To have 2.Ne.mu and not 4.Ne.mu
            keep=True)  # keep existing mutations
    )
    genotype_matrix = ts_mutated.genotype_matrix()
    snp_mat = genotype_matrix.T
    pos = np.round(ts_mutated.tables.asdict()["sites"]["position"]).astype(int)

    return snp_mat, pos, ts_mutated
Example #20
0
 def get_ts(self, include_text=False):
     if include_text:
         # read in from text
         node_file = open(os.path.join(self.dir, "NodeTable.txt"), "r")
         edge_file = open(os.path.join(self.dir, "EdgeTable.txt"), "r")
         try:
             site_file = open(os.path.join(self.dir, "SiteTable.txt"), "r")
         except IOerror:
             site_file = None
         try:
             mutation_file = open(os.path.join(self.dir, "MutationTable.txt"), "r")
         except IOerror:
             mutation_file = None
         try:
             individual_file = open(os.path.join(self.dir, "IndividualTable.txt"), "r")
         except IOerror:
             individual_file = None
         try:
             population_file = open(os.path.join(self.dir, "PopulationTable.txt"), "r")
         except IOerror:
             population_file = None
         text_ts = msprime.load_text(nodes=node_file, edges=edge_file, 
                                sites=site_file, mutations=mutation_file,
                                individuals=individual_file,
                                populations=population_file,
                                base64_metadata=False)
 
         print("******* Text input.")
         yield text_ts
     # and binary
     bin_ts = pyslim.load(os.path.join(self.dir, "test_output.trees"))
     print("******** Binary input.")
     yield bin_ts
     # and nonsimplified binary
     print("******** Unsimplified binary.")
     bin_nonsip_ts = pyslim.load(
         os.path.join(self.dir, "test_output.unsimplified.trees"))
     yield bin_nonsip_ts
Example #21
0
def guess_gen_time(ts):
    t=pyslim.load(ts)
    slim_muts=t.num_mutations
    g=4
    msp_t=msprime.mutate(t,1e-8/g,keep=False) #slim mutation rate was set to 1e-8
    msp_muts=msp_t.num_mutations
    while abs(msp_muts-slim_muts) > 0.01*slim_muts:
        new_g=g+np.random.uniform(-1,1,1)
        new_msp_t=msprime.mutate(t,1e-8/new_g,keep=False)
        new_msp_muts=new_msp_t.num_mutations
        if abs(new_msp_muts-slim_muts) < abs(msp_muts-slim_muts):
            msp_muts=new_msp_muts
            g=new_g
    return g[0]
Example #22
0
    def test_recap_and_rescale(self):
        engine = stdpopsim.get_engine("slim")
        species = stdpopsim.get_species("HomSap")
        contig = species.get_contig("chr22", length_multiplier=0.001)
        model = species.get_demographic_model("OutOfAfrica_3G09")
        samples = model.get_samples(10, 10, 10)

        seed = 12
        ts1 = engine.simulate(demographic_model=model,
                              contig=contig,
                              samples=samples,
                              slim_scaling_factor=10,
                              slim_burn_in=0,
                              seed=seed)

        out, _ = capture_output(engine.simulate,
                                demographic_model=model,
                                contig=contig,
                                samples=samples,
                                slim_script=True,
                                slim_scaling_factor=10,
                                slim_burn_in=0,
                                seed=seed)

        match = re.search(r'"trees_file",\s*"([^"]*)"', out)
        self.assertIsNotNone(match)
        tmp_trees_file = match.group(1)

        with tempfile.NamedTemporaryFile(mode="w") as slim_script, \
                tempfile.NamedTemporaryFile(mode="w") as trees_file:
            out = out.replace(tmp_trees_file, trees_file.name)
            slim_script.write(out)
            slim_script.flush()
            engine._run_slim(slim_script.name, seed=seed)
            ts2_headless = pyslim.load(trees_file.name)

        ts2 = engine.recap_and_rescale(ts2_headless,
                                       demographic_model=model,
                                       contig=contig,
                                       samples=samples,
                                       slim_scaling_factor=10,
                                       seed=seed)

        tables1 = ts1.dump_tables()
        tables2 = ts2.dump_tables()

        self.assertEqual(tables1.nodes, tables2.nodes)
        self.assertEqual(tables1.edges, tables2.edges)
        self.assertEqual(tables1.mutations, tables2.mutations)
Example #23
0
 def test_dump_equality(self, recipe, tmp_path):
     """
     Test that we can dump a copy of the specified tree sequence
     to the specified file, and load an identical copy.
     """
     tmp_file = os.path.join(tmp_path, "test_dump.trees")
     ts = recipe["ts"]
     ts.dump(tmp_file)
     ts2 = pyslim.load(tmp_file)
     assert ts.num_samples == ts2.num_samples
     assert ts.sequence_length == ts2.sequence_length
     assert ts.tables == ts2.dump_tables()
     assert ts.has_reference_sequence() == ts2.has_reference_sequence()
     if ts.has_reference_sequence():
         assert ts.reference_sequence.data == ts2.reference_sequence.data
Example #24
0
def readFiles(recipeNum):
    '''
    read in the tree sequence and the mutations Ids to find origin
    '''

    adaptationFile = open("./Output1/MyRecipe" + recipeNum + "/Adaptation.txt",
                          "r")
    ts = pyslim.load("./Output1/MyRecipe" + recipeNum +
                     "/TreesAtAdaptation.trees",
                     slim_format=True)

    header = adaptationFile.readline()
    PreExisting_FWAA = [int(Id) for Id in adaptationFile.readline().split()]
    Introduced_FWAA = [int(Id) for Id in adaptationFile.readline().split()]

    return Introduced_FWAA, PreExisting_FWAA, ts
Example #25
0
 def get_slim_restarts(self, **kwargs):
     # Loads previously produced tree sequences and SLiM scripts
     # appropriate for restarting from these tree sequences.
     for exname in restart_files:
         ex = restart_files[exname]
         use = True
         for a in kwargs:
             if a not in ex or ex[a] != kwargs[a]:
                 use = False
         if use:
             basename = ex['basename']
             treefile = ex['input']
             print(f"restarting {treefile} as {basename}")
             self.assertTrue(os.path.isfile(treefile))
             ts = pyslim.load(treefile)
             yield ts, basename
Example #26
0
    def merge(self):
        "Merges tree sequences"

        self.species = []
        ids = []
        species = []

        #read in all thre tree sequences
        for i in range(0, len(self.files)):
            ts = pyslim.load(self.files[i])
            species.append(ts)

        self.merged_ts = pyslim.SlimTreeSequence(species[0].union(
            species[1],
            node_mapping=[tskit.NULL for i in range(species[1].num_nodes)],
            add_populations=True,
        ))
def ancestry_local (treepath):
    starts, ends, subpops = [], [], []
    ts = pyslim.load(treepath)
    for tree in ts.trees(sample_counts=True):
        subpop_sum, subpop_weights = 0, 0
        for root in tree.roots:
            leaves_count = tree.num_samples(root) - 1
            subpop_sum += tree.population(root) * leaves_count
            subpop_weights += leaves_count
        starts.append(tree.interval[0])
        ends.append(tree.interval[1])
        subpops.append(subpop_sum / float(subpop_weights))

    x = [x for pair in zip(starts, ends) for x in pair]
    y = [x for x in subpops for _ in (0, 1)]   
    matplotlib.pyplot.plot(x, y)
    matplotlib.pyplot.show()
    return x,y #x=genome positions; y = ancestry
def ancestry_position_writeout (treepath,n,admpop,popsize,t_sinceadm,region_name):
    ts = pyslim.load(treepath)
    starts=[]
    ends=[]

    for x in ts.trees():
        starts.append(x.interval[0])
        ends.append(x.interval[1])

    outfilename = DIR_anc+ region_name+str(dominance)+ "_"+str(model)+ "_"+str(growth)+ "_"+str(m4s)+ "_"+str(hs) + "_"+str(n) + '.ancestry'
    outfile = open(outfilename, 'w')
    outfile.write('start,end,ancestry\n')
    
    p1ancestry = ancestry_p_varies(ts,admpop,popsize,t_sinceadm)

    for start, end, anc in zip(starts, ends, p1ancestry):
        outfile.write('{0},{1},{2}\n'.format(start, end, anc))

    outfile.close()
Example #29
0
def getBranchLengthSumStats(direc,n): #don't use this yet
    #read through a directory of SLiM tree sequences and return a matrix
    #of summary stats calculated from branch lengths
    out=[]
    for i in range(n):
        filename = str(i) + ".trees"
        filepath = os.path.join(direc,filename)
        ts = pyslim.load(filepath)
        samples=[x for x in ts.samples()]
        locs=[[ts.individual(ts.node(s).individual).location[0],
               ts.individual(ts.node(s).individual).location[1]] for s in samples]
        bs=msp.BranchLengthStatCalculator(ts)
        gd=np.array(bs.divergence([[x] for x in ts.samples()],
                                 windows=[0.0,ts.sequence_length]))
        gd = gd[np.logical_not(np.isnan(gd))]
        sd = np.array(scipy.spatial.distance.pdist(locs))
        simSFS = np.array(bs.site_frequency_spectrum(samples)[0])
        out.append(np.concatenate((sd,gd,simSFS)))
    return out
Example #30
0
 def get_slim_examples(self, return_info=False, **args):
     for ex in example_files.values():
         basename = ex['basename']
         use = True
         for a in args:
             if a not in ex or ex[a] != args[a]:
                 use = False
         if use:
             treefile = basename + ".trees"
             print("---->", treefile)
             self.assertTrue(os.path.isfile(treefile))
             ts = pyslim.load(treefile)
             if return_info:
                 infofile = treefile + ".pedigree"
                 if os.path.isfile(infofile):
                     ex['info'] = self.get_slim_info(infofile)
                 else:
                     ex['info'] = None
                 yield (ts, ex)
             else:
                 yield ts
# Keywords: Python, tree-sequence recording, tree sequence recording

import msprime, pyslim

ts = pyslim.load("recipe_17.7.trees").simplify()

# selection coefficients and locations of all selected mutations
coeffs = []
for mut in ts.mutations():
    md = pyslim.decode_mutation(mut.metadata)
    sel = [x.selection_coeff for x in md]
    if any([s != 0 for s in sel]):
        coeffs += sel

b = [x for x in coeffs if x > 0]
d = [x for x in coeffs if x < 0]

print("Beneficial: " + str(len(b)) + ", mean " + str(sum(b) / len(b)))
print("Deleterious: " + str(len(d)) + ", mean " + str(sum(d) / len(d)))
Example #32
0
import msprime
import pyslim
import numpy as np

decap = pyslim.load("temp/decapitated.trees", slim_format=True)

recap = []
recap_simplified = []
for _ in range(10):
    recap = decap.recapitate(recombination_rate=1e-6, Ne=1000)
    recap_mut = pyslim.mutate(recap,
                rate=1e-6, keep=True)
    ru = recap.simplify(ru.samples())
    recap_simplified.append(ru)

def get_stats(ts):
    bs = msprime.BranchLengthStatCalculator(ts)
    stats = {'num_trees' : ts.num_trees,
             'sequence_length' : ts.sequence_length,
             'num_samples' : ts.num_samples,
             'num_mutations' : ts.num_mutations,
             'divergence' : -1 # bs.divergence(sample_sets=[list(ts.samples())], windows=[0.0, ts.sequence_length])
             }
    print(stats)
    return stats

def summarize_stats(tslist):
    statlist = list(map(get_stats, tslist))
    out = {}
    for a in statlist[0]:
        x = [u[a] for u in statlist]
# Keywords: Python, tree-sequence recording, tree sequence recording

# This is a Python recipe; note that it runs the SLiM model internally, below

import subprocess, msprime, pyslim
import matplotlib.pyplot as plt
import numpy as np

# Run the SLiM model and load the resulting .trees file
subprocess.check_output(["slim", "-m", "-s", "0", "./recipe_17.5.slim"])
ts = pyslim.load("./recipe_17.5.trees").simplify()

# Load the .trees file and assess true local ancestry
breaks = np.zeros(ts.num_trees + 1)
ancestry = np.zeros(ts.num_trees + 1)
for tree in ts.trees(sample_counts=True):
    subpop_sum, subpop_weights = 0, 0
    for root in tree.roots:
        leaves_count = tree.num_samples(root) - 1  # subtract one for the root, which is a sample
        subpop_sum += tree.population(root) * leaves_count
        subpop_weights += leaves_count
    breaks[tree.index] = tree.interval[0]
    ancestry[tree.index] = subpop_sum / subpop_weights
breaks[-1] = ts.sequence_length
ancestry[-1] = ancestry[-2]

# Make a simple plot
plt.plot(breaks, ancestry)
plt.show()