Example #1
0
    def _merge_ts_pops(self):
        """Merge two separate sims into a single ts with 2 pops.

        Merges with union and re-loads the ts as a SlimTreeSequence.
        Adds the non-shared portions of ts1 to ts0. Since they have
        no shared portion, we enter NULL for the `node mapping`, and
        add_population True sets new nodes to a new population.
        """
        # check the number of simulations for what to do.
        if self._nts == 1:
            #simplify to remove empty population
            self.tree_sequence = pyslim.SlimTreeSequence(
                self._tree_sequences[0]).simplify(keep_input_roots=True)
            return
        if self._nts > 2:
            raise ValueError("you cannot enter >2 tree sequences.")
        # Merge two tree sequences
        ts0 = self._tree_sequences[0]
        ts1 = self._tree_sequences[1]
        merged_ts = ts0.union(
            ts1,
            node_mapping=[tskit.NULL for i in range(ts1.num_nodes)],
            add_populations=True,
        )
        self.tree_sequence = pyslim.SlimTreeSequence(merged_ts)
Example #2
0
    def _recap_and_rescale(self, ts, seed, recap_epoch, contig, mutation_rate,
                           slim_frac, slim_scaling_factor):
        """
        Apply post-SLiM transformations to ``ts``. This rescales node times,
        does recapitation, simplification, and adds neutral mutations.
        """
        # Node times come from SLiM generation numbers, which may have been
        # divided by a scaling factor for computational tractability.
        tables = ts.dump_tables()
        for table in (tables.nodes, tables.migrations):
            table.time *= slim_scaling_factor
        ts = pyslim.SlimTreeSequence.load_tables(tables)
        ts.slim_generation *= slim_scaling_factor

        rng = random.Random(seed)
        s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32)

        population_configurations = [
            msprime.PopulationConfiguration(initial_size=pop.start_size,
                                            growth_rate=pop.growth_rate)
            for pop in recap_epoch.populations
        ]
        ts = ts.recapitate(recombination_rate=contig.recombination_map.
                           mean_recombination_rate,
                           population_configurations=population_configurations,
                           migration_matrix=recap_epoch.migration_matrix,
                           random_seed=s1)

        ts = self._simplify_remembered(ts)

        if slim_frac < 1:
            # Add mutations to SLiM part of trees.
            rate = (1 - slim_frac) * mutation_rate
            ts = pyslim.SlimTreeSequence(
                msprime.mutate(ts,
                               rate=rate,
                               keep=True,
                               random_seed=s2,
                               end_time=ts.slim_generation))

        # Add mutations to recapitated part of trees.
        s3 = rng.randrange(1, 2**32)
        ts = pyslim.SlimTreeSequence(
            msprime.mutate(ts,
                           rate=mutation_rate,
                           keep=True,
                           random_seed=s3,
                           start_time=ts.slim_generation))

        return ts
Example #3
0
 def test_load(self, recipe):
     fn = recipe["path"]["ts"]
     # load in msprime then switch
     msp_ts = tskit.load(fn)
     assert isinstance(msp_ts, tskit.TreeSequence)
     # transfer tables
     msp_tables = msp_ts.dump_tables()
     new_ts = pyslim.load_tables(msp_tables)
     assert isinstance(new_ts, pyslim.SlimTreeSequence)
     self.verify_times(msp_ts, new_ts)
     new_tables = new_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, new_tables)
     # convert directly
     new_ts = pyslim.SlimTreeSequence(msp_ts)
     assert isinstance(new_ts, pyslim.SlimTreeSequence)
     self.verify_times(msp_ts, new_ts)
     new_tables = new_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, new_tables)
     # load to pyslim from file
     slim_ts = pyslim.load(fn)
     assert isinstance(slim_ts, pyslim.SlimTreeSequence)
     slim_tables = slim_ts.dump_tables()
     self.assertTableCollectionsEqual(msp_tables, slim_tables)
     assert slim_ts.metadata['SLiM']['generation'] == new_ts.metadata[
         'SLiM']['generation']
 def test_load(self):
     for _, ex in self.get_slim_examples(return_info=True):
         fn = ex['basename'] + ".trees"
         # load in msprime then switch
         msp_ts = tskit.load(fn)
         self.assertTrue(type(msp_ts) is msprime.TreeSequence)
         # transfer tables
         msp_tables = msp_ts.tables
         new_ts = pyslim.load_tables(msp_tables, legacy_metadata=True)
         self.assertTrue(isinstance(new_ts, pyslim.SlimTreeSequence))
         self.verify_times(msp_ts, new_ts)
         new_tables = new_ts.tables
         self.assertTableCollectionsEqual(msp_tables, new_tables)
         # convert directly
         new_ts = pyslim.SlimTreeSequence(msp_ts)
         self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence)
         self.verify_times(msp_ts, new_ts)
         new_tables = new_ts.tables
         self.assertTableCollectionsEqual(msp_tables, new_tables)
         # load to pyslim from file
         slim_ts = pyslim.load(fn, legacy_metadata=True)
         self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence)
         slim_tables = slim_ts.tables
         self.assertTableCollectionsEqual(msp_tables, slim_tables)
         self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
Example #5
0
 def go(self):
     """ A wrapper for the admixture simulation."""
     print(self.slim_out)
     print('Simulating recent history with SLiM...')
     self.simulate_recent_history()
     if not os.path.isfile(self.slim_out):
         raise StringError("The supplied SLiM outfile does not match the one specified\
             in the script.")
     ts = tskit.load(self.slim_out)
     if self.need_to_subsample:
         print('Taking samples from present day populations...')
         ts = TreeSequenceToSample(ts, 
             populations_to_sample_from = self.populations,
             sample_sizes = self.sample_sizes)
         ts = ts.subsample()
     # tabs = ts.tables
     ts = pyslim.SlimTreeSequence.load_tables(ts.tables)
     print('Simulating ancient history with msprime...')
     ts = ts.recapitate(
         recombination_rate = self.ancient_recombination_rate,
         population_configurations = self.ancient_population_configurations,
         demographic_events = self.ancient_demographic_events,
         keep_first_generation = True # needed to get local ancestors
         )
     print('Adding variation...')
     ts = pyslim.SlimTreeSequence(msprime.mutate(ts, 
         rate=self.neutral_mutation_rate, keep=True))
     if self.out_file is not None:
         ts.dump(self.out_file)
     return(ts)
Example #6
0
 def test_convert_0_4_files(self):
     # Note that with version 0.5 and above, we *don't* get information from
     # provenance, we get it from top-level metadata
     for ts in self.get_0_4_slim_examples():
         pts = pyslim.SlimTreeSequence(ts)
         self.assertEqual(ts.num_provenances, 1)
         self.assertEqual(pts.num_provenances, 2)
         self.assertEqual(ts.provenance(0).record, pts.provenance(0).record)
         record = json.loads(ts.provenance(0).record)
         self.assertTrue(isinstance(pts.metadata, dict))
         self.assertTrue('SLiM' in pts.metadata)
         self.assertEqual(record['parameters']['model_type'],
                          pts.metadata['SLiM']['model_type'])
         self.assertEqual(record['slim']['generation'],
                          pts.metadata['SLiM']['generation'])
         self.assertListEqual(list(ts.samples()), list(pts.samples()))
         self.assertArrayEqual(ts.tables.nodes.flags,
                               pts.tables.nodes.flags)
         samples = list(ts.samples())
         t = ts.first()
         pt = pts.first()
         for _ in range(20):
             u = random.sample(samples, 1)[0]
             self.assertEqual(t.parent(u), pt.parent(u))
             if t.parent(u) != msprime.NULL_NODE:
                 self.assertEqual(t.branch_length(u), pt.branch_length(u))
Example #7
0
 def test_convert_mixd_files(self):
     for ts in self.get_mixed_slim_examples():
         pts = pyslim.SlimTreeSequence(ts)
         self.verify_upgrade(pts)
         assert ts.num_provenances == 1
         assert pts.num_provenances == 2
         assert ts.provenance(0).record == pts.provenance(0).record
         record = json.loads(ts.provenance(0).record)
         assert isinstance(pts.metadata, dict)
         assert 'SLiM' in pts.metadata
         assert record['parameters']['model_type'] == pts.metadata['SLiM'][
             'model_type']
         assert record['slim']['generation'] == pts.metadata['SLiM'][
             'generation']
         assert list(ts.samples()) == list(pts.samples())
         assert np.array_equal(ts.tables.nodes.flags,
                               pts.tables.nodes.flags)
         samples = list(ts.samples())
         t = ts.first()
         pt = pts.first()
         for _ in range(20):
             u = random.sample(samples, 1)[0]
             assert t.parent(u) == pt.parent(u)
             if t.parent(u) != tskit.NULL:
                 assert t.branch_length(u) == pt.branch_length(u)
Example #8
0
 def test_bad_metadata(self):
     clean_tables = self.clean_example()
     tables = clean_tables.copy()
     tables.metadata_schema = tskit.MetadataSchema({"type": "object", "codec": "json"})
     tables.metadata = {}
     ts = tables.tree_sequence()
     with pytest.raises(ValueError):
         _ = pyslim.SlimTreeSequence(ts)
Example #9
0
    def slimcoal(self):
        self.tscoal = pyslim.SlimTreeSequence(
            msprime.mutate(self.ts, rate=1e-9, keep=True))

        print(
            f"The tree sequence now has {self.tscoal.num_mutations} mutations, "
            f"and mean pairwise nucleotide diversity is {self.tscoal.diversity()}."
        )
Example #10
0
 def test_legacy_error(self, recipe, tmp_path):
     tmp_file = os.path.join(tmp_path, "test_legacy.trees")
     ts = recipe["ts"]
     ts.dump(tmp_file)
     with pytest.raises(ValueError, match="legacy metadata tools"):
         _ = pyslim.load(tmp_file, legacy_metadata=True)
     with pytest.raises(ValueError, match="legacy metadata tools"):
         _ = pyslim.SlimTreeSequence(ts, legacy_metadata=True)
Example #11
0
 def test_inconsistent_times(self):
     clean_tables = self.clean_example()
     tables = clean_tables.copy()
     tables.nodes.clear()
     for j, n in enumerate(clean_tables.nodes):
         tables.nodes.add_row(time=j, flags=tskit.NODE_IS_SAMPLE, population=1, individual=0)
     ts = tables.tree_sequence()
     with pytest.raises(ValueError):
         _ = pyslim.SlimTreeSequence(ts)
Example #12
0
 def test_samples_only(self, recipe):
     ts = recipe["ts"]
     all_inds = ts.individuals_alive_at(0)
     assert set(all_inds) == set(ts.individuals_alive_at(0, samples_only=False))
     sub_inds = np.random.choice(all_inds, size=min(len(all_inds), 4), replace=False)
     flags = np.array([n.flags & (tskit.NODE_IS_SAMPLE * n.individual in sub_inds)
                       for n in ts.nodes()], dtype=np.uint32)
     tables = ts.dump_tables()
     tables.nodes.flags = flags
     new_ts = pyslim.SlimTreeSequence(tables.tree_sequence())
     assert set(sub_inds) == set(new_ts.individuals_alive_at(0, samples_only=True))
Example #13
0
    def _match_nodes_and_merge(self):
        """Find matching ancestral nodes between two treeseqs for merging
        ts1 into ts0.

        Given SLiM tree sequences `other` and `ts`, builds a numpy array with length
        `other.num_nodes` in which the indexes represent the node id in `other` and the
        entries represent the equivalent node id in `ts`. If a node in `other` has no
        equivalent in `ts`, then the entry takes the value `tskit.NULL`. The matching
        is done by comparing the IDs assigned by SLiM which are kept in the NodeTable
        metadata. Further, this matching of SLiM IDs is done for times (going 
        backward-in-time) greater than the specified `split_time`.

        Note
        ----
        This must be done before recapitation which would add nodes
        without slim_id metadata.
        """
        ts0 = self._tree_sequences[0]
        ts1 = self._tree_sequences[1]
        split_time = 1000  #int(1 + (2 * self.generations))

        # get an empty array to be filled.
        node_mapping = np.full(ts1.num_nodes, tskit.NULL)

        # get the slim_ids for every node in each ts
        slim_ids0 = np.array([n.metadata["slim_id"] for n in ts0.nodes()])
        slim_ids1 = np.array([n.metadata["slim_id"] for n in ts1.nodes()])

        # get ids of all nodes alive before the split (tskit: think backwards time),
        # meaning that they are in the ancestor.
        alive_before_split_pop1 = ts1.tables.nodes.time >= split_time

        # get ids of nodes slim_ids for nodes that are in both ts's
        is_1in0 = np.isin(slim_ids1, slim_ids0)

        # get node ids that meet both above booleans
        both = np.logical_and(alive_before_split_pop1, is_1in0)
        sorted_ids0 = np.argsort(slim_ids0)
        matches = np.searchsorted(
            slim_ids0,
            slim_ids1[both],
            side='left',
            sorter=sorted_ids0,
        )
        node_mapping[both] = sorted_ids0[matches]
        match = sum(node_mapping != -1)
        nomatch = sum(node_mapping == -1)

        # save it.
        # logger.debug(f"match={match}; nomatch={nomatch}; {self.trees_files}")
        tsu = ts0.union(ts1,
                        node_mapping=node_mapping,
                        check_shared_equality=True)
        self.tree_sequence = pyslim.SlimTreeSequence(tsu)
Example #14
0
 def test_load_without_provenance(self):
     # with 0.5, SLiM should read info from metadata, not provenances
     for in_ts, basename in self.get_slim_restarts(no_op=True):
         in_tables = in_ts.tables
         in_tables.provenances.clear()
         cleared_ts = pyslim.SlimTreeSequence(
             in_tables.tree_sequence(),
             reference_sequence=in_ts.reference_sequence)
         out_ts = self.run_slim_restart(cleared_ts, basename)
         out_tables = out_ts.tables
         out_tables.provenances.clear()
         self.assertEqual(in_tables, out_tables)
def recapitate(treesfile, sample_size, recombination_rate, mutation_rate, Ne,
               gcBurnin):

    # load trees with pyslim
    ts = pyslim.load(treesfile)
    num_individuals_0 = len(ts.individuals_alive_at(0))
    n_roots = pd.Series([
        t.num_roots for t in ts.trees()
    ]).value_counts().to_frame(name="num_tree_with_num_roots")
    logging.debug(
        f"""The tree sequence has {ts.num_trees} trees on a genome of length {ts.sequence_length}
                         {num_individuals_0} alive individuals, {ts.num_samples} 'sample' genomes
                         and {ts.num_mutations} mutations.
                         number of roots per tree: 
                         {n_roots.__str__()[:-12]}""")

    # discard second genomes (diploids) from the tree.
    #ts_haploid = ts.simplify(samples=[ind.nodes[0] for ind in ts.individuals()])
    ts_recap = ts.recapitate(
        recombination_rate=gcBurnin + 1e-20,  # can't put 0 here. 
        Ne=Ne)
    #population_configurations=[msprime.PopulationConiguration(initial_size=Ne)])

    # simplify to a subset of the haploids
    sample_inds = np.random.choice(ts_recap.individuals_alive_at(0),
                                   size=sample_size,
                                   replace=False)  # choose n random leaves
    sample_nodes = [ts_recap.individual(i).nodes[0] for i in sample_inds]
    ts_samp = ts_recap.simplify(samples=sample_nodes)

    n_roots = pd.Series([
        t.num_roots for t in ts_samp.trees()
    ]).value_counts().to_frame(name="num_tree_with_num_roots")
    logging.debug(
        f"""The tree sequence has {ts_samp.num_trees} trees on a genome of length {ts_samp.sequence_length}
                         {ts_samp.num_individuals} alive individuals, {ts_samp.num_samples} 'sample' genomes
                         and {ts_samp.num_mutations} mutations.
                         number of roots per tree: 
                         {n_roots.__str__()[:-12]}""")

    # mutate
    ts_mutated = pyslim.SlimTreeSequence(
        msprime.mutate(
            ts_samp,
            rate=mutation_rate / 2,  # To have 2.Ne.mu and not 4.Ne.mu
            keep=True)  # keep existing mutations
    )
    genotype_matrix = ts_mutated.genotype_matrix()
    snp_mat = genotype_matrix.T
    pos = np.round(ts_mutated.tables.asdict()["sites"]["position"]).astype(int)

    return snp_mat, pos, ts_mutated
Example #16
0
 def test_load_without_provenance(self, restart_name, recipe,
                                  helper_functions, tmp_path):
     in_ts = recipe["ts"]
     in_tables = in_ts.dump_tables()
     in_tables.provenances.clear()
     in_tables.sort()
     cleared_ts = pyslim.SlimTreeSequence(in_tables.tree_sequence(), )
     out_ts = helper_functions.run_slim_restart(cleared_ts, restart_name,
                                                tmp_path)
     out_tables = out_ts.dump_tables()
     out_tables.provenances.clear()
     out_tables.sort()
     in_tables.assert_equals(out_tables)
Example #17
0
 def test_inconsistent_nodes(self):
     clean_tables = self.clean_example()
     tables = clean_tables.copy()
     tables.nodes.clear()
     for j, n in enumerate(clean_tables.nodes):
         tables.nodes.add_row(
                 time=n.time, flags=n.flags,
                 population=j,
                 individual=n.individual,
                 metadata=n.metadata)
     with pytest.raises(ValueError):
         pyslim.annotate_defaults_tables(tables, model_type='nonWF', slim_generation=1)
     ts = tables.tree_sequence()
     with pytest.raises(ValueError):
         _ = pyslim.SlimTreeSequence(ts)
Example #18
0
    def merge(self):
        "Merges tree sequences"

        self.species = []
        ids = []
        species = []

        #read in all thre tree sequences
        for i in range(0, len(self.files)):
            ts = pyslim.load(self.files[i])
            species.append(ts)

        self.merged_ts = pyslim.SlimTreeSequence(species[0].union(
            species[1],
            node_mapping=[tskit.NULL for i in range(species[1].num_nodes)],
            add_populations=True,
        ))
Example #19
0
    def mutate(self, ts):
        "mutate the tree sequence"

        self.mts = pyslim.SlimTreeSequence(
            msprime.mutate(ts, rate=self.mutrate, keep=True))

        #save mutation positions and which population they occurred in
        positions = []
        popids = []
        allpositions = []
        for mut in self.mts.mutations():
            allpositions.append(int(mut.site))
            popids.append(self.mts.node(mut.node).population)
            if mut.derived_state != '1':
                positions.append(int(mut.site))

        self.positions = positions
        self.allpositions = allpositions
        self.popids = popids
Example #20
0
 def test_convert_0_1_files(self):
     for ts in self.get_0_1_slim_examples():
         pts = pyslim.SlimTreeSequence(ts)
         self.assertEqual(ts.num_provenances, 1)
         self.assertEqual(pts.num_provenances, 2)
         self.assertEqual(ts.provenance(0).record, pts.provenance(0).record)
         record = json.loads(ts.provenance(0).record)
         new_record = json.loads(pts.provenance(1).record)
         self.assertEqual(record['model_type'], new_record['parameters']['model_type'])
         self.assertEqual(record['generation'], new_record['slim']["generation"])
         self.assertListEqual(list(ts.samples()), list(pts.samples()))
         self.assertArrayEqual(ts.tables.nodes.flags, pts.tables.nodes.flags)
         samples = list(ts.samples())
         t = ts.first()
         pt = pts.first()
         for _ in range(20):
             u = random.sample(samples, 1)[0]
             self.assertEqual(t.parent(u), pt.parent(u))
             if t.parent(u) != msprime.NULL_NODE:
                 self.assertEqual(t.branch_length(u), pt.branch_length(u))
Example #21
0
    def _merge_ts_pops(self):
        """Merge two separate sims into a single ts with 2 pops.

        Merges with union and re-loads the ts as a SlimTreeSequence.
        Adds the non-shared portions of ts1 to ts0. Since they have
        no shared portion, we enter NULL for the `node mapping`, and
        add_population True sets new nodes to a new population.
        """
        # check the number of simulations for what to do.
        if self._nts > 2:
            raise ValueError("you cannot enter >2 tree sequences.") 
        
        node_map= self._match_nodes(other=self._tree_sequences[0],
            ts=self._tree_sequences[1],
            split_time= int(1+(2*self.generations)))

        tsu = self._tree_sequences[1].union(self._tree_sequences[0],
            node_map, check_shared_equality=True)

        self.tree_sequence = pyslim.SlimTreeSequence(tsu)
Example #22
0
 def test_load(self):
     for fn in self.get_slim_example_files():
         # load in msprime then switch
         msp_ts = tskit.load(fn)
         self.assertTrue(type(msp_ts) is msprime.TreeSequence)
         # transfer tables
         msp_tables = msp_ts.tables
         new_ts = pyslim.load_tables(msp_tables)
         self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence)
         self.verify_times(msp_ts, new_ts)
         self.assertEqual(msp_tables, new_ts.tables)
         # convert directly
         new_ts = pyslim.SlimTreeSequence(msp_ts)
         self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence)
         self.verify_times(msp_ts, new_ts)
         self.assertEqual(msp_tables, new_ts.tables)
         # load to pyslim from file
         slim_ts = pyslim.load(fn)
         self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence)
         self.assertEqual(msp_tables, slim_ts.tables)
         self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
def sample_treeseq(ts, n):
    """Sample n individuals from the TreeSequence.

    Parameters
    ----------
    ts : SlimTreeSequence
        Full TreeSequence from fwd simulation with tree recording
    n : int
        Size of the sample

    Returns
    -------
    SlimTreeSequence
        The subsample tree sequence
    """

    samples = np.random.choice(ts.samples(), size=n,
                               replace=False)  # choose n random leaves
    ts_samp = ts.simplify(samples=samples.astype("int32"))
    ts_samp_sts = pyslim.SlimTreeSequence(ts_samp)
    return ts_samp_sts
Example #24
0
    def _mutate(self):
        """Mutatates the recapitated TreeSequence.

        This applies a mutation model to edges of the tree sequence.
        Does it know which regions to mutate or not mutate? For example,
        all recapitated edges should be mutated, but also the neutral
        genomic regions of the SLiM time frame should be mutated.
        """
        # logger report before adding mutations
        self._report_mutations(allow_m0=False)

        # add mutations
        self.tree_sequence = msprime.sim_mutations(
            self.tree_sequence,
            rate=self.mut,
            random_seed=self.rng.integers(2**31),
            keep=True,  # whether to keep existing mutations.
            model=msprime.SLiMMutationModel(type=0),
        )
        self.tree_sequence = pyslim.SlimTreeSequence(self.tree_sequence)

        # logger report after adding mutations
        self._report_mutations(allow_m0=True)
Example #25
0
    def recapitate(self,
                   decap_trees,
                   demographic_events,
                   demography_debugger=False):
        """Recapitates tree sequence under model specified by demographic events.
        Adds mutations and sequencing errors. Returns tskit.tree_sequence."""
        population_configurations = [
            msprime.PopulationConfiguration(
                initial_size=self._pop_size_domestic_1
            ),  # msprime uses diploid Ne
            msprime.PopulationConfiguration(
                initial_size=self._pop_size_wild_1),
            msprime.PopulationConfiguration(
                initial_size=self._pop_size_captive)
        ]

        tree_seq = decap_trees.recapitate(
            recombination_rate=self.seq_features.recombination_rate,
            population_configurations=population_configurations,
            demographic_events=demographic_events,
            random_seed=self.random_seed)

        # Overlay mutations
        tree_seq = pyslim.SlimTreeSequence(
            msprime.mutate(tree_seq,
                           rate=self.seq_features.mutation_rate,
                           random_seed=self.random_seed))

        tree_seq = tree_seq.simplify()

        if demography_debugger:
            dd = msprime.DemographyDebugger(
                population_configurations=population_configurations,
                demographic_events=demographic_events)
            dd.print_history()

        return tree_seq
Example #26
0
    def merge(self, ts1, ts2):  #merge the tree sequences
        self.merged_ts = pyslim.SlimTreeSequence(
            ts1.union(
                ts2,
                node_mapping=[tskit.NULL for i in range(ts2.num_nodes)],
                add_populations=True,
            ))

        #save pops
        alive = self.merged_ts.individuals_alive_at(0)
        num_alive = [0 for _ in range(self.merged_ts.num_populations)]
        for i in alive:
            ind = self.merged_ts.individual(i)
            num_alive[ind.population] += 1

        self.num_alive = num_alive

        edge_ids = []
        for i in range(self.merged_ts.num_populations):
            if num_alive[i] != 0:
                edge_ids.append(i)
            else:
                pass

        self.edge_ids = edge_ids

        pop1 = []
        pop2 = []
        inds = self.merged_ts.individuals()
        for i in range(1, inds.length):
            if inds[i].population == self.edge_ids[0]:
                pop1.append(inds[i].id)
            elif inds[i].population == self.edge_ids[1]:
                pop2.append(inds[i].id)

        self.pops = [pop1, pop2]
Example #27
0
 def test_convert_0_3_files(self):
     for ts in self.get_0_3_slim_examples():
         pts = pyslim.SlimTreeSequence(ts)
         self.assertEqual(ts.num_provenances, 1)
         self.assertEqual(pts.num_provenances, 2)
         self.assertEqual(ts.provenance(0).record, pts.provenance(0).record)
         record = json.loads(ts.provenance(0).record)
         self.assertTrue(isinstance(pts.metadata, dict))
         self.assertTrue('SLiM' in pts.metadata)
         self.assertEqual(record['parameters']['model_type'],
                          pts.metadata['SLiM']['model_type'])
         self.assertEqual(record['slim']['generation'],
                          pts.metadata['SLiM']["generation"])
         self.assertListEqual(list(ts.samples()), list(pts.samples()))
         self.assertArrayEqual(ts.tables.nodes.flags,
                               pts.tables.nodes.flags)
         samples = list(ts.samples())
         t = ts.first()
         pt = pts.first()
         for _ in range(20):
             u = random.sample(samples, 1)[0]
             self.assertEqual(t.parent(u), pt.parent(u))
             if t.parent(u) != tskit.NULL:
                 self.assertEqual(t.branch_length(u), pt.branch_length(u))
Example #28
0
 def test_mutate(self):
     for ts in self.get_slim_examples():
         mts = msprime.mutate(ts, rate=1e-8, random_seed=5)
         pts = pyslim.SlimTreeSequence(mts)
         self.assertEqual(ts.metadata, pts.metadata)
Example #29
0
    def simulate(
            self, demographic_model=None, contig=None, samples=None, seed=None,
            verbosity=0, slim_path=None, slim_script=False, slim_scaling_factor=10,
            slim_no_recapitation=False, slim_no_burnin=False, **kwargs):
        """
        Simulate the demographic model using SLiM.
        See :meth:`.Engine.simulate()` for definitions of the
        ``demographic_model``, ``contig``, and ``samples`` parameters.

        :param seed: The seed for the random number generator.
        :type seed: int
        :param slim_path: The full path to the slim executable, or the name of
            a command in the current PATH.
        :type slim_path: str
        :param slim_script: If true, the simulation will not be executed.
            Instead the generated SLiM script will be printed to stdout.
        :type slim_script: bool
        :param slim_scaling_factor: Rescale model parameters by the given value,
            to speed up simulation. Population sizes and generation times are
            divided by this factor, whereas the mutation rate, recombination
            rate, and growth rates are multiplied by the factor.
            See SLiM manual: `5.5 Rescaling population sizes to improve
            simulation performance.`
        :type slim_scaling_factor: float
        :param slim_no_recapitation: Do an explicit burn in, and add
            mutations, within the SLiM simulation. This may be much slower than
            the defaults (recapitation and neutral mutation overlay with
            msprime). The burn in behaviour is to wait until all individuals in
            the ancestral populations have a common ancestor within their
            respective population, and then wait another 10*N generations.
        :type slim_no_recapitation: bool
        :param slim_no_burnin: Do not perform a burn in at the start of the
            simulation.  This option is only relevant when
            ``slim_no_recapitation=True``.
        :type slim_no_burnin: bool
        """

        run_slim = not slim_script
        do_recap = not slim_no_recapitation
        check_coalescence = slim_no_recapitation and not slim_no_burnin

        if slim_path is None:
            slim_path = self.slim_path()

        if do_recap:
            mutation_rate = contig.mutation_rate
            # Ensure no mutations are introduced by SLiM.
            contig = stdpopsim.Contig(
                    recombination_map=contig.recombination_map,
                    mutation_rate=0,
                    genetic_map=contig.genetic_map)

        slim_cmd = [slim_path]
        if seed is not None:
            slim_cmd.extend(["-s", f"{seed}"])

        mktemp = functools.partial(tempfile.NamedTemporaryFile, mode="w")

        @contextlib.contextmanager
        def script_file_f():
            f = mktemp(suffix=".slim") if not slim_script else sys.stdout
            yield f
            # Don't close sys.stdout.
            if not slim_script:
                f.close()

        with script_file_f() as script_file, mktemp(suffix=".ts") as ts_file:

            recap_epoch = slim_makescript(
                    script_file, ts_file.name,
                    demographic_model, contig, samples,
                    slim_scaling_factor, check_coalescence, verbosity)

            script_file.flush()

            if not run_slim:
                return None

            slim_cmd.append(script_file.name)
            stdout = subprocess.DEVNULL if verbosity == 0 else None
            subprocess.check_call(slim_cmd, stdout=stdout)

            ts = pyslim.load(ts_file.name)

        # Node times come from SLiM generation numbers, which may have been
        # divided by a scaling factor for computational tractibility.
        tables = ts.dump_tables()
        for table in (tables.nodes, tables.migrations):
            table.time *= slim_scaling_factor
        ts = pyslim.SlimTreeSequence.load_tables(tables)
        ts.slim_generation *= slim_scaling_factor

        if do_recap:
            rng = random.Random(seed)
            s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32)

            population_configurations = [
                    msprime.PopulationConfiguration(
                        initial_size=pop.start_size,
                        growth_rate=pop.growth_rate)
                    for pop in recap_epoch.populations]
            ts = ts.recapitate(
                    recombination_rate=contig.recombination_map.mean_recombination_rate,
                    population_configurations=population_configurations,
                    migration_matrix=recap_epoch.migration_matrix,
                    random_seed=s1)

        ts = simplify_remembered(ts)

        if do_recap:
            # Add neutral mutations.
            ts = pyslim.SlimTreeSequence(msprime.mutate(
                ts, rate=mutation_rate, keep=True, random_seed=s2))

        return ts
Example #30
0
 def test_pickle(self):
     ts = self.clean_example().tree_sequence()
     ts = pyslim.SlimTreeSequence(ts)
     roundtripped = pickle.loads(pickle.dumps(ts))
     assert roundtripped == ts