Esempio n. 1
0
 def test_generate_nucleotides_keep(self):
     ts = msprime.sim_ancestry(4, sequence_length=10, population_size=10)
     ts = pyslim.annotate_defaults(ts, model_type='nonWF', slim_generation=1)
     mts1 = msprime.sim_mutations(ts,
             model=msprime.SLiMMutationModel(type=1),
             rate=0.1,
             random_seed=23)
     mts1.dump("out.trees")
     nts1 = pyslim.generate_nucleotides(mts1, seed=10, keep=False)
     assert nts1.num_mutations > 0
     self.verify_generate_nucleotides(nts1, check_transitions=False)
     mts2 = msprime.sim_mutations(nts1,
             model=msprime.SLiMMutationModel(
                 type=2,
                 next_id=nts1.num_mutations,
             ),
             rate=0.1,
             random_seed=24,
     )
     # keep defaults to True
     nts2 = pyslim.generate_nucleotides(mts2, seed=12)
     assert nts2.num_mutations > nts1.num_mutations
     muts1 = {}
     for mut in nts1.mutations():
         for i, md in zip(mut.derived_state.split(","), mut.metadata['mutation_list']):
             muts1[i] = md['nucleotide']
     for mut in nts2.mutations():
         for i, md in zip(mut.derived_state.split(","), mut.metadata['mutation_list']):
             if md['mutation_type'] == 1:
                 assert i in muts1
                 assert muts1[i] == md['nucleotide']
             else:
                 assert md['nucleotide'] in [0, 1, 2, 3]
     nts3 = pyslim.generate_nucleotides(mts2, keep=False, seed=15)
     self.verify_generate_nucleotides(nts3, check_transitions=False)
Esempio n. 2
0
def add_mutations(ts, mut_type, mu_rate, effect_sd, next_id=0):
    # s_fn draws the selection coefficient
    # need to assign metadata to be able to put the mutations in
    mut_model = msprime.SLiMMutationModel(type=mut_type, next_id=next_id)
    mts = msprime.sim_mutations(
        ts,
        mu_rate,
        model=mut_model,
    )
    print(f"The tree sequence now has {mts.num_mutations} mutations, at "
          f"{mts.num_sites} distinct sites.")
    tables = mts.tables
    tables.mutations.clear()
    mut_map = {}
    for m in mts.mutations():
        md_list = m.metadata["mutation_list"]
        slim_ids = m.derived_state.split(",")
        assert len(slim_ids) == len(md_list)
        for sid, md in zip(slim_ids, md_list):
            if sid not in mut_map:
                mut_map[sid] = np.random.normal(loc=0.0, scale=effect_sd)
            md["selection_coeff"] = mut_map[sid]
        tables.mutations.add_row(site=m.site,
                                 node=m.node,
                                 time=m.time,
                                 derived_state=m.derived_state,
                                 parent=m.parent,
                                 metadata={"mutation_list": md_list})
    assert tables.mutations.num_rows == mts.num_mutations
    print(
        f"The selection coefficients range from {min(mut_map.values()):0.2e}")
    print(f"to {max(mut_map.values()):0.2e}.")
    return tables.tree_sequence()
Esempio n. 3
0
 def test_generate_nucleotides_refseq(self):
     ts = msprime.sim_ancestry(
             4,
             sequence_length=10,
             population_size=10,
             random_seed=10,
     )
     ts = pyslim.annotate_defaults(ts, model_type='nonWF', slim_generation=1)
     mts = msprime.sim_mutations(ts,
             model=msprime.SLiMMutationModel(type=1),
             rate=0.5,
             random_seed=23)
     refseq = "A" * int(mts.sequence_length)
     nts = pyslim.generate_nucleotides(mts, reference_sequence=refseq, seed=6)
     self.verify_generate_nucleotides(nts, check_transitions=True)
     assert nts.reference_sequence.data == refseq
Esempio n. 4
0
 def test_convert_alleles_errors(self):
     ts = msprime.sim_ancestry(4, sequence_length=10, population_size=10)
     with pytest.raises(ValueError, match="must have a valid reference sequence"):
         _ = pyslim.convert_alleles(ts)
     ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1)
     with pytest.raises(ValueError, match="must have a valid reference sequence"):
         _ = pyslim.convert_alleles(ts)
     mts = msprime.sim_mutations(ts,
             model=msprime.SLiMMutationModel(type=1),
             rate=0.1,
             random_seed=23)
     assert mts.num_mutations > 0
     mtt = mts.dump_tables()
     mtt.reference_sequence.data = 'A' * int(mts.sequence_length)
     mts = mtt.tree_sequence()
     with pytest.raises(ValueError, match="must be nucleotide mutations"):
         _ = pyslim.convert_alleles(mts)
Esempio n. 5
0
    def _mutate(self):
        """Mutatates the recapitated TreeSequence.

        This applies a mutation model to edges of the tree sequence.
        Does it know which regions to mutate or not mutate? For example,
        all recapitated edges should be mutated, but also the neutral
        genomic regions of the SLiM time frame should be mutated.
        """
        # logger report before adding mutations
        self._report_mutations(allow_m0=False)

        # add mutations
        self.tree_sequence = msprime.sim_mutations(
            self.tree_sequence,
            rate=self.mut,
            random_seed=self.rng.integers(2**31),
            keep=True,  # whether to keep existing mutations.
            model=msprime.SLiMMutationModel(type=0),
        )
        self.tree_sequence = pyslim.SlimTreeSequence(self.tree_sequence)

        # logger report after adding mutations
        self._report_mutations(allow_m0=True)
Esempio n. 6
0
    rate=[1e-8, 1e-8, 1e-8])  # why do we set the recombination rate this way?
demog_model = msprime.Demography()
demog_model.add_population(initial_size=10000)
print("Working on Snake Sim")
ots = msprime.sim_ancestry(
    samples=1000,  # number of individividuals sampled?
    demography=demog_model,
    # random_seed=5,
    recombination_rate=recomb_map)

ots = pyslim.annotate_defaults(ots, model_type="nonWF", slim_generation=1)
# this is adding anotation or metadata to all of the individuals
mut_map = msprime.RateMap(position=breaks,
                          rate=[1e-10, 1e-10,
                                1e-10])  # what rate(s) would I put in here
mut_model = msprime.SLiMMutationModel(type=2)  # mutation "m2"
ots = msprime.sim_mutations(
    ots,
    rate=mut_map,
    model=mut_model,
    keep=True,
    # random_seed=9
)
print(f"The tree sequence now has {ots.num_mutations} mutations, at "
      f"{ots.num_sites} distinct sites.")

tables = ots.tables
tables.mutations.clear()
mut_map = {}
for m in ots.mutations():
    md_list = m.metadata["mutation_list"]