def test_generate_nucleotides_keep(self): ts = msprime.sim_ancestry(4, sequence_length=10, population_size=10) ts = pyslim.annotate_defaults(ts, model_type='nonWF', slim_generation=1) mts1 = msprime.sim_mutations(ts, model=msprime.SLiMMutationModel(type=1), rate=0.1, random_seed=23) mts1.dump("out.trees") nts1 = pyslim.generate_nucleotides(mts1, seed=10, keep=False) assert nts1.num_mutations > 0 self.verify_generate_nucleotides(nts1, check_transitions=False) mts2 = msprime.sim_mutations(nts1, model=msprime.SLiMMutationModel( type=2, next_id=nts1.num_mutations, ), rate=0.1, random_seed=24, ) # keep defaults to True nts2 = pyslim.generate_nucleotides(mts2, seed=12) assert nts2.num_mutations > nts1.num_mutations muts1 = {} for mut in nts1.mutations(): for i, md in zip(mut.derived_state.split(","), mut.metadata['mutation_list']): muts1[i] = md['nucleotide'] for mut in nts2.mutations(): for i, md in zip(mut.derived_state.split(","), mut.metadata['mutation_list']): if md['mutation_type'] == 1: assert i in muts1 assert muts1[i] == md['nucleotide'] else: assert md['nucleotide'] in [0, 1, 2, 3] nts3 = pyslim.generate_nucleotides(mts2, keep=False, seed=15) self.verify_generate_nucleotides(nts3, check_transitions=False)
def add_mutations(ts, mut_type, mu_rate, effect_sd, next_id=0): # s_fn draws the selection coefficient # need to assign metadata to be able to put the mutations in mut_model = msprime.SLiMMutationModel(type=mut_type, next_id=next_id) mts = msprime.sim_mutations( ts, mu_rate, model=mut_model, ) print(f"The tree sequence now has {mts.num_mutations} mutations, at " f"{mts.num_sites} distinct sites.") tables = mts.tables tables.mutations.clear() mut_map = {} for m in mts.mutations(): md_list = m.metadata["mutation_list"] slim_ids = m.derived_state.split(",") assert len(slim_ids) == len(md_list) for sid, md in zip(slim_ids, md_list): if sid not in mut_map: mut_map[sid] = np.random.normal(loc=0.0, scale=effect_sd) md["selection_coeff"] = mut_map[sid] tables.mutations.add_row(site=m.site, node=m.node, time=m.time, derived_state=m.derived_state, parent=m.parent, metadata={"mutation_list": md_list}) assert tables.mutations.num_rows == mts.num_mutations print( f"The selection coefficients range from {min(mut_map.values()):0.2e}") print(f"to {max(mut_map.values()):0.2e}.") return tables.tree_sequence()
def test_generate_nucleotides_refseq(self): ts = msprime.sim_ancestry( 4, sequence_length=10, population_size=10, random_seed=10, ) ts = pyslim.annotate_defaults(ts, model_type='nonWF', slim_generation=1) mts = msprime.sim_mutations(ts, model=msprime.SLiMMutationModel(type=1), rate=0.5, random_seed=23) refseq = "A" * int(mts.sequence_length) nts = pyslim.generate_nucleotides(mts, reference_sequence=refseq, seed=6) self.verify_generate_nucleotides(nts, check_transitions=True) assert nts.reference_sequence.data == refseq
def test_convert_alleles_errors(self): ts = msprime.sim_ancestry(4, sequence_length=10, population_size=10) with pytest.raises(ValueError, match="must have a valid reference sequence"): _ = pyslim.convert_alleles(ts) ts = pyslim.annotate_defaults(ts, model_type="nonWF", slim_generation=1) with pytest.raises(ValueError, match="must have a valid reference sequence"): _ = pyslim.convert_alleles(ts) mts = msprime.sim_mutations(ts, model=msprime.SLiMMutationModel(type=1), rate=0.1, random_seed=23) assert mts.num_mutations > 0 mtt = mts.dump_tables() mtt.reference_sequence.data = 'A' * int(mts.sequence_length) mts = mtt.tree_sequence() with pytest.raises(ValueError, match="must be nucleotide mutations"): _ = pyslim.convert_alleles(mts)
def _mutate(self): """Mutatates the recapitated TreeSequence. This applies a mutation model to edges of the tree sequence. Does it know which regions to mutate or not mutate? For example, all recapitated edges should be mutated, but also the neutral genomic regions of the SLiM time frame should be mutated. """ # logger report before adding mutations self._report_mutations(allow_m0=False) # add mutations self.tree_sequence = msprime.sim_mutations( self.tree_sequence, rate=self.mut, random_seed=self.rng.integers(2**31), keep=True, # whether to keep existing mutations. model=msprime.SLiMMutationModel(type=0), ) self.tree_sequence = pyslim.SlimTreeSequence(self.tree_sequence) # logger report after adding mutations self._report_mutations(allow_m0=True)
rate=[1e-8, 1e-8, 1e-8]) # why do we set the recombination rate this way? demog_model = msprime.Demography() demog_model.add_population(initial_size=10000) print("Working on Snake Sim") ots = msprime.sim_ancestry( samples=1000, # number of individividuals sampled? demography=demog_model, # random_seed=5, recombination_rate=recomb_map) ots = pyslim.annotate_defaults(ots, model_type="nonWF", slim_generation=1) # this is adding anotation or metadata to all of the individuals mut_map = msprime.RateMap(position=breaks, rate=[1e-10, 1e-10, 1e-10]) # what rate(s) would I put in here mut_model = msprime.SLiMMutationModel(type=2) # mutation "m2" ots = msprime.sim_mutations( ots, rate=mut_map, model=mut_model, keep=True, # random_seed=9 ) print(f"The tree sequence now has {ots.num_mutations} mutations, at " f"{ots.num_sites} distinct sites.") tables = ots.tables tables.mutations.clear() mut_map = {} for m in ots.mutations(): md_list = m.metadata["mutation_list"]