def _merge_ts_pops(self): """Merge two separate sims into a single ts with 2 pops. Merges with union and re-loads the ts as a SlimTreeSequence. Adds the non-shared portions of ts1 to ts0. Since they have no shared portion, we enter NULL for the `node mapping`, and add_population True sets new nodes to a new population. """ # check the number of simulations for what to do. if self._nts == 1: #simplify to remove empty population self.tree_sequence = pyslim.SlimTreeSequence( self._tree_sequences[0]).simplify(keep_input_roots=True) return if self._nts > 2: raise ValueError("you cannot enter >2 tree sequences.") # Merge two tree sequences ts0 = self._tree_sequences[0] ts1 = self._tree_sequences[1] merged_ts = ts0.union( ts1, node_mapping=[tskit.NULL for i in range(ts1.num_nodes)], add_populations=True, ) self.tree_sequence = pyslim.SlimTreeSequence(merged_ts)
def _recap_and_rescale(self, ts, seed, recap_epoch, contig, mutation_rate, slim_frac, slim_scaling_factor): """ Apply post-SLiM transformations to ``ts``. This rescales node times, does recapitation, simplification, and adds neutral mutations. """ # Node times come from SLiM generation numbers, which may have been # divided by a scaling factor for computational tractability. tables = ts.dump_tables() for table in (tables.nodes, tables.migrations): table.time *= slim_scaling_factor ts = pyslim.SlimTreeSequence.load_tables(tables) ts.slim_generation *= slim_scaling_factor rng = random.Random(seed) s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32) population_configurations = [ msprime.PopulationConfiguration(initial_size=pop.start_size, growth_rate=pop.growth_rate) for pop in recap_epoch.populations ] ts = ts.recapitate(recombination_rate=contig.recombination_map. mean_recombination_rate, population_configurations=population_configurations, migration_matrix=recap_epoch.migration_matrix, random_seed=s1) ts = self._simplify_remembered(ts) if slim_frac < 1: # Add mutations to SLiM part of trees. rate = (1 - slim_frac) * mutation_rate ts = pyslim.SlimTreeSequence( msprime.mutate(ts, rate=rate, keep=True, random_seed=s2, end_time=ts.slim_generation)) # Add mutations to recapitated part of trees. s3 = rng.randrange(1, 2**32) ts = pyslim.SlimTreeSequence( msprime.mutate(ts, rate=mutation_rate, keep=True, random_seed=s3, start_time=ts.slim_generation)) return ts
def test_load(self, recipe): fn = recipe["path"]["ts"] # load in msprime then switch msp_ts = tskit.load(fn) assert isinstance(msp_ts, tskit.TreeSequence) # transfer tables msp_tables = msp_ts.dump_tables() new_ts = pyslim.load_tables(msp_tables) assert isinstance(new_ts, pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, new_tables) # convert directly new_ts = pyslim.SlimTreeSequence(msp_ts) assert isinstance(new_ts, pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, new_tables) # load to pyslim from file slim_ts = pyslim.load(fn) assert isinstance(slim_ts, pyslim.SlimTreeSequence) slim_tables = slim_ts.dump_tables() self.assertTableCollectionsEqual(msp_tables, slim_tables) assert slim_ts.metadata['SLiM']['generation'] == new_ts.metadata[ 'SLiM']['generation']
def test_load(self): for _, ex in self.get_slim_examples(return_info=True): fn = ex['basename'] + ".trees" # load in msprime then switch msp_ts = tskit.load(fn) self.assertTrue(type(msp_ts) is msprime.TreeSequence) # transfer tables msp_tables = msp_ts.tables new_ts = pyslim.load_tables(msp_tables, legacy_metadata=True) self.assertTrue(isinstance(new_ts, pyslim.SlimTreeSequence)) self.verify_times(msp_ts, new_ts) new_tables = new_ts.tables self.assertTableCollectionsEqual(msp_tables, new_tables) # convert directly new_ts = pyslim.SlimTreeSequence(msp_ts) self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) new_tables = new_ts.tables self.assertTableCollectionsEqual(msp_tables, new_tables) # load to pyslim from file slim_ts = pyslim.load(fn, legacy_metadata=True) self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence) slim_tables = slim_ts.tables self.assertTableCollectionsEqual(msp_tables, slim_tables) self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
def go(self): """ A wrapper for the admixture simulation.""" print(self.slim_out) print('Simulating recent history with SLiM...') self.simulate_recent_history() if not os.path.isfile(self.slim_out): raise StringError("The supplied SLiM outfile does not match the one specified\ in the script.") ts = tskit.load(self.slim_out) if self.need_to_subsample: print('Taking samples from present day populations...') ts = TreeSequenceToSample(ts, populations_to_sample_from = self.populations, sample_sizes = self.sample_sizes) ts = ts.subsample() # tabs = ts.tables ts = pyslim.SlimTreeSequence.load_tables(ts.tables) print('Simulating ancient history with msprime...') ts = ts.recapitate( recombination_rate = self.ancient_recombination_rate, population_configurations = self.ancient_population_configurations, demographic_events = self.ancient_demographic_events, keep_first_generation = True # needed to get local ancestors ) print('Adding variation...') ts = pyslim.SlimTreeSequence(msprime.mutate(ts, rate=self.neutral_mutation_rate, keep=True)) if self.out_file is not None: ts.dump(self.out_file) return(ts)
def test_convert_0_4_files(self): # Note that with version 0.5 and above, we *don't* get information from # provenance, we get it from top-level metadata for ts in self.get_0_4_slim_examples(): pts = pyslim.SlimTreeSequence(ts) self.assertEqual(ts.num_provenances, 1) self.assertEqual(pts.num_provenances, 2) self.assertEqual(ts.provenance(0).record, pts.provenance(0).record) record = json.loads(ts.provenance(0).record) self.assertTrue(isinstance(pts.metadata, dict)) self.assertTrue('SLiM' in pts.metadata) self.assertEqual(record['parameters']['model_type'], pts.metadata['SLiM']['model_type']) self.assertEqual(record['slim']['generation'], pts.metadata['SLiM']['generation']) self.assertListEqual(list(ts.samples()), list(pts.samples())) self.assertArrayEqual(ts.tables.nodes.flags, pts.tables.nodes.flags) samples = list(ts.samples()) t = ts.first() pt = pts.first() for _ in range(20): u = random.sample(samples, 1)[0] self.assertEqual(t.parent(u), pt.parent(u)) if t.parent(u) != msprime.NULL_NODE: self.assertEqual(t.branch_length(u), pt.branch_length(u))
def test_convert_mixd_files(self): for ts in self.get_mixed_slim_examples(): pts = pyslim.SlimTreeSequence(ts) self.verify_upgrade(pts) assert ts.num_provenances == 1 assert pts.num_provenances == 2 assert ts.provenance(0).record == pts.provenance(0).record record = json.loads(ts.provenance(0).record) assert isinstance(pts.metadata, dict) assert 'SLiM' in pts.metadata assert record['parameters']['model_type'] == pts.metadata['SLiM'][ 'model_type'] assert record['slim']['generation'] == pts.metadata['SLiM'][ 'generation'] assert list(ts.samples()) == list(pts.samples()) assert np.array_equal(ts.tables.nodes.flags, pts.tables.nodes.flags) samples = list(ts.samples()) t = ts.first() pt = pts.first() for _ in range(20): u = random.sample(samples, 1)[0] assert t.parent(u) == pt.parent(u) if t.parent(u) != tskit.NULL: assert t.branch_length(u) == pt.branch_length(u)
def test_bad_metadata(self): clean_tables = self.clean_example() tables = clean_tables.copy() tables.metadata_schema = tskit.MetadataSchema({"type": "object", "codec": "json"}) tables.metadata = {} ts = tables.tree_sequence() with pytest.raises(ValueError): _ = pyslim.SlimTreeSequence(ts)
def slimcoal(self): self.tscoal = pyslim.SlimTreeSequence( msprime.mutate(self.ts, rate=1e-9, keep=True)) print( f"The tree sequence now has {self.tscoal.num_mutations} mutations, " f"and mean pairwise nucleotide diversity is {self.tscoal.diversity()}." )
def test_legacy_error(self, recipe, tmp_path): tmp_file = os.path.join(tmp_path, "test_legacy.trees") ts = recipe["ts"] ts.dump(tmp_file) with pytest.raises(ValueError, match="legacy metadata tools"): _ = pyslim.load(tmp_file, legacy_metadata=True) with pytest.raises(ValueError, match="legacy metadata tools"): _ = pyslim.SlimTreeSequence(ts, legacy_metadata=True)
def test_inconsistent_times(self): clean_tables = self.clean_example() tables = clean_tables.copy() tables.nodes.clear() for j, n in enumerate(clean_tables.nodes): tables.nodes.add_row(time=j, flags=tskit.NODE_IS_SAMPLE, population=1, individual=0) ts = tables.tree_sequence() with pytest.raises(ValueError): _ = pyslim.SlimTreeSequence(ts)
def test_samples_only(self, recipe): ts = recipe["ts"] all_inds = ts.individuals_alive_at(0) assert set(all_inds) == set(ts.individuals_alive_at(0, samples_only=False)) sub_inds = np.random.choice(all_inds, size=min(len(all_inds), 4), replace=False) flags = np.array([n.flags & (tskit.NODE_IS_SAMPLE * n.individual in sub_inds) for n in ts.nodes()], dtype=np.uint32) tables = ts.dump_tables() tables.nodes.flags = flags new_ts = pyslim.SlimTreeSequence(tables.tree_sequence()) assert set(sub_inds) == set(new_ts.individuals_alive_at(0, samples_only=True))
def _match_nodes_and_merge(self): """Find matching ancestral nodes between two treeseqs for merging ts1 into ts0. Given SLiM tree sequences `other` and `ts`, builds a numpy array with length `other.num_nodes` in which the indexes represent the node id in `other` and the entries represent the equivalent node id in `ts`. If a node in `other` has no equivalent in `ts`, then the entry takes the value `tskit.NULL`. The matching is done by comparing the IDs assigned by SLiM which are kept in the NodeTable metadata. Further, this matching of SLiM IDs is done for times (going backward-in-time) greater than the specified `split_time`. Note ---- This must be done before recapitation which would add nodes without slim_id metadata. """ ts0 = self._tree_sequences[0] ts1 = self._tree_sequences[1] split_time = 1000 #int(1 + (2 * self.generations)) # get an empty array to be filled. node_mapping = np.full(ts1.num_nodes, tskit.NULL) # get the slim_ids for every node in each ts slim_ids0 = np.array([n.metadata["slim_id"] for n in ts0.nodes()]) slim_ids1 = np.array([n.metadata["slim_id"] for n in ts1.nodes()]) # get ids of all nodes alive before the split (tskit: think backwards time), # meaning that they are in the ancestor. alive_before_split_pop1 = ts1.tables.nodes.time >= split_time # get ids of nodes slim_ids for nodes that are in both ts's is_1in0 = np.isin(slim_ids1, slim_ids0) # get node ids that meet both above booleans both = np.logical_and(alive_before_split_pop1, is_1in0) sorted_ids0 = np.argsort(slim_ids0) matches = np.searchsorted( slim_ids0, slim_ids1[both], side='left', sorter=sorted_ids0, ) node_mapping[both] = sorted_ids0[matches] match = sum(node_mapping != -1) nomatch = sum(node_mapping == -1) # save it. # logger.debug(f"match={match}; nomatch={nomatch}; {self.trees_files}") tsu = ts0.union(ts1, node_mapping=node_mapping, check_shared_equality=True) self.tree_sequence = pyslim.SlimTreeSequence(tsu)
def test_load_without_provenance(self): # with 0.5, SLiM should read info from metadata, not provenances for in_ts, basename in self.get_slim_restarts(no_op=True): in_tables = in_ts.tables in_tables.provenances.clear() cleared_ts = pyslim.SlimTreeSequence( in_tables.tree_sequence(), reference_sequence=in_ts.reference_sequence) out_ts = self.run_slim_restart(cleared_ts, basename) out_tables = out_ts.tables out_tables.provenances.clear() self.assertEqual(in_tables, out_tables)
def recapitate(treesfile, sample_size, recombination_rate, mutation_rate, Ne, gcBurnin): # load trees with pyslim ts = pyslim.load(treesfile) num_individuals_0 = len(ts.individuals_alive_at(0)) n_roots = pd.Series([ t.num_roots for t in ts.trees() ]).value_counts().to_frame(name="num_tree_with_num_roots") logging.debug( f"""The tree sequence has {ts.num_trees} trees on a genome of length {ts.sequence_length} {num_individuals_0} alive individuals, {ts.num_samples} 'sample' genomes and {ts.num_mutations} mutations. number of roots per tree: {n_roots.__str__()[:-12]}""") # discard second genomes (diploids) from the tree. #ts_haploid = ts.simplify(samples=[ind.nodes[0] for ind in ts.individuals()]) ts_recap = ts.recapitate( recombination_rate=gcBurnin + 1e-20, # can't put 0 here. Ne=Ne) #population_configurations=[msprime.PopulationConiguration(initial_size=Ne)]) # simplify to a subset of the haploids sample_inds = np.random.choice(ts_recap.individuals_alive_at(0), size=sample_size, replace=False) # choose n random leaves sample_nodes = [ts_recap.individual(i).nodes[0] for i in sample_inds] ts_samp = ts_recap.simplify(samples=sample_nodes) n_roots = pd.Series([ t.num_roots for t in ts_samp.trees() ]).value_counts().to_frame(name="num_tree_with_num_roots") logging.debug( f"""The tree sequence has {ts_samp.num_trees} trees on a genome of length {ts_samp.sequence_length} {ts_samp.num_individuals} alive individuals, {ts_samp.num_samples} 'sample' genomes and {ts_samp.num_mutations} mutations. number of roots per tree: {n_roots.__str__()[:-12]}""") # mutate ts_mutated = pyslim.SlimTreeSequence( msprime.mutate( ts_samp, rate=mutation_rate / 2, # To have 2.Ne.mu and not 4.Ne.mu keep=True) # keep existing mutations ) genotype_matrix = ts_mutated.genotype_matrix() snp_mat = genotype_matrix.T pos = np.round(ts_mutated.tables.asdict()["sites"]["position"]).astype(int) return snp_mat, pos, ts_mutated
def test_load_without_provenance(self, restart_name, recipe, helper_functions, tmp_path): in_ts = recipe["ts"] in_tables = in_ts.dump_tables() in_tables.provenances.clear() in_tables.sort() cleared_ts = pyslim.SlimTreeSequence(in_tables.tree_sequence(), ) out_ts = helper_functions.run_slim_restart(cleared_ts, restart_name, tmp_path) out_tables = out_ts.dump_tables() out_tables.provenances.clear() out_tables.sort() in_tables.assert_equals(out_tables)
def test_inconsistent_nodes(self): clean_tables = self.clean_example() tables = clean_tables.copy() tables.nodes.clear() for j, n in enumerate(clean_tables.nodes): tables.nodes.add_row( time=n.time, flags=n.flags, population=j, individual=n.individual, metadata=n.metadata) with pytest.raises(ValueError): pyslim.annotate_defaults_tables(tables, model_type='nonWF', slim_generation=1) ts = tables.tree_sequence() with pytest.raises(ValueError): _ = pyslim.SlimTreeSequence(ts)
def merge(self): "Merges tree sequences" self.species = [] ids = [] species = [] #read in all thre tree sequences for i in range(0, len(self.files)): ts = pyslim.load(self.files[i]) species.append(ts) self.merged_ts = pyslim.SlimTreeSequence(species[0].union( species[1], node_mapping=[tskit.NULL for i in range(species[1].num_nodes)], add_populations=True, ))
def mutate(self, ts): "mutate the tree sequence" self.mts = pyslim.SlimTreeSequence( msprime.mutate(ts, rate=self.mutrate, keep=True)) #save mutation positions and which population they occurred in positions = [] popids = [] allpositions = [] for mut in self.mts.mutations(): allpositions.append(int(mut.site)) popids.append(self.mts.node(mut.node).population) if mut.derived_state != '1': positions.append(int(mut.site)) self.positions = positions self.allpositions = allpositions self.popids = popids
def test_convert_0_1_files(self): for ts in self.get_0_1_slim_examples(): pts = pyslim.SlimTreeSequence(ts) self.assertEqual(ts.num_provenances, 1) self.assertEqual(pts.num_provenances, 2) self.assertEqual(ts.provenance(0).record, pts.provenance(0).record) record = json.loads(ts.provenance(0).record) new_record = json.loads(pts.provenance(1).record) self.assertEqual(record['model_type'], new_record['parameters']['model_type']) self.assertEqual(record['generation'], new_record['slim']["generation"]) self.assertListEqual(list(ts.samples()), list(pts.samples())) self.assertArrayEqual(ts.tables.nodes.flags, pts.tables.nodes.flags) samples = list(ts.samples()) t = ts.first() pt = pts.first() for _ in range(20): u = random.sample(samples, 1)[0] self.assertEqual(t.parent(u), pt.parent(u)) if t.parent(u) != msprime.NULL_NODE: self.assertEqual(t.branch_length(u), pt.branch_length(u))
def _merge_ts_pops(self): """Merge two separate sims into a single ts with 2 pops. Merges with union and re-loads the ts as a SlimTreeSequence. Adds the non-shared portions of ts1 to ts0. Since they have no shared portion, we enter NULL for the `node mapping`, and add_population True sets new nodes to a new population. """ # check the number of simulations for what to do. if self._nts > 2: raise ValueError("you cannot enter >2 tree sequences.") node_map= self._match_nodes(other=self._tree_sequences[0], ts=self._tree_sequences[1], split_time= int(1+(2*self.generations))) tsu = self._tree_sequences[1].union(self._tree_sequences[0], node_map, check_shared_equality=True) self.tree_sequence = pyslim.SlimTreeSequence(tsu)
def test_load(self): for fn in self.get_slim_example_files(): # load in msprime then switch msp_ts = tskit.load(fn) self.assertTrue(type(msp_ts) is msprime.TreeSequence) # transfer tables msp_tables = msp_ts.tables new_ts = pyslim.load_tables(msp_tables) self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) self.assertEqual(msp_tables, new_ts.tables) # convert directly new_ts = pyslim.SlimTreeSequence(msp_ts) self.assertTrue(type(new_ts) is pyslim.SlimTreeSequence) self.verify_times(msp_ts, new_ts) self.assertEqual(msp_tables, new_ts.tables) # load to pyslim from file slim_ts = pyslim.load(fn) self.assertTrue(type(slim_ts) is pyslim.SlimTreeSequence) self.assertEqual(msp_tables, slim_ts.tables) self.assertEqual(slim_ts.slim_generation, new_ts.slim_generation)
def sample_treeseq(ts, n): """Sample n individuals from the TreeSequence. Parameters ---------- ts : SlimTreeSequence Full TreeSequence from fwd simulation with tree recording n : int Size of the sample Returns ------- SlimTreeSequence The subsample tree sequence """ samples = np.random.choice(ts.samples(), size=n, replace=False) # choose n random leaves ts_samp = ts.simplify(samples=samples.astype("int32")) ts_samp_sts = pyslim.SlimTreeSequence(ts_samp) return ts_samp_sts
def _mutate(self): """Mutatates the recapitated TreeSequence. This applies a mutation model to edges of the tree sequence. Does it know which regions to mutate or not mutate? For example, all recapitated edges should be mutated, but also the neutral genomic regions of the SLiM time frame should be mutated. """ # logger report before adding mutations self._report_mutations(allow_m0=False) # add mutations self.tree_sequence = msprime.sim_mutations( self.tree_sequence, rate=self.mut, random_seed=self.rng.integers(2**31), keep=True, # whether to keep existing mutations. model=msprime.SLiMMutationModel(type=0), ) self.tree_sequence = pyslim.SlimTreeSequence(self.tree_sequence) # logger report after adding mutations self._report_mutations(allow_m0=True)
def recapitate(self, decap_trees, demographic_events, demography_debugger=False): """Recapitates tree sequence under model specified by demographic events. Adds mutations and sequencing errors. Returns tskit.tree_sequence.""" population_configurations = [ msprime.PopulationConfiguration( initial_size=self._pop_size_domestic_1 ), # msprime uses diploid Ne msprime.PopulationConfiguration( initial_size=self._pop_size_wild_1), msprime.PopulationConfiguration( initial_size=self._pop_size_captive) ] tree_seq = decap_trees.recapitate( recombination_rate=self.seq_features.recombination_rate, population_configurations=population_configurations, demographic_events=demographic_events, random_seed=self.random_seed) # Overlay mutations tree_seq = pyslim.SlimTreeSequence( msprime.mutate(tree_seq, rate=self.seq_features.mutation_rate, random_seed=self.random_seed)) tree_seq = tree_seq.simplify() if demography_debugger: dd = msprime.DemographyDebugger( population_configurations=population_configurations, demographic_events=demographic_events) dd.print_history() return tree_seq
def merge(self, ts1, ts2): #merge the tree sequences self.merged_ts = pyslim.SlimTreeSequence( ts1.union( ts2, node_mapping=[tskit.NULL for i in range(ts2.num_nodes)], add_populations=True, )) #save pops alive = self.merged_ts.individuals_alive_at(0) num_alive = [0 for _ in range(self.merged_ts.num_populations)] for i in alive: ind = self.merged_ts.individual(i) num_alive[ind.population] += 1 self.num_alive = num_alive edge_ids = [] for i in range(self.merged_ts.num_populations): if num_alive[i] != 0: edge_ids.append(i) else: pass self.edge_ids = edge_ids pop1 = [] pop2 = [] inds = self.merged_ts.individuals() for i in range(1, inds.length): if inds[i].population == self.edge_ids[0]: pop1.append(inds[i].id) elif inds[i].population == self.edge_ids[1]: pop2.append(inds[i].id) self.pops = [pop1, pop2]
def test_convert_0_3_files(self): for ts in self.get_0_3_slim_examples(): pts = pyslim.SlimTreeSequence(ts) self.assertEqual(ts.num_provenances, 1) self.assertEqual(pts.num_provenances, 2) self.assertEqual(ts.provenance(0).record, pts.provenance(0).record) record = json.loads(ts.provenance(0).record) self.assertTrue(isinstance(pts.metadata, dict)) self.assertTrue('SLiM' in pts.metadata) self.assertEqual(record['parameters']['model_type'], pts.metadata['SLiM']['model_type']) self.assertEqual(record['slim']['generation'], pts.metadata['SLiM']["generation"]) self.assertListEqual(list(ts.samples()), list(pts.samples())) self.assertArrayEqual(ts.tables.nodes.flags, pts.tables.nodes.flags) samples = list(ts.samples()) t = ts.first() pt = pts.first() for _ in range(20): u = random.sample(samples, 1)[0] self.assertEqual(t.parent(u), pt.parent(u)) if t.parent(u) != tskit.NULL: self.assertEqual(t.branch_length(u), pt.branch_length(u))
def test_mutate(self): for ts in self.get_slim_examples(): mts = msprime.mutate(ts, rate=1e-8, random_seed=5) pts = pyslim.SlimTreeSequence(mts) self.assertEqual(ts.metadata, pts.metadata)
def simulate( self, demographic_model=None, contig=None, samples=None, seed=None, verbosity=0, slim_path=None, slim_script=False, slim_scaling_factor=10, slim_no_recapitation=False, slim_no_burnin=False, **kwargs): """ Simulate the demographic model using SLiM. See :meth:`.Engine.simulate()` for definitions of the ``demographic_model``, ``contig``, and ``samples`` parameters. :param seed: The seed for the random number generator. :type seed: int :param slim_path: The full path to the slim executable, or the name of a command in the current PATH. :type slim_path: str :param slim_script: If true, the simulation will not be executed. Instead the generated SLiM script will be printed to stdout. :type slim_script: bool :param slim_scaling_factor: Rescale model parameters by the given value, to speed up simulation. Population sizes and generation times are divided by this factor, whereas the mutation rate, recombination rate, and growth rates are multiplied by the factor. See SLiM manual: `5.5 Rescaling population sizes to improve simulation performance.` :type slim_scaling_factor: float :param slim_no_recapitation: Do an explicit burn in, and add mutations, within the SLiM simulation. This may be much slower than the defaults (recapitation and neutral mutation overlay with msprime). The burn in behaviour is to wait until all individuals in the ancestral populations have a common ancestor within their respective population, and then wait another 10*N generations. :type slim_no_recapitation: bool :param slim_no_burnin: Do not perform a burn in at the start of the simulation. This option is only relevant when ``slim_no_recapitation=True``. :type slim_no_burnin: bool """ run_slim = not slim_script do_recap = not slim_no_recapitation check_coalescence = slim_no_recapitation and not slim_no_burnin if slim_path is None: slim_path = self.slim_path() if do_recap: mutation_rate = contig.mutation_rate # Ensure no mutations are introduced by SLiM. contig = stdpopsim.Contig( recombination_map=contig.recombination_map, mutation_rate=0, genetic_map=contig.genetic_map) slim_cmd = [slim_path] if seed is not None: slim_cmd.extend(["-s", f"{seed}"]) mktemp = functools.partial(tempfile.NamedTemporaryFile, mode="w") @contextlib.contextmanager def script_file_f(): f = mktemp(suffix=".slim") if not slim_script else sys.stdout yield f # Don't close sys.stdout. if not slim_script: f.close() with script_file_f() as script_file, mktemp(suffix=".ts") as ts_file: recap_epoch = slim_makescript( script_file, ts_file.name, demographic_model, contig, samples, slim_scaling_factor, check_coalescence, verbosity) script_file.flush() if not run_slim: return None slim_cmd.append(script_file.name) stdout = subprocess.DEVNULL if verbosity == 0 else None subprocess.check_call(slim_cmd, stdout=stdout) ts = pyslim.load(ts_file.name) # Node times come from SLiM generation numbers, which may have been # divided by a scaling factor for computational tractibility. tables = ts.dump_tables() for table in (tables.nodes, tables.migrations): table.time *= slim_scaling_factor ts = pyslim.SlimTreeSequence.load_tables(tables) ts.slim_generation *= slim_scaling_factor if do_recap: rng = random.Random(seed) s1, s2 = rng.randrange(1, 2**32), rng.randrange(1, 2**32) population_configurations = [ msprime.PopulationConfiguration( initial_size=pop.start_size, growth_rate=pop.growth_rate) for pop in recap_epoch.populations] ts = ts.recapitate( recombination_rate=contig.recombination_map.mean_recombination_rate, population_configurations=population_configurations, migration_matrix=recap_epoch.migration_matrix, random_seed=s1) ts = simplify_remembered(ts) if do_recap: # Add neutral mutations. ts = pyslim.SlimTreeSequence(msprime.mutate( ts, rate=mutation_rate, keep=True, random_seed=s2)) return ts
def test_pickle(self): ts = self.clean_example().tree_sequence() ts = pyslim.SlimTreeSequence(ts) roundtripped = pickle.loads(pickle.dumps(ts)) assert roundtripped == ts