def test_default_metadata(self): for k in pyslim.slim_metadata_schemas: schema = pyslim.slim_metadata_schemas[k] entry = pyslim.default_slim_metadata(k) sd = schema.asdict() if sd is not None: for p in sd['properties']: assert p in entry encoded = schema.validate_and_encode_row(entry) decoded = schema.decode_row(encoded) if entry is None: assert decoded is None else: assert entry == decoded schema = pyslim.slim_metadata_schemas["mutation"] entry = pyslim.default_slim_metadata("mutation") entry['mutation_list'].append( pyslim.default_slim_metadata("mutation_list_entry")) encoded = schema.validate_and_encode_row(entry) decoded = schema.decode_row(encoded) assert entry == decoded entry['mutation_list'].append( pyslim.default_slim_metadata("mutation_list_entry")) encoded = schema.validate_and_encode_row(entry) decoded = schema.decode_row(encoded) assert entry == decoded
def test_many_populations(self, helper_functions, tmp_path): # test we can add more than one population ts = msprime.sim_ancestry(5, population_size=10, sequence_length=100, random_seed=455) t = ts.dump_tables() for k in range(5): md = pyslim.default_slim_metadata('population') md['name'] = f"new_pop_num_{k}" md['description'] = f"the {k}-th added pop" t.populations.add_row(metadata=md) i = t.individuals.add_row() for _ in range(2): t.nodes.add_row(flags=1, time=0.0, individual=i, population=k) ts = t.tree_sequence() ts = pyslim.annotate_defaults(ts, model_type='WF', slim_generation=1) for ind in ts.individuals(): assert ind.flags == pyslim.INDIVIDUAL_ALIVE sts = helper_functions.run_slim_restart( ts, "restart_WF.slim", tmp_path, WF=True, )
def validate_slim_metadata(self, t): # t could be tables or a tree sequence schema = t.metadata_schema.schema self.assertTrue('SLiM' in schema['properties']) self.assertTrue('SLiM' in t.metadata) for k in pyslim.default_slim_metadata('tree_sequence')['SLiM']: self.assertTrue(k in schema['properties']['SLiM']['properties']) self.assertTrue(k in t.metadata['SLiM'])
def test_default_metadata(self): for k in pyslim.slim_metadata_schemas: schema = pyslim.slim_metadata_schemas[k] entry = pyslim.default_slim_metadata(k) encoded = schema.validate_and_encode_row(entry) decoded = schema.decode_row(encoded) if entry is None: self.assertTrue(decoded is None) else: self.assertDictEqual(entry, decoded)
def test_empty_populations(self, helper_functions, tmp_path): # test SLiM doesn't error on having empty populations ts = msprime.sim_ancestry(5, population_size=10, sequence_length=100, random_seed=455) ts = pyslim.annotate_defaults(ts, model_type='WF', slim_generation=1) t = ts.dump_tables() for k in range(5): md = pyslim.default_slim_metadata('population') md['name'] = f"new_pop_num_{k}" md['description'] = f"the {k}-th added pop" t.populations.add_row(metadata=md) ts = t.tree_sequence() sts = helper_functions.run_slim_restart( ts, "restart_WF.slim", tmp_path, WF=True, )
def test_default_metadata_errors(self): with pytest.raises(ValueError, match="Unknown metadata request"): _ = pyslim.default_slim_metadata("xxx")
import tskit, pyslim """ Takes an old tree sequence and update the metadata *without* properly updating the top-level metadata. """ ts = tskit.load("recipe_WF.v3.5.trees") tables = ts.dump_tables() tables.populations.clear() tables.populations.metadata_schema = pyslim.slim_metadata_schemas['population'] for p in ts.populations(): tables.populations.append(p) tables.individuals.clear() tables.individuals.metadata_schema = pyslim.slim_metadata_schemas['individual'] d = pyslim.default_slim_metadata("individual") for i in ts.individuals(): d.update(i.metadata) ii = i.replace(metadata=d) tables.individuals.append(ii) tables.mutations.clear() tables.mutations.metadata_schema = pyslim.slim_metadata_schemas['mutation'] d = pyslim.default_slim_metadata("mutation") for m in ts.mutations(): tables.mutations.append(m) ts = tables.tree_sequence() ts.dump("recipe_WF.v3.5_and_v3.6.trees")
def test_known_answer(self): # a simple example to make sure we've got the edge cases right tables = tskit.TableCollection(sequence_length=1) pyslim.set_tree_sequence_metadata(tables, model_type='nonWF', generation=0) pyslim.set_metadata_schemas(tables) locs = [ [0, 0], # alive at 0, 1 [0, 1], # alive at 0, 1, 2 [2, 0], # alive at 0, 1, 2 [1, 1], # alive at 0, 1, 2 [0, 0], # alive at 1 [0.5, 1], # alive at 1 [2, 2], # alive at 1 [3, 2] # alive at 0, 1, 2, 3 ] births = [1, 2, 2, 2, 1, 1, 1, 3] ages = [1, 2, 2, 2, 0, 0, 0, 3] x_bins = [0, 1, 3] for xy, a in zip(locs, ages): md = pyslim.default_slim_metadata('individual') md['age'] = a tables.individuals.add_row( location=xy + [np.nan], metadata=md, ) for j, b in enumerate(births): tables.nodes.add_row(time=b, individual=j) ts = pyslim.SlimTreeSequence(tables.tree_sequence()) # check we've got this right for k, n in enumerate([[0, 1, 2, 3, 7], [0, 1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 7], [7]]): np.testing.assert_array_equal(n, ts.individuals_alive_at(k)) # no-one counts = pyslim.population_size( ts, x_bins=np.arange(10), y_bins=np.arange(10), time_bins=[100, 200, 300], ) np.testing.assert_array_equal( counts, np.zeros((9, 9, 2)), ) # everyone at the start counts = pyslim.population_size( ts, x_bins=[0, 10], y_bins=[0, 10], time_bins=[0, 1], ) np.testing.assert_array_equal(counts, [[[5]]]) # should omit the last one counts = pyslim.population_size(ts, x_bins=[0, 3], y_bins=[0, 3], time_bins=[0, 1]) np.testing.assert_array_equal(counts, [[[4]]]) # now should omit the ones at the boundaries counts = pyslim.population_size(ts, x_bins=[0, 1, 2], y_bins=[0, 1, 2], time_bins=[0, 1, 2, 5]) np.testing.assert_array_equal( counts, [[[1, 2, 0], [1, 2, 1 / 3]], [[0, 0, 0], [1, 1, 1 / 3]]])