def test_all_fields(self): demography = msprime.Demography() demography.add_population(name="A", initial_size=10_000) demography.add_population(name="B", initial_size=5_000) demography.add_population(name="C", initial_size=1_000) demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C") ts = msprime.sim_ancestry( samples={"A": 1, "B": 1}, demography=demography, random_seed=42, record_migrations=True, ) ts = msprime.sim_mutations(ts, rate=1, random_seed=42) tables = ts.dump_tables() for name, table in tables.table_name_map.items(): if name not in ["provenances", "edges"]: table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))] metadata, metadata_offset = tskit.pack_strings(metadatas) table.set_columns( **{ **table.asdict(), "metadata": metadata, "metadata_offset": metadata_offset, } ) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = "Test metadata" self.verify(tables.tree_sequence())
def test_json(self): ts = msprime.simulate(10, random_seed=1) tables = ts.dump_tables() nodes = tables.nodes # For each node, we create some Python metadata that can be JSON encoded. metadata = [{ "one": j, "two": 2 * j, "three": list(range(j)) } for j in range(len(nodes))] encoded, offset = tskit.pack_strings(map(json.dumps, metadata)) nodes.set_columns( flags=nodes.flags, time=nodes.time, population=nodes.population, metadata_offset=offset, metadata=encoded, ) self.assertTrue(np.array_equal(nodes.metadata_offset, offset)) self.assertTrue(np.array_equal(nodes.metadata, encoded)) ts1 = tables.tree_sequence() for j, node in enumerate(ts1.nodes()): decoded_metadata = json.loads(node.metadata.decode()) self.assertEqual(decoded_metadata, metadata[j]) ts1.dump(self.temp_file) ts2 = tskit.load(self.temp_file) self.assertEqual(ts1.tables.nodes, ts2.tables.nodes)
def full_ts(): """ Return a tree sequence that has data in all fields. """ """ A tree sequence with data in all fields - duplcated from tskit's conftest.py as other test suites using this file will not have that fixture defined. """ n = 10 t = 1 population_configurations = [ msprime.PopulationConfiguration(n // 2), msprime.PopulationConfiguration(n // 2), msprime.PopulationConfiguration(0), ] demographic_events = [ msprime.MassMigration(time=t, source=0, destination=2), msprime.MassMigration(time=t, source=1, destination=2), ] ts = msprime.simulate( population_configurations=population_configurations, demographic_events=demographic_events, random_seed=1, mutation_rate=1, record_migrations=True, ) tables = ts.dump_tables() # TODO replace this with properly linked up individuals using sim_ancestry # once 1.0 is released. for j in range(n): tables.individuals.add_row(flags=j, location=(j, j), parents=(j - 1, j - 1)) for name, table in tables.name_map.items(): if name != "provenances": table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) metadatas = [f"n_{name}_{u}" for u in range(len(table))] metadata, metadata_offset = tskit.pack_strings(metadatas) table.set_columns( **{ **table.asdict(), "metadata": metadata, "metadata_offset": metadata_offset, }) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = "Test metadata" # Add some more provenance so we have enough rows for the offset deletion test. for j in range(10): tables.provenances.add_row(timestamp="x" * j, record="y" * j) return tables.tree_sequence()
def full_ts(): """ A tree sequence with data in all fields - duplicated from tskit's conftest.py as other test suites using this file will not have that fixture defined. """ demography = msprime.Demography() demography.add_population(initial_size=100, name="A") demography.add_population(initial_size=100, name="B") demography.add_population(initial_size=100, name="C") demography.add_population_split(time=10, ancestral="C", derived=["A", "B"]) ts = msprime.sim_ancestry( {"A": 5, "B": 5}, demography=demography, random_seed=1, sequence_length=10, record_migrations=True, ) assert ts.num_migrations > 0 assert ts.num_individuals > 0 ts = msprime.sim_mutations(ts, rate=0.1, random_seed=2) assert ts.num_mutations > 0 tables = ts.dump_tables() tables.individuals.clear() for ind in ts.individuals(): tables.individuals.add_row(flags=0, location=[ind.id, ind.id], parents=[-1, -1]) for name, table in tables.table_name_map.items(): if name != "provenances": table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) metadatas = [f"n_{name}_{u}" for u in range(len(table))] metadata, metadata_offset = tskit.pack_strings(metadatas) table.set_columns( **{ **table.asdict(), "metadata": metadata, "metadata_offset": metadata_offset, } ) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = {"A": "Test metadata"} tables.reference_sequence.data = "A" * int(tables.sequence_length) tables.reference_sequence.url = "https://example.com/sequence" tables.reference_sequence.metadata_schema = tskit.MetadataSchema.permissive_json() tables.reference_sequence.metadata = {"A": "Test metadata"} # Add some more provenance so we have enough rows for the offset deletion test. for j in range(10): tables.provenances.add_row(timestamp="x" * j, record="y" * j) return tables.tree_sequence()
def node_metadata_example(): ts = msprime.simulate( sample_size=100, recombination_rate=0.1, length=10, random_seed=1 ) tables = ts.dump_tables() metadatas = [f"n_{u}" for u in range(ts.num_nodes)] packed, offset = tskit.pack_strings(metadatas) tables.nodes.set_columns( metadata=packed, metadata_offset=offset, flags=tables.nodes.flags, time=tables.nodes.time, ) return tables.tree_sequence()
def ts_fixture(): """ A tree sequence with data in all fields """ demography = msprime.Demography() demography.add_population(name="A", initial_size=10_000) demography.add_population(name="B", initial_size=5_000) demography.add_population(name="C", initial_size=1_000) demography.add_population(name="D", initial_size=500) demography.add_population(name="E", initial_size=100) demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C") ts = msprime.sim_ancestry( samples={"A": 10, "B": 10}, demography=demography, sequence_length=5, random_seed=42, record_migrations=True, record_provenance=True, ) ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42) tables = ts.dump_tables() # Add locations to individuals individuals_copy = tables.individuals.copy() tables.individuals.clear() for i, individual in enumerate(individuals_copy): tables.individuals.append( individual.replace(location=[i, i + 1], parents=[i - 1, i - 1]) ) for name, table in tables.name_map.items(): if name != "provenances": table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))] metadata, metadata_offset = tskit.pack_strings(metadatas) table.set_columns( **{ **table.asdict(), "metadata": metadata, "metadata_offset": metadata_offset, } ) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = "Test metadata" # Add some more rows to provenance to have enough for testing. for _ in range(3): tables.provenances.add_row(record="A") return tables.tree_sequence()
def migration_metadata_example(): ts = migration_example() tables = ts.dump_tables() metadatas = [f"n_{u}" for u in range(ts.num_migrations)] packed, offset = tskit.pack_strings(metadatas) tables.migrations.set_columns( metadata=packed, metadata_offset=offset, left=tables.migrations.left, right=tables.migrations.right, source=tables.migrations.source, dest=tables.migrations.dest, node=tables.migrations.node, time=tables.migrations.time, ) return tables.tree_sequence()
def edge_metadata_example(): ts = msprime.simulate( sample_size=100, recombination_rate=0.1, length=10, random_seed=1 ) tables = ts.dump_tables() metadatas = [f"edge_{u}" for u in range(ts.num_edges)] packed, offset = tskit.pack_strings(metadatas) tables.edges.set_columns( metadata=packed, metadata_offset=offset, left=tables.edges.left, right=tables.edges.right, child=tables.edges.child, parent=tables.edges.parent, ) return tables.tree_sequence()
def ts_fixture(): """ A tree sequence with data in all fields """ n = 10 t = 1 population_configurations = [ msprime.PopulationConfiguration(n // 2), msprime.PopulationConfiguration(n // 2), msprime.PopulationConfiguration(0), ] demographic_events = [ msprime.MassMigration(time=t, source=0, destination=2), msprime.MassMigration(time=t, source=1, destination=2), ] ts = msprime.simulate( population_configurations=population_configurations, demographic_events=demographic_events, random_seed=1, mutation_rate=1, record_migrations=True, ) tables = ts.dump_tables() # TODO replace this with properly linked up individuals using sim_ancestry # once 1.0 is released. for j in range(n): tables.individuals.add_row(flags=j, location=(j, j), parents=(j - 1, j - 1)) for name, table in tables.name_map.items(): if name != "provenances": table.metadata_schema = tskit.MetadataSchema({"codec": "json"}) metadatas = [f"n_{name}_{u}" for u in range(len(table))] metadata, metadata_offset = tskit.pack_strings(metadatas) table.set_columns( **{ **table.asdict(), "metadata": metadata, "metadata_offset": metadata_offset, }) tables.metadata_schema = tskit.MetadataSchema({"codec": "json"}) tables.metadata = "Test metadata" return tables.tree_sequence()