Exemple #1
0
 def test_all_fields(self):
     demography = msprime.Demography()
     demography.add_population(name="A", initial_size=10_000)
     demography.add_population(name="B", initial_size=5_000)
     demography.add_population(name="C", initial_size=1_000)
     demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C")
     ts = msprime.sim_ancestry(
         samples={"A": 1, "B": 1},
         demography=demography,
         random_seed=42,
         record_migrations=True,
     )
     ts = msprime.sim_mutations(ts, rate=1, random_seed=42)
     tables = ts.dump_tables()
     for name, table in tables.table_name_map.items():
         if name not in ["provenances", "edges"]:
             table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
             metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))]
             metadata, metadata_offset = tskit.pack_strings(metadatas)
             table.set_columns(
                 **{
                     **table.asdict(),
                     "metadata": metadata,
                     "metadata_offset": metadata_offset,
                 }
             )
     tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
     tables.metadata = "Test metadata"
     self.verify(tables.tree_sequence())
 def test_json(self):
     ts = msprime.simulate(10, random_seed=1)
     tables = ts.dump_tables()
     nodes = tables.nodes
     # For each node, we create some Python metadata that can be JSON encoded.
     metadata = [{
         "one": j,
         "two": 2 * j,
         "three": list(range(j))
     } for j in range(len(nodes))]
     encoded, offset = tskit.pack_strings(map(json.dumps, metadata))
     nodes.set_columns(
         flags=nodes.flags,
         time=nodes.time,
         population=nodes.population,
         metadata_offset=offset,
         metadata=encoded,
     )
     self.assertTrue(np.array_equal(nodes.metadata_offset, offset))
     self.assertTrue(np.array_equal(nodes.metadata, encoded))
     ts1 = tables.tree_sequence()
     for j, node in enumerate(ts1.nodes()):
         decoded_metadata = json.loads(node.metadata.decode())
         self.assertEqual(decoded_metadata, metadata[j])
     ts1.dump(self.temp_file)
     ts2 = tskit.load(self.temp_file)
     self.assertEqual(ts1.tables.nodes, ts2.tables.nodes)
Exemple #3
0
def full_ts():
    """
    Return a tree sequence that has data in all fields.
    """
    """
    A tree sequence with data in all fields - duplcated from tskit's conftest.py
    as other test suites using this file will not have that fixture defined.
    """
    n = 10
    t = 1
    population_configurations = [
        msprime.PopulationConfiguration(n // 2),
        msprime.PopulationConfiguration(n // 2),
        msprime.PopulationConfiguration(0),
    ]
    demographic_events = [
        msprime.MassMigration(time=t, source=0, destination=2),
        msprime.MassMigration(time=t, source=1, destination=2),
    ]
    ts = msprime.simulate(
        population_configurations=population_configurations,
        demographic_events=demographic_events,
        random_seed=1,
        mutation_rate=1,
        record_migrations=True,
    )
    tables = ts.dump_tables()
    # TODO replace this with properly linked up individuals using sim_ancestry
    # once 1.0 is released.
    for j in range(n):
        tables.individuals.add_row(flags=j,
                                   location=(j, j),
                                   parents=(j - 1, j - 1))

    for name, table in tables.name_map.items():
        if name != "provenances":
            table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
            metadatas = [f"n_{name}_{u}" for u in range(len(table))]
            metadata, metadata_offset = tskit.pack_strings(metadatas)
            table.set_columns(
                **{
                    **table.asdict(),
                    "metadata": metadata,
                    "metadata_offset": metadata_offset,
                })
    tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
    tables.metadata = "Test metadata"

    # Add some more provenance so we have enough rows for the offset deletion test.
    for j in range(10):
        tables.provenances.add_row(timestamp="x" * j, record="y" * j)
    return tables.tree_sequence()
Exemple #4
0
def full_ts():
    """
    A tree sequence with data in all fields - duplicated from tskit's conftest.py
    as other test suites using this file will not have that fixture defined.
    """
    demography = msprime.Demography()
    demography.add_population(initial_size=100, name="A")
    demography.add_population(initial_size=100, name="B")
    demography.add_population(initial_size=100, name="C")
    demography.add_population_split(time=10, ancestral="C", derived=["A", "B"])

    ts = msprime.sim_ancestry(
        {"A": 5, "B": 5},
        demography=demography,
        random_seed=1,
        sequence_length=10,
        record_migrations=True,
    )
    assert ts.num_migrations > 0
    assert ts.num_individuals > 0
    ts = msprime.sim_mutations(ts, rate=0.1, random_seed=2)
    assert ts.num_mutations > 0
    tables = ts.dump_tables()
    tables.individuals.clear()

    for ind in ts.individuals():
        tables.individuals.add_row(flags=0, location=[ind.id, ind.id], parents=[-1, -1])

    for name, table in tables.table_name_map.items():
        if name != "provenances":
            table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
            metadatas = [f"n_{name}_{u}" for u in range(len(table))]
            metadata, metadata_offset = tskit.pack_strings(metadatas)
            table.set_columns(
                **{
                    **table.asdict(),
                    "metadata": metadata,
                    "metadata_offset": metadata_offset,
                }
            )
    tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
    tables.metadata = {"A": "Test metadata"}

    tables.reference_sequence.data = "A" * int(tables.sequence_length)
    tables.reference_sequence.url = "https://example.com/sequence"
    tables.reference_sequence.metadata_schema = tskit.MetadataSchema.permissive_json()
    tables.reference_sequence.metadata = {"A": "Test metadata"}

    # Add some more provenance so we have enough rows for the offset deletion test.
    for j in range(10):
        tables.provenances.add_row(timestamp="x" * j, record="y" * j)
    return tables.tree_sequence()
Exemple #5
0
def node_metadata_example():
    ts = msprime.simulate(
        sample_size=100, recombination_rate=0.1, length=10, random_seed=1
    )
    tables = ts.dump_tables()
    metadatas = [f"n_{u}" for u in range(ts.num_nodes)]
    packed, offset = tskit.pack_strings(metadatas)
    tables.nodes.set_columns(
        metadata=packed,
        metadata_offset=offset,
        flags=tables.nodes.flags,
        time=tables.nodes.time,
    )
    return tables.tree_sequence()
Exemple #6
0
def ts_fixture():
    """
    A tree sequence with data in all fields
    """
    demography = msprime.Demography()
    demography.add_population(name="A", initial_size=10_000)
    demography.add_population(name="B", initial_size=5_000)
    demography.add_population(name="C", initial_size=1_000)
    demography.add_population(name="D", initial_size=500)
    demography.add_population(name="E", initial_size=100)
    demography.add_population_split(time=1000, derived=["A", "B"], ancestral="C")
    ts = msprime.sim_ancestry(
        samples={"A": 10, "B": 10},
        demography=demography,
        sequence_length=5,
        random_seed=42,
        record_migrations=True,
        record_provenance=True,
    )
    ts = msprime.sim_mutations(ts, rate=0.001, random_seed=42)
    tables = ts.dump_tables()
    # Add locations to individuals
    individuals_copy = tables.individuals.copy()
    tables.individuals.clear()
    for i, individual in enumerate(individuals_copy):
        tables.individuals.append(
            individual.replace(location=[i, i + 1], parents=[i - 1, i - 1])
        )
    for name, table in tables.name_map.items():
        if name != "provenances":
            table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
            metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))]
            metadata, metadata_offset = tskit.pack_strings(metadatas)
            table.set_columns(
                **{
                    **table.asdict(),
                    "metadata": metadata,
                    "metadata_offset": metadata_offset,
                }
            )
    tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
    tables.metadata = "Test metadata"

    # Add some more rows to provenance to have enough for testing.
    for _ in range(3):
        tables.provenances.add_row(record="A")

    return tables.tree_sequence()
Exemple #7
0
def migration_metadata_example():
    ts = migration_example()
    tables = ts.dump_tables()
    metadatas = [f"n_{u}" for u in range(ts.num_migrations)]
    packed, offset = tskit.pack_strings(metadatas)
    tables.migrations.set_columns(
        metadata=packed,
        metadata_offset=offset,
        left=tables.migrations.left,
        right=tables.migrations.right,
        source=tables.migrations.source,
        dest=tables.migrations.dest,
        node=tables.migrations.node,
        time=tables.migrations.time,
    )
    return tables.tree_sequence()
Exemple #8
0
def edge_metadata_example():
    ts = msprime.simulate(
        sample_size=100, recombination_rate=0.1, length=10, random_seed=1
    )
    tables = ts.dump_tables()
    metadatas = [f"edge_{u}" for u in range(ts.num_edges)]
    packed, offset = tskit.pack_strings(metadatas)
    tables.edges.set_columns(
        metadata=packed,
        metadata_offset=offset,
        left=tables.edges.left,
        right=tables.edges.right,
        child=tables.edges.child,
        parent=tables.edges.parent,
    )
    return tables.tree_sequence()
Exemple #9
0
def ts_fixture():
    """
    A tree sequence with data in all fields
    """
    n = 10
    t = 1
    population_configurations = [
        msprime.PopulationConfiguration(n // 2),
        msprime.PopulationConfiguration(n // 2),
        msprime.PopulationConfiguration(0),
    ]
    demographic_events = [
        msprime.MassMigration(time=t, source=0, destination=2),
        msprime.MassMigration(time=t, source=1, destination=2),
    ]
    ts = msprime.simulate(
        population_configurations=population_configurations,
        demographic_events=demographic_events,
        random_seed=1,
        mutation_rate=1,
        record_migrations=True,
    )
    tables = ts.dump_tables()
    # TODO replace this with properly linked up individuals using sim_ancestry
    # once 1.0 is released.
    for j in range(n):
        tables.individuals.add_row(flags=j,
                                   location=(j, j),
                                   parents=(j - 1, j - 1))

    for name, table in tables.name_map.items():
        if name != "provenances":
            table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
            metadatas = [f"n_{name}_{u}" for u in range(len(table))]
            metadata, metadata_offset = tskit.pack_strings(metadatas)
            table.set_columns(
                **{
                    **table.asdict(),
                    "metadata": metadata,
                    "metadata_offset": metadata_offset,
                })
    tables.metadata_schema = tskit.MetadataSchema({"codec": "json"})
    tables.metadata = "Test metadata"
    return tables.tree_sequence()