Example #1
0
def extract_ancestors(samples, ts):
    """
    Given the specified sample data file and final (unsimplified) tree sequence output
    by tsinfer, return the same tree sequence with the samples removed, which can then
    be used as an ancestors tree sequence.
    """
    position = samples.sites_position[:][samples.sites_inference[:]]
    ts = subset_sites(ts, position)
    tables = ts.dump_tables()

    # The nodes that we want to keep are all those *except* what
    # has been marked as samples.
    samples = np.where(tables.nodes.flags != tskit.NODE_IS_SAMPLE)[0].astype(np.int32)

    # Mark all nodes as samples
    tables.nodes.set_columns(
        flags=np.bitwise_or(tables.nodes.flags, tskit.NODE_IS_SAMPLE),
        time=tables.nodes.time,
        population=tables.nodes.population,
        individual=tables.nodes.individual,
        metadata=tables.nodes.metadata,
        metadata_offset=tables.nodes.metadata_offset)
    # Now simplify down the tables to get rid of all sample edges.
    node_id_map = tables.simplify(
        samples, filter_sites=False, filter_individuals=True, filter_populations=False)

    # We cannot have flags that are both samples and have other flags set,
    # so we need to unset all the sample flags for these.
    flags = np.zeros_like(tables.nodes.flags)
    index = tables.nodes.flags == tskit.NODE_IS_SAMPLE
    flags[index] = tskit.NODE_IS_SAMPLE
    index = tables.nodes.flags != tskit.NODE_IS_SAMPLE
    flags[index] = np.bitwise_and(tables.nodes.flags[index], ~tskit.NODE_IS_SAMPLE)

    tables.nodes.set_columns(
        flags=flags,
        time=tables.nodes.time,
        population=tables.nodes.population,
        individual=tables.nodes.individual,
        metadata=tables.nodes.metadata,
        metadata_offset=tables.nodes.metadata_offset)
    # Drop site metadata and set the ancestral_state to zeros
    tables.sites.set_columns(
        position=tables.sites.position,
        ancestral_state=np.zeros(len(tables.sites), dtype=np.int8) + ord('0'),
        ancestral_state_offset=np.arange(len(tables.sites) + 1, dtype=np.uint32))

    # Drop mutation metadata and set the derived_state to ones
    tables.mutations.set_columns(
        site=tables.mutations.site,
        node=tables.mutations.node,
        derived_state=np.zeros(len(tables.mutations), dtype=np.int8) + ord('1'),
        derived_state_offset=np.arange(len(tables.mutations) + 1, dtype=np.uint32))

    record = provenance.get_provenance_dict(command="extract_ancestors")
    tables.provenances.add_row(record=json.dumps(record))

    return tables, node_id_map
Example #2
0
def snip_centromere(ts, left, right):
    """
    Cuts tree topology information out of the specifified tree sequence in the specified
    region. The tree sequence will effectively be in two halves. There cannot be
    any sites within the removed region.
    """
    if not (0 < left < right < ts.sequence_length):
        raise ValueError("Invalid centromere coordinates")
    tables = ts.dump_tables()
    if len(tables.sites) > 0:
        position = tables.sites.position
        left_index = np.searchsorted(position, left)
        right_index = np.searchsorted(position, right)
        if right_index != left_index:
            raise ValueError("Cannot have sites defined within the centromere")

    edges = tables.edges.copy()
    # Get all edges that do not intersect and add them in directly.
    index = np.logical_or(right <= edges.left, left >= edges.right)
    tables.edges.set_columns(
        left=edges.left[index],
        right=edges.right[index],
        parent=edges.parent[index],
        child=edges.child[index],
    )
    # Get all edges that intersect and add two edges for each.
    index = np.logical_not(index)
    i_parent = edges.parent[index]
    i_child = edges.child[index]
    i_left = edges.left[index]
    i_right = edges.right[index]

    # Only insert valid edges (remove any entirely lost topology)
    index = i_left < left
    num_intersecting = np.sum(index)
    tables.edges.append_columns(
        left=i_left[index],
        right=np.full(num_intersecting, left, dtype=np.float64),
        parent=i_parent[index],
        child=i_child[index],
    )

    # Only insert valid edges (remove any entirely lost topology)
    index = right < i_right
    num_intersecting = np.sum(index)
    tables.edges.append_columns(
        left=np.full(num_intersecting, right, dtype=np.float64),
        right=i_right[index],
        parent=i_parent[index],
        child=i_child[index],
    )
    tables.sort()
    record = provenance.get_provenance_dict(command="snip_centromere",
                                            left=left,
                                            right=right)
    tables.provenances.add_row(record=json.dumps(record))
    return tables.tree_sequence()
Example #3
0
 def validate_encoding(self, params):
     pdict = provenance.get_provenance_dict("test", **params)
     encoded = pdict["parameters"]
     assert encoded["command"] == "test"
     del encoded["command"]
     assert encoded == params
Example #4
0
 def test_no_command(self):
     with pytest.raises(ValueError):
         provenance.get_provenance_dict()
Example #5
0
 def validate_encoding(self, params):
     pdict = provenance.get_provenance_dict("test", **params)
     encoded = pdict["parameters"]
     self.assertEqual(encoded["command"], "test")
     del encoded["command"]
     self.assertEqual(encoded, params)
Example #6
0
 def test_no_command(self):
     with self.assertRaises(ValueError):
         provenance.get_provenance_dict()