Exemple #1
0
def _load_legacy_hdf5_v3(root, remove_duplicate_positions):
    # get the trees group for the records and samples
    trees_group = root["trees"]
    nodes_group = trees_group["nodes"]
    time = np.array(nodes_group["time"])

    breakpoints = np.array(trees_group["breakpoints"])
    records_group = trees_group["records"]
    left_indexes = np.array(records_group["left"])
    right_indexes = np.array(records_group["right"])
    record_node = np.array(records_group["node"], dtype=np.int32)
    num_nodes = time.shape[0]
    sample_size = np.min(record_node)
    flags = np.zeros(num_nodes, dtype=np.uint32)
    flags[:sample_size] = msprime.NODE_IS_SAMPLE

    children_length = np.array(records_group["num_children"], dtype=np.uint32)
    total_rows = np.sum(children_length)
    left = np.zeros(total_rows, dtype=np.float64)
    right = np.zeros(total_rows, dtype=np.float64)
    parent = np.zeros(total_rows, dtype=np.int32)
    record_left = breakpoints[left_indexes]
    record_right = breakpoints[right_indexes]
    k = 0
    for j in range(left_indexes.shape[0]):
        for _ in range(children_length[j]):
            left[k] = record_left[j]
            right[k] = record_right[j]
            parent[k] = record_node[j]
            k += 1
    nodes = msprime.NodeTable()
    nodes.set_columns(flags=flags,
                      time=nodes_group["time"],
                      population=nodes_group["population"])
    edges = msprime.EdgeTable()
    edges.set_columns(left=left,
                      right=right,
                      parent=parent,
                      child=records_group["children"])
    sites = msprime.SiteTable()
    mutations = msprime.MutationTable()
    if "mutations" in root:
        _convert_hdf5_mutations(root["mutations"], sites, mutations,
                                remove_duplicate_positions)
    old_timestamp = datetime.datetime.min.isoformat()
    provenances = msprime.ProvenanceTable()
    if "provenance" in root:
        for record in root["provenance"]:
            provenances.add_row(timestamp=old_timestamp, record=record)
    provenances.add_row(_get_upgrade_provenance(root))
    msprime.sort_tables(nodes=nodes,
                        edges=edges,
                        sites=sites,
                        mutations=mutations)
    return msprime.load_tables(nodes=nodes,
                               edges=edges,
                               sites=sites,
                               mutations=mutations,
                               provenances=provenances)
Exemple #2
0
def provenance_timestamp_only_example():
    ts = msprime.simulate(10, random_seed=1)
    tables = ts.dump_tables()
    provenances = msprime.ProvenanceTable()
    provenances.add_row(timestamp="12345", record="")
    return msprime.load_tables(nodes=tables.nodes,
                               edges=tables.edges,
                               provenances=provenances)
Exemple #3
0
    def run(self, ngens):
        nodes = msprime.NodeTable()
        edges = msprime.EdgeTable()
        migrations = msprime.MigrationTable()
        sites = msprime.SiteTable()
        mutations = msprime.MutationTable()
        provenances = msprime.ProvenanceTable()
        if self.deep_history:
            # initial population
            init_ts = msprime.simulate(self.N, recombination_rate=1.0)
            init_ts.dump_tables(nodes=nodes, edges=edges)
            nodes.set_columns(time=nodes.time + ngens, flags=nodes.flags)
        else:
            for _ in range(self.N):
                nodes.add_row(time=ngens)

        pop = list(range(self.N))
        for t in range(ngens - 1, -1, -1):
            if self.debug:
                print("t:", t)
                print("pop:", pop)

            dead = [random.random() > self.survival for k in pop]
            # sample these first so that all parents are from the previous gen
            new_parents = [(random.choice(pop), random.choice(pop))
                           for k in range(sum(dead))]
            k = 0
            if self.debug:
                print("Replacing", sum(dead), "individuals.")
            for j in range(self.N):
                if dead[j]:
                    # this is: offspring ID, lparent, rparent, breakpoint
                    offspring = nodes.num_rows
                    nodes.add_row(time=t)
                    lparent, rparent = new_parents[k]
                    k += 1
                    bp = self.random_breakpoint()
                    if self.debug:
                        print("--->", offspring, lparent, rparent, bp)
                    pop[j] = offspring
                    if bp > 0.0:
                        edges.add_row(left=0.0,
                                      right=bp,
                                      parent=lparent,
                                      child=offspring)
                    if bp < 1.0:
                        edges.add_row(left=bp,
                                      right=1.0,
                                      parent=rparent,
                                      child=offspring)

        if self.debug:
            print("Done! Final pop:")
            print(pop)
        flags = [(msprime.NODE_IS_SAMPLE if u in pop else 0)
                 for u in range(nodes.num_rows)]
        nodes.set_columns(time=nodes.time, flags=flags)
        if self.debug:
            print("Done.")
            print("Nodes:")
            print(nodes)
            print("Edges:")
            print(edges)
        return msprime.TableCollection(nodes, edges, migrations, sites,
                                       mutations, provenances)
Exemple #4
0
def _load_legacy_hdf5_v2(root, remove_duplicate_positions):
    # Get the coalescence records
    trees_group = root["trees"]
    old_timestamp = datetime.datetime.min.isoformat()
    provenances = msprime.ProvenanceTable()
    provenances.add_row(timestamp=old_timestamp,
                        record=_get_v2_provenance("generate_trees",
                                                  trees_group.attrs))
    num_rows = trees_group["node"].shape[0]
    index = np.arange(num_rows, dtype=int)
    parent = np.zeros(2 * num_rows, dtype=np.int32)
    parent[2 * index] = trees_group["node"]
    parent[2 * index + 1] = trees_group["node"]
    left = np.zeros(2 * num_rows, dtype=np.float64)
    left[2 * index] = trees_group["left"]
    left[2 * index + 1] = trees_group["left"]
    right = np.zeros(2 * num_rows, dtype=np.float64)
    right[2 * index] = trees_group["right"]
    right[2 * index + 1] = trees_group["right"]
    child = np.array(trees_group["children"], dtype=np.int32).flatten()
    edges = msprime.EdgeTable()
    edges.set_columns(left=left, right=right, parent=parent, child=child)

    cr_node = np.array(trees_group["node"], dtype=np.int32)
    num_nodes = max(np.max(child), np.max(cr_node)) + 1
    sample_size = np.min(cr_node)
    flags = np.zeros(num_nodes, dtype=np.uint32)
    population = np.zeros(num_nodes, dtype=np.int32)
    time = np.zeros(num_nodes, dtype=np.float64)
    flags[:sample_size] = msprime.NODE_IS_SAMPLE
    cr_population = np.array(trees_group["population"], dtype=np.int32)
    cr_time = np.array(trees_group["time"])
    time[cr_node] = cr_time
    population[cr_node] = cr_population
    if "samples" in root:
        samples_group = root["samples"]
        population[:sample_size] = samples_group["population"]
        if "time" in samples_group:
            time[:sample_size] = samples_group["time"]
    nodes = msprime.NodeTable()
    nodes.set_columns(flags=flags, population=population, time=time)

    sites = msprime.SiteTable()
    mutations = msprime.MutationTable()
    if "mutations" in root:
        mutations_group = root["mutations"]
        _convert_hdf5_mutations(mutations_group, sites, mutations,
                                remove_duplicate_positions)
        provenances.add_row(timestamp=old_timestamp,
                            record=_get_v2_provenance("generate_mutations",
                                                      mutations_group.attrs))
    provenances.add_row(_get_upgrade_provenance(root))
    msprime.sort_tables(nodes=nodes,
                        edges=edges,
                        sites=sites,
                        mutations=mutations)
    return msprime.load_tables(nodes=nodes,
                               edges=edges,
                               sites=sites,
                               mutations=mutations,
                               provenances=provenances)
Exemple #5
0
def _load_legacy_hdf5_v10(root, remove_duplicate_positions=False):
    # We cannot have duplicate positions in v10, so this parameter is ignored
    nodes_group = root["nodes"]
    nodes = msprime.NodeTable()
    metadata = None
    metadata_offset = None
    if "metadata" in nodes_group:
        metadata = nodes_group["metadata"]
        metadata_offset = nodes_group["metadata_offset"]
    nodes.set_columns(flags=nodes_group["flags"],
                      population=nodes_group["population"],
                      time=nodes_group["time"],
                      metadata=metadata,
                      metadata_offset=metadata_offset)

    edges_group = root["edges"]
    edges = msprime.EdgeTable()
    edges.set_columns(left=edges_group["left"],
                      right=edges_group["right"],
                      parent=edges_group["parent"],
                      child=edges_group["child"])

    migrations_group = root["migrations"]
    migrations = msprime.MigrationTable()
    if "left" in migrations_group:
        migrations.set_columns(left=migrations_group["left"],
                               right=migrations_group["right"],
                               node=migrations_group["node"],
                               source=migrations_group["source"],
                               dest=migrations_group["dest"],
                               time=migrations_group["time"])

    sites_group = root["sites"]
    sites = msprime.SiteTable()
    if "position" in sites_group:
        metadata = None
        metadata_offset = None
        if "metadata" in sites_group:
            metadata = sites_group["metadata"]
            metadata_offset = sites_group["metadata_offset"]
        sites.set_columns(
            position=sites_group["position"],
            ancestral_state=sites_group["ancestral_state"],
            ancestral_state_offset=sites_group["ancestral_state_offset"],
            metadata=metadata,
            metadata_offset=metadata_offset)

    mutations_group = root["mutations"]
    mutations = msprime.MutationTable()
    if "site" in mutations_group:
        metadata = None
        metadata_offset = None
        if "metadata" in mutations_group:
            metadata = mutations_group["metadata"]
            metadata_offset = mutations_group["metadata_offset"]
        mutations.set_columns(
            site=mutations_group["site"],
            node=mutations_group["node"],
            parent=mutations_group["parent"],
            derived_state=mutations_group["derived_state"],
            derived_state_offset=mutations_group["derived_state_offset"],
            metadata=metadata,
            metadata_offset=metadata_offset)

    provenances_group = root["provenances"]
    provenances = msprime.ProvenanceTable()
    if "timestamp" in provenances_group:
        timestamp = provenances_group["timestamp"]
        timestamp_offset = provenances_group["timestamp_offset"]
        if "record" in provenances_group:
            record = provenances_group["record"]
            record_offset = provenances_group["record_offset"]
        else:
            record = np.empty_like(timestamp)
            record_offset = np.zeros_like(timestamp_offset)
        provenances.set_columns(timestamp=timestamp,
                                timestamp_offset=timestamp_offset,
                                record=record,
                                record_offset=record_offset)
    provenances.add_row(_get_upgrade_provenance(root))

    return msprime.load_tables(nodes=nodes,
                               edges=edges,
                               migrations=migrations,
                               sites=sites,
                               mutations=mutations,
                               provenances=provenances)