def _load_legacy_hdf5_v2(root, remove_duplicate_positions): # Get the coalescence records trees_group = root["trees"] old_timestamp = datetime.datetime.min.isoformat() provenances = tskit.ProvenanceTable() provenances.add_row( timestamp=old_timestamp, record=_get_v2_provenance("generate_trees", trees_group.attrs), ) num_rows = trees_group["node"].shape[0] index = np.arange(num_rows, dtype=int) parent = np.zeros(2 * num_rows, dtype=np.int32) parent[2 * index] = trees_group["node"] parent[2 * index + 1] = trees_group["node"] left = np.zeros(2 * num_rows, dtype=np.float64) left[2 * index] = trees_group["left"] left[2 * index + 1] = trees_group["left"] right = np.zeros(2 * num_rows, dtype=np.float64) right[2 * index] = trees_group["right"] right[2 * index + 1] = trees_group["right"] child = np.array(trees_group["children"], dtype=np.int32).flatten() tables = tskit.TableCollection(np.max(right)) tables.edges.set_columns(left=left, right=right, parent=parent, child=child) cr_node = np.array(trees_group["node"], dtype=np.int32) num_nodes = max(np.max(child), np.max(cr_node)) + 1 sample_size = np.min(cr_node) flags = np.zeros(num_nodes, dtype=np.uint32) population = np.zeros(num_nodes, dtype=np.int32) time = np.zeros(num_nodes, dtype=np.float64) flags[:sample_size] = tskit.NODE_IS_SAMPLE cr_population = np.array(trees_group["population"], dtype=np.int32) cr_time = np.array(trees_group["time"]) time[cr_node] = cr_time population[cr_node] = cr_population if "samples" in root: samples_group = root["samples"] population[:sample_size] = samples_group["population"] if "time" in samples_group: time[:sample_size] = samples_group["time"] tables.nodes.set_columns(flags=flags, population=population, time=time) _set_populations(tables) if "mutations" in root: mutations_group = root["mutations"] _convert_hdf5_mutations(mutations_group, tables.sites, tables.mutations, remove_duplicate_positions) provenances.add_row( timestamp=old_timestamp, record=_get_v2_provenance("generate_mutations", mutations_group.attrs), ) tables.provenances.add_row(_get_upgrade_provenance(root)) tables.sort() return tables.tree_sequence()
def provenance_timestamp_only_example(): ts = msprime.simulate(10, random_seed=1) tables = ts.dump_tables() provenances = tskit.ProvenanceTable() provenances.add_row(timestamp="12345", record="") return tables.tree_sequence()