def _load_legacy_hdf5_v3(root, remove_duplicate_positions): # get the trees group for the records and samples trees_group = root["trees"] nodes_group = trees_group["nodes"] time = np.array(nodes_group["time"]) breakpoints = np.array(trees_group["breakpoints"]) records_group = trees_group["records"] left_indexes = np.array(records_group["left"]) right_indexes = np.array(records_group["right"]) record_node = np.array(records_group["node"], dtype=np.int32) num_nodes = time.shape[0] sample_size = np.min(record_node) flags = np.zeros(num_nodes, dtype=np.uint32) flags[:sample_size] = msprime.NODE_IS_SAMPLE children_length = np.array(records_group["num_children"], dtype=np.uint32) total_rows = np.sum(children_length) left = np.zeros(total_rows, dtype=np.float64) right = np.zeros(total_rows, dtype=np.float64) parent = np.zeros(total_rows, dtype=np.int32) record_left = breakpoints[left_indexes] record_right = breakpoints[right_indexes] k = 0 for j in range(left_indexes.shape[0]): for _ in range(children_length[j]): left[k] = record_left[j] right[k] = record_right[j] parent[k] = record_node[j] k += 1 nodes = msprime.NodeTable() nodes.set_columns(flags=flags, time=nodes_group["time"], population=nodes_group["population"]) edges = msprime.EdgeTable() edges.set_columns(left=left, right=right, parent=parent, child=records_group["children"]) sites = msprime.SiteTable() mutations = msprime.MutationTable() if "mutations" in root: _convert_hdf5_mutations(root["mutations"], sites, mutations, remove_duplicate_positions) old_timestamp = datetime.datetime.min.isoformat() provenances = msprime.ProvenanceTable() if "provenance" in root: for record in root["provenance"]: provenances.add_row(timestamp=old_timestamp, record=record) provenances.add_row(_get_upgrade_provenance(root)) msprime.sort_tables(nodes=nodes, edges=edges, sites=sites, mutations=mutations) return msprime.load_tables(nodes=nodes, edges=edges, sites=sites, mutations=mutations, provenances=provenances)
def provenance_timestamp_only_example(): ts = msprime.simulate(10, random_seed=1) tables = ts.dump_tables() provenances = msprime.ProvenanceTable() provenances.add_row(timestamp="12345", record="") return msprime.load_tables(nodes=tables.nodes, edges=tables.edges, provenances=provenances)
def run(self, ngens): nodes = msprime.NodeTable() edges = msprime.EdgeTable() migrations = msprime.MigrationTable() sites = msprime.SiteTable() mutations = msprime.MutationTable() provenances = msprime.ProvenanceTable() if self.deep_history: # initial population init_ts = msprime.simulate(self.N, recombination_rate=1.0) init_ts.dump_tables(nodes=nodes, edges=edges) nodes.set_columns(time=nodes.time + ngens, flags=nodes.flags) else: for _ in range(self.N): nodes.add_row(time=ngens) pop = list(range(self.N)) for t in range(ngens - 1, -1, -1): if self.debug: print("t:", t) print("pop:", pop) dead = [random.random() > self.survival for k in pop] # sample these first so that all parents are from the previous gen new_parents = [(random.choice(pop), random.choice(pop)) for k in range(sum(dead))] k = 0 if self.debug: print("Replacing", sum(dead), "individuals.") for j in range(self.N): if dead[j]: # this is: offspring ID, lparent, rparent, breakpoint offspring = nodes.num_rows nodes.add_row(time=t) lparent, rparent = new_parents[k] k += 1 bp = self.random_breakpoint() if self.debug: print("--->", offspring, lparent, rparent, bp) pop[j] = offspring if bp > 0.0: edges.add_row(left=0.0, right=bp, parent=lparent, child=offspring) if bp < 1.0: edges.add_row(left=bp, right=1.0, parent=rparent, child=offspring) if self.debug: print("Done! Final pop:") print(pop) flags = [(msprime.NODE_IS_SAMPLE if u in pop else 0) for u in range(nodes.num_rows)] nodes.set_columns(time=nodes.time, flags=flags) if self.debug: print("Done.") print("Nodes:") print(nodes) print("Edges:") print(edges) return msprime.TableCollection(nodes, edges, migrations, sites, mutations, provenances)
def _load_legacy_hdf5_v2(root, remove_duplicate_positions): # Get the coalescence records trees_group = root["trees"] old_timestamp = datetime.datetime.min.isoformat() provenances = msprime.ProvenanceTable() provenances.add_row(timestamp=old_timestamp, record=_get_v2_provenance("generate_trees", trees_group.attrs)) num_rows = trees_group["node"].shape[0] index = np.arange(num_rows, dtype=int) parent = np.zeros(2 * num_rows, dtype=np.int32) parent[2 * index] = trees_group["node"] parent[2 * index + 1] = trees_group["node"] left = np.zeros(2 * num_rows, dtype=np.float64) left[2 * index] = trees_group["left"] left[2 * index + 1] = trees_group["left"] right = np.zeros(2 * num_rows, dtype=np.float64) right[2 * index] = trees_group["right"] right[2 * index + 1] = trees_group["right"] child = np.array(trees_group["children"], dtype=np.int32).flatten() edges = msprime.EdgeTable() edges.set_columns(left=left, right=right, parent=parent, child=child) cr_node = np.array(trees_group["node"], dtype=np.int32) num_nodes = max(np.max(child), np.max(cr_node)) + 1 sample_size = np.min(cr_node) flags = np.zeros(num_nodes, dtype=np.uint32) population = np.zeros(num_nodes, dtype=np.int32) time = np.zeros(num_nodes, dtype=np.float64) flags[:sample_size] = msprime.NODE_IS_SAMPLE cr_population = np.array(trees_group["population"], dtype=np.int32) cr_time = np.array(trees_group["time"]) time[cr_node] = cr_time population[cr_node] = cr_population if "samples" in root: samples_group = root["samples"] population[:sample_size] = samples_group["population"] if "time" in samples_group: time[:sample_size] = samples_group["time"] nodes = msprime.NodeTable() nodes.set_columns(flags=flags, population=population, time=time) sites = msprime.SiteTable() mutations = msprime.MutationTable() if "mutations" in root: mutations_group = root["mutations"] _convert_hdf5_mutations(mutations_group, sites, mutations, remove_duplicate_positions) provenances.add_row(timestamp=old_timestamp, record=_get_v2_provenance("generate_mutations", mutations_group.attrs)) provenances.add_row(_get_upgrade_provenance(root)) msprime.sort_tables(nodes=nodes, edges=edges, sites=sites, mutations=mutations) return msprime.load_tables(nodes=nodes, edges=edges, sites=sites, mutations=mutations, provenances=provenances)
def _load_legacy_hdf5_v10(root, remove_duplicate_positions=False): # We cannot have duplicate positions in v10, so this parameter is ignored nodes_group = root["nodes"] nodes = msprime.NodeTable() metadata = None metadata_offset = None if "metadata" in nodes_group: metadata = nodes_group["metadata"] metadata_offset = nodes_group["metadata_offset"] nodes.set_columns(flags=nodes_group["flags"], population=nodes_group["population"], time=nodes_group["time"], metadata=metadata, metadata_offset=metadata_offset) edges_group = root["edges"] edges = msprime.EdgeTable() edges.set_columns(left=edges_group["left"], right=edges_group["right"], parent=edges_group["parent"], child=edges_group["child"]) migrations_group = root["migrations"] migrations = msprime.MigrationTable() if "left" in migrations_group: migrations.set_columns(left=migrations_group["left"], right=migrations_group["right"], node=migrations_group["node"], source=migrations_group["source"], dest=migrations_group["dest"], time=migrations_group["time"]) sites_group = root["sites"] sites = msprime.SiteTable() if "position" in sites_group: metadata = None metadata_offset = None if "metadata" in sites_group: metadata = sites_group["metadata"] metadata_offset = sites_group["metadata_offset"] sites.set_columns( position=sites_group["position"], ancestral_state=sites_group["ancestral_state"], ancestral_state_offset=sites_group["ancestral_state_offset"], metadata=metadata, metadata_offset=metadata_offset) mutations_group = root["mutations"] mutations = msprime.MutationTable() if "site" in mutations_group: metadata = None metadata_offset = None if "metadata" in mutations_group: metadata = mutations_group["metadata"] metadata_offset = mutations_group["metadata_offset"] mutations.set_columns( site=mutations_group["site"], node=mutations_group["node"], parent=mutations_group["parent"], derived_state=mutations_group["derived_state"], derived_state_offset=mutations_group["derived_state_offset"], metadata=metadata, metadata_offset=metadata_offset) provenances_group = root["provenances"] provenances = msprime.ProvenanceTable() if "timestamp" in provenances_group: timestamp = provenances_group["timestamp"] timestamp_offset = provenances_group["timestamp_offset"] if "record" in provenances_group: record = provenances_group["record"] record_offset = provenances_group["record_offset"] else: record = np.empty_like(timestamp) record_offset = np.zeros_like(timestamp_offset) provenances.set_columns(timestamp=timestamp, timestamp_offset=timestamp_offset, record=record, record_offset=record_offset) provenances.add_row(_get_upgrade_provenance(root)) return msprime.load_tables(nodes=nodes, edges=edges, migrations=migrations, sites=sites, mutations=mutations, provenances=provenances)