def _get_provenance(command, attrs): """ Returns the V2 tree provenance attributes reformatted as a V3 provenance string. """ environment = json.loads(attrs["environment"]) parameters = json.loads(attrs["parameters"]) provenance = msprime.get_provenance_dict(command, parameters) provenance["version"] = environment["msprime_version"] provenance["environment"] = environment return json.dumps(provenance)
def _load_legacy_hdf5(root): if 'format_version' not in root.attrs: raise ValueError("HDF5 file not in msprime format") format_version = root.attrs['format_version'] if format_version[0] != 2: raise ValueError("Only version 2.x are supported") # Get the coalescence records trees_group = root["trees"] left = np.array(trees_group["left"]) right = np.array(trees_group["right"]) node = np.array(trees_group["node"]) children = np.array(trees_group["children"]) population = np.array(trees_group["population"]) time = np.array(trees_group["time"]) num_records = len(left) records = num_records * [None] for j in range(num_records): records[j] = msprime.CoalescenceRecord(left=left[j], right=right[j], node=node[j], children=tuple(children[j]), time=time[j], population=population[j]) # Get the samples (if present) samples = None if "samples" in root: samples_group = root["samples"] population = np.array(samples_group["population"]) time = None if "time" in samples_group: time = np.array(samples_group["time"]) sample_size = len(population) samples = sample_size * [None] for j in range(sample_size): t = 0 if time is not None: t = time[j] samples[j] = msprime.Sample(population=population[j], time=t) # Get the mutations (if present) mutations = None if "mutations" in root: mutations_group = root["mutations"] position = np.array(mutations_group["position"]) node = np.array(mutations_group["node"]) num_mutations = len(node) mutations = num_mutations * [None] for j in range(num_mutations): mutations[j] = msprime.Mutation(position=position[j], node=node[j], index=j) ll_ts = _msprime.TreeSequence() if samples is None: ll_ts.load_records(records) else: ll_ts.load_records(records, samples) ll_ts.add_provenance_string( _get_provenance("generate_trees", trees_group.attrs)) if mutations is not None: ll_ts.set_mutations(mutations) ll_ts.add_provenance_string( _get_provenance("generate_mutations", mutations_group.attrs)) ll_ts.add_provenance_string( json.dumps(msprime.get_provenance_dict("upgrade", {}))) return ll_ts
def _load_legacy_hdf5(root): if 'format_version' not in root.attrs: raise ValueError("HDF5 file not in msprime format") format_version = root.attrs['format_version'] if format_version[0] != 2: raise ValueError("Only version 2.x are supported") # Get the coalescence records trees_group = root["trees"] left = np.array(trees_group["left"]) right = np.array(trees_group["right"]) node = np.array(trees_group["node"]) children = np.array(trees_group["children"]) population = np.array(trees_group["population"]) time = np.array(trees_group["time"]) num_records = len(left) records = num_records * [None] for j in range(num_records): records[j] = msprime.CoalescenceRecord( left=left[j], right=right[j], node=node[j], children=tuple(children[j]), time=time[j], population=population[j]) # Get the samples (if present) samples = None if "samples" in root: samples_group = root["samples"] population = np.array(samples_group["population"]) time = None if "time" in samples_group: time = np.array(samples_group["time"]) sample_size = len(population) samples = sample_size * [None] for j in range(sample_size): t = 0 if time is not None: t = time[j] samples[j] = msprime.Sample(population=population[j], time=t) # Get the mutations (if present) mutations = None if "mutations" in root: mutations_group = root["mutations"] position = np.array(mutations_group["position"]) node = np.array(mutations_group["node"]) num_mutations = len(node) mutations = num_mutations * [None] for j in range(num_mutations): mutations[j] = msprime.Mutation( position=position[j], node=node[j], index=j) ll_ts = _msprime.TreeSequence() if samples is None: ll_ts.load_records(records) else: ll_ts.load_records(records, samples) ll_ts.add_provenance_string( _get_provenance("generate_trees", trees_group.attrs)) if mutations is not None: ll_ts.set_mutations(mutations) ll_ts.add_provenance_string( _get_provenance("generate_mutations", mutations_group.attrs)) ll_ts.add_provenance_string( json.dumps(msprime.get_provenance_dict("upgrade", {}))) return ll_ts