Beispiel #1
0
def _get_provenance(command, attrs):
    """
    Returns the V2 tree provenance attributes reformatted as a V3
    provenance string.
    """
    environment = json.loads(attrs["environment"])
    parameters = json.loads(attrs["parameters"])
    provenance = msprime.get_provenance_dict(command, parameters)
    provenance["version"] = environment["msprime_version"]
    provenance["environment"] = environment
    return json.dumps(provenance)
Beispiel #2
0
def _get_provenance(command, attrs):
    """
    Returns the V2 tree provenance attributes reformatted as a V3
    provenance string.
    """
    environment = json.loads(attrs["environment"])
    parameters = json.loads(attrs["parameters"])
    provenance = msprime.get_provenance_dict(command, parameters)
    provenance["version"] = environment["msprime_version"]
    provenance["environment"] = environment
    return json.dumps(provenance)
Beispiel #3
0
def _load_legacy_hdf5(root):
    if 'format_version' not in root.attrs:
        raise ValueError("HDF5 file not in msprime format")
    format_version = root.attrs['format_version']
    if format_version[0] != 2:
        raise ValueError("Only version 2.x are supported")

    # Get the coalescence records
    trees_group = root["trees"]
    left = np.array(trees_group["left"])
    right = np.array(trees_group["right"])
    node = np.array(trees_group["node"])
    children = np.array(trees_group["children"])
    population = np.array(trees_group["population"])
    time = np.array(trees_group["time"])
    num_records = len(left)
    records = num_records * [None]
    for j in range(num_records):
        records[j] = msprime.CoalescenceRecord(left=left[j],
                                               right=right[j],
                                               node=node[j],
                                               children=tuple(children[j]),
                                               time=time[j],
                                               population=population[j])

    # Get the samples (if present)
    samples = None
    if "samples" in root:
        samples_group = root["samples"]
        population = np.array(samples_group["population"])
        time = None
        if "time" in samples_group:
            time = np.array(samples_group["time"])
        sample_size = len(population)
        samples = sample_size * [None]
        for j in range(sample_size):
            t = 0
            if time is not None:
                t = time[j]
            samples[j] = msprime.Sample(population=population[j], time=t)

    # Get the mutations (if present)
    mutations = None
    if "mutations" in root:
        mutations_group = root["mutations"]
        position = np.array(mutations_group["position"])
        node = np.array(mutations_group["node"])
        num_mutations = len(node)
        mutations = num_mutations * [None]
        for j in range(num_mutations):
            mutations[j] = msprime.Mutation(position=position[j],
                                            node=node[j],
                                            index=j)

    ll_ts = _msprime.TreeSequence()
    if samples is None:
        ll_ts.load_records(records)
    else:
        ll_ts.load_records(records, samples)
    ll_ts.add_provenance_string(
        _get_provenance("generate_trees", trees_group.attrs))
    if mutations is not None:
        ll_ts.set_mutations(mutations)
        ll_ts.add_provenance_string(
            _get_provenance("generate_mutations", mutations_group.attrs))
    ll_ts.add_provenance_string(
        json.dumps(msprime.get_provenance_dict("upgrade", {})))
    return ll_ts
Beispiel #4
0
def _load_legacy_hdf5(root):
    if 'format_version' not in root.attrs:
        raise ValueError("HDF5 file not in msprime format")
    format_version = root.attrs['format_version']
    if format_version[0] != 2:
        raise ValueError("Only version 2.x are supported")

    # Get the coalescence records
    trees_group = root["trees"]
    left = np.array(trees_group["left"])
    right = np.array(trees_group["right"])
    node = np.array(trees_group["node"])
    children = np.array(trees_group["children"])
    population = np.array(trees_group["population"])
    time = np.array(trees_group["time"])
    num_records = len(left)
    records = num_records * [None]
    for j in range(num_records):
        records[j] = msprime.CoalescenceRecord(
            left=left[j], right=right[j], node=node[j],
            children=tuple(children[j]), time=time[j],
            population=population[j])

    # Get the samples (if present)
    samples = None
    if "samples" in root:
        samples_group = root["samples"]
        population = np.array(samples_group["population"])
        time = None
        if "time" in samples_group:
            time = np.array(samples_group["time"])
        sample_size = len(population)
        samples = sample_size * [None]
        for j in range(sample_size):
            t = 0
            if time is not None:
                t = time[j]
            samples[j] = msprime.Sample(population=population[j], time=t)

    # Get the mutations (if present)
    mutations = None
    if "mutations" in root:
        mutations_group = root["mutations"]
        position = np.array(mutations_group["position"])
        node = np.array(mutations_group["node"])
        num_mutations = len(node)
        mutations = num_mutations * [None]
        for j in range(num_mutations):
            mutations[j] = msprime.Mutation(
                position=position[j], node=node[j], index=j)

    ll_ts = _msprime.TreeSequence()
    if samples is None:
        ll_ts.load_records(records)
    else:
        ll_ts.load_records(records, samples)
    ll_ts.add_provenance_string(
        _get_provenance("generate_trees", trees_group.attrs))
    if mutations is not None:
        ll_ts.set_mutations(mutations)
        ll_ts.add_provenance_string(
            _get_provenance("generate_mutations", mutations_group.attrs))
    ll_ts.add_provenance_string(
        json.dumps(msprime.get_provenance_dict("upgrade", {})))
    return ll_ts