コード例 #1
0
def test_write_dataset_chunks_multidimension(h5file):
    chunks = ChunkSize(3 * (CHUNK_SIZE_3D, ))
    shape = (2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D)
    data = np.zeros(shape)
    slices1 = write_dataset(h5file, 'test_data', data, chunks=chunks)
    data_dict = {}
    for n, c in enumerate(chunks.indices(shape)):
        if n == 0:
            data_dict[c] = slices1[c]
        else:
            data_dict[c] = n * np.ones(chunks)

    slices1 = write_dataset(h5file, 'test_data', data, chunks=chunks)
    slices2 = write_dataset_chunks(h5file, 'test_data', data_dict)

    assert slices1 == {
        c: slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D)
        for c in chunks.indices(shape)
    }
    assert slices2 == {
        c: slice(i * CHUNK_SIZE_3D, (i + 1) * CHUNK_SIZE_3D)
        for i, c in enumerate(chunks.indices(shape))
    }

    ds = h5file['/_version_data/test_data/raw_data']
    assert ds.shape == (8 * CHUNK_SIZE_3D, CHUNK_SIZE_3D, CHUNK_SIZE_3D)
    for n in range(8):
        assert_equal(ds[n * CHUNK_SIZE_3D:(n + 1) * CHUNK_SIZE_3D], n)
    assert ds.dtype == np.float64
コード例 #2
0
def test_create_virtual_dataset_offset_multidimension(h5file):
    chunks = ChunkSize(3 * (CHUNK_SIZE_3D, ))
    shape = (2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D)
    data = np.zeros(shape)
    write_dataset(h5file, 'test_data', data, chunks=chunks)
    shape2 = (2 * CHUNK_SIZE_3D - 2, 2 * CHUNK_SIZE_3D - 2,
              2 * CHUNK_SIZE_3D - 2)
    data2 = np.empty(shape2)
    for n, c in enumerate(chunks.indices(shape)):
        data2[c.raw] = n

    slices2 = write_dataset(h5file, 'test_data', data2, chunks=chunks)

    virtual_data = create_virtual_dataset(h5file, 'test_version', 'test_data',
                                          shape2, slices2)

    assert virtual_data.shape == shape2
    assert_equal(virtual_data[()], data2)
    assert virtual_data.dtype == np.float64
コード例 #3
0
    def from_raw_data(cls,
                      f,
                      name,
                      chunk_size=None,
                      hash_table_name='hash_table'):
        if hash_table_name in f['_version_data'][name]:
            raise ValueError(
                f"a hash table {hash_table_name!r} for {name!r} already exists"
            )

        hashtable = cls(f,
                        name,
                        chunk_size=chunk_size,
                        hash_table_name=hash_table_name)

        raw_data = f['_version_data'][name]['raw_data']
        chunks = ChunkSize(raw_data.chunks)
        for c in chunks.indices(raw_data.shape):
            data_hash = hashtable.hash(raw_data[c.raw])
            hashtable.setdefault(data_hash, c.args[0])

        hashtable.write()
        return hashtable
コード例 #4
0
def test_write_dataset_offset_multidimension(h5file):
    chunks = ChunkSize(3 * (CHUNK_SIZE_3D, ))
    shape = (2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D)
    data = np.zeros(shape)
    slices1 = write_dataset(h5file, 'test_data', data, chunks=chunks)
    shape2 = (2 * CHUNK_SIZE_3D - 2, 2 * CHUNK_SIZE_3D - 2,
              2 * CHUNK_SIZE_3D - 2)
    data2 = np.empty(shape2)
    for n, c in enumerate(chunks.indices(shape)):
        data2[c.raw] = n

    slices2 = write_dataset(h5file, 'test_data', data2, chunks=chunks)

    assert slices1 == {
        (
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
        (
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
        (
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
        (
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
        (
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
        (
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
        (
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
        (
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
    }

    assert slices2 == {
        (
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
        ):
        slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D),
        (
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
        ):
        slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D),
        (
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
        ):
        slice(2 * CHUNK_SIZE_3D, 3 * CHUNK_SIZE_3D),
        (
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
        ):
        slice(3 * CHUNK_SIZE_3D, 4 * CHUNK_SIZE_3D),
        (
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
        ):
        slice(4 * CHUNK_SIZE_3D, 5 * CHUNK_SIZE_3D - 2),
        (
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
        ):
        slice(5 * CHUNK_SIZE_3D, 6 * CHUNK_SIZE_3D - 2),
        (
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
            Slice(0 * CHUNK_SIZE_3D, 1 * CHUNK_SIZE_3D, 1),
        ):
        slice(6 * CHUNK_SIZE_3D, 7 * CHUNK_SIZE_3D - 2),
        (
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
            Slice(1 * CHUNK_SIZE_3D, 2 * CHUNK_SIZE_3D - 2, 1),
        ):
        slice(7 * CHUNK_SIZE_3D, 8 * CHUNK_SIZE_3D - 2),
    }

    ds = h5file['/_version_data/test_data/raw_data']
    assert ds.shape == (8 * CHUNK_SIZE_3D, CHUNK_SIZE_3D, CHUNK_SIZE_3D)
    for n, c in enumerate(chunks.indices(shape2)):
        a = np.zeros(chunks)
        a[Tuple(*[slice(0, i) for i in shape2]).as_subindex(c).raw] = n
        assert_equal(ds[n * CHUNK_SIZE_3D:(n + 1) * CHUNK_SIZE_3D], a)
    assert ds.dtype == np.float64
コード例 #5
0
def _recreate_raw_data(f, name, versions_to_delete, tmp=False):
    """
    Return a new raw data set for a dataset without the chunks from
    versions_to_delete.

    If no chunks would be left, i.e., the dataset does not appear in any
    version not in versions_to_delete, None is returned.

    If tmp is True, the new raw dataset is called '_tmp_raw_data' and is
    placed alongside the existing raw dataset. Otherwise the existing raw
    dataset is replaced.

    """
    chunks_map = defaultdict(dict)

    for version_name in all_versions(f):
        if (version_name in versions_to_delete
                or name not in f['_version_data/versions'][version_name]):
            continue

        dataset = f['_version_data/versions'][version_name][name]

        virtual_sources = dataset.virtual_sources()
        slice_map = {
            spaceid_to_slice(i.vspace): spaceid_to_slice(i.src_space)
            for i in virtual_sources
        }
        chunks_map[version_name].update(slice_map)

    chunks_to_keep = set().union(
        *[map.values() for map in chunks_map.values()])

    chunks_to_keep = sorted(chunks_to_keep, key=lambda i: i.args[0].args[0])

    if not chunks_to_keep:
        return {}

    raw_data = f['_version_data'][name]['raw_data']
    chunks = ChunkSize(raw_data.chunks)
    new_shape = (len(chunks_to_keep) * chunks[0], *chunks[1:])

    new_raw_data = f['_version_data'][name].create_dataset(
        '_tmp_raw_data',
        shape=new_shape,
        maxshape=(None, ) + chunks[1:],
        chunks=raw_data.chunks,
        dtype=raw_data.dtype,
        compression=raw_data.compression,
        compression_opts=raw_data.compression_opts,
        fillvalue=raw_data.fillvalue)
    for key, val in raw_data.attrs.items():
        new_raw_data.attrs[key] = val

    r = raw_data[:]
    n = np.full(new_raw_data.shape,
                new_raw_data.fillvalue,
                dtype=new_raw_data.dtype)
    raw_data_chunks_map = {}
    for new_chunk, chunk in zip(chunks.indices(new_shape), chunks_to_keep):
        # Shrink new_chunk to the size of chunk, in case chunk isn't a full
        # chunk in one of the dimensions.
        # TODO: Implement something in ndindex to do this.
        new_chunk = Tuple(*[
            Slice(new_chunk.args[i].start, new_chunk.args[i].start +
                  len(chunk.args[i])) for i in range(len(new_chunk.args))
        ])
        raw_data_chunks_map[chunk] = new_chunk
        n[new_chunk.raw] = r[chunk.raw]

    new_raw_data[:] = n
    if not tmp:
        del f['_version_data'][name]['raw_data']
        f['_version_data'][name].move('_tmp_raw_data', 'raw_data')

    return raw_data_chunks_map