Esempio n. 1
0
def get_dataset(file_path, dataset_key):
    """ Get dataset from hdf5 file 
    """
    def check_extension(file_path, ext):
        if file_path.split('.')[-1] != ext:
            return False
        return True

    if not os.path.isfile(file_path):
        raise FileNotFoundError()

    if not check_extension(file_path, 'hdf5'):
        raise ValueError("This is not a hdf5 file.")

    f = h5py.File(file_path, 'r')

    if not f.keys():
        raise ValueError('No dataset found in the input file. Aborting.')

    if not file_in_list(f, SOURCE_FILES):
        SOURCE_FILES.append(f)

    print("Loading file...")
    inspect_h5py_file(f)

    return f[dataset_key]
Esempio n. 2
0
def save_to_hdf5(arr,
                 file_path,
                 physik_cs=None,
                 key='/data',
                 compression=None):
    """ Save dask array to hdf5 dataset.

    Arguments: 
    ----------
        arr: dask array
        file_path
        physik_cs
        key
        compression: compression algorithm. If None then compression unabled.
    """

    print(f'Saving a dask array at {file_path}:')
    print(f'- physik_cs: {physik_cs}')
    print(f'- key: {key}')
    print(f'- compression: {compression}')

    da.to_hdf5(file_path, key, arr, chunks=physik_cs, compression=compression)
    print(f'Array successfully saved.\n')

    print(f'Inspecting created file...')
    with h5py.File(file_path, 'r') as f:
        inspect_h5py_file(f)
Esempio n. 3
0
def test_split_and_merge_multiple(shape_to_test, nb_chunks):
    """ TODO: add asserts -> retrieve chunks and compare to what have been stored.
    """
    fileslist = list()
    for infilepath in glob.glob("[0-9]*_[0-9]*_[0-9]*.hdf5"
                                ):  # remove split files from previous tests
        fileslist.append(infilepath)
    fileslist.append('./reconstructed.hdf5')
    for fn in fileslist:
        if os.path.isfile(fn):
            os.remove(fn)

    out_dirpath = './'
    case = Split(pytest.test_array_path, shape_to_test)
    case.split_hdf5_multiple(out_dirpath, nb_blocks=None)
    arr = case.get()
    arr.compute()
    case.clean()

    in_dirpath = out_dirpath
    case = Merge('./reconstructed.hdf5')
    case.merge_hdf5_multiple(in_dirpath)
    arr = case.get()
    arr.compute()
    case.clean()

    logger.info("Inspecting filepath: './reconstructed.hdf5'")
    with h5py.File('./reconstructed.hdf5', 'r') as f:
        inspect_h5py_file(f)
        assert f['/data'].shape == (100, 100, 100)
Esempio n. 4
0
def inspect_dir(dirpath):
    print(f'Inspecting {dirpath}...')
    workdir = os.getcwd()
    os.chdir(dirpath)
    nb_outfiles = 0
    for filename in glob.glob("[0-9]*_[0-9]*_[0-9]*.hdf5"):
        with h5py.File(os.path.join(dirpath, filename), 'r') as f:
            inspect_h5py_file(f)
        nb_outfiles += 1
    os.chdir(workdir)
    print(f'Found {nb_outfiles} files.')
Esempio n. 5
0
def test_split_multiple(shape_to_test, nb_chunks):
    """ TODO: add asserts -> retrieve chunks and compare to what have been stored.
    """
    out_dirpath = './'
    case = Split(pytest.test_array_path, shape_to_test)
    case.split_hdf5_multiple(out_dirpath, nb_blocks=None)
    arr = case.get()
    # arr.visualize(filename='/tmp/dask_io_visualize_split_multiple.svg')
    arr.compute()
    case.clean()

    for filepath in glob.glob("*.hdf5"):
        logger.info("Inspecting filepath: %s", filepath)
        with h5py.File(filepath, 'r') as f:
            inspect_h5py_file(f)
Esempio n. 6
0
def check_outputs():
    # sanity check
    outfiles = list()
    for fpath in glob.glob(
            "[0-9].hdf5"):  # remove split files from previous tests
        print(f'Filename: {fpath}')
        with h5py.File(fpath, 'r') as f:
            inspect_h5py_file(f)

    # prepare ground truth for verification
    arrays_expected = dict()
    outfiles_partititon = get_blocks_shape((1, 120, 120), O)
    outfiles_volumes = get_named_volumes(outfiles_partititon, O)
    for outfilekey, volume in outfiles_volumes.items():
        slices = convert_Volume_to_slices(volume)
        arrays_expected[outfilekey] = reconstructed_array[slices[0], slices[1],
                                                          slices[2]]

    # verify
    for fpath in glob.glob("[0-9].hdf5"):
        outputfile_index = int(fpath.split('.')[0])
        print(f'Output file index: ', outputfile_index)

        array_stored = get_dask_array_from_hdf5(fpath,
                                                '/data',
                                                logic_cs="dataset_shape")
        arr_expected = arrays_expected[outputfile_index]
        print("equal:", da.allclose(array_stored, arr_expected).compute())
        print(
            "stored:", array_stored[slice(0, 1, None),
                                    slice(0, 1, None),
                                    slice(0, 10, None)].compute())
        print(
            "expected", arr_expected[slice(0, 1, None),
                                     slice(0, 1, None),
                                     slice(0, 10, None)].compute())