コード例 #1
0
def verify_results_split(R, I, input_array_path, datadir):
    from dask_io.optimizer.cases.resplit_utils import get_blocks_shape
    splitfiles_partition = get_blocks_shape(R, I)
    print("split files partiton:", splitfiles_partition)

    all_true = True
    orig_arr = get_dask_array_from_hdf5(input_array_path,
                                        "/data",
                                        logic_cs=tuple(I))

    for i in range(splitfiles_partition[0]):
        for j in range(splitfiles_partition[1]):
            for k in range(splitfiles_partition[2]):
                splitfilename = f"{i}_{j}_{k}.hdf5"
                split_filepath = os.path.join(datadir, splitfilename)
                print("opening", split_filepath)
                splitarray = get_dask_array_from_hdf5(split_filepath, "/data")
                print(
                    f"Slices from ground truth {i*I[0]}:{(i+1)*I[0]}, {j*I[1]}:{(j+1)*I[1]}, {k*I[2]}:{(k+1)*I[2]}"
                )
                ground_truth_arr = orig_arr[i * I[0]:(i + 1) * I[0],
                                            j * I[1]:(j + 1) * I[1],
                                            k * I[2]:(k + 1) * I[2]]

                verify_task = da.allclose(ground_truth_arr, splitarray)
                print("VERIFY TASK: ", verify_task)
                disable_clustering()
                _res = verify_task.compute()
                print("RESULT: ", _res)
                if _res == False:
                    print(f"[Error] Split failed for {splitfilename}")
                    all_true = False

    clean_files()
    return all_true
コード例 #2
0
ファイル: case_config.py プロジェクト: GTimothee/dask_io
 def clean(self):
     name = self.case['name']
     if name:
         if name == 'merge_hdf5_multiple':
             try:
                 f = self.case['params']['out_file']
                 if f:
                     f.close()
                 clean_files()
             except:
                 pass
コード例 #3
0
def verify_results_merge(input_array_path, merged_array_path):
    original_array = get_dask_array_from_hdf5(input_array_path, "/data")
    merged_array = get_dask_array_from_hdf5(merged_array_path, "/data")
    verify_task = da.allclose(original_array, merged_array)
    print("VERIFY TASK: ", verify_task)
    disable_clustering()
    _res = verify_task.compute()
    print("RESULT: ", _res)
    if _res == False:
        print("[Error] Rechunk failed")
    clean_files()
    return _res
コード例 #4
0
def experiment1():
    """ Split multidimensional arrays using vanilla dask and clustered strategy implementation from dask_io.
    """
    tests = create_tests() * args.nb_repetitions
    shuffle(tests)

    results = list()
    for i, test in enumerate(tests):
        print(f'\n\nProcessing test {i + 1}/{len(tests)} ~')
        print(f'Creating test array if needed...')
        create_test_array(test, create_random_dask_array, save_to_hdf5)
        clean_files()
        print(f'Done. Running test...')
        result = run_test(test, paths)
        results.append(result)

    write_csv(results, paths["outdir"], create_csv_file)
コード例 #5
0
ファイル: case_config.py プロジェクト: GTimothee/dask_io
 def clean(self):
     name = self.case['name']
     if name:
         if name == 'split_hdf5' or name == 'split_npy':
             try:
                 f = self.case['params']['out_file']
                 if f:
                     f.close()
                 clean_files()
             except:
                 pass
         elif name == 'split_hdf5_multiple':
             for f in self.case['params']['out_files']:
                 try:
                     if f:
                         f.close()
                     clean_files()
                 except:
                     pass
コード例 #6
0
def rechunk_vanilla_dask(indir_path, outdir_path, nthreads, R, O, model):
    """ Rechunk using vanilla dask
    """
    in_arrays = load_input_files(indir_path)

    case = Merge('samplename')
    case.merge_hdf5_multiple(indir_path, store=False)
    reconstructed_array = case.get()

    out_files = list()  # to keep outfiles open during processing
    sources = list()
    targets = list()
    outfiles_partition = get_blocks_shape(R, O)
    for i in range(outfiles_partition[0]):
        for j in range(outfiles_partition[1]):
            for k in range(outfiles_partition[2]):
                out_filename = f'{i}_{j}_{k}.hdf5'
                out_file = h5py.File(os.path.join(outdir_path, out_filename),
                                     'w')
                dset = out_file.create_dataset('/data',
                                               shape=O,
                                               dtype=np.float16)

                tmp_array = reconstructed_array[i * O[0]:(i + 1) * O[0],
                                                j * O[1]:(j + 1) * O[1],
                                                k * O[2]:(k + 1) * O[2]]
                print(
                    f'{i*O[0]}: {(i+1)*O[0]}, {j*O[1]}: {(j+1)*O[1]}, {k*O[2]}: {(k+1)*O[2]}'
                )

                out_files.append(out_file)
                sources.append(tmp_array)
                targets.append(dset)

    rechunk_task = da.store(sources, targets, compute=False)
    # rechunk_task.visualize(filename="tmp_dir/test_graph_vanilla.png")
    # sys.exit()

    with Profiler() as prof, ResourceProfiler(
            dt=0.25) as rprof, CacheProfiler() as cprof:
        scheduler = 'single-threaded' if nthreads == 1 else 'threads'

        with dask.config.set(scheduler=scheduler):
            try:
                t = time.time()
                rechunk_task.compute()
                t = time.time() - t
                # visualize([prof, rprof, cprof])
            except Exception as e:
                print(e, "\nSomething went wrong during graph execution.")
                t = None

        diagnostics = os.path.join(outdir_path, 'exp5_' + str(model) + '.html')
        visualize([prof, rprof, cprof], diagnostics, show=False)

    clean_files()

    for f in out_files:
        f.close()

    return t