def test_timeout_1(hdf5, lt_ctx): with mock.patch('h5py.File.visititems', side_effect=TimeoutError("too slow")): params = H5DataSet.detect_params(hdf5.filename, executor=lt_ctx.executor)["parameters"] assert list(params.keys()) == ["path"] ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) diags = ds.diagnostics print(diags)
def test_timeout_2(hdf5): with mock.patch('time.time', side_effect=[1, 30, 30, 60]): params = H5DataSet.detect_params(hdf5.filename) assert list(params.keys()) == ["path"] ds = H5DataSet(path=hdf5.filename, ds_path="data", tileshape=(1, 4, 16, 16)) ds = ds.initialize() diags = ds.diagnostics print(diags)
def test_timeout_1(hdf5): with mock.patch('h5py.File.visititems', side_effect=TimeoutError("too slow")): params = H5DataSet.detect_params(hdf5.filename) assert list(params.keys()) == ["path"] ds = H5DataSet(path=hdf5.filename, ds_path="data", tileshape=(1, 4, 16, 16)) ds = ds.initialize() diags = ds.diagnostics print(diags)
def test_timeout_2(hdf5, lt_ctx): print(threading.enumerate()) with mock.patch('libertem.io.dataset.hdf5.current_time', side_effect=[1, 30]): params = H5DataSet.detect_params(hdf5.filename, executor=lt_ctx.executor)["parameters"] assert list(params.keys()) == ["path"] ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) print(threading.enumerate()) with mock.patch('libertem.io.dataset.hdf5.current_time', side_effect=[30, 60]): diags = ds.diagnostics print(diags)
def do_com(fn, tileshape): ds = H5DataSet( path=fn, ds_path="data", tileshape=tileshape, target_size=512*1024*1024, ) masks = [ # summation of all pixels: lambda: np.ones(shape=ds.shape[2:]), # gradient from left to right lambda: gradient_x(*ds.shape[2:]), # gradient from top to bottom lambda: gradient_y(*ds.shape[2:]), ] job = ApplyMasksJob(dataset=ds, mask_factories=masks) print(job.masks.computed_masks) print("\n\n") executor = InlineJobExecutor() full_result = np.zeros(shape=(3,) + ds.shape[:2]) color = np.zeros(shape=(3,) + ds.shape[:2]) for result in executor.run_job(job): for tile in result: print(tile) print(tile.data[0]) color[tile.tile_slice.get()[:2]] += 1 tile.copy_to_result(full_result) x_centers = np.divide(full_result[1], full_result[0]) y_centers = np.divide(full_result[2], full_result[0]) print(color) return full_result, x_centers, y_centers
def test_diags(hdf5, lt_ctx): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) print(ds.diagnostics)
def hdf5_ds_1(hdf5): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(InlineJobExecutor()) return ds
def test_check_valid(hdf5, lt_ctx): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) assert ds.check_valid()
def hdf5_ds_1(hdf5): ds = H5DataSet(path=hdf5.filename, ds_path="data", tileshape=(1, 5, 16, 16), target_size=512 * 1024 * 1024) ds.initialize() return ds
def hdf5_ds_large_sig(random_hdf5): ds = H5DataSet( path=random_hdf5.filename, ds_path="data", ) ds = ds.initialize(InlineJobExecutor()) return ds
def test_roi_3(hdf5, lt_ctx): ds = H5DataSet( path=hdf5.filename, ds_path="data", target_size=12800*2, ) ds = ds.initialize(lt_ctx.executor) roi = np.zeros(ds.shape.flatten_nav().nav, dtype=bool) roi[24] = 1 tileshape = Shape( (16,) + tuple(ds.shape.sig), sig_dims=ds.shape.sig.dims ) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=ds.shape, ) tiles = [] for p in ds.get_partitions(): for tile in p.get_tiles(tiling_scheme=tiling_scheme, dest_dtype="float32", roi=roi): print("tile:", tile) tiles.append(tile) assert len(tiles) == 1 assert tiles[0].tile_slice.shape.nav.size == 1 assert tuple(tiles[0].tile_slice.shape.sig) == (16, 16) assert tiles[0].tile_slice.origin == (0, 0, 0) assert np.allclose(tiles[0].data, hdf5['data'][4, 4])
def test_cloudpickle(lt_ctx, hdf5): ds = H5DataSet( path=hdf5.filename, ds_path="data", target_size=512*1024*1024 ) pickled = cloudpickle.dumps(ds) loaded = cloudpickle.loads(pickled) assert loaded._dtype is None assert loaded._shape is None repr(loaded) ds = ds.initialize(lt_ctx.executor) pickled = cloudpickle.dumps(ds) loaded = cloudpickle.loads(pickled) assert loaded._dtype is not None assert loaded._shape is not None loaded.shape loaded.dtype repr(loaded) # let's keep the pickled dataset size small-ish: assert len(pickled) < 1 * 1024
def test_cache_key_json_serializable(hdf5, lt_ctx): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) json.dumps(ds.get_cache_key())
def test_read_2(lt_ctx, hdf5): ds = H5DataSet(path=hdf5.filename, ds_path="data", tileshape=(1, 3, 16, 16)) ds = ds.initialize(lt_ctx.executor) for p in ds.get_partitions(): for t in p.get_tiles(): print(t.tile_slice)
def create_random_hdf5(path): with h5py.File(path, 'w') as f: sample_data = np.random.randn(16, 16, 16, 16).astype("float32") f.create_dataset("data", (16, 16, 16, 16), data=sample_data) # read and provide the ds ds = H5DataSet(path=path, ds_path='data') ds = ds.initialize(InlineJobExecutor()) return ds
def test_pick(hdf5, lt_ctx): ds = H5DataSet(path=hdf5.filename, ds_path="data", tileshape=(1, 3, 16, 16)) ds.initialize() assert len(ds.shape) == 4 print(ds.shape) pick = lt_ctx.create_pick_analysis(dataset=ds, x=2, y=3) lt_ctx.run(pick)
def test_pick(hdf5, lt_ctx, TYPE): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) assert len(ds.shape) == 4 print(ds.shape) pick = lt_ctx.create_pick_analysis(dataset=ds, x=2, y=3) pick.TYPE = TYPE lt_ctx.run(pick)
def test_read_3(lt_ctx, random_hdf5): # try with smaller partitions: ds = H5DataSet(path=random_hdf5.filename, ds_path="data", tileshape=(1, 2, 16, 16), target_size=4096) ds = ds.initialize(lt_ctx.executor) for p in ds.get_partitions(): for t in p.get_tiles(): print(t.tile_slice)
def test_auto_tileshape(chunked_hdf5, lt_ctx): ds = H5DataSet( path=chunked_hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) p = next(ds.get_partitions()) t = next(p.get_tiles(dest_dtype="float32", target_size=4 * 1024)) assert tuple(p._get_tileshape("float32", 4 * 1024)) == (1, 4, 16, 16) assert tuple(t.tile_slice.shape) == (4, 16, 16)
def test_read_3(lt_ctx, random_hdf5): # try with smaller partitions: ds = H5DataSet(path=random_hdf5.filename, ds_path="data", target_size=4096) ds = ds.initialize(lt_ctx.executor) tileshape = Shape((16, ) + tuple(ds.shape.sig), sig_dims=ds.shape.sig.dims) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=ds.shape, ) for p in ds.get_partitions(): for t in p.get_tiles(tiling_scheme=tiling_scheme): print(t.tile_slice)
def test_read_2(lt_ctx, hdf5): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) tileshape = Shape((16, ) + tuple(ds.shape.sig), sig_dims=ds.shape.sig.dims) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=ds.shape, ) for p in ds.get_partitions(): for t in p.get_tiles(tiling_scheme=tiling_scheme): print(t.tile_slice)
def test_roi_4(hdf5, lt_ctx): ds = H5DataSet( path=hdf5.filename, ds_path="data", target_size=12800 * 2, ) ds = ds.initialize(lt_ctx.executor) roi = np.random.choice(size=ds.shape.flatten_nav().nav, a=[True, False]) sum_udf = lt_ctx.create_sum_analysis(dataset=ds) sumres = lt_ctx.run(sum_udf, roi=roi)['intensity'] assert np.allclose( sumres, np.sum(hdf5['data'][:].reshape(25, 16, 16)[roi, ...], axis=0))
def test_roi_2(random_hdf5, lt_ctx, mnp): ds = H5DataSet( path=random_hdf5.filename, ds_path="data", tileshape=(1, 4, 16, 16), min_num_partitions=mnp, ) ds = ds.initialize(lt_ctx.executor) roi = { "shape": "disk", "cx": 2, "cy": 2, "r": 1, } analysis = SumAnalysis(dataset=ds, parameters={ "roi": roi, }) print(analysis.get_roi()) results = lt_ctx.run(analysis) # let's draw a circle! mask = np.full((5, 5), False) mask[1, 2] = True mask[2, 1:4] = True mask[3, 2] = True print(mask) assert mask.shape == (5, 5) assert mask.dtype == np.bool reader = ds.get_reader() with reader.get_h5ds() as h5ds: data = np.array(h5ds) # applying the mask flattens the first two dimensions, so we # only sum over axis 0 here: expected = data[mask, ...].sum(axis=(0, )) assert expected.shape == (16, 16) assert results.intensity.raw_data.shape == (16, 16) # is not equal to results without mask: assert not np.allclose(results.intensity.raw_data, data.sum(axis=(0, 1))) # ... but rather like `expected`: assert np.allclose(results.intensity.raw_data, expected)
def test_roi_5(hdf5, lt_ctx): ds = H5DataSet( path=hdf5.filename, ds_path="data", target_size=12800*2, ) ds = ds.initialize(lt_ctx.executor) roi = np.random.choice(size=ds.shape.flatten_nav().nav, a=[True, False]) udf = SumSigUDF() sumres = lt_ctx.run_udf(dataset=ds, udf=udf, roi=roi)['intensity'] assert np.allclose( sumres.raw_data, np.sum(hdf5['data'][:][roi.reshape(5, 5), ...], axis=(1, 2)) )
def test_roi_1(hdf5, lt_ctx): ds = H5DataSet(path=hdf5.filename, ds_path="data", tileshape=(1, 4, 16, 16)) ds = ds.initialize(lt_ctx.executor) p = next(ds.get_partitions()) roi = np.zeros(p.meta.shape.flatten_nav().nav, dtype=bool) roi[0] = 1 tiles = [] for tile in p.get_tiles(dest_dtype="float32", roi=roi): print("tile:", tile) tiles.append(tile) assert len(tiles) == 1 assert tiles[0].tile_slice.shape.nav.size == 1 assert tuple(tiles[0].tile_slice.shape.sig) == (16, 16) assert tiles[0].tile_slice.origin == (0, 0, 0)
def test_scheme_idx(lt_ctx, hdf5): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) p = next(ds.get_partitions()) sig_shape = tuple(ds.shape.sig) tileshape = Shape((16, ) + sig_shape[:-1] + (sig_shape[-1] // 2, ), sig_dims=ds.shape.sig.dims) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=ds.shape, ) tiles = p.get_tiles(tiling_scheme=tiling_scheme) for tile, expected_idx in zip(tiles, itertools.cycle([0, 1])): print(tile.scheme_idx, tile.tile_slice) assert tile.scheme_idx == expected_idx
def test_roi_1(hdf5, lt_ctx): ds = H5DataSet( path=hdf5.filename, ds_path="data", ) ds = ds.initialize(lt_ctx.executor) p = next(ds.get_partitions()) roi = np.zeros(p.meta.shape.flatten_nav().nav, dtype=bool) roi[0] = 1 tiles = [] tileshape = Shape( (16,) + tuple(ds.shape.sig), sig_dims=ds.shape.sig.dims ) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=ds.shape, ) for tile in p.get_tiles(tiling_scheme=tiling_scheme, dest_dtype="float32", roi=roi): print("tile:", tile) tiles.append(tile) assert len(tiles) == 1 assert tiles[0].tile_slice.shape.nav.size == 1 assert tuple(tiles[0].tile_slice.shape.sig) == (16, 16) assert tiles[0].tile_slice.origin == (0, 0, 0)
def hdf5_ds_1(hdf5): ds = H5DataSet(path=hdf5.filename, ds_path="data", tileshape=(1, 5, 16, 16)) ds = ds.initialize() return ds
def hdf5_ds_2(random_hdf5): ds = H5DataSet( path=random_hdf5.filename, ds_path="data", tileshape=(1, 5, 16, 16) ) ds = ds.initialize(InlineJobExecutor()) return ds
def hdf5_ds_5d(hdf5_5d): ds = H5DataSet(path=hdf5_5d.filename, ds_path="data", tileshape=(1, 1, 1, 16, 16)) ds = ds.initialize(InlineJobExecutor()) return ds