Esempio n. 1
0
def test_timeout_1(hdf5, lt_ctx):
    with mock.patch('h5py.File.visititems', side_effect=TimeoutError("too slow")):
        params = H5DataSet.detect_params(hdf5.filename, executor=lt_ctx.executor)["parameters"]
        assert list(params.keys()) == ["path"]

        ds = H5DataSet(
            path=hdf5.filename, ds_path="data",
        )
        ds = ds.initialize(lt_ctx.executor)
        diags = ds.diagnostics
        print(diags)
Esempio n. 2
0
def test_timeout_2(hdf5):
    with mock.patch('time.time', side_effect=[1, 30, 30, 60]):
        params = H5DataSet.detect_params(hdf5.filename)
        assert list(params.keys()) == ["path"]

        ds = H5DataSet(path=hdf5.filename,
                       ds_path="data",
                       tileshape=(1, 4, 16, 16))
        ds = ds.initialize()
        diags = ds.diagnostics
        print(diags)
Esempio n. 3
0
def test_timeout_1(hdf5):
    with mock.patch('h5py.File.visititems',
                    side_effect=TimeoutError("too slow")):
        params = H5DataSet.detect_params(hdf5.filename)
        assert list(params.keys()) == ["path"]

        ds = H5DataSet(path=hdf5.filename,
                       ds_path="data",
                       tileshape=(1, 4, 16, 16))
        ds = ds.initialize()
        diags = ds.diagnostics
        print(diags)
Esempio n. 4
0
def test_timeout_2(hdf5, lt_ctx):
    print(threading.enumerate())
    with mock.patch('libertem.io.dataset.hdf5.current_time', side_effect=[1, 30]):
        params = H5DataSet.detect_params(hdf5.filename, executor=lt_ctx.executor)["parameters"]
        assert list(params.keys()) == ["path"]

    ds = H5DataSet(
        path=hdf5.filename, ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)

    print(threading.enumerate())
    with mock.patch('libertem.io.dataset.hdf5.current_time', side_effect=[30, 60]):
        diags = ds.diagnostics
        print(diags)
Esempio n. 5
0
def do_com(fn, tileshape):
    ds = H5DataSet(
        path=fn,
        ds_path="data",
        tileshape=tileshape,
        target_size=512*1024*1024,
    )

    masks = [
        # summation of all pixels:
        lambda: np.ones(shape=ds.shape[2:]),

        # gradient from left to right
        lambda: gradient_x(*ds.shape[2:]),

        # gradient from top to bottom
        lambda: gradient_y(*ds.shape[2:]),
    ]
    job = ApplyMasksJob(dataset=ds, mask_factories=masks)
    print(job.masks.computed_masks)
    print("\n\n")
    executor = InlineJobExecutor()
    full_result = np.zeros(shape=(3,) + ds.shape[:2])
    color = np.zeros(shape=(3,) + ds.shape[:2])
    for result in executor.run_job(job):
        for tile in result:
            print(tile)
            print(tile.data[0])
            color[tile.tile_slice.get()[:2]] += 1
            tile.copy_to_result(full_result)
    x_centers = np.divide(full_result[1], full_result[0])
    y_centers = np.divide(full_result[2], full_result[0])
    print(color)

    return full_result, x_centers, y_centers
Esempio n. 6
0
def test_diags(hdf5, lt_ctx):
    ds = H5DataSet(
        path=hdf5.filename,
        ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)
    print(ds.diagnostics)
Esempio n. 7
0
def hdf5_ds_1(hdf5):
    ds = H5DataSet(
        path=hdf5.filename,
        ds_path="data",
    )
    ds = ds.initialize(InlineJobExecutor())
    return ds
Esempio n. 8
0
def test_check_valid(hdf5, lt_ctx):
    ds = H5DataSet(
        path=hdf5.filename,
        ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)
    assert ds.check_valid()
Esempio n. 9
0
def hdf5_ds_1(hdf5):
    ds = H5DataSet(path=hdf5.filename,
                   ds_path="data",
                   tileshape=(1, 5, 16, 16),
                   target_size=512 * 1024 * 1024)
    ds.initialize()
    return ds
Esempio n. 10
0
def hdf5_ds_large_sig(random_hdf5):
    ds = H5DataSet(
        path=random_hdf5.filename,
        ds_path="data",
    )
    ds = ds.initialize(InlineJobExecutor())
    return ds
Esempio n. 11
0
def test_roi_3(hdf5, lt_ctx):
    ds = H5DataSet(
        path=hdf5.filename, ds_path="data",
        target_size=12800*2,
    )
    ds = ds.initialize(lt_ctx.executor)
    roi = np.zeros(ds.shape.flatten_nav().nav, dtype=bool)
    roi[24] = 1

    tileshape = Shape(
        (16,) + tuple(ds.shape.sig),
        sig_dims=ds.shape.sig.dims
    )
    tiling_scheme = TilingScheme.make_for_shape(
        tileshape=tileshape,
        dataset_shape=ds.shape,
    )

    tiles = []
    for p in ds.get_partitions():
        for tile in p.get_tiles(tiling_scheme=tiling_scheme, dest_dtype="float32", roi=roi):
            print("tile:", tile)
            tiles.append(tile)
    assert len(tiles) == 1
    assert tiles[0].tile_slice.shape.nav.size == 1
    assert tuple(tiles[0].tile_slice.shape.sig) == (16, 16)
    assert tiles[0].tile_slice.origin == (0, 0, 0)
    assert np.allclose(tiles[0].data, hdf5['data'][4, 4])
Esempio n. 12
0
def test_cloudpickle(lt_ctx, hdf5):
    ds = H5DataSet(
        path=hdf5.filename, ds_path="data", target_size=512*1024*1024
    )

    pickled = cloudpickle.dumps(ds)
    loaded = cloudpickle.loads(pickled)

    assert loaded._dtype is None
    assert loaded._shape is None
    repr(loaded)

    ds = ds.initialize(lt_ctx.executor)

    pickled = cloudpickle.dumps(ds)
    loaded = cloudpickle.loads(pickled)

    assert loaded._dtype is not None
    assert loaded._shape is not None
    loaded.shape
    loaded.dtype
    repr(loaded)

    # let's keep the pickled dataset size small-ish:
    assert len(pickled) < 1 * 1024
Esempio n. 13
0
def test_cache_key_json_serializable(hdf5, lt_ctx):
    ds = H5DataSet(
        path=hdf5.filename,
        ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)
    json.dumps(ds.get_cache_key())
Esempio n. 14
0
def test_read_2(lt_ctx, hdf5):
    ds = H5DataSet(path=hdf5.filename,
                   ds_path="data",
                   tileshape=(1, 3, 16, 16))
    ds = ds.initialize(lt_ctx.executor)
    for p in ds.get_partitions():
        for t in p.get_tiles():
            print(t.tile_slice)
Esempio n. 15
0
def create_random_hdf5(path):
    with h5py.File(path, 'w') as f:
        sample_data = np.random.randn(16, 16, 16, 16).astype("float32")
        f.create_dataset("data", (16, 16, 16, 16), data=sample_data)
        # read and provide the ds
    ds = H5DataSet(path=path, ds_path='data')
    ds = ds.initialize(InlineJobExecutor())
    return ds
Esempio n. 16
0
def test_pick(hdf5, lt_ctx):
    ds = H5DataSet(path=hdf5.filename,
                   ds_path="data",
                   tileshape=(1, 3, 16, 16))
    ds.initialize()
    assert len(ds.shape) == 4
    print(ds.shape)
    pick = lt_ctx.create_pick_analysis(dataset=ds, x=2, y=3)
    lt_ctx.run(pick)
Esempio n. 17
0
def test_pick(hdf5, lt_ctx, TYPE):
    ds = H5DataSet(
        path=hdf5.filename, ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)
    assert len(ds.shape) == 4
    print(ds.shape)
    pick = lt_ctx.create_pick_analysis(dataset=ds, x=2, y=3)
    pick.TYPE = TYPE
    lt_ctx.run(pick)
Esempio n. 18
0
def test_read_3(lt_ctx, random_hdf5):
    # try with smaller partitions:
    ds = H5DataSet(path=random_hdf5.filename,
                   ds_path="data",
                   tileshape=(1, 2, 16, 16),
                   target_size=4096)
    ds = ds.initialize(lt_ctx.executor)
    for p in ds.get_partitions():
        for t in p.get_tiles():
            print(t.tile_slice)
Esempio n. 19
0
def test_auto_tileshape(chunked_hdf5, lt_ctx):
    ds = H5DataSet(
        path=chunked_hdf5.filename,
        ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)
    p = next(ds.get_partitions())
    t = next(p.get_tiles(dest_dtype="float32", target_size=4 * 1024))
    assert tuple(p._get_tileshape("float32", 4 * 1024)) == (1, 4, 16, 16)
    assert tuple(t.tile_slice.shape) == (4, 16, 16)
Esempio n. 20
0
def test_read_3(lt_ctx, random_hdf5):
    # try with smaller partitions:
    ds = H5DataSet(path=random_hdf5.filename, ds_path="data", target_size=4096)
    ds = ds.initialize(lt_ctx.executor)
    tileshape = Shape((16, ) + tuple(ds.shape.sig), sig_dims=ds.shape.sig.dims)
    tiling_scheme = TilingScheme.make_for_shape(
        tileshape=tileshape,
        dataset_shape=ds.shape,
    )
    for p in ds.get_partitions():
        for t in p.get_tiles(tiling_scheme=tiling_scheme):
            print(t.tile_slice)
Esempio n. 21
0
def test_read_2(lt_ctx, hdf5):
    ds = H5DataSet(
        path=hdf5.filename,
        ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)
    tileshape = Shape((16, ) + tuple(ds.shape.sig), sig_dims=ds.shape.sig.dims)
    tiling_scheme = TilingScheme.make_for_shape(
        tileshape=tileshape,
        dataset_shape=ds.shape,
    )
    for p in ds.get_partitions():
        for t in p.get_tiles(tiling_scheme=tiling_scheme):
            print(t.tile_slice)
Esempio n. 22
0
def test_roi_4(hdf5, lt_ctx):
    ds = H5DataSet(
        path=hdf5.filename,
        ds_path="data",
        target_size=12800 * 2,
    )
    ds = ds.initialize(lt_ctx.executor)
    roi = np.random.choice(size=ds.shape.flatten_nav().nav, a=[True, False])

    sum_udf = lt_ctx.create_sum_analysis(dataset=ds)
    sumres = lt_ctx.run(sum_udf, roi=roi)['intensity']

    assert np.allclose(
        sumres, np.sum(hdf5['data'][:].reshape(25, 16, 16)[roi, ...], axis=0))
Esempio n. 23
0
def test_roi_2(random_hdf5, lt_ctx, mnp):
    ds = H5DataSet(
        path=random_hdf5.filename,
        ds_path="data",
        tileshape=(1, 4, 16, 16),
        min_num_partitions=mnp,
    )
    ds = ds.initialize(lt_ctx.executor)

    roi = {
        "shape": "disk",
        "cx": 2,
        "cy": 2,
        "r": 1,
    }
    analysis = SumAnalysis(dataset=ds, parameters={
        "roi": roi,
    })

    print(analysis.get_roi())

    results = lt_ctx.run(analysis)

    # let's draw a circle!
    mask = np.full((5, 5), False)
    mask[1, 2] = True
    mask[2, 1:4] = True
    mask[3, 2] = True

    print(mask)

    assert mask.shape == (5, 5)
    assert mask.dtype == np.bool

    reader = ds.get_reader()
    with reader.get_h5ds() as h5ds:
        data = np.array(h5ds)

        # applying the mask flattens the first two dimensions, so we
        # only sum over axis 0 here:
        expected = data[mask, ...].sum(axis=(0, ))

        assert expected.shape == (16, 16)
        assert results.intensity.raw_data.shape == (16, 16)

        # is not equal to results without mask:
        assert not np.allclose(results.intensity.raw_data,
                               data.sum(axis=(0, 1)))
        # ... but rather like `expected`:
        assert np.allclose(results.intensity.raw_data, expected)
Esempio n. 24
0
def test_roi_5(hdf5, lt_ctx):
    ds = H5DataSet(
        path=hdf5.filename, ds_path="data",
        target_size=12800*2,
    )
    ds = ds.initialize(lt_ctx.executor)
    roi = np.random.choice(size=ds.shape.flatten_nav().nav, a=[True, False])

    udf = SumSigUDF()
    sumres = lt_ctx.run_udf(dataset=ds, udf=udf, roi=roi)['intensity']

    assert np.allclose(
        sumres.raw_data,
        np.sum(hdf5['data'][:][roi.reshape(5, 5), ...], axis=(1, 2))
    )
Esempio n. 25
0
def test_roi_1(hdf5, lt_ctx):
    ds = H5DataSet(path=hdf5.filename,
                   ds_path="data",
                   tileshape=(1, 4, 16, 16))
    ds = ds.initialize(lt_ctx.executor)
    p = next(ds.get_partitions())
    roi = np.zeros(p.meta.shape.flatten_nav().nav, dtype=bool)
    roi[0] = 1
    tiles = []
    for tile in p.get_tiles(dest_dtype="float32", roi=roi):
        print("tile:", tile)
        tiles.append(tile)
    assert len(tiles) == 1
    assert tiles[0].tile_slice.shape.nav.size == 1
    assert tuple(tiles[0].tile_slice.shape.sig) == (16, 16)
    assert tiles[0].tile_slice.origin == (0, 0, 0)
Esempio n. 26
0
def test_scheme_idx(lt_ctx, hdf5):
    ds = H5DataSet(
        path=hdf5.filename,
        ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)
    p = next(ds.get_partitions())

    sig_shape = tuple(ds.shape.sig)
    tileshape = Shape((16, ) + sig_shape[:-1] + (sig_shape[-1] // 2, ),
                      sig_dims=ds.shape.sig.dims)
    tiling_scheme = TilingScheme.make_for_shape(
        tileshape=tileshape,
        dataset_shape=ds.shape,
    )
    tiles = p.get_tiles(tiling_scheme=tiling_scheme)

    for tile, expected_idx in zip(tiles, itertools.cycle([0, 1])):
        print(tile.scheme_idx, tile.tile_slice)
        assert tile.scheme_idx == expected_idx
Esempio n. 27
0
def test_roi_1(hdf5, lt_ctx):
    ds = H5DataSet(
        path=hdf5.filename, ds_path="data",
    )
    ds = ds.initialize(lt_ctx.executor)
    p = next(ds.get_partitions())
    roi = np.zeros(p.meta.shape.flatten_nav().nav, dtype=bool)
    roi[0] = 1
    tiles = []
    tileshape = Shape(
        (16,) + tuple(ds.shape.sig),
        sig_dims=ds.shape.sig.dims
    )
    tiling_scheme = TilingScheme.make_for_shape(
        tileshape=tileshape,
        dataset_shape=ds.shape,
    )
    for tile in p.get_tiles(tiling_scheme=tiling_scheme, dest_dtype="float32", roi=roi):
        print("tile:", tile)
        tiles.append(tile)
    assert len(tiles) == 1
    assert tiles[0].tile_slice.shape.nav.size == 1
    assert tuple(tiles[0].tile_slice.shape.sig) == (16, 16)
    assert tiles[0].tile_slice.origin == (0, 0, 0)
Esempio n. 28
0
def hdf5_ds_1(hdf5):
    ds = H5DataSet(path=hdf5.filename,
                   ds_path="data",
                   tileshape=(1, 5, 16, 16))
    ds = ds.initialize()
    return ds
Esempio n. 29
0
def hdf5_ds_2(random_hdf5):
    ds = H5DataSet(
        path=random_hdf5.filename, ds_path="data", tileshape=(1, 5, 16, 16)
    )
    ds = ds.initialize(InlineJobExecutor())
    return ds
Esempio n. 30
0
def hdf5_ds_5d(hdf5_5d):
    ds = H5DataSet(path=hdf5_5d.filename,
                   ds_path="data",
                   tileshape=(1, 1, 1, 16, 16))
    ds = ds.initialize(InlineJobExecutor())
    return ds