def test_multi_partition_wins():
    # FIXME: the constellatin partition+tile or partition+frame or similar
    # can be optimized by using a fitting tiling scheme for tile/frame processing
    # and accumulating the whole partition into a buffer, then running process_partition
    # after the tile loop.
    other_unlimited = UDFUnlimitedDepth()
    other_best_fit = TestUDFBestFit()
    other_deep = UDFWithLargeDepth()
    udf_partition = TestUDFPartition()

    udfs = [
        udf_partition,
        other_unlimited,
        other_best_fit,
        other_deep,
    ]

    data = _mk_random(size=(32, 1860, 2048))
    dataset = MemoryDataSet(
        data=data,
        num_partitions=1,
        sig_dims=2,
        base_shape=(1, 930, 16),
        force_need_decode=False,
    )

    neg = Negotiator()
    p = next(dataset.get_partitions())
    scheme = neg.get_scheme(udfs=udfs, partition=p, read_dtype=np.float32, roi=None)
    print(scheme._debug)
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (32, 1860, 2048)
def test_multi_no_by_frame_small_size_wins():
    other_unlimited = UDFUnlimitedDepth()
    other_best_fit = TestUDFBestFit()
    other_deep = UDFWithLargeDepth()

    udfs = [
        other_unlimited,
        other_best_fit,
        other_deep,
    ]

    data = _mk_random(size=(32, 1860, 2048))
    dataset = MemoryDataSet(
        data=data,
        num_partitions=1,
        sig_dims=2,
        base_shape=(1, 930, 16),
        force_need_decode=False,
    )

    neg = Negotiator()
    p = next(dataset.get_partitions())
    scheme = neg.get_scheme(udfs=udfs, partition=p, read_dtype=np.float32, roi=None)
    print(scheme._debug)
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (17, 930, 16)
Exemple #3
0
def test_hdf5_tileshape_negotation(lt_ctx, tmpdir_factory):
    # try to hit the third case in _get_subslices:
    datadir = tmpdir_factory.mktemp('data')
    filename = os.path.join(datadir, 'tileshape-neg-test.h5')
    data = _mk_random((4, 100, 256, 256), dtype=np.uint16)

    with h5py.File(filename, "w") as f:
        f.create_dataset("data", data=data, chunks=(2, 32, 32, 32))

    ds = lt_ctx.load("hdf5", path=filename)

    udfs = [UDFWithLargeDepth()]
    p = next(ds.get_partitions())
    neg = Negotiator()
    tiling_scheme = neg.get_scheme(
        udfs=udfs,
        partition=p,
        read_dtype=np.float32,
        roi=None,
        corrections=None,
    )
    assert len(tiling_scheme) > 1
    next(
        p.get_tiles(tiling_scheme=tiling_scheme,
                    roi=None,
                    dest_dtype=np.float32))
Exemple #4
0
def test_hdf5_result_dtype(lt_ctx, tmpdir_factory, in_dtype, read_dtype,
                           use_roi):
    datadir = tmpdir_factory.mktemp('data')
    filename = os.path.join(datadir, 'result-dtype-checks.h5')
    data = _mk_random((2, 2, 4, 4), dtype=in_dtype)

    with h5py.File(filename, "w") as f:
        f.create_dataset("data", data=data)

    ds = lt_ctx.load("hdf5", path=filename)

    if use_roi:
        roi = np.zeros(ds.shape.nav, dtype=bool).reshape((-1, ))
        roi[0] = 1
    else:
        roi = None
    udfs = [SumSigUDF()]  # need to have at least one UDF
    p = next(ds.get_partitions())
    neg = Negotiator()
    tiling_scheme = neg.get_scheme(
        udfs=udfs,
        partition=p,
        read_dtype=read_dtype,
        roi=roi,
        corrections=None,
    )
    tile = next(
        p.get_tiles(tiling_scheme=tiling_scheme,
                    roi=roi,
                    dest_dtype=read_dtype))
    assert tile.dtype == np.dtype(read_dtype)
def test_get_scheme_partition(default_raw):
    neg = Negotiator()
    p = next(default_raw.get_partitions())
    udf = TestUDFPartition()
    scheme = neg.get_scheme(udfs=[udf], partition=p, read_dtype=np.float32, roi=None)
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (128, 128, 128)
def test_correction_size_overflow():
    data = _mk_random(size=(32, 1860, 2048))
    dataset = MemoryDataSet(
        data=data,
        num_partitions=1,
        sig_dims=2,
        base_shape=(1, 930, 16),
        force_need_decode=True,
    )

    neg = Negotiator()
    p = next(dataset.get_partitions())
    udf = UDFWithLargeDepth()

    excluded_coords = np.array([
        (930, ),
        (16, )
    ])
    excluded_pixels = sparse.COO(coords=excluded_coords, shape=dataset.shape.sig, data=True)
    corr = CorrectionSet(excluded_pixels=excluded_pixels)

    scheme = neg.get_scheme(
        udfs=[udf], partition=p, read_dtype=np.float32, roi=None,
        corrections=corr,
    )
    print(scheme._debug)
    assert scheme._debug["need_decode"]
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (4, 1860, 32)
Exemple #7
0
def prime_numba_cache(ds):
    dtypes = (np.float32, None)
    for dtype in dtypes:
        roi = np.zeros(ds.shape.nav, dtype=bool).reshape((-1,))
        roi[0] = 1

        from libertem.udf.sum import SumUDF
        from libertem.corrections.corrset import CorrectionSet
        from libertem.io.dataset.base import Negotiator

        udfs = [SumUDF()]  # need to have at least one UDF
        p = next(ds.get_partitions())
        neg = Negotiator()
        for corr_dtype in (np.float32, None):
            if corr_dtype is not None:
                corrections = CorrectionSet(dark=np.zeros(ds.shape.sig, dtype=corr_dtype))
            else:
                corrections = None
            p.set_corrections(corrections)
            tiling_scheme = neg.get_scheme(
                udfs=udfs,
                partition=p,
                read_dtype=dtype,
                roi=roi,
                corrections=corrections,
            )
            next(p.get_tiles(tiling_scheme=tiling_scheme, roi=roi))
Exemple #8
0
    def _init_udfs(self, numpy_udfs, cupy_udfs, partition, roi, corrections,
                   device_class):
        dtype = self._get_dtype(partition.dtype, corrections)
        meta = UDFMeta(
            partition_shape=partition.slice.adjust_for_roi(roi).shape,
            dataset_shape=partition.meta.shape,
            roi=roi,
            dataset_dtype=partition.dtype,
            input_dtype=dtype,
            tiling_scheme=None,
            corrections=corrections,
            device_class=device_class,
        )
        for udf in numpy_udfs:
            if device_class == 'cuda':
                udf.set_backend('cuda')
            else:
                udf.set_backend('numpy')
        if device_class == 'cpu':
            assert not cupy_udfs
        for udf in cupy_udfs:
            udf.set_backend('cupy')
        udfs = numpy_udfs + cupy_udfs
        for udf in udfs:
            udf.set_meta(meta)
            udf.init_result_buffers()
            udf.allocate_for_part(partition, roi)
            udf.init_task_data()
            if hasattr(udf, 'preprocess'):
                udf.clear_views()
                udf.preprocess()
        neg = Negotiator()
        # FIXME take compute backend into consideration as well
        # Other boundary conditions when moving input data to device
        tiling_scheme = neg.get_scheme(
            udfs=udfs,
            partition=partition,
            read_dtype=dtype,
            roi=roi,
            corrections=corrections,
        )

        # print(tiling_scheme)

        # FIXME: don't fully re-create?
        meta = UDFMeta(
            partition_shape=partition.slice.adjust_for_roi(roi).shape,
            dataset_shape=partition.meta.shape,
            roi=roi,
            dataset_dtype=partition.dtype,
            input_dtype=dtype,
            tiling_scheme=tiling_scheme,
            corrections=corrections,
            device_class=device_class,
        )
        for udf in udfs:
            udf.set_meta(meta)
        return (meta, tiling_scheme, dtype)
def test_get_scheme_tile(default_raw):
    neg = Negotiator()
    p = next(default_raw.get_partitions())
    udf = TestUDFBestFit()
    scheme = neg.get_scheme(udfs=[udf], partition=p, read_dtype=np.float32, roi=None)
    assert scheme.shape.sig.dims == 2
    print(neg._get_udf_size_pref(udf))
    print(scheme._debug)
    print(p.shape)
    assert tuple(scheme.shape) == (64, 32, 128)
def test_get_scheme_frame(default_raw):
    neg = Negotiator()
    p = next(default_raw.get_partitions())
    udf = TilingUDFFrame()
    scheme = neg.get_scheme(udfs=[udf],
                            dataset=default_raw,
                            approx_partition_shape=p.shape,
                            read_dtype=np.float32,
                            roi=None)
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (16, 128, 128)
Exemple #11
0
def test_get_scheme_upper_size_1():
    """
    Test that will hit the 2**20 default size
    """
    data = _mk_random(size=(1024, 144, 144))
    dataset = MemoryDataSet(data=data, num_partitions=1, sig_dims=2)

    neg = Negotiator()
    p = next(dataset.get_partitions())
    udf = TestUDFBestFit()
    scheme = neg.get_scheme(udf=udf,
                            partition=p,
                            read_dtype=np.float32,
                            roi=None)
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (65, 28, 144)
def test_scale_factors():
    neg = Negotiator()
    # scenario: k2is.
    assert neg._get_scale_factors(
        shape=(930, 16),
        containing_shape=(1860, 2048),
        size=1024,
    ) == [1, 1]

    assert neg._get_scale_factors(
        shape=(930, 16),
        containing_shape=(1860, 2048),
        size=930 * 16 * 16
    ) == [2, 8]

    assert neg._get_scale_factors(
        shape=(930, 16),
        containing_shape=(1860, 2048),
        size=930 * 16 * 128
    ) == [2, 64]

    # slightly above, but not enough to fit another block: we err on the small side
    assert neg._get_scale_factors(
        shape=(930, 16),
        containing_shape=(1860, 2048),
        size=930 * 16 * 129
    ) == [2, 64]

    # larger size than we can accomodate with our containing shape:
    assert neg._get_scale_factors(
        shape=(930, 16),
        containing_shape=(1860, 2048),
        size=1860 * 2048 * 2
    ) == [2, 128]
def test_depth_max_size_max():
    data = _mk_random(size=(32, 1860, 2048))
    dataset = MemoryDataSet(
        data=data,
        num_partitions=1,
        sig_dims=2,
        base_shape=(1, 930, 16),
        force_need_decode=False,
    )

    neg = Negotiator()
    p = next(dataset.get_partitions())
    udf = UDFUnlimitedDepth()
    scheme = neg.get_scheme(udfs=[udf], partition=p, read_dtype=np.float32, roi=None)
    print(scheme._debug)
    assert not scheme._debug["need_decode"]
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (32, 1860, 2048)
Exemple #14
0
def test_get_scheme_upper_size_2():
    """
    Test that will hit the 2**20 default size
    """
    data = _mk_random(size=(2048, 264, 264))
    dataset = MemoryDataSet(
        data=data,
        num_partitions=1,
        sig_dims=2,
        base_shape=(1, 8, 264),
    )

    neg = Negotiator()
    p = next(dataset.get_partitions())
    udf = TilingUDFBestFit()
    scheme = neg.get_scheme(udfs=[udf],
                            partition=p,
                            read_dtype=np.float32,
                            roi=None)
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (124, 8, 264)
def test_limited_depth():
    data = _mk_random(size=(32, 1860, 2048))
    dataset = MemoryDataSet(
        data=data,
        num_partitions=1,
        sig_dims=2,
        base_shape=(1, 930, 16),
        force_need_decode=True,
    )

    neg = Negotiator()
    p = next(dataset.get_partitions())
    udf = UDFWithLargeDepth()
    scheme = neg.get_scheme(udfs=[udf],
                            approx_partition_shape=p.shape,
                            dataset=dataset,
                            read_dtype=np.float32,
                            roi=None)
    print(scheme._debug)
    assert scheme._debug["need_decode"]
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (17, 930, 16)
Exemple #16
0
def prime_numba_cache(ds):
    dtypes = (np.float32, None)
    for dtype in dtypes:
        roi = np.zeros(ds.shape.nav, dtype=bool).reshape((-1, ))
        roi[max(-ds._meta.sync_offset, 0)] = True

        from libertem.udf.sum import SumUDF
        from libertem.udf.raw import PickUDF
        from libertem.io.corrections.corrset import CorrectionSet
        from libertem.io.dataset.base import Negotiator

        # need to have at least one UDF; here we run for both sum and pick
        # to reduce the initial latency when switching to pick mode
        udfs = [SumUDF(), PickUDF()]
        neg = Negotiator()
        for udf in udfs:
            for corr_dtype in (np.float32, None):
                if corr_dtype is not None:
                    corrections = CorrectionSet(
                        dark=np.zeros(ds.shape.sig, dtype=corr_dtype))
                else:
                    corrections = None
                found_first_tile = False
                for p in ds.get_partitions():
                    if found_first_tile:
                        break
                    p.set_corrections(corrections)
                    tiling_scheme = neg.get_scheme(
                        udfs=[udf],
                        dataset=ds,
                        approx_partition_shape=p.shape,
                        read_dtype=dtype,
                        roi=roi,
                        corrections=corrections,
                    )
                    for t in p.get_tiles(tiling_scheme=tiling_scheme, roi=roi):
                        found_first_tile = True
                        break
def test_get_scheme_upper_size_roi():
    """
    Confirm that a small ROI will not be split
    up unnecessarily.
    """
    data = _mk_random(size=(1024, 144, 144))
    dataset = MemoryDataSet(data=data, num_partitions=1, sig_dims=2)

    roi = np.zeros(dataset.shape.nav, dtype=bool)
    # All in a single partition here
    roi[0] = True
    roi[512] = True
    roi[-1] = True

    neg = Negotiator()
    p = next(dataset.get_partitions())
    udf = TilingUDFBestFit()
    scheme = neg.get_scheme(udfs=[udf],
                            partition=p,
                            read_dtype=np.float32,
                            roi=roi)
    assert scheme.shape.sig.dims == 2
    assert tuple(scheme.shape) == (3, 144, 144)
Exemple #18
0
    def run_for_partition(self, partition: Partition, roi):
        with set_num_threads(1):
            dtype = self._get_dtype(partition.dtype)
            meta = UDFMeta(
                partition_shape=partition.slice.adjust_for_roi(roi).shape,
                dataset_shape=partition.meta.shape,
                roi=roi,
                dataset_dtype=partition.dtype,
                input_dtype=dtype,
                tiling_scheme=None,
            )
            udfs = self._udfs
            for udf in udfs:
                udf.set_meta(meta)
                udf.init_result_buffers()
                udf.allocate_for_part(partition, roi)
                udf.init_task_data()
                if hasattr(udf, 'preprocess'):
                    udf.clear_views()
                    udf.preprocess()
            neg = Negotiator()
            tiling_scheme = neg.get_scheme(
                udfs=udfs,
                partition=partition,
                read_dtype=dtype,
                roi=roi,
            )

            # FIXME: don't fully re-create?
            meta = UDFMeta(
                partition_shape=partition.slice.adjust_for_roi(roi).shape,
                dataset_shape=partition.meta.shape,
                roi=roi,
                dataset_dtype=partition.dtype,
                input_dtype=dtype,
                tiling_scheme=tiling_scheme,
            )
            for udf in udfs:
                udf.set_meta(meta)
            # print("UDF TilingScheme: %r" % tiling_scheme.shape)

            tiles = partition.get_tiles(tiling_scheme=tiling_scheme,
                                        roi=roi,
                                        dest_dtype=dtype)

            for tile in tiles:
                for udf in udfs:
                    method = udf.get_method()
                    if method == 'tile':
                        udf.set_contiguous_views_for_tile(partition, tile)
                        udf.set_slice(tile.tile_slice)
                        udf.process_tile(tile)
                    elif method == 'frame':
                        tile_slice = tile.tile_slice
                        for frame_idx, frame in enumerate(tile):
                            frame_slice = Slice(
                                origin=(tile_slice.origin[0] + frame_idx, ) +
                                tile_slice.origin[1:],
                                shape=Shape(
                                    (1, ) + tuple(tile_slice.shape)[1:],
                                    sig_dims=tile_slice.shape.sig.dims),
                            )
                            udf.set_slice(frame_slice)
                            udf.set_views_for_frame(partition, tile, frame_idx)
                            udf.process_frame(frame)
                    elif method == 'partition':
                        udf.set_views_for_tile(partition, tile)
                        udf.set_slice(partition.slice)
                        udf.process_partition(tile)
            for udf in udfs:
                udf.flush()
                if hasattr(udf, 'postprocess'):
                    udf.clear_views()
                    udf.postprocess()

                udf.cleanup()
                udf.clear_views()

            if self._debug:
                try:
                    cloudpickle.loads(cloudpickle.dumps(partition))
                except TypeError:
                    raise TypeError("could not pickle partition")
                try:
                    cloudpickle.loads(
                        cloudpickle.dumps([u.results for u in udfs]))
                except TypeError:
                    raise TypeError("could not pickle results")

            return tuple(udf.results for udf in udfs)