def test_multi_masks(lt_ctx):
    data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
    mask0 = _mk_random(size=(16, 16))
    mask1 = sp.csr_matrix(_mk_random(size=(16, 16)))
    mask2 = sparse.COO.from_numpy(_mk_random(size=(16, 16)))
    expected = _naive_mask_apply([mask0, mask1, mask2], data)

    dataset = MemoryDataSet(data=data,
                            tileshape=(4 * 4, 4, 4),
                            num_partitions=2)
    analysis = lt_ctx.create_mask_analysis(
        dataset=dataset,
        factories=[lambda: mask0, lambda: mask1, lambda: mask2])
    results = lt_ctx.run(analysis)

    assert np.allclose(
        results.mask_0.raw_data,
        expected[0],
    )
    assert np.allclose(
        results.mask_1.raw_data,
        expected[1],
    )
    assert np.allclose(
        results.mask_2.raw_data,
        expected[2],
    )
def test_start_local_cupyonly(hdf5_ds_1):
    cudas = detect()['cudas']
    # Make sure we have enough partitions
    hdf5_ds_1.set_num_cores(len(cudas))
    mask = _mk_random(size=(16, 16))
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data)

    spec = cluster_spec(cpus=(), cudas=cudas, has_cupy=True)
    with DaskJobExecutor.make_local(spec=spec) as executor:
        ctx = api.Context(executor=executor)
        # Uses ApplyMasksUDF, which supports CuPy
        analysis = ctx.create_mask_analysis(
            dataset=hdf5_ds_1, factories=[lambda: mask]
        )
        results = ctx.run(analysis)
        udf_res = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1)
        # No CPU compute resources
        with pytest.raises(RuntimeError):
            _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy',)), dataset=hdf5_ds_1)
        cuda_res = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda',)), dataset=hdf5_ds_1)

    assert np.allclose(
        results.mask_0.raw_data,
        expected
    )

    found = {}

    for val in udf_res['device_id'].data[0].values():
        print(val)
        # no CPU
        assert val["cpu"] is None
        # Register which GPUs got work
        found[val["cuda"]] = True

    for val in cuda_res['device_id'].data[0].values():
        print(val)
        # no CPU
        assert val["cpu"] is None
        # Register which GPUs got work
        found[val["cuda"]] = True

    for val in udf_res['backend'].data[0].values():
        # use CuPy
        print(val)
        assert 'cupy' in val

    for val in cuda_res['backend'].data[0].values():
        # no CuPy, i.e. NumPy
        print(val)
        assert 'numpy' in val

    # Test if each GPU got work. We have to see if this
    # actually works always since this depends on the scheduler behavior
    assert set(found.keys()) == set(cudas)

    assert np.all(udf_res['device_class'].data == 'cuda')
    assert np.allclose(udf_res['on_device'].data, data.sum(axis=(0, 1)))
Exemple #3
0
def test_multi_mask_stack_force_dense(lt_ctx, backend):
    if backend == 'cupy':
        d = detect()
        cudas = detect()['cudas']
        if not d['cudas'] or not d['has_cupy']:
            pytest.skip("No CUDA device or no CuPy, skipping CuPy test")
    try:
        if backend == 'cupy':
            set_use_cuda(cudas[0])
        data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
        masks = sparse.COO.from_numpy(_mk_random(size=(2, 16, 16)))
        expected = _naive_mask_apply(masks, data)

        dataset = MemoryDataSet(data=data,
                                tileshape=(4 * 4, 4, 4),
                                num_partitions=2)
        analysis = lt_ctx.create_mask_analysis(dataset=dataset,
                                               factories=lambda: masks,
                                               use_sparse=False,
                                               mask_count=2)
        results = lt_ctx.run(analysis)

        assert np.allclose(
            results.mask_0.raw_data,
            expected[0],
        )
        assert np.allclose(
            results.mask_1.raw_data,
            expected[1],
        )
    finally:
        set_use_cpu(0)
def test_numerics(lt_ctx):
    dtype = 'float32'
    # Highest expected detector resolution
    RESOLUTION = 4096
    # Highest expected detector dynamic range
    RANGE = 1e6
    # default value for all cells
    # The test fails for 1.1 using float32!
    VAL = 1.0

    data = np.full((2, 2, RESOLUTION, RESOLUTION), VAL, dtype=dtype)
    data[0, 0, 0, 0] += VAL * RANGE
    dataset = MemoryDataSet(
        data=data,
        tileshape=(2, RESOLUTION, RESOLUTION),
        num_partitions=2,
        sig_dims=2,
    )
    mask0 = np.ones((RESOLUTION, RESOLUTION), dtype=dtype)
    analysis = lt_ctx.create_mask_analysis(dataset=dataset,
                                           factories=[lambda: mask0])

    results = lt_ctx.run(analysis)
    expected = np.array(
        [[[VAL * RESOLUTION**2 + VAL * RANGE, VAL * RESOLUTION**2],
          [VAL * RESOLUTION**2, VAL * RESOLUTION**2]]])
    naive = _naive_mask_apply([mask0], data)

    # print(expected)
    # print(naive)
    # print(results.mask_0.raw_data)

    assert np.allclose(expected, naive)
    assert np.allclose(expected[0], results.mask_0.raw_data)
Exemple #5
0
def test_override_mask_dtype(lt_ctx):
    mask_dtype = np.float32
    data = _mk_random(size=(16, 16, 16, 16), dtype=mask_dtype)
    masks = _mk_random(size=(2, 16, 16), dtype=np.float64)
    expected = _naive_mask_apply(masks.astype(mask_dtype), data)

    dataset = MemoryDataSet(data=data,
                            tileshape=(4 * 4, 4, 4),
                            num_partitions=2)
    analysis = lt_ctx.create_mask_analysis(
        dataset=dataset,
        factories=lambda: masks,
        mask_dtype=mask_dtype,
        mask_count=len(masks),
    )
    results = lt_ctx.run(analysis)

    assert results.mask_0.raw_data.dtype == mask_dtype

    assert np.allclose(
        results.mask_0.raw_data,
        expected[0],
    )
    assert np.allclose(
        results.mask_1.raw_data,
        expected[1],
    )
Exemple #6
0
def test_multi_mask_force_dtype(lt_ctx):
    force_dtype = np.dtype(np.int32)
    data = _mk_random(size=(16, 16, 16, 16), dtype="int16")
    masks = _mk_random(size=(2, 16, 16), dtype="bool")
    expected = _naive_mask_apply(masks.astype(force_dtype),
                                 data.astype(force_dtype))

    dataset = MemoryDataSet(data=data,
                            tileshape=(4 * 4, 4, 4),
                            num_partitions=2)
    analysis = lt_ctx.create_mask_analysis(dataset=dataset,
                                           factories=lambda: masks,
                                           dtype=force_dtype)
    results = lt_ctx.run(analysis)

    assert results.mask_0.raw_data.dtype.kind == force_dtype.kind
    assert results.mask_0.raw_data.dtype == force_dtype

    assert np.allclose(
        results.mask_0.raw_data,
        expected[0],
    )
    assert np.allclose(
        results.mask_1.raw_data,
        expected[1],
    )
Exemple #7
0
def test_ring_3d_ds(lt_ctx):
    data = _mk_random(size=(16 * 16, 16, 16))
    dataset = MemoryDataSet(
        data=data.astype("<u2"),
        tileshape=(1, 16, 16),
        num_partitions=2,
        sig_dims=2,
    )
    analysis = lt_ctx.create_ring_analysis(dataset=dataset,
                                           cx=8,
                                           cy=8,
                                           ri=5,
                                           ro=8)
    results = lt_ctx.run(analysis)
    mask = analysis.get_mask_factories()[0]()
    expected = _naive_mask_apply([mask], dataset.data.reshape(
        (16, 16, 16, 16)))
    assert results.intensity.raw_data.shape == (16 * 16, )
    assert np.allclose(
        results.intensity.raw_data.reshape((16, 16)),
        expected,
    )
    assert np.allclose(
        results.intensity_log.raw_data.reshape((16, 16)),
        expected,
    )
Exemple #8
0
def test_numerics_succeed(lt_ctx):
    dtype = 'float64'
    # Highest expected detector resolution
    RESOLUTION = 4096
    # Highest expected detector dynamic range
    RANGE = 1e6
    # default value for all cells
    VAL = 1.1

    data = np.full((2, 1, RESOLUTION, RESOLUTION), VAL, dtype=np.float32)
    data[0, 0, 0, 0] += VAL * RANGE
    dataset = MemoryDataSet(
        data=data,
        tileshape=(2, RESOLUTION, RESOLUTION),
        num_partitions=1,
        sig_dims=2,
    )
    mask0 = np.ones((RESOLUTION, RESOLUTION), dtype=np.float32)
    analysis = lt_ctx.create_mask_analysis(
        dataset=dataset,
        factories=[lambda: mask0],
        mask_count=1,
        mask_dtype=dtype,
    )

    results = lt_ctx.run(analysis)
    expected = np.array([[[VAL * RESOLUTION**2 + VAL * RANGE],
                          [VAL * RESOLUTION**2]]])
    naive = _naive_mask_apply([mask0.astype(dtype)], data.astype(dtype))

    assert np.allclose(expected, naive)
    assert np.allclose(expected[0], results.mask_0.raw_data)
def test_endian(lt_ctx, TYPE):
    data = np.random.choice(a=0xFFFF, size=(16, 16, 16, 16)).astype(">u2")
    mask = _mk_random(size=(16, 16))
    expected = _naive_mask_apply([mask], data)

    dataset = MemoryDataSet(data=data, tileshape=(4 * 4, 4, 4), num_partitions=2)

    _run_mask_test_program(lt_ctx, dataset, mask, expected, TYPE)
def test_mask_uint(lt_ctx, TYPE):
    data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
    mask = _mk_random(size=(16, 16)).astype("uint16")
    expected = _naive_mask_apply([mask], data)

    dataset = MemoryDataSet(data=data, tileshape=(4 * 4, 4, 4), num_partitions=2)

    _run_mask_test_program(lt_ctx, dataset, mask, expected, TYPE)
def test_subframe_tiles_fast(lt_ctx):
    data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
    mask = _mk_random(size=(16, 16))
    expected = _naive_mask_apply([mask], data)

    dataset = MemoryDataSet(data=data, tileshape=(1, 8, 4, 4), partition_shape=(16, 16, 16, 16))

    _run_mask_test_program(lt_ctx, dataset, mask, expected)
def test_signed(lt_ctx):
    data = np.random.choice(a=0xFFFF, size=(16, 16, 16, 16)).astype("<i4")
    mask = _mk_random(size=(16, 16))
    expected = _naive_mask_apply([mask], data)

    dataset = MemoryDataSet(data=data, tileshape=(4, 4, 4, 4), partition_shape=(16, 16, 16, 16))

    _run_mask_test_program(lt_ctx, dataset, mask, expected)
Exemple #13
0
def test_single_frame_tiles(lt_ctx):
    data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
    mask = _mk_random(size=(16, 16))
    expected = _naive_mask_apply([mask], data)

    dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=2)

    _run_mask_test_program(lt_ctx, dataset, mask, expected)
Exemple #14
0
def test_ring_defaults(lt_ctx, ds_random):
    analysis = lt_ctx.create_ring_analysis(dataset=ds_random)
    results = lt_ctx.run(analysis)
    mask = analysis.get_mask_factories()[0]()
    expected = _naive_mask_apply([mask], ds_random.data)
    assert np.allclose(
        results.intensity.raw_data,
        expected,
    )
Exemple #15
0
def test_point_complex(lt_ctx, ds_complex):
    analysis = lt_ctx.create_point_analysis(dataset=ds_complex)
    results = lt_ctx.run(analysis)
    mask = analysis.get_mask_factories()[0]()
    expected = _naive_mask_apply([mask], ds_complex.data)
    assert np.allclose(
        results.intensity_complex.raw_data,
        expected,
    )
Exemple #16
0
def test_point_1(lt_ctx, ds_random):
    analysis = lt_ctx.create_point_analysis(dataset=ds_random, x=8, y=8)
    results = lt_ctx.run(analysis)
    mask = np.zeros((16, 16))
    mask[8, 8] = 1
    expected = _naive_mask_apply([mask], ds_random.data)
    assert np.allclose(
        results.intensity.raw_data,
        expected,
    )
Exemple #17
0
def test_hdf5_3d_apply_masks(lt_ctx, hdf5_ds_3d):
    mask = _mk_random(size=(16, 16))
    with hdf5_ds_3d.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data.reshape((1, 17, 16, 16)))
    analysis = lt_ctx.create_mask_analysis(dataset=hdf5_ds_3d,
                                           factories=[lambda: mask])
    results = lt_ctx.run(analysis)

    assert np.allclose(results.mask_0.raw_data, expected)
Exemple #18
0
def test_start_local_default(hdf5_ds_1, local_cluster_ctx):
    mask = _mk_random(size=(16, 16))
    d = detect()
    cudas = d['cudas']
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data)

    ctx = local_cluster_ctx
    analysis = ctx.create_mask_analysis(dataset=hdf5_ds_1,
                                        factories=[lambda: mask])

    num_cores_ds = ctx.load('memory', data=np.zeros((2, 3, 4, 5)))
    workers = ctx.executor.get_available_workers()
    cpu_count = len(workers.has_cpu())
    gpu_count = len(workers.has_cuda())

    assert num_cores_ds._cores == max(cpu_count, gpu_count)

    # Based on ApplyMasksUDF, which is CuPy-enabled
    hybrid = ctx.run(analysis)
    _ = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1)
    _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'numpy')),
                    dataset=hdf5_ds_1)
    _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', 'numpy')),
                    dataset=hdf5_ds_1)
    _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'cuda', 'numpy')),
                    dataset=hdf5_ds_1)
    if cudas:
        cuda_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', 'numpy')),
                                dataset=hdf5_ds_1,
                                backends=('cuda', ))
        if d['has_cupy']:
            cupy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',
                                                                 'numpy')),
                                    dataset=hdf5_ds_1,
                                    backends=('cupy', ))
        else:
            with pytest.raises(RuntimeError):
                cupy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',
                                                                     'numpy')),
                                        dataset=hdf5_ds_1,
                                        backends=('cupy', ))
            cupy_only = None

    numpy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy', )),
                             dataset=hdf5_ds_1)

    assert np.allclose(hybrid.mask_0.raw_data, expected)
    if cudas:
        assert np.all(cuda_only['device_class'].data == 'cuda')
        if cupy_only is not None:
            assert np.all(cupy_only['device_class'].data == 'cuda')
    assert np.all(numpy_only['device_class'].data == 'cpu')
Exemple #19
0
def test_hdf5_apply_masks_1(lt_ctx, hdf5_ds_1, TYPE):
    mask = _mk_random(size=(16, 16))
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data)
    analysis = lt_ctx.create_mask_analysis(dataset=hdf5_ds_1,
                                           factories=[lambda: mask])
    analysis.TYPE = TYPE
    results = lt_ctx.run(analysis)

    assert np.allclose(results.mask_0.raw_data, expected)
Exemple #20
0
def test_disk_defaults(lt_ctx, ds_random):
    analysis = lt_ctx.create_disk_analysis(dataset=ds_random)
    results = lt_ctx.run(analysis)
    mask = analysis.get_mask_factories()[0]()
    expected = _naive_mask_apply([mask], ds_random.data)
    # TODO: test the actual mask shape, too
    # at least mask application should match:
    assert np.allclose(
        results.intensity.raw_data,
        expected,
    )
def test_masks_complex_mask(lt_ctx, ds_complex):
    mask0 = _mk_random(size=(16, 16), dtype="complex64")
    analysis = lt_ctx.create_mask_analysis(dataset=ds_complex,
                                           factories=[lambda: mask0])
    expected = _naive_mask_apply([mask0], ds_complex.data)
    results = lt_ctx.run(analysis)
    assert results.mask_0_complex.raw_data.shape == (16, 16)
    assert np.allclose(results.mask_0_complex.raw_data, expected)

    # also execute _run_mask_test_program to check sparse implementation.
    # _run_mask_test_program checks mask_0 result, which is np.abs(mask_0_complex)
    _run_mask_test_program(lt_ctx, ds_complex, mask0, np.abs(expected))
def test_weird_partition_shapes_1_slow(lt_ctx):
    data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
    mask = _mk_random(size=(16, 16))
    expected = _naive_mask_apply([mask], data)

    dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), partition_shape=(16, 16, 2, 2))

    _run_mask_test_program(lt_ctx, dataset, mask, expected)

    p = next(dataset.get_partitions())
    t = next(p.get_tiles())
    assert tuple(t.tile_slice.shape) == (1, 1, 2, 2)
Exemple #23
0
def test_start_local(hdf5_ds_1):
    mask = _mk_random(size=(16, 16))
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data)

    with api.Context() as ctx:
        analysis = ctx.create_mask_analysis(dataset=hdf5_ds_1,
                                            factories=[lambda: mask])
        results = ctx.run(analysis)

    assert np.allclose(results.mask_0.raw_data, expected)
def test_numerics_fail(lt_ctx, TYPE):
    dtype = 'float32'
    # Highest expected detector resolution
    RESOLUTION = 4096
    # Highest expected detector dynamic range
    RANGE = 1e6
    # default value for all cells
    VAL = 1.1

    data = np.full((2, 1, RESOLUTION, RESOLUTION), VAL, dtype=np.float32)
    data[0, 0, 0, 0] += VAL * RANGE
    dataset = MemoryDataSet(
        data=data,
        tileshape=(2, RESOLUTION, RESOLUTION),
        num_partitions=1,
        sig_dims=2,
    )
    mask0 = np.ones((RESOLUTION, RESOLUTION), dtype=np.float64)
    analysis = lt_ctx.create_mask_analysis(
        dataset=dataset,
        factories=[lambda: mask0],
        mask_count=1,
        mask_dtype=dtype,
    )
    analysis.TYPE = TYPE

    results = lt_ctx.run(analysis)
    expected = np.array([[[VAL * RESOLUTION**2 + VAL * RANGE],
                          [VAL * RESOLUTION**2]]])
    naive = _naive_mask_apply([mask0], data)
    naive_32 = _naive_mask_apply([mask0.astype(dtype)], data)
    # The masks are float64, that means the calculation is performed with high resolution
    # and the naive result should be correct
    assert np.allclose(expected, naive)
    # We make sure LiberTEM calculated this with the lower-precision dtype we set
    assert np.allclose(results.mask_0.raw_data,
                       expected[0]) == np.allclose(naive_32, expected)
    # Confirm that the numerical precision is actually insufficient.
    # If this succeeds, we have to rethink the premise of this test.
    assert not np.allclose(results.mask_0.raw_data, expected[0])
def test_signed(lt_ctx, TYPE):
    data = np.random.choice(a=0xFFFF, size=(16, 16, 16, 16)).astype("<i4")
    mask = _mk_random(size=(16, 16))
    expected = _naive_mask_apply([mask], data)

    # NOTE: we allow casting from int32 to float32 here, and may lose some
    # precision in case of data with large dynamic range
    dataset = MemoryDataSet(
        data=data, tileshape=(4 * 4, 4, 4), num_partitions=2,
        check_cast=False,
    )

    _run_mask_test_program(lt_ctx, dataset, mask, expected, TYPE)
Exemple #26
0
def test_ring_1(lt_ctx, ds_random):
    analysis = lt_ctx.create_ring_analysis(dataset=ds_random,
                                           cx=8,
                                           cy=8,
                                           ri=5,
                                           ro=8)
    results = lt_ctx.run(analysis)
    mask = analysis.get_mask_factories()[0]()
    expected = _naive_mask_apply([mask], ds_random.data)
    assert results.intensity.raw_data.shape == (16, 16)
    assert np.allclose(
        results.intensity.raw_data,
        expected,
    )
def test_weird_partition_shapes_1_fast(lt_ctx):
    # XXX MemoryDataSet is now using Partition3D and so on, so we can't create
    # partitions with weird shapes so easily anymore (in this case, partitioned in
    # the signal dimensions). maybe fix this with a custom DataSet impl that simulates this?
    data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
    mask = _mk_random(size=(16, 16))
    expected = _naive_mask_apply([mask], data)

    dataset = MemoryDataSet(data=data, tileshape=(8, 16, 16), partition_shape=(16, 16, 8, 8))

    _run_mask_test_program(lt_ctx, dataset, mask, expected)

    p = next(dataset.get_partitions())
    t = next(p.get_tiles())
    assert tuple(t.tile_slice.shape) == (1, 8, 8, 8)
def test_mask_job(lt_ctx):
    data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
    mask0 = _mk_random(size=(16, 16))
    mask1 = sp.csr_matrix(_mk_random(size=(16, 16)))
    expected = _naive_mask_apply([mask0, mask1], data)

    dataset = MemoryDataSet(data=data, tileshape=(4, 4, 4, 4), partition_shape=(16, 16, 16, 16))
    job = lt_ctx.create_mask_job(
        dataset=dataset, factories=[lambda: mask0, lambda: mask1]
    )
    results = lt_ctx.run(job)

    assert np.allclose(
        results,
        expected,
    )
def test_mask_udf(lt_ctx):
    data = _mk_random(size=(16, 16, 16, 16), dtype="<u2")
    mask0 = _mk_random(size=(16, 16))
    mask1 = sp.csr_matrix(_mk_random(size=(16, 16)))
    mask2 = sparse.COO.from_numpy(_mk_random(size=(16, 16)))
    # The ApplyMasksUDF returns data with shape ds.shape.nav + (mask_count, ),
    # different from ApplyMasksJob
    expected = np.moveaxis(_naive_mask_apply([mask0, mask1, mask2], data), (0, 1), (2, 0))

    dataset = MemoryDataSet(data=data, tileshape=(4 * 4, 4, 4), num_partitions=2)
    udf = ApplyMasksUDF(
        mask_factories=[lambda: mask0, lambda: mask1, lambda: mask2]
    )
    results = lt_ctx.run_udf(udf=udf, dataset=dataset)

    assert np.allclose(results['intensity'].data, expected)
Exemple #30
0
def test_start_local_default(hdf5_ds_1):
    mask = _mk_random(size=(16, 16))
    d = detect()
    cudas = d['cudas']
    with hdf5_ds_1.get_reader().get_h5ds() as h5ds:
        data = h5ds[:]
        expected = _naive_mask_apply([mask], data)

    with api.Context() as ctx:
        analysis = ctx.create_mask_analysis(dataset=hdf5_ds_1,
                                            factories=[lambda: mask])
        # Based on ApplyMasksUDF, which is CuPy-enabled
        hybrid = ctx.run(analysis)
        _ = ctx.run_udf(udf=DebugDeviceUDF(), dataset=hdf5_ds_1)
        _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'numpy')),
                        dataset=hdf5_ds_1)
        _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda', 'numpy')),
                        dataset=hdf5_ds_1)
        _ = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy', 'cuda', 'numpy')),
                        dataset=hdf5_ds_1)
        if cudas:
            cuda_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cuda',
                                                                 'numpy')),
                                    dataset=hdf5_ds_1,
                                    backends=('cuda', ))
            if d['has_cupy']:
                cupy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('cupy',
                                                                     'numpy')),
                                        dataset=hdf5_ds_1,
                                        backends=('cupy', ))
            else:
                with pytest.raises(RuntimeError):
                    cupy_only = ctx.run_udf(
                        udf=DebugDeviceUDF(backends=('cupy', 'numpy')),
                        dataset=hdf5_ds_1,
                        backends=('cupy', ))
                cupy_only = None

        numpy_only = ctx.run_udf(udf=DebugDeviceUDF(backends=('numpy', )),
                                 dataset=hdf5_ds_1)

    assert np.allclose(hybrid.mask_0.raw_data, expected)
    if cudas:
        assert np.all(cuda_only['device_class'].data == 'cuda')
        if cupy_only is not None:
            assert np.all(cupy_only['device_class'].data == 'cuda')
    assert np.all(numpy_only['device_class'].data == 'cpu')