def test_multi_no_by_frame_small_size_wins(): other_unlimited = UDFUnlimitedDepth() other_best_fit = TestUDFBestFit() other_deep = UDFWithLargeDepth() udfs = [ other_unlimited, other_best_fit, other_deep, ] data = _mk_random(size=(32, 1860, 2048)) dataset = MemoryDataSet( data=data, num_partitions=1, sig_dims=2, base_shape=(1, 930, 16), force_need_decode=False, ) neg = Negotiator() p = next(dataset.get_partitions()) scheme = neg.get_scheme(udfs=udfs, partition=p, read_dtype=np.float32, roi=None) print(scheme._debug) assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (17, 930, 16)
def test_noncontiguous_tiles(lt_ctx, backend): if backend == 'cupy': d = detect() cudas = detect()['cudas'] if not d['cudas'] or not d['has_cupy']: pytest.skip("No CUDA device or no CuPy, skipping CuPy test") data = _mk_random(size=(30, 3, 7), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(3, 2, 2), num_partitions=2, sig_dims=2) try: if backend == 'cupy': set_use_cuda(cudas[0]) udf = ReshapedViewUDF() res = lt_ctx.run_udf(udf=udf, dataset=dataset) partition = next(dataset.get_partitions()) p_udf = udf.copy_for_partition(partition=partition, roi=None) # Enabling debug=True checks for disjoint cache keys UDFRunner([p_udf], debug=True).run_for_partition( partition=partition, roi=None, corrections=None, env=Environment(threads_per_worker=1), ) finally: set_use_cpu(0) assert np.all(res["sigbuf"].data == 1)
def test_multi_partition_wins(): # FIXME: the constellatin partition+tile or partition+frame or similar # can be optimized by using a fitting tiling scheme for tile/frame processing # and accumulating the whole partition into a buffer, then running process_partition # after the tile loop. other_unlimited = UDFUnlimitedDepth() other_best_fit = TestUDFBestFit() other_deep = UDFWithLargeDepth() udf_partition = TestUDFPartition() udfs = [ udf_partition, other_unlimited, other_best_fit, other_deep, ] data = _mk_random(size=(32, 1860, 2048)) dataset = MemoryDataSet( data=data, num_partitions=1, sig_dims=2, base_shape=(1, 930, 16), force_need_decode=False, ) neg = Negotiator() p = next(dataset.get_partitions()) scheme = neg.get_scheme(udfs=udfs, partition=p, read_dtype=np.float32, roi=None) print(scheme._debug) assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (32, 1860, 2048)
def test_partition3d_correct_slices(): data = _mk_random(size=(16, 16, 16, 16), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(3, 16, 16), num_partitions=2, sig_dims=2) tileshape = Shape((3, ) + tuple(dataset.shape.sig), sig_dims=dataset.shape.sig.dims) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=dataset.shape, ) mask = np.zeros(data.shape[:2], dtype=bool) mask[0, 0] = True mask[15, 0] = True partitions = dataset.get_partitions() p1 = next(partitions) p2 = next(partitions) assert len(list(p1.get_tiles(tiling_scheme=tiling_scheme, roi=mask))) == 1 assert len(list(p2.get_tiles(tiling_scheme=tiling_scheme, roi=mask))) == 1 t1 = next(p1.get_tiles(tiling_scheme=tiling_scheme, roi=mask)) t2 = next(p2.get_tiles(tiling_scheme=tiling_scheme, roi=mask)) print("t1", t1.tile_slice) print("t2", t2.tile_slice) assert t1.tile_slice.origin[0] == 0 assert t2.tile_slice.origin[0] == 1
def test_partition3d_correct_slices(): data = _mk_random(size=(16, 16, 16, 16), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(3, 16, 16), num_partitions=2, sig_dims=2) mask = np.zeros(data.shape[:2], dtype=bool) mask[0, 0] = True mask[15, 0] = True partitions = dataset.get_partitions() p1 = next(partitions) p2 = next(partitions) assert len(list(p1.get_tiles(roi=mask))) == 1 assert len(list(p2.get_tiles(roi=mask))) == 1 t1 = next(p1.get_tiles(roi=mask)) t2 = next(p2.get_tiles(roi=mask)) print("t1", t1.tile_slice) print("t2", t2.tile_slice) assert t1.tile_slice.origin[0] == 0 assert t2.tile_slice.origin[0] == 1
def test_correction_size_overflow(): data = _mk_random(size=(32, 1860, 2048)) dataset = MemoryDataSet( data=data, num_partitions=1, sig_dims=2, base_shape=(1, 930, 16), force_need_decode=True, ) neg = Negotiator() p = next(dataset.get_partitions()) udf = UDFWithLargeDepth() excluded_coords = np.array([ (930, ), (16, ) ]) excluded_pixels = sparse.COO(coords=excluded_coords, shape=dataset.shape.sig, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) scheme = neg.get_scheme( udfs=[udf], partition=p, read_dtype=np.float32, roi=None, corrections=corr, ) print(scheme._debug) assert scheme._debug["need_decode"] assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (4, 1860, 32)
def test_new_for_partition(): auxdata = _mk_random(size=(16, 16), dtype="float32") buf = AuxBufferWrapper(kind="nav", dtype="float32") buf.set_buffer(auxdata) dataset = MemoryDataSet(data=_mk_random(size=(16, 16, 16, 16), dtype="float32"), tileshape=(7, 16, 16), num_partitions=2, sig_dims=2) assert auxdata.shape == tuple(dataset.shape.nav) roi = _mk_random(size=dataset.shape.nav, dtype="bool") for idx, partition in enumerate(dataset.get_partitions()): print("partition number", idx) new_buf = buf.new_for_partition(partition, roi=roi) ps = partition.slice.get(nav_only=True) roi_part = roi.reshape(-1)[ps] assert np.product(new_buf._data.shape) == roi_part.sum() # old buffer stays the same: assert np.allclose(buf._data, auxdata.reshape(-1)) assert buf._data_coords_global assert not new_buf._data_coords_global # new buffer is sliced to partition and has ROI applied: assert new_buf._data.shape[0] <= buf._data.shape[0] assert new_buf._data.shape[0] <= partition.shape[0] # let's try and manually apply the ROI to `auxdata`: assert np.allclose(new_buf._data, auxdata.reshape(-1)[ps][roi_part])
def test_negative_sync_offset(lt_ctx): udf = SumSigUDF() data = _mk_random(size=(8, 8, 8, 8)) sync_offset = -2 ds_with_offset = MemoryDataSet( data=data, tileshape=(2, 8, 8), num_partitions=4, sync_offset=sync_offset, ) p0 = next(ds_with_offset.get_partitions()) assert p0._start_frame == -2 assert p0.slice.origin == (0, 0, 0) tileshape = Shape((2, ) + tuple(ds_with_offset.shape.sig), sig_dims=ds_with_offset.shape.sig.dims) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=ds_with_offset.shape, ) for p in ds_with_offset.get_partitions(): for t in p.get_tiles(tiling_scheme=tiling_scheme): pass assert p.slice.origin == (48, 0, 0) assert p.slice.shape[0] == 16 ds_with_no_offset = MemoryDataSet( data=data, tileshape=(2, 8, 8), num_partitions=4, sync_offset=0, ) result = lt_ctx.run_udf(dataset=ds_with_no_offset, udf=udf) result = result['intensity'].raw_data[:ds_with_no_offset._meta. image_count - abs(sync_offset)] result_with_offset = lt_ctx.run_udf(dataset=ds_with_offset, udf=udf) result_with_offset = result_with_offset['intensity'].raw_data[ abs(sync_offset):] assert np.allclose(result, result_with_offset)
def test_run_each_partition_2(dask_executor): data = _mk_random(size=(16, 16, 16), dtype='<u2') dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=16) partitions = dataset.get_partitions() i = 0 for result in dask_executor.run_each_partition(partitions, lambda p: False, all_nodes=True): i += 1 assert i == 0 # memory dataset doesn't have a defined location, so fn is never run
def test_get_macrotile(): data = _mk_random(size=(16, 16, 16, 16)) dataset = MemoryDataSet( data=data, tileshape=(16, 16, 16), num_partitions=2, ) p = next(dataset.get_partitions()) mt = p.get_macrotile() assert tuple(mt.shape) == (128, 16, 16)
def test_sweep_stackheight(): data = _mk_random(size=(16, 16, 16, 16)) for stackheight in range(1, 256): print("testing with stackheight", stackheight) dataset = MemoryDataSet( data=data.astype("<u2"), tileshape=(stackheight, 16, 16), num_partitions=2, ) for p in dataset.get_partitions(): for tile in p.get_tiles(): pass
def test_weird_partition_shapes_1_slow(lt_ctx): data = _mk_random(size=(16, 16, 16, 16), dtype="<u2") mask = _mk_random(size=(16, 16)) expected = _naive_mask_apply([mask], data) dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), partition_shape=(16, 16, 2, 2)) _run_mask_test_program(lt_ctx, dataset, mask, expected) p = next(dataset.get_partitions()) t = next(p.get_tiles()) assert tuple(t.tile_slice.shape) == (1, 1, 2, 2)
def test_run_each_partition(dask_executor): data = _mk_random(size=(16, 16, 16), dtype='<u2') dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=16) partitions = dataset.get_partitions() def fn1(partition): return 42 i = 0 for result in dask_executor.run_each_partition(partitions, fn1, all_nodes=False): i += 1 assert result == 42 assert i == 16
def test_weird_partition_shapes_1_fast(lt_ctx): # XXX MemoryDataSet is now using Partition3D and so on, so we can't create # partitions with weird shapes so easily anymore (in this case, partitioned in # the signal dimensions). maybe fix this with a custom DataSet impl that simulates this? data = _mk_random(size=(16, 16, 16, 16), dtype="<u2") mask = _mk_random(size=(16, 16)) expected = _naive_mask_apply([mask], data) dataset = MemoryDataSet(data=data, tileshape=(8, 16, 16), partition_shape=(16, 16, 8, 8)) _run_mask_test_program(lt_ctx, dataset, mask, expected) p = next(dataset.get_partitions()) t = next(p.get_tiles()) assert tuple(t.tile_slice.shape) == (1, 8, 8, 8)
def test_get_scheme_upper_size_1(): """ Test that will hit the 2**20 default size """ data = _mk_random(size=(1024, 144, 144)) dataset = MemoryDataSet(data=data, num_partitions=1, sig_dims=2) neg = Negotiator() p = next(dataset.get_partitions()) udf = TestUDFBestFit() scheme = neg.get_scheme(udf=udf, partition=p, read_dtype=np.float32, roi=None) assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (65, 28, 144)
def test_udf_pickle(lt_ctx): data = _mk_random(size=(16, 16, 16, 16), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(3, 16, 16), num_partitions=16, sig_dims=2) partition = next(dataset.get_partitions()) pixelsum = PixelsumUDF() meta = UDFMeta(partition_shape=partition.slice.shape, dataset_shape=dataset.shape, roi=None, dataset_dtype="float32") pixelsum.set_meta(meta) pixelsum.init_result_buffers() pixelsum.allocate_for_part(partition, None) pickle.loads(pickle.dumps(pixelsum))
def test_depth_max_size_max(): data = _mk_random(size=(32, 1860, 2048)) dataset = MemoryDataSet( data=data, num_partitions=1, sig_dims=2, base_shape=(1, 930, 16), force_need_decode=False, ) neg = Negotiator() p = next(dataset.get_partitions()) udf = UDFUnlimitedDepth() scheme = neg.get_scheme(udfs=[udf], partition=p, read_dtype=np.float32, roi=None) print(scheme._debug) assert not scheme._debug["need_decode"] assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (32, 1860, 2048)
def test_sweep_stackheight(): data = _mk_random(size=(16, 16, 16, 16)) dataset = MemoryDataSet( data=data.astype("<u2"), num_partitions=2, ) for stackheight in range(1, 256): tileshape = Shape((stackheight, ) + tuple(dataset.shape.sig), sig_dims=dataset.shape.sig.dims) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=dataset.shape, ) print("testing with stackheight", stackheight) for p in dataset.get_partitions(): for tile in p.get_tiles(tiling_scheme=tiling_scheme, dest_dtype="float32"): pass
def test_mem_cropped(lt_ctx): """ to make sure the memory dataset works fine with cropping: """ data = _mk_random(size=(16, 16, 24, 24), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(7, 7, 7), num_partitions=2, sig_dims=2) buf = np.zeros((256, 24, 24), dtype="float32") for p in dataset.get_partitions(): for tile in p.get_tiles(): assert tuple(tile.tile_slice.shape)[0] in (7, 2) assert tuple(tile.tile_slice.shape)[1:] in [(7, 7), (7, 3), (3, 7), (3, 3)] buf[tile.tile_slice.get()] = tile.data assert np.allclose(buf.reshape(data.shape), data)
def test_get_scheme_upper_size_2(): """ Test that will hit the 2**20 default size """ data = _mk_random(size=(2048, 264, 264)) dataset = MemoryDataSet( data=data, num_partitions=1, sig_dims=2, base_shape=(1, 8, 264), ) neg = Negotiator() p = next(dataset.get_partitions()) udf = TilingUDFBestFit() scheme = neg.get_scheme(udfs=[udf], partition=p, read_dtype=np.float32, roi=None) assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (124, 8, 264)
def test_limited_depth(): data = _mk_random(size=(32, 1860, 2048)) dataset = MemoryDataSet( data=data, num_partitions=1, sig_dims=2, base_shape=(1, 930, 16), force_need_decode=True, ) neg = Negotiator() p = next(dataset.get_partitions()) udf = UDFWithLargeDepth() scheme = neg.get_scheme(udfs=[udf], approx_partition_shape=p.shape, dataset=dataset, read_dtype=np.float32, roi=None) print(scheme._debug) assert scheme._debug["need_decode"] assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (17, 930, 16)
def test_get_scheme_upper_size_roi(): """ Confirm that a small ROI will not be split up unnecessarily. """ data = _mk_random(size=(1024, 144, 144)) dataset = MemoryDataSet(data=data, num_partitions=1, sig_dims=2) roi = np.zeros(dataset.shape.nav, dtype=bool) # All in a single partition here roi[0] = True roi[512] = True roi[-1] = True neg = Negotiator() p = next(dataset.get_partitions()) udf = TilingUDFBestFit() scheme = neg.get_scheme(udfs=[udf], partition=p, read_dtype=np.float32, roi=roi) assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (3, 144, 144)
def test_scheme_too_large(): data = _mk_random(size=(16, 16, 16, 16)) ds = MemoryDataSet( data=data, # tileshape=(16, 16, 16), num_partitions=2, ) partitions = ds.get_partitions() p = next(partitions) depth = p.shape[0] # we make a tileshape that is too large for the partition here: tileshape = Shape((depth + 1, ) + tuple(ds.shape.sig), sig_dims=ds.shape.sig.dims) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=ds.shape, ) # tile shape is clamped to partition shape: tiles = p.get_tiles(tiling_scheme=tiling_scheme) t = next(tiles) assert tuple(t.tile_slice.shape) == tuple((depth, ) + ds.shape.sig)