def test_patch_corr_empty(lt_ctx): data = np.ones((13, 17, 19)) excluded_coords = np.array([ (1, 2, 3), ]).astype(np.int64) excluded_pixels = sparse.COO(coords=excluded_coords, shape=(19, ), data=True) ds = lt_ctx.load("memory", data=data, sig_dims=1) udf = SumUDF() with pytest.raises(RepairValueError): corr = CorrectionSet( excluded_pixels=excluded_pixels, gain=np.ones((19, )), dark=np.ones((19, )) ) corr = CorrectionSet( excluded_pixels=excluded_pixels, gain=np.ones((19, )), dark=np.ones((19, )), allow_empty=True ) res = lt_ctx.run_udf( dataset=ds, udf=udf, corrections=corr ) # The value will be unpatched and remain 0 after gain and dark correction are applied assert np.allclose(res['intensity'], 0)
def dataset_correction_masks(ds, roi, lt_ctx, exclude=None): """ compare correction via sparse mask multiplication w/ correct function """ for i in range(1): shape = (-1, *tuple(ds.shape.sig)) uncorr = CorrectionSet() data = lt_ctx.run_udf(udf=PickUDF(), dataset=ds, roi=roi, corrections=uncorr) gain = np.random.random(ds.shape.sig) + 1 dark = np.random.random(ds.shape.sig) - 0.5 if exclude is None: exclude = [ (np.random.randint(0, s), np.random.randint(0, s)) for s in tuple(ds.shape.sig) ] exclude_coo = sparse.COO(coords=exclude, data=True, shape=ds.shape.sig) corrset = CorrectionSet(dark=dark, gain=gain, excluded_pixels=exclude_coo) def mask_factory(): s = tuple(ds.shape.sig) return sparse.eye(np.prod(s)).reshape((-1, *s)) # This one casts to float mask_res = lt_ctx.run_udf( udf=ApplyMasksUDF(mask_factory), dataset=ds, corrections=corrset, roi=roi, ) # This one uses native input data corrected = correct( buffer=data['intensity'].raw_data.reshape(shape), dark_image=dark, gain_map=gain, excluded_pixels=exclude, inplace=False ) print("Exclude: ", exclude) print(mask_res['intensity'].raw_data.dtype) print(corrected.dtype) assert np.allclose( mask_res['intensity'].raw_data.reshape(shape), corrected )
def test_tileshape_adjustment_7(): sig_shape = (123, 456) tile_shape = (14, 42) base_shape = (7, 1) excluded_coords = np.array([ (14, ), (42, ) ]) excluded_pixels = sparse.COO(coords=excluded_coords, shape=sig_shape, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) adjusted = corr.adjust_tileshape( tile_shape=tile_shape, sig_shape=sig_shape, base_shape=base_shape ) assert adjusted == (21, 41) _validate(excluded_coords=excluded_coords, adjusted=adjusted, sig_shape=sig_shape)
def test_tileshape_adjustment_6_3(): sig_shape = (123, 456) tile_shape = (1, 1) base_shape = (1, 1) excluded_coords = np.array([ range(123), range(0, 246, 2) ]) excluded_pixels = sparse.COO(coords=excluded_coords, shape=sig_shape, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) adjusted = corr.adjust_tileshape( tile_shape=tile_shape, sig_shape=sig_shape, base_shape=base_shape ) assert adjusted == (123, 256) _validate(excluded_coords=excluded_coords, adjusted=adjusted, sig_shape=sig_shape)
def test_tileshape_adjustment_10(): sig_shape = (122, 455) tile_shape = (8, 1) base_shape = (2, 1) excluded_coords = np.array([ (121, ), (454, ) ]) excluded_pixels = sparse.COO(coords=excluded_coords, shape=sig_shape, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) adjusted = corr.adjust_tileshape( tile_shape=tile_shape, sig_shape=sig_shape, base_shape=base_shape ) assert adjusted == (8, 3) _validate(excluded_coords=excluded_coords, adjusted=adjusted, sig_shape=sig_shape)
def test_tileshape_adjustment_6_1(): sig_shape = (123, 456) tile_shape = (122, 1) base_shape = (1, 1) excluded_coords = np.array([ range(123), np.zeros(123, dtype=int) ]) excluded_pixels = sparse.COO(coords=excluded_coords, shape=sig_shape, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) adjusted = corr.adjust_tileshape( tile_shape=tile_shape, sig_shape=sig_shape, base_shape=base_shape ) print(adjusted) assert adjusted == (123, 2) _validate(excluded_coords=excluded_coords, adjusted=adjusted, sig_shape=sig_shape)
def __init__(self, partition_shape: Shape, dataset_shape: Shape, roi: np.ndarray, dataset_dtype: np.dtype, input_dtype: np.dtype, tiling_scheme: TilingScheme = None, tiling_index: int = 0, corrections=None, device_class: str = None): self._partition_shape = partition_shape self._dataset_shape = dataset_shape self._dataset_dtype = dataset_dtype self._input_dtype = input_dtype self._tiling_scheme = tiling_scheme self._tiling_index = tiling_index if device_class is None: device_class = 'cpu' self._device_class = device_class if roi is not None: roi = roi.reshape(dataset_shape.nav) self._roi = roi self._slice = None if corrections is None: corrections = CorrectionSet() self._corrections = corrections
def __init__(self, meta: DataSetMeta, partition_slice: Slice, fileset: FileSet, start_frame: int, num_frames: int): """ Parameters ---------- meta The `DataSet`'s `DataSetMeta` instance partition_slice The partition slice in non-flattened form fileset The files that are part of this partition (the FileSet may also contain files from the dataset which are not part of this partition, but that may harm performance) start_frame The index of the first frame of this partition (global coords) num_frames How many frames this partition should contain """ super().__init__(meta=meta, partition_slice=partition_slice) self._fileset = fileset.get_for_range(start_frame, start_frame + num_frames - 1) self._start_frame = start_frame self._num_frames = num_frames self._corrections = CorrectionSet() if num_frames <= 0: raise ValueError("invalid number of frames: %d" % num_frames)
def test_correction_size_overflow(): data = _mk_random(size=(32, 1860, 2048)) dataset = MemoryDataSet( data=data, num_partitions=1, sig_dims=2, base_shape=(1, 930, 16), force_need_decode=True, ) neg = Negotiator() p = next(dataset.get_partitions()) udf = UDFWithLargeDepth() excluded_coords = np.array([ (930, ), (16, ) ]) excluded_pixels = sparse.COO(coords=excluded_coords, shape=dataset.shape.sig, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) scheme = neg.get_scheme( udfs=[udf], partition=p, read_dtype=np.float32, roi=None, corrections=corr, ) print(scheme._debug) assert scheme._debug["need_decode"] assert scheme.shape.sig.dims == 2 assert tuple(scheme.shape) == (4, 1860, 32)
def test_patch_pixels_only_excluded_pixels(lt_ctx, default_raw, default_raw_data): udf = SumUDF() excluded_pixels = sparse.COO(np.zeros((128, 128))) corr = CorrectionSet(excluded_pixels=excluded_pixels) res = lt_ctx.run_udf(dataset=default_raw, udf=udf, corrections=corr) assert np.allclose(res['intensity'], np.sum(default_raw_data, axis=(0, 1)))
def test_correction_set_dark_one(lt_ctx, default_raw, default_raw_data, gain, dark): udf = SumUDF() corr = CorrectionSet(dark=dark, gain=gain) res = lt_ctx.run_udf(dataset=default_raw, udf=udf, corrections=corr) assert np.allclose(res['intensity'], np.sum(default_raw_data - 1, axis=(0, 1)))
def test_patch_pixels(lt_ctx, default_raw, default_raw_data): udf = SumUDF() # test with empty excluded_pixels array corr = CorrectionSet(excluded_pixels=np.array([(), ()]).astype(np.int64), gain=np.ones((128, 128))) res = lt_ctx.run_udf(dataset=default_raw, udf=udf, corrections=corr) assert np.allclose(res['intensity'], np.sum(default_raw_data, axis=(0, 1)))
def test_tileshape_adjustment_8(): sig_shape = (1014, 1024) tile_shape = (1, 1) base_shape = (1, 1) # These magic numbers are "worst case" to produce collisions # 2*3*4*5*6*7 excluded_coords = np.array([ (720, 210, 306), (120, 210, 210) ]) excluded_pixels = sparse.COO(coords=excluded_coords, shape=sig_shape, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) adjusted = corr.adjust_tileshape( tile_shape=tile_shape, sig_shape=sig_shape, base_shape=base_shape ) print(adjusted) assert adjusted != (1014, 1024) _validate(excluded_coords=excluded_coords, adjusted=adjusted, sig_shape=sig_shape)
def test_real_correction(self, shared_dist_ctx, large_raw_file, benchmark, gain, dark, num_excluded): filename, shape, dtype = large_raw_file nav_dims = shape[:2] sig_dims = shape[2:] if gain == 'use gain': gain_map = (np.random.random(sig_dims) + 1).astype(np.float64) elif gain == 'no gain': gain_map = None else: raise ValueError if dark == 'use dark': dark_image = np.random.random(sig_dims).astype(np.float64) elif dark == 'no dark': dark_image = None else: raise ValueError if num_excluded > 0: excluded_coords = exclude_pixels(sig_dims=sig_dims, num_excluded=num_excluded) assert excluded_coords.shape[1] == num_excluded exclude = sparse.COO(coords=excluded_coords, shape=sig_dims, data=True) else: exclude = None print("Nav dims: ", nav_dims) print("Sig dims:", sig_dims) corrset = CorrectionSet( dark=dark_image, gain=gain_map, excluded_pixels=exclude, ) udf = NoOpUDF() ds = shared_dist_ctx.load( 'RAW', path=str(filename), scan_size=shape[:2], dtype=dtype, detector_size=shape[2:], ) benchmark.pedantic( shared_dist_ctx.run_udf, kwargs=dict( dataset=ds, udf=udf, corrections=corrset, ), warmup_rounds=0, rounds=5, iterations=1, )
def test_tileshape_adjustment_many(large_raw, lt_ctx): udf = EarlyExitUDF() exclude = sparse.COO(coords=exclude_pixels(sig_dims=tuple( large_raw.shape.sig), num_excluded=1000), shape=tuple(large_raw.shape.sig), data=True) corr = CorrectionSet(excluded_pixels=exclude) with pytest.raises(EarlyExit): lt_ctx.run_udf(dataset=large_raw, udf=udf, corrections=corr)
def test_tileshape_adjustment_fuzz(): for n in range(10): sig_shape = (np.random.randint(1, 2**12), np.random.randint(1, 2**12)) print("Sig shape", sig_shape) tile_shape = (1, 1) base_shape = (1, 1) size = max(1, max(sig_shape) // 10) excluded_coords = np.vstack([ np.random.randint(0, sig_shape[0], size=size), np.random.randint(0, sig_shape[1], size=size), ]) print("excluded_coords", excluded_coords.shape, excluded_coords) excluded_pixels = sparse.COO(coords=excluded_coords, shape=sig_shape, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) adjusted = corr.adjust_tileshape( tile_shape=tile_shape, sig_shape=sig_shape, base_shape=base_shape ) print(adjusted) _validate(excluded_coords=excluded_coords, adjusted=adjusted, sig_shape=sig_shape)
def test_correction_set_zero_gain(lt_ctx, default_raw, gain, dark): udf = SumUDF() corr = CorrectionSet(dark=dark, gain=gain) res = lt_ctx.run_udf( dataset=default_raw, udf=udf, corrections=corr ) assert np.allclose(res['intensity'], 0)
def dataset_correction_verification(ds, roi, lt_ctx, exclude=None): """ compare correct function w/ corrected pick """ for i in range(1): shape = (-1, *tuple(ds.shape.sig)) uncorr = CorrectionSet() data = lt_ctx.run_udf(udf=PickUDF(), dataset=ds, roi=roi, corrections=uncorr) gain = np.random.random(ds.shape.sig) + 1 dark = np.random.random(ds.shape.sig) - 0.5 if exclude is None: exclude = [(np.random.randint(0, s), np.random.randint(0, s)) for s in tuple(ds.shape.sig)] exclude_coo = sparse.COO(coords=exclude, data=True, shape=ds.shape.sig) corrset = CorrectionSet(dark=dark, gain=gain, excluded_pixels=exclude_coo) # This one uses native input data pick_res = lt_ctx.run_udf(udf=PickUDF(), dataset=ds, corrections=corrset, roi=roi) corrected = correct(buffer=data['intensity'].raw_data.reshape(shape), dark_image=dark, gain_map=gain, excluded_pixels=exclude, inplace=False) print("Exclude: ", exclude) print(pick_res['intensity'].raw_data.dtype) print(corrected.dtype) assert np.allclose(pick_res['intensity'].raw_data.reshape(shape), corrected)
def test_job_no_corrections_possible(lt_ctx): data = _mk_random(size=(16, 16, 16, 16)) dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=2, sig_dims=2) corr = CorrectionSet(dark=np.zeros((16, 16))) job = lt_ctx.create_pick_job(dataset=dataset, origin=(7, 8)) with pytest.raises(TypeError): lt_ctx.run(job, corrections=corr)
def test_tileshape_adjustment_bench(benchmark, base_shape, excluded_coords): sig_shape = (1024, 1024) tile_shape = base_shape excluded_pixels = sparse.COO(coords=excluded_coords, shape=sig_shape, data=True) corr = CorrectionSet(excluded_pixels=excluded_pixels) adjusted = benchmark( corr.adjust_tileshape, tile_shape=tile_shape, sig_shape=sig_shape, base_shape=base_shape ) print("Base shape", base_shape) print("Excluded coords", excluded_coords) print("Adjusted", adjusted)
def __init__( self, meta: DataSetMeta, partition_slice: Slice, fileset: FileSet, start_frame: int, num_frames: int, io_backend: IOBackend, ): super().__init__(meta=meta, partition_slice=partition_slice, io_backend=io_backend) if start_frame < self.meta.image_count: self._fileset = fileset.get_for_range( max(0, start_frame), max(0, start_frame + num_frames - 1) ) self._start_frame = start_frame self._num_frames = num_frames self._corrections = CorrectionSet() if num_frames <= 0: raise ValueError("invalid number of frames: %d" % num_frames)
def test_patch_corr_odd(lt_ctx_fast): data = np.ones((13, 17, 19, 23, 29, 31)) excluded_coords = np.array([(2, 5), (2, 5), (2, 5)]).astype(np.int64) excluded_pixels = sparse.COO(coords=excluded_coords, shape=(23, 29, 31), data=True) ds = lt_ctx_fast.load("memory", data=data, sig_dims=3) udf = SumUDF() corr = CorrectionSet(excluded_pixels=excluded_pixels, gain=np.ones((23, 29, 31)), dark=np.ones((23, 29, 31))) res = lt_ctx_fast.run_udf(dataset=ds, udf=udf, corrections=corr) assert np.allclose(res['intensity'], 0)
def get_correction_data(self): return CorrectionSet( dark=self._get_dark_frame(), gain=self._get_gain_map(), )
def get_scheme(self, udfs, partition, read_dtype: np.dtype, roi: np.ndarray, corrections: CorrectionSet = None): """ Generate a :class:`TilingScheme` instance that is compatible with both the given `udf` and the :class:~`libertem.io.dataset.base.DataSet`. Parameters ---------- udfs : List[UDF] The concrete UDF to optimize the tiling scheme for. Depending on the method (tile, frame, partition) and preferred total input size and depth. partition : Partition The `TilingScheme` is created specifically for the given `Partition`, so it can adjust even in the face of different partition sizes/shapes. read_dtype The dtype in which the data will be fed into the UDF roi : np.ndarray Region of interest corrections : CorrectionSet Correction set to consider in negotiation """ itemsize = np.dtype(read_dtype).itemsize # FIXME: itemsize != native_dtype.itemsize! use partition.meta.raw_dtype.itemsize? # try not to waste page faults: # FIXME: let the UDF define upper bound for signal size (lower bound, too?) # (signal buffers should fit into the L2 cache) min_sig_size = 4 * 4096 // itemsize # This already takes corrections into account through a different pathway need_decode = partition.need_decode(roi=roi, read_dtype=read_dtype) if need_decode: io_max_size = 1 * 2**20 else: io_max_size = itemsize * np.prod(partition.shape, dtype=np.int64) depths = [self._get_min_depth(udf, partition) for udf in udfs] depth = max(depths) # take the largest min-depth base_shape = self._get_base_shape(udfs, partition) sizes = [ self._get_size( io_max_size, udf, itemsize, partition, base_shape, ) for udf in udfs ] if any(udf.get_method() == "partition" for udf in udfs): size = max(sizes) # by partition wants to be big, ... else: size = min(sizes) size_px = size // itemsize if corrections is not None and corrections.have_corrections(): # The correction has to make sure that there are no excluded pixels # at tile boundaries base_shape = corrections.adjust_tileshape( tile_shape=base_shape, sig_shape=tuple(partition.shape.sig), base_shape=base_shape, ) # first, scale `base_shape` up to contain at least `min_sig_size` items: min_factors = self._get_scale_factors( base_shape, containing_shape=partition.shape.sig, size=min_sig_size, ) min_base_shape = self._scale_base_shape(base_shape, min_factors) # considering the min size, calculate the max depth: max_depth = size_px // np.prod(min_base_shape, dtype=np.int64) if depth > max_depth: depth = max_depth full_base_shape = (1, ) + tuple(base_shape) min_factors = (depth, ) + tuple(min_factors) factors = self._get_scale_factors( full_base_shape, containing_shape=partition.shape, size=size_px, min_factors=min_factors, ) tileshape = self._scale_base_shape(full_base_shape, factors) # the partition has a "veto" on the tileshape: tileshape = partition.adjust_tileshape(tileshape) self.validate(tileshape, partition, size, itemsize, full_base_shape) return TilingScheme.make_for_shape( tileshape=Shape(tileshape, sig_dims=partition.shape.sig.dims), dataset_shape=partition.meta.shape, debug={ "min_factors": min_factors, "factors": factors, "tileshape": tileshape, "size": size, "size_px": size_px, "full_base_shape": full_base_shape, "need_decode": need_decode, "depth": depth, })
def get_correction_data(self): return CorrectionSet( dark=self._dark, gain=self._gain, )