def test_sparse_roi(self, benchmark, prefix, drop, io_backend, shared_dist_ctx, sparsity): io_backend = backends_by_name[io_backend] mib_hdr = os.path.join(prefix, MIB_FILE) flist = filelist(mib_hdr) ctx = shared_dist_ctx ds = ctx.load(filetype="mib", path=mib_hdr, io_backend=io_backend) sparse_roi = np.zeros(ds.shape.nav.size, dtype=bool) sparse_roi[::sparsity] = True def mask(): return np.ones(ds.shape.sig, dtype=bool) udf = ApplyMasksUDF(mask_factories=[mask], backends=('numpy', )) # warmup executor ctx.run_udf(udf=udf, dataset=ds) if drop == "cold_cache": drop_cache(flist) elif drop == "warm_cache": warmup_cache(flist) else: raise ValueError("bad param") benchmark.pedantic( ctx.run_udf, kwargs=dict(udf=udf, dataset=ds, roi=sparse_roi), warmup_rounds=0, rounds=1, iterations=1, )
def test_mask_repeated(self, benchmark, prefix, lt_ctx): hdr = os.path.join(prefix, K2IS_FILE) flist = filelist(hdr) ctx = lt_ctx ds = ctx.load(filetype="k2is", path=hdr) sig_shape = ds.shape.sig def mask(): return np.ones(sig_shape, dtype=bool) udf = ApplyMasksUDF(mask_factories=[mask], backends=('numpy', )) # warmup: ctx.run_udf(udf=udf, dataset=ds) warmup_cache(flist) benchmark.pedantic( ctx.run_udf, kwargs=dict(udf=udf, dataset=ds), warmup_rounds=0, rounds=3, iterations=1, )
def test_concurrent_executor(lt_ctx, concurrent_ctx, default_raw, use_roi): if use_roi: roi = np.random.choice([True, False], default_raw.shape.nav) else: roi = None mask = np.random.random(default_raw.shape.sig) def mask_factory(): return mask load_params = { 'filetype': 'raw', 'path': default_raw._path, 'nav_shape': default_raw.shape.nav, 'sig_shape': default_raw.shape.sig, 'dtype': default_raw.dtype } udfs = [StdDevUDF(), ApplyMasksUDF(mask_factories=[mask_factory])] ref_res = lt_ctx.run_udf(dataset=default_raw, udf=udfs, roi=roi) ds = concurrent_ctx.load(**load_params) res = concurrent_ctx.run_udf(dataset=ds, udf=udfs, roi=roi) assert len(ref_res) == len(res) for index, value in enumerate(ref_res): for key, ref in value.items(): assert np.allclose(ref.data, res[index][key].data, equal_nan=True) for key in res[index].keys(): assert key in value
def main(): # Set a plot class for Digital Micrograph with api.Context(executor=InlineJobExecutor(), plot_class=GMSLive2DPlot) as ctx: ds = ctx.load("RAW", path=r"C:\Users\Dieter\testfile-32-32-32-32-float32.raw", nav_shape=(32, 32), sig_shape=(32, 32), dtype=np.float32) sum_udf = SumUDF() ring_udf = ApplyMasksUDF(mask_factories=[ functools.partial( ring, centerX=16, centerY=16, imageSizeX=32, imageSizeY=32, radius=15, radius_inner=11, ) ]) ctx.run_udf(dataset=ds, udf=[sum_udf, ring_udf], plots=True)
def test_mask_firstrun(benchmark, prefix, first, io_backend): io_backend = backends_by_name[io_backend] hdr = os.path.join(prefix, K2IS_FILE) flist = filelist(hdr) with api.Context() as ctx: ds = ctx.load(filetype="k2is", path=hdr, io_backend=io_backend) def mask(): return np.ones(ds.shape.sig, dtype=bool) udf = ApplyMasksUDF(mask_factories=[mask], backends=('numpy', )) if first == "warm_executor": ctx.run_udf(udf=udf, dataset=ds) elif first == "cold_executor": pass else: raise ValueError("bad param") warmup_cache(flist) benchmark.pedantic(ctx.run_udf, kwargs=dict(udf=udf, dataset=ds), warmup_rounds=0, rounds=1, iterations=1)
def test_mask(self, benchmark, prefix, drop, shared_dist_ctx, io_backend): io_backend = backends_by_name[io_backend] hdr = os.path.join(prefix, K2IS_FILE) flist = filelist(hdr) ctx = shared_dist_ctx ds = ctx.load(filetype="k2is", path=hdr, io_backend=io_backend) def mask(): return np.ones(ds.shape.sig, dtype=bool) udf = ApplyMasksUDF(mask_factories=[mask], backends=('numpy', )) # warmup executor ctx.run_udf(udf=udf, dataset=ds) if drop == "cold_cache": drop_cache(flist) elif drop == "warm_cache": warmup_cache(flist) else: raise ValueError("bad param") benchmark.pedantic( ctx.run_udf, kwargs=dict(udf=udf, dataset=ds), warmup_rounds=0, rounds=1, iterations=1, )
def mask(udf_params, ds_dict): data = ds_dict['data'] dataset = ds_dict['dataset'] roi = ds_dict.get('roi', None) ds_shape, nav_dims, sig_dims = ds_dims(dataset) flat_nav_data, flat_sig_dims = flatten_with_roi(data, roi, ds_shape) mask_slice = udf_params.pop('slices', [np.s_[-1, -1]]) mask_value = udf_params.pop('values', [1.3]) factories = [] for sl, val in zip(mask_slice, mask_value): factories.append(partial(_mask_fac, sl, val, ds_shape)) udf = ApplyMasksUDF(mask_factories=factories) results = [] for fac in factories: _mask = fac() masked_result = (flat_nav_data * _mask[np.newaxis, ...]).sum(axis=flat_sig_dims) results.append(fill_nav_with_roi(ds_shape, masked_result, roi)) naive_result = np.stack(results, axis=-1) return {'udf': udf, 'naive_result': {'intensity': naive_result}}
def test_mask(self, benchmark, drop, shared_dist_ctx, lt_ctx, context, chunked_emd): if context == 'dist': ctx = shared_dist_ctx elif context == 'inline': ctx = lt_ctx else: raise ValueError ds = chunked_emd def mask(): return np.ones(ds.shape.sig, dtype=bool) udf = ApplyMasksUDF(mask_factories=[mask], backends=('numpy', )) # warmup executor ctx.run_udf(udf=udf, dataset=ds) if drop == "cold_cache": drop_cache([ds.path]) elif drop == "warm_cache": warmup_cache([ds.path]) else: raise ValueError("bad param") benchmark.pedantic(ctx.run_udf, kwargs=dict(udf=udf, dataset=ds), warmup_rounds=0, rounds=1, iterations=1)
def get_cluster_udf(self, sd_udf_results): from skimage.feature import peak_local_max center = (self.parameters["cy"], self.parameters["cx"]) rad_in = self.parameters["ri"] rad_out = self.parameters["ro"] n_peaks = self.parameters["n_peaks"] min_dist = self.parameters["min_dist"] sstd = sd_udf_results['std'] sshape = sstd.shape if not (center is None or rad_in is None or rad_out is None): mask_out = 1*_make_circular_mask(center[1], center[0], sshape[1], sshape[0], rad_out) mask_in = 1*_make_circular_mask(center[1], center[0], sshape[1], sshape[0], rad_in) mask = mask_out - mask_in masked_sstd = sstd*mask else: masked_sstd = sstd coordinates = peak_local_max(masked_sstd, num_peaks=n_peaks, min_distance=min_dist) y = coordinates[..., 0] x = coordinates[..., 1] z = range(len(y)) mask = sparse.COO( shape=(len(y), ) + tuple(self.dataset.shape.sig), coords=(z, y, x), data=1. ) udf = ApplyMasksUDF( # float32 for cupy support mask_factories=lambda: mask, mask_count=len(y), mask_dtype=np.float32, use_sparse=True ) return udf
def _make_udfs(ds): def factory(): m = np.zeros(ds.shape.sig) m[-1, -1] = 1.3 return m udfs = [StdDevUDF(), ApplyMasksUDF(mask_factories=[factory])] return udfs
def get_udf(self): return ApplyMasksUDF( mask_factories=self.get_mask_factories(), use_sparse=self.get_use_sparse(), mask_count=self.get_preset_mask_count(), mask_dtype=self.get_preset_mask_dtype(), preferred_dtype=self.get_preset_dtype() )
def test_featurevector(lt_ctx): shape = np.array([128, 128]) zero = shape // 2 a = np.array([24, 0.]) b = np.array([0., 30]) indices = np.mgrid[-2:3, -2:3] indices = np.concatenate(indices.T) radius = 5 radius_outer = 10 template = m.background_subtraction(centerX=radius_outer + 1, centerY=radius_outer + 1, imageSizeX=radius_outer * 2 + 2, imageSizeY=radius_outer * 2 + 2, radius=radius_outer, radius_inner=radius + 1, antialiased=False) data, indices, peaks = cbed_frame(*shape, zero, a, b, indices, radius, all_equal=True) dataset = MemoryDataSet(data=data, tileshape=(1, *shape), num_partitions=1, sig_dims=2) match_pattern = blobfinder.UserTemplate(template=template) stack = functools.partial( blobfinder.feature_vector, imageSizeX=shape[1], imageSizeY=shape[0], peaks=peaks, match_pattern=match_pattern, ) m_udf = ApplyMasksUDF(mask_factories=stack, mask_count=len(peaks), mask_dtype=np.float32) res = lt_ctx.run_udf(dataset=dataset, udf=m_udf) peak_data, _, _ = cbed_frame(*shape, zero, a, b, np.array([(0, 0)]), radius, all_equal=True) peak_sum = peak_data.sum() assert np.allclose(res['intensity'].data.sum(), data.sum()) assert np.allclose(res['intensity'].data, peak_sum)
def dataset_correction_masks(ds, roi, lt_ctx, exclude=None): """ compare correction via sparse mask multiplication w/ correct function """ for i in range(1): shape = (-1, *tuple(ds.shape.sig)) uncorr = CorrectionSet() data = lt_ctx.run_udf(udf=PickUDF(), dataset=ds, roi=roi, corrections=uncorr) gain = np.random.random(ds.shape.sig) + 1 dark = np.random.random(ds.shape.sig) - 0.5 if exclude is None: exclude = [ (np.random.randint(0, s), np.random.randint(0, s)) for s in tuple(ds.shape.sig) ] exclude_coo = sparse.COO(coords=exclude, data=True, shape=ds.shape.sig) corrset = CorrectionSet(dark=dark, gain=gain, excluded_pixels=exclude_coo) def mask_factory(): s = tuple(ds.shape.sig) return sparse.eye(np.prod(s)).reshape((-1, *s)) # This one casts to float mask_res = lt_ctx.run_udf( udf=ApplyMasksUDF(mask_factory), dataset=ds, corrections=corrset, roi=roi, ) # This one uses native input data corrected = correct( buffer=data['intensity'].raw_data.reshape(shape), dark_image=dark, gain_map=gain, excluded_pixels=exclude, inplace=False ) print("Exclude: ", exclude) print(mask_res['intensity'].raw_data.dtype) print(corrected.dtype) assert np.allclose( mask_res['intensity'].raw_data.reshape(shape), corrected )
def open_button_clicked(self, widget: Declarative.UIWidget): file_path = self.file_path_field.text if file_path.endswith(('h5', 'hdf5')) and os.path.isfile(file_path): ds = self.load_data( "hdf5", path=file_path, ds_path="4DSTEM_experiment/data/datacubes/polyAu_4DSTEM/data", min_num_partitions=8, ) #Show the file dialog with parameters self.show_file_param_dialog('hdf5') shape = ds.shape.sig udf = ApplyMasksUDF(mask_factories=[lambda: np.ones(shape)]) self.__event_loop.create_task(self.run_udf(udf, dataset=ds))
def test_comparison_mask(default_k2is, default_k2is_raw, local_cluster_ctx, lt_ctx): default_k2is_raw_ds = local_cluster_ctx.load( "raw", K2IS_TESTDATA_RAW, dtype="u2", nav_shape=(34, 35), sig_shape=(1860, 2048), ) udf = ApplyMasksUDF( mask_factories=[lambda: masks.circular(centerX=1024, centerY=930, radius=465, imageSizeX=2048, imageSizeY=1860)] ) r1 = local_cluster_ctx.run_udf(udf=udf, dataset=default_k2is) r2 = local_cluster_ctx.run_udf(udf=udf, dataset=default_k2is_raw_ds) assert np.allclose( r1['intensity'], r2['intensity'], )
def test_mask_udf(lt_ctx): data = _mk_random(size=(16, 16, 16, 16), dtype="<u2") mask0 = _mk_random(size=(16, 16)) mask1 = sp.csr_matrix(_mk_random(size=(16, 16))) mask2 = sparse.COO.from_numpy(_mk_random(size=(16, 16))) # The ApplyMasksUDF returns data with shape ds.shape.nav + (mask_count, ), # different from ApplyMasksJob expected = np.moveaxis(_naive_mask_apply([mask0, mask1, mask2], data), (0, 1), (2, 0)) dataset = MemoryDataSet(data=data, tileshape=(4 * 4, 4, 4), num_partitions=2) udf = ApplyMasksUDF( mask_factories=[lambda: mask0, lambda: mask1, lambda: mask2] ) results = lt_ctx.run_udf(udf=udf, dataset=dataset) assert np.allclose(results['intensity'].data, expected)
def execute(self, src, map_regions): try: if hasattr(self.computation._computation, 'last_src_uuid') and hasattr( self.computation._computation, 'last_map_regions'): map_regions_ = [ region.persistent_dict for region in map_regions ] if str( src.uuid ) == self.computation._computation.last_src_uuid and map_regions_ == self.computation._computation.last_map_regions: return metadata = copy.deepcopy(src.xdata.metadata) libertem_metadata = metadata.get('libertem-io') if libertem_metadata is None: return executor = Registry.get_component('libertem_executor') if executor is None: logging.error( 'No libertem executor could be retrieved from the Registry.' ) return file_parameters = libertem_metadata['file_parameters'] file_type = file_parameters.pop('type') shape = src.xdata.data_shape if map_regions: mask_data = np.zeros(shape, dtype=np.bool) for region in map_regions: np.logical_or(mask_data, region.get_mask(shape), out=mask_data) else: mask_data = np.ones(shape, dtype=np.bool) ds = dataset.load(file_type, executor.ensure_sync(), **file_parameters) udf = ApplyMasksUDF(mask_factories=[lambda: mask_data]) dc = self.__api.application.document_controllers[ 0]._document_controller if hasattr(self.computation._computation, 'cancel_id'): print( f'Cancelling task: {self.computation._computation.cancel_id}' ) to_cancel = self.computation._computation.cancel_id self.__api.queue_task(lambda: self.__event_loop.create_task( executor.cancel(to_cancel))) #self.computation._computation.cancel_id = None self.computation._computation.cancel_id = str(time.time()) print(f'Creating task: {self.computation._computation.cancel_id}') dc.add_task( 'libertem-map4d', lambda: self.__event_loop.create_task( self.run_udf(udf, self.computation._computation.cancel_id, executor, dataset=ds))) self.computation._computation.last_src_uuid = str(src.uuid) self.computation._computation.last_map_regions = copy.deepcopy( [region.persistent_dict for region in map_regions]) except Exception as e: print(str(e)) import traceback traceback.print_exc()
def clustering(interactive: Interactive, api: API): window = api.application.document_windows[0] target_data_item = window.target_data_item ctx = iface.get_context() ds = iface.dataset_from_data_item(ctx, target_data_item) fy, fx = tuple(ds.shape.sig) y, x = tuple(ds.shape.nav) # roi = np.random.choice([True, False], tuple(ds.shape.nav), p=[0.01, 0.99]) # We only sample 5 % of the frame for the std deviation map # since the UDF still needs optimization std_roi = np.random.choice([True, False], tuple(ds.shape.nav), p=[0.05, 0.95]) roi = np.ones((y, x), dtype=bool) # roi = np.zeros((y, x), dtype=bool) # roi[:, :50] = True stddev_res = run_stddev(ctx=ctx, dataset=ds, roi=std_roi * roi) ref_frame = stddev_res['std'] # sum_res = ctx.run_udf(udf=SumUDF(), dataset=ds) # ref_frame = sum_res['intensity'].data update_data(target_data_item, ref_frame) peaks = peak_local_max(ref_frame, min_distance=3, num_peaks=500) masks = sparse.COO(shape=(len(peaks), fy, fx), coords=(range(len(peaks)), peaks[..., 0], peaks[..., 1]), data=1) feature_udf = ApplyMasksUDF(mask_factories=lambda: masks, mask_dtype=np.uint8, mask_count=len(peaks), use_sparse=True) feature_res = ctx.run_udf(udf=feature_udf, dataset=ds, roi=roi) f = feature_res['intensity'].raw_data.astype(np.float32) f = np.log(f - np.min(f) + 1) feature_vector = f / np.abs(f).mean(axis=0) # too slow # nion_peaks = peaks / tuple(ds.shape.sig) # with api.library.data_ref_for_data_item(target_data_item): # for p in nion_peaks: # target_data_item.add_ellipse_region(*p, 0.01, 0.01) connectivity = scipy.sparse.csc_matrix( grid_to_graph( # Transposed! n_x=y, n_y=x, )) roi_connectivity = connectivity[roi.flatten()][:, roi.flatten()] threshold = interactive.get_float("Cluster distance threshold: ", 10) clusterer = AgglomerativeClustering( affinity='euclidean', distance_threshold=threshold, n_clusters=None, linkage='ward', connectivity=roi_connectivity, ) clusterer.fit(feature_vector) labels = np.zeros((y, x), dtype=np.int32) labels[roi] = clusterer.labels_ + 1 new_data = api.library.create_data_item_from_data(labels) window.display_data_item(new_data)
def test_executor_memleak(local_cluster_ctx, lt_ctx_fast, default_raw, ctx_select): if ctx_select == 'dask': ctx = local_cluster_ctx rounds = 5 def get_worker_mem(ctx): return worker_memory(ctx.executor.client) elif ctx_select == 'inline': ctx = lt_ctx_fast rounds = 1 def get_worker_mem(ctx): return 0 mask_count = 8 * 1014 * 1024 // np.prod(default_raw.shape.sig) mask_shape = (mask_count, *default_raw.shape.sig) masks = np.zeros(mask_shape) # Intentionally "bad" factory function: make it large by closing over masks def mask_factory(): return masks udf = ApplyMasksUDF(mask_factories=mask_factory, mask_count=mask_count, mask_dtype=masks.dtype, use_torch=False) # warm-up for _ in range(2): for _ in ctx.run_udf_iter(dataset=default_raw, udf=udf): pass cumulative_worker_delta = 0 cumulative_executor_delta = 0 for round in range(rounds): gc.collect() # Allow to settle time.sleep(1) ctx.executor.run_each_worker(gc.collect) executor_size_before = total_size(ctx) worker_mem_before = get_worker_mem(ctx) executor_size_during = None for res in ctx.run_udf_iter(dataset=default_raw, udf=udf): if executor_size_during is None: executor_size_during = total_size(ctx) worker_mem_during = get_worker_mem(ctx) gc.collect() # Allow to settle time.sleep(1) ctx.executor.run_each_worker(gc.collect) executor_size_after = total_size(ctx) worker_mem_after = get_worker_mem(ctx) active_use = worker_mem_during - worker_mem_before # Memory use does increase slowly. Just make sure it is not caused by keeping # a big array around worker_delta = worker_mem_after - worker_mem_before executor_delta = executor_size_after - executor_size_before print(f"Round {round}") print(f"Memory use during UDF run: {active_use}.") print(f"Memory increase worker: {worker_delta}.") print(f"Memory increase executor: {executor_delta}.") cumulative_worker_delta += worker_delta cumulative_executor_delta += executor_delta worker_count = len(ctx.executor.get_available_workers()) assert cumulative_worker_delta / rounds / worker_count < sys.getsizeof( masks) * 0.1 assert cumulative_executor_delta / rounds < sys.getsizeof(masks) * 0.1
async def controller(self, cancel_id, executor, job_is_cancelled, send_results): stddev_udf = StdDevUDF() roi = self.get_sd_roi() result_iter = UDFRunner(stddev_udf).run_for_dataset_async( self.dataset, executor, roi=roi, cancel_id=cancel_id) async for sd_udf_results in result_iter: pass if job_is_cancelled(): raise JobCancelledError() sd_udf_results = consolidate_result(sd_udf_results) center = (self.parameters["cy"], self.parameters["cx"]) rad_in = self.parameters["ri"] rad_out = self.parameters["ro"] n_peaks = self.parameters["n_peaks"] min_dist = self.parameters["min_dist"] sstd = sd_udf_results['std'] sshape = sstd.shape if not (center is None or rad_in is None or rad_out is None): mask_out = 1 * _make_circular_mask(center[1], center[0], sshape[1], sshape[0], rad_out) mask_in = 1 * _make_circular_mask(center[1], center[0], sshape[1], sshape[0], rad_in) mask = mask_out - mask_in masked_sstd = sstd * mask else: masked_sstd = sstd coordinates = peak_local_max(masked_sstd, num_peaks=n_peaks, min_distance=min_dist) y = coordinates[..., 0] x = coordinates[..., 1] z = range(len(y)) mask = sparse.COO(shape=(len(y), ) + tuple(self.dataset.shape.sig), coords=(z, y, x), data=1) udf = ApplyMasksUDF(mask_factories=lambda: mask, mask_count=len(y), mask_dtype=np.uint8, use_sparse=True) result_iter = UDFRunner(udf).run_for_dataset_async(self.dataset, executor, cancel_id=cancel_id) async for udf_results in result_iter: pass if job_is_cancelled(): raise JobCancelledError() results = await run_blocking( self.get_udf_results, udf_results=udf_results, roi=roi, ) await send_results(results, True)