def test_apply_mask_job(default_k2is, lt_ctx): mask = np.ones((1860, 2048)) tileshape = Shape( (16, 930, 16), sig_dims=2, ) tiling_scheme = TilingScheme.make_for_shape( tileshape=tileshape, dataset_shape=default_k2is.shape, ) job = ApplyMasksJob( dataset=default_k2is, mask_factories=[lambda: mask], tiling_scheme=tiling_scheme, ) out = job.get_result_buffer() executor = InlineJobExecutor() for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(out) results = lt_ctx.run(job) assert results[0].shape == (34 * 35, ) # there should be _something_ in each result pixel for px in results[0].reshape((-1, )): assert not np.isclose(px, 0)
async def async_main(ds_kwargs, address): executor = await AsyncDaskJobExecutor.connect(address) ds = load(**ds_kwargs) sum_job = SumFramesJob(dataset=ds) (y, x) = sum_job.get_result_shape() sum_image = get_result_image(sum_job) sum_buffer = sum_image.GetNumArray() doc = DM.NewImageDocument("test document") d = doc.AddImageDisplay(sum_image, 1) c = d.AddNewComponent(5, int(y * 0.4), int(x * 0.4), int(y * 0.6), int(x * 0.6)) c.SetForegroundColor(1, 0, 0) doc.Show() async for _ in run(executor, sum_job, sum_buffer): sum_image.UpdateImage() rect = c.GetRect() mask = mask_factory_from_rect(rect, tuple(ds.shape.sig)) rect_job = ApplyMasksJob(dataset=ds, mask_factories=[mask]) result_buffer = np.zeros(rect_job.get_result_shape()) result_image = DM.CreateImage(result_buffer[0]) result_image.ShowImage() result_image_buffer = result_image.GetNumArray() # For now we do a limited number of runs # FIXME implement a proper way to exit the loop counter = 0 while counter < 20: counter += 1 result_buffer[:] = 0 async for _ in run(executor, rect_job, result_buffer): np.copyto( result_image_buffer, # The reshape is a workaround for a bug in the current alpha version of DM # This will not be required in the final DM release result_buffer[0].reshape(result_image_buffer.shape), casting='unsafe') result_image.UpdateImage() while True: newrect = c.GetRect() if newrect != rect: rect = newrect mask = mask_factory_from_rect(rect, tuple(ds.shape.sig)) rect_job = ApplyMasksJob(dataset=ds, mask_factories=[mask]) break await asyncio.sleep(1)
def test_apply_mask_on_raw_job(default_blo, lt_ctx): mask = np.ones((144, 144)) job = ApplyMasksJob(dataset=default_blo, mask_factories=[lambda: mask]) out = job.get_result_buffer() executor = InlineJobExecutor() for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(out) results = lt_ctx.run(job) assert results[0].shape == (90 * 121, )
def test_apply_mask_on_empad_job(default_empad, lt_ctx): mask = np.ones((128, 128)) job = ApplyMasksJob(dataset=default_empad, mask_factories=[lambda: mask]) out = job.get_result_buffer() executor = InlineJobExecutor() for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(out) results = lt_ctx.run(job) assert results[0].shape == (4 * 4,) assert np.count_nonzero(results[0]) > 0
async def start_mask_job(self, uuid, params, analysis, ds): dtype = np.dtype(ds.dtype).kind == 'f' and ds.dtype or "float32" mask_factories = self.make_mask_factories(analysis, dtype, frame_size=ds.shape[2:]) job = ApplyMasksJob(dataset=ds, mask_factories=mask_factories) full_result = np.zeros(shape=(len(mask_factories), ) + tuple(ds.shape[:2])) job_runner = self.run_job( full_result=full_result, uuid=uuid, ds=ds, job=job, ) try: await job_runner.asend(None) while True: images = await self.visualize( full_result, analysis, save_kwargs={'format': 'png'}, ) await job_runner.asend(images) except StopAsyncIteration: pass
def test_subframe_tiles(): data = np.random.choice(a=[0, 1], size=(16, 16, 16, 16)) mask = np.random.choice(a=[0, 1], size=(16, 16)) expected = _naive_mask_apply([mask], data) mask_factories = [ lambda: mask, ] dataset = MemoryDataSet(data=data, tileshape=(1, 1, 4, 4), partition_shape=(16, 16, 16, 16)) job = ApplyMasksJob(dataset=dataset, mask_factories=mask_factories) part = next(dataset.get_partitions()) executor = InlineJobExecutor() result = np.zeros((1, 16, 16)) for tiles in executor.run_job(job): for tile in tiles: tile.copy_to_result(result) print(part.shape) print(expected) print(result) assert np.allclose( result, expected )
def do_com(fn, tileshape): ds = H5DataSet( path=fn, ds_path="data", tileshape=tileshape, target_size=512*1024*1024, ) masks = [ # summation of all pixels: lambda: np.ones(shape=ds.shape[2:]), # gradient from left to right lambda: gradient_x(*ds.shape[2:]), # gradient from top to bottom lambda: gradient_y(*ds.shape[2:]), ] job = ApplyMasksJob(dataset=ds, mask_factories=masks) print(job.masks.computed_masks) print("\n\n") executor = InlineJobExecutor() full_result = np.zeros(shape=(3,) + ds.shape[:2]) color = np.zeros(shape=(3,) + ds.shape[:2]) for result in executor.run_job(job): for tile in result: print(tile) print(tile.data[0]) color[tile.tile_slice.get()[:2]] += 1 tile.copy_to_result(full_result) x_centers = np.divide(full_result[1], full_result[0]) y_centers = np.divide(full_result[2], full_result[0]) print(color) return full_result, x_centers, y_centers
def get_job(self): mask_factories = self.get_mask_factories() use_sparse = self.get_use_sparse() job = ApplyMasksJob( dataset=self.dataset, mask_factories=mask_factories, use_sparse=use_sparse) return job
def test_apply_mask_on_raw_job(default_raw, lt_ctx): mask = np.ones((128, 128)) job = ApplyMasksJob(dataset=default_raw, mask_factories=[lambda: mask]) out = job.get_result_buffer() executor = InlineJobExecutor() for tiles in executor.run_job(job): for tile in tiles: tile.reduce_into_result(out) results = lt_ctx.run(job) # FIXME: should the result here be 1D or 2D? # currently, for inherently 4D datasets it is 2D, and for 3D datasets # it is 1D. make this consistent? assert results[0].shape == (16, 16)
def get_job(self): mask_factories = self.get_mask_factories() use_sparse = self.get_use_sparse() length = self.get_preset_length() job = ApplyMasksJob(dataset=self.dataset, mask_factories=mask_factories, use_sparse=use_sparse, length=length) return job
def get_job(self): mask_factories = self.get_mask_factories() use_sparse = self.get_use_sparse() mask_count = self.get_preset_mask_count() mask_dtype = self.get_preset_mask_dtype() dtype = self.get_preset_dtype() job = ApplyMasksJob(dataset=self.dataset, mask_factories=mask_factories, use_sparse=use_sparse, mask_count=mask_count, mask_dtype=mask_dtype, dtype=dtype) return job
def create_mask_job(self, factories, dataset, use_sparse=None, length=None): """ Create a low-level mask application job. Each factory function should, when called, return a numpy array with the same shape as frames in the dataset (so dataset.shape.sig). Parameters ---------- factories Function or list of functions that take no arguments and create masks. The returned masks can be numpy arrays, scipy.sparse or sparse https://sparse.pydata.org/ matrices. The mask factories should not reference large objects because they can create significant overheads when they are pickled and unpickled. dataset dataset to work on use_sparse * None (default): Use sparse matrix multiplication if all factory functions return a \ sparse mask, otherwise convert all masks to dense matrices and use dense matrix \ multiplication * True: Convert all masks to sparse matrices. * False: Convert all masks to dense matrices. length Specify the number of masks if a single function is used so that the number of masks can be determined without calling the function. Examples -------- >>> from libertem.api import Context >>> ctx = Context() >>> ds = ctx.load("...") >>> # Use intermediate variables instead of referencing >>> # large complex objects like a dataset within the >>> # factory function >>> shape = dataset.shape.sig >>> job = ctx.create_mask_job( ... factories=[lambda: np.ones(shape)], ... dataset=dataset) >>> result = ctx.run(job) """ return ApplyMasksJob(dataset=dataset, mask_factories=factories, use_sparse=use_sparse, length=length)
def create_mask_job(self, factories, dataset): """ Create a mask application job. Each factory function should, when called, return a numpy array with the same shape as frames in the dataset (so dataset.shape[2:]). Parameters ---------- factories : list of mask factory functions functions that take no arguments and create masks dataset : DataSet dataset to work on Examples -------- >>> job = ctx.create_mask_job( ... factories=[lambda: np.ones(dataset.shape[2:])], ... dataset=dataset) >>> result = ctx.run(job) """ return ApplyMasksJob( dataset=dataset, mask_factories=factories, )
def create_mask_job(self, factories: MaskFactoriesType, dataset: DataSet, use_sparse: bool = None, mask_count: int = None, mask_dtype: np.ndarray = None, dtype: np.ndarray = None) -> ApplyMasksJob: """ Create a low-level mask application job. Each factory function should, when called, return a numpy array with the same shape as frames in the dataset (so dataset.shape.sig). .. deprecated:: 0.4.0 Use :meth:`create_mask_analysis` or :class:`~libertem.udf.masks.ApplyMasksUDF`. See also :ref:`job deprecation`. Parameters ---------- factories : Union[Callable[[], array_like], Iterable[Callable[[], array_like]]] Function or list of functions that take no arguments and create masks. The returned masks can be numpy arrays, scipy.sparse or sparse https://sparse.pydata.org/ matrices. The mask factories should not reference large objects because they can create significant overheads when they are pickled and unpickled. dataset : libertem.io.dataset.base.DataSet dataset to work on use_sparse : bool or None * None (default): Use sparse matrix multiplication if all factory functions return a \ sparse mask, otherwise convert all masks to dense matrices and use dense matrix \ multiplication * True: Convert all masks to sparse matrices. * False: Convert all masks to dense matrices. mask_count : int, optional Specify the number of masks if a single factory function is used so that the number of masks can be determined without calling the factory function. mask_dtype : numpy.dtype, optional Specify the dtype of the masks so that mask dtype can be determined without calling the mask factory functions. This can be used to override the mask dtype in the result dtype determination. As an example, setting this to np.float32 means that masks of type float64 will not switch the calculation and result dtype to float64 or complex128. dtype : numpy.dtype, optional Specify the dtype to do the calculation in. Integer dtypes are possible if the numpy casting rules allow this for source and mask data. Returns ------- ApplyMasksJob : libertem.job.base.Job When run by the Context, this Job creates a :class:`numpy.ndarray` of shape (n_masks, prod(ds.shape.nav)) Examples -------- >>> # Use intermediate variables instead of referencing >>> # large complex objects like a dataset within the >>> # factory function >>> shape = dataset.shape.sig >>> job = ctx.create_mask_job( ... factories=[lambda: np.ones(shape)], ... dataset=dataset ... ) >>> result = ctx.run(job) """ warnings.warn( "The Job API is deprecated and will be removed after version 0.6.0. " "Use Context.create_mask_analysis() or libertem.udf.masks.ApplyMasksUDF instead. " "See " "https://libertem.github.io/LiberTEM/changelog.html#job-deprecation " "for details and a migration guide.", FutureWarning) return ApplyMasksJob( dataset=dataset, mask_factories=factories, use_sparse=use_sparse, mask_count=mask_count, mask_dtype=mask_dtype, dtype=dtype, )
def get_job(self): mask_factories = self.get_mask_factories() job = ApplyMasksJob(dataset=self.dataset, mask_factories=mask_factories) return job
async def put(self, uuid): request_data = tornado.escape.json_decode(self.request.body) params = request_data['job'] ds = self.data.get_dataset(params['dataset']) mask_factories = self.make_mask_factories(params['masks'], frame_size=ds.shape[2:]) job = ApplyMasksJob(dataset=ds, mask_factories=mask_factories) self.data.register_job(uuid=uuid, job=job) dask_client = await AioClient("tcp://localhost:8786") executor = DaskJobExecutor(client=dask_client, is_local=True) futures = [] for task in job.get_tasks(): submit_kwargs = {} futures.append(executor.client.submit(task, **submit_kwargs)) self.write(Message(self.data).start_job(job_id=uuid)) self.finish() self.event_registry.broadcast_event( Message(self.data).start_job(job_id=uuid, )) full_result = np.zeros(shape=(len(mask_factories), ) + tuple(ds.shape[:2])) async for future, result in dd.as_completed(futures, with_results=True): # TODO: # + only send PNG of area that has changed (bounding box of all result tiles!) # + normalize each channel (per channel: keep running min/max, map data to [0, 1]) # + if min/max changes, send whole channel (all results up to this point re-normalized) # + maybe saturate up to some point (20% over current max => keep current max) and send # whole result image once finished # + maybe use visualization framework in-browser (example: GR) # TODO: update task_result message: # + send bbox for blitting for tile in result: tile.copy_to_result(full_result) images = await self.result_images( full_result, save_kwargs={ 'format': 'jpeg', 'quality': 65 }, ) # NOTE: make sure the following broadcast_event messages are sent atomically! # (that is: keep the code below synchronous, and only send the messages # once the images have finished encoding, and then send all at once) self.event_registry.broadcast_event( Message(self.data).task_result( job_id=uuid, num_images=len(images), )) for image in images: raw_bytes = image.read() self.event_registry.broadcast_event(raw_bytes, binary=True) images = await self.result_images( full_result, save_kwargs={'format': 'png'}, ) self.event_registry.broadcast_event( Message(self.data).finish_job( job_id=uuid, num_images=len(images), )) for image in images: raw_bytes = image.read() self.event_registry.broadcast_event(raw_bytes, binary=True)
def create_mask_job(self, factories, dataset, use_sparse=None, mask_count=None, mask_dtype=None, dtype=None): """ Create a low-level mask application job. Each factory function should, when called, return a numpy array with the same shape as frames in the dataset (so dataset.shape.sig). Parameters ---------- factories Function or list of functions that take no arguments and create masks. The returned masks can be numpy arrays, scipy.sparse or sparse https://sparse.pydata.org/ matrices. The mask factories should not reference large objects because they can create significant overheads when they are pickled and unpickled. dataset dataset to work on use_sparse * None (default): Use sparse matrix multiplication if all factory functions return a \ sparse mask, otherwise convert all masks to dense matrices and use dense matrix \ multiplication * True: Convert all masks to sparse matrices. * False: Convert all masks to dense matrices. mask_count (optional) Specify the number of masks if a single factory function is used so that the number of masks can be determined without calling the factory function. mask_dtype (optional) Specify the dtype of the masks so that mask dtype can be determined without calling the mask factory functions. This can be used to override the mask dtype in the result dtype determination. As an example, setting this to np.float32 means that masks of type float64 will not switch the calculation and result dtype to float64 or complex128. dtype (optional) Specify the dtype to do the calculation in. Integer dtypes are possible if the numpy casting rules allow this for source and mask data. Examples -------- >>> from libertem.api import Context >>> ctx = Context() >>> ds = ctx.load("...") >>> # Use intermediate variables instead of referencing >>> # large complex objects like a dataset within the >>> # factory function >>> shape = dataset.shape.sig >>> job = ctx.create_mask_job( ... factories=[lambda: np.ones(shape)], ... dataset=dataset) >>> result = ctx.run(job) """ return ApplyMasksJob( dataset=dataset, mask_factories=factories, use_sparse=use_sparse, mask_count=mask_count, mask_dtype=mask_dtype, dtype=dtype, )
async def async_main(address): GUI_events = asyncio.ensure_future(background_task()) executor = await AsyncDaskJobExecutor.connect(address) # Just an alternative dataset that works better on a slower machine # ds = load( # "blo", # path=("C:/Users/weber/Nextcloud/Projects/Open Pixelated STEM framework/" # "Data/3rd-Party Datasets/Glasgow/10 um 110.blo"), # tileshape=(1,8,144,144) # ) # For a remote cluster this has to be the path on the worker nodes, not the client ds = load("raw", path='/data/users/weber/scan_11_x256_y256.raw', dtype="float32", scan_size=(256, 256), detector_size_raw=(130, 128), crop_detector_to=(128, 128)) sum_job = SumFramesJob(dataset=ds) (y, x) = sum_job.get_result_shape() sum_image = get_result_image(sum_job) sum_buffer = sum_image.GetNumArray() doc = DM.NewImageDocument("test document") d = doc.AddImageDisplay(sum_image, 1) c = d.AddNewComponent(5, int(y * 0.4), int(x * 0.4), int(y * 0.6), int(x * 0.6)) c.SetForegroundColor(1, 0, 0) doc.Show() async for _ in run(executor, sum_job, sum_buffer): sum_image.UpdateImage() rect = c.GetRect() mask = mask_factory_from_rect(rect, tuple(ds.shape.sig)) rect_job = ApplyMasksJob(dataset=ds, mask_factories=[mask]) result_buffer = np.zeros(rect_job.get_result_shape()) result_image = DM.CreateImage(result_buffer[0]) result_image.ShowImage() result_image_buffer = result_image.GetNumArray() # For now we do a limited number of runs # FIXME implement a proper way to exit the loop counter = 0 while counter < 20: counter += 1 result_buffer[:] = 0 async for _ in run(executor, rect_job, result_buffer): np.copyto( result_image_buffer, # The reshape is a workaround for a bug in the current alpha version of DM # This will not be required in the final DM release result_buffer[0].reshape(result_image_buffer.shape), casting='unsafe') result_image.UpdateImage() while True: newrect = c.GetRect() if newrect != rect: rect = newrect mask = mask_factory_from_rect(rect, tuple(ds.shape.sig)) rect_job = ApplyMasksJob(dataset=ds, mask_factories=[mask]) break await asyncio.sleep(1) GUI_events.cancel()