def test_noncontiguous_tiles(lt_ctx, backend): if backend == 'cupy': d = detect() cudas = detect()['cudas'] if not d['cudas'] or not d['has_cupy']: pytest.skip("No CUDA device or no CuPy, skipping CuPy test") data = _mk_random(size=(30, 3, 7), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(3, 2, 2), num_partitions=2, sig_dims=2) try: if backend == 'cupy': set_use_cuda(cudas[0]) udf = ReshapedViewUDF() res = lt_ctx.run_udf(udf=udf, dataset=dataset) partition = next(dataset.get_partitions()) p_udf = udf.copy_for_partition(partition=partition, roi=None) # Enabling debug=True checks for disjoint cache keys UDFRunner([p_udf], debug=True).run_for_partition( partition=partition, roi=None, corrections=None, env=Environment(threads_per_worker=1), ) finally: set_use_cpu(0) assert np.all(res["sigbuf"].data == 1)
def run_udf(self, dataset: DataSet, udf: UDF, roi: np.ndarray = None, corrections: CorrectionSet = None, progress: bool = False, backends=None) -> Dict[str, BufferWrapper]: """ Run :code:`udf` on :code:`dataset`, restricted to the region of interest :code:`roi`. .. versionchanged:: 0.5.0 Added the :code:`progress` parameter .. versionchanged:: 0.6.0 Added the :code:`corrections` and :code:`backends` parameter Parameters ---------- dataset The dataset to work on udf UDF instance you want to run roi : numpy.ndarray Region of interest as bool mask over the navigation axes of the dataset progress : bool Show progress bar corrections Corrections to apply while running the UDF. If none are given, the corrections that are part of the :code:`DataSet` are used, if there are any. backends : None or iterable containing 'numpy', 'cupy' and/or 'cuda' Restrict the back-end to a subset of the capabilities of the UDF. This can be useful for testing hybrid UDFs. Returns ------- dict Return value of the UDF containing the result buffers of type :class:`libertem.common.buffers.BufferWrapper`. Note that a :class:`~libertem.common.buffers.BufferWrapper` can be used like a :class:`numpy.ndarray` in many cases because it implements :meth:`__array__`. You can access the underlying numpy array using the :attr:`~libertem.common.buffers.BufferWrapper.data` property. """ if corrections is None: corrections = dataset.get_correction_data() results = UDFRunner([udf]).run_for_dataset( dataset=dataset, executor=self.executor, roi=roi, progress=progress, corrections=corrections, backends=backends, ) return results[0]
def run_udf(self, dataset: DataSet, udf: UDF, roi=None, progress=False): """ Run `udf` on `dataset`. Parameters ---------- dataset The dataset to work on udf UDF instance you want to run roi : numpy.ndarray Region of interest as bool mask over the navigation axes of the dataset progress : bool Show progress bar Returns ------- dict Return value of the UDF containing the result buffers of type :class:`libertem.common.buffers.BufferWrapper`. Note that a :class:`~libertem.common.buffers.BufferWrapper` can be used like a :class:`numpy.ndarray` in many cases because it implements :meth:`__array__`. You can access the underlying numpy array using the :attr:`~libertem.common.buffers.BufferWrapper.data` property. .. versionchanged:: 0.5.0.dev0 Added the progress parameter """ return UDFRunner(udf).run_for_dataset(dataset, self.executor, roi, progress=progress)
def _update_collection_index(axis, value): libertem_metadata = copy.deepcopy(src.metadata.get('libertem-io')) if not libertem_metadata: return file_parameters = libertem_metadata['file_parameters'] file_type = file_parameters.pop('type') current_index = libertem_metadata['display_slice']['start'] current_index = np.unravel_index(current_index, target.data.shape) if value == current_index[axis]: return executor = Registry.get_component('libertem_executor') if not executor: return executor = executor.ensure_sync() ds = dataset.load(file_type, executor, **file_parameters) roi = np.zeros(ds.shape.nav, dtype=bool) if axis == 0: roi[value, current_index[1]] = True current_index = (value, current_index[1]) else: roi[current_index[0], value] = True current_index = (current_index[0], value) result = UDFRunner(PickUDF()).run_for_dataset(ds, executor, roi=roi) result_array = np.squeeze(np.array(result['intensity'])) new_metadata = copy.deepcopy(src.metadata) new_display_slice = np.ravel_multi_index(current_index, target.data.shape) new_metadata['libertem-io']['display_slice'][ 'start'] = new_display_slice new_xdata = self.__api.create_data_and_metadata( result_array, metadata=new_metadata) src.set_data_and_metadata(new_xdata)
async def test_fd_limit(async_executor): import resource import psutil # set soft limit, throws errors but allows to raise it # again afterwards: proc = psutil.Process() oldlimit = resource.getrlimit(resource.RLIMIT_NOFILE) resource.setrlimit(resource.RLIMIT_NOFILE, (proc.num_fds() + 24, oldlimit[1])) print("fds", proc.num_fds()) try: data = _mk_random(size=(1, 16, 16), dtype='<u2') dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=1) roi = np.ones((1, ), dtype=bool) udf = PickUDF() for i in range(32): print(i) print(proc.num_fds()) async for part in UDFRunner([udf]).run_for_dataset_async( dataset=dataset, executor=async_executor, cancel_id="42", roi=roi, ): pass finally: resource.setrlimit(resource.RLIMIT_NOFILE, oldlimit)
async def run_sd_udf(self, roi, stddev_udf, executor, cancel_id, job_is_cancelled): result_iter = UDFRunner([stddev_udf]).run_for_dataset_async( self.dataset, executor, roi=roi, cancel_id=cancel_id ) async for (sd_udf_results,) in result_iter: pass if job_is_cancelled(): raise JobCancelledError() return roi, consolidate_result(sd_udf_results)
async def run_udf(self, job_id, dataset, dataset_id, analysis, analysis_id, details): udf = analysis.get_udf() roi = analysis.get_roi() executor = self.state.executor_state.get_executor() msg = Message(self.state).start_job( job_id=job_id, analysis_id=analysis_id, ) log_message(msg) self.write(msg) self.finish() self.event_registry.broadcast_event(msg) if hasattr(analysis, 'controller'): return await analysis.controller( cancel_id=job_id, executor=executor, job_is_cancelled=lambda: self.state.job_state.is_cancelled(job_id), send_results=lambda results, finished: self.send_results( results, job_id, finished=finished, details=details, analysis_id=analysis_id, ) ) t = time.time() post_t = time.time() window = 0.3 # FIXME: allow to set correction data for a dataset via upload and local loading corrections = dataset.get_correction_data() result_iter = UDFRunner([udf]).run_for_dataset_async( dataset, executor, roi=roi, cancel_id=job_id, corrections=corrections, ) async for udf_results in result_iter: window = min(max(window, 2*(t - post_t)), 5) if time.time() - t < window: continue results = await sync_to_async( analysis.get_udf_results, udf_results=udf_results.buffers[0], roi=roi, damage=udf_results.damage ) post_t = time.time() await self.send_results(results, job_id, analysis_id, details) # The broadcast might have taken quite some time due to # backpressure from the network t = time.time() if self.state.job_state.is_cancelled(job_id): raise JobCancelledError() results = await sync_to_async( analysis.get_udf_results, udf_results=udf_results.buffers[0], roi=roi, damage=udf_results.damage ) await self.send_results(results, job_id, analysis_id, details, finished=True)
def test_common_backends_cpu(default_raw, lt_ctx): runner = UDFRunner([UDF1(), UDF3()]) tasks = list( runner._make_udf_tasks( dataset=default_raw, roi=None, backends=None, )) for task in tasks: assert task.get_resources() == {'CPU': 1, 'compute': 1, 'ndarray': 1}
def test_common_backends_string(default_raw, lt_ctx): runner = UDFRunner([UDF4(), UDF5()]) tasks = list( runner._make_udf_tasks( dataset=default_raw, roi=None, backends=None, )) for task in tasks: assert task.get_resources() == {'CUDA': 1, 'compute': 1}
async def run_udf(self, uuid, ds, analysis): udf = analysis.get_udf() roi = analysis.get_roi() # FIXME: register_job for UDFs? self.data.register_job(uuid=uuid, job=udf, dataset=ds) # FIXME: code duplication executor = self.data.get_executor() msg = Message(self.data).start_job( job_id=uuid, ) log_message(msg) self.write(msg) self.finish() self.event_registry.broadcast_event(msg) if hasattr(analysis, 'controller'): return await analysis.controller( cancel_id=uuid, executor=executor, job_is_cancelled=lambda: self.data.job_is_cancelled(uuid), send_results=lambda results, finished: self.send_results(results, uuid, finished=finished) ) t = time.time() post_t = time.time() window = 0.3 result_iter = UDFRunner(udf).run_for_dataset_async( ds, executor, roi=roi, cancel_id=uuid ) async for udf_results in result_iter: window = min(max(window, 2*(t - post_t)), 5) if time.time() - t < window: continue results = await run_blocking( analysis.get_udf_results, udf_results=udf_results, roi=roi, ) post_t = time.time() await self.send_results(results, uuid) # The broadcast might have taken quite some time due to # backpressure from the network t = time.time() if self.data.job_is_cancelled(uuid): raise JobCancelledError() results = await run_blocking( analysis.get_udf_results, udf_results=udf_results, roi=roi, ) await self.send_results(results, uuid, finished=True)
def test_no_common_backends(default_raw, lt_ctx): runner = UDFRunner([UDF1(), UDF2()]) tasks = list( runner._make_udf_tasks( dataset=default_raw, roi=None, backends=None, )) for task in tasks: with pytest.raises(ValueError) as e: task.get_resources() assert e.match("^There is no common supported UDF backend")
async def run_udf(self, udf: UDF, cancel_id, executor, dataset, roi=None): result_iter = UDFRunner(udf).run_for_dataset_async( dataset, executor, roi=roi, cancel_id=cancel_id, ) async for result in result_iter: result_array = np.squeeze( np.swapaxes(np.array(result['intensity']), -1, 0)) self.__new_xdata = self.get_xdata_for_results(result_array) self.commit()
async def test_async_run_for_dset(async_executor): data = _mk_random(size=(16 * 16, 16, 16), dtype="float32") dataset = MemoryDataSet(data=data, tileshape=(1, 16, 16), num_partitions=2, sig_dims=2) pixelsum = PixelsumUDF() roi = np.zeros((256,), dtype=bool) runner = UDFRunner([pixelsum]) udf_iter = runner.run_for_dataset_async( dataset, async_executor, roi=roi, cancel_id="42" ) async for (udf_results,) in udf_iter: pass assert "udf_results" in locals(), "must yield at least one result"
async def run_udf(self, udf: UDF, dataset, roi=None): result_iter = UDFRunner(udf).run_for_dataset_async( dataset, self.executor, roi=roi, cancel_id="42", ) data_item = None async for result in result_iter: result_array = np.swapaxes(np.array(result['intensity']), -1, 0) if data_item is None: data_item = self.show_results(result_array=result_array) else: xdata = self.get_xdata_for_results(result_array) data_item.set_data_and_metadata(xdata)
def read_data_and_metadata_from_stream(self, stream): executor = Registry.get_component('libertem_executor') if executor is None: logging.error( 'No libertem executor could be retrieved from the Registry.') return executor = executor.ensure_sync() file_parameters = dataset.detect(stream, executor=executor) file_type = file_parameters.pop('type', None) if file_type is None: file_type = 'raw' file_parameters = {'path': stream} file_params = dict() def params_callback(file_params_): file_params.update(file_params_) self.__api.queue_task( lambda: self.show_file_param_dialog(file_type, params_callback)) self.__show_file_param_dialog_finished_event.wait() self.__show_file_param_dialog_finished_event.clear() self.__file_param_dialog_closed_event.wait() file_params.pop('name', None) file_parameters.update(file_params) ds = dataset.load(file_type, executor, **file_parameters) roi = np.zeros(ds.shape.nav, dtype=bool) roi_flat = roi.ravel() roi_flat[0] = True result = UDFRunner(PickUDF()).run_for_dataset(ds, executor, roi=roi) result_array = np.squeeze(np.array(result['intensity'])) file_parameters['type'] = file_type metadata = { 'libertem-io': { 'file_parameters': file_parameters, 'display_slice': { 'start': 0, 'stop': 0 } } } return self.__api.create_data_and_metadata(result_array, metadata=metadata)
async def controller(self, cancel_id, executor, job_is_cancelled, send_results): roi, sd_udf_results = await self.get_sd_results(executor, cancel_id, job_is_cancelled) udf = self.get_cluster_udf(sd_udf_results) result_iter = UDFRunner([udf]).run_for_dataset_async( self.dataset, executor, cancel_id=cancel_id ) async for (udf_results,) in result_iter: pass if job_is_cancelled(): raise JobCancelledError() results = await run_blocking( self.get_udf_results, udf_results=udf_results, roi=roi, ) await send_results(results, True)
def run_udf(self, dataset: DataSet, udf: UDF, roi=None): """ Run `udf` on `dataset`. Parameters ---------- dataset The dataset to work on udf UDF instance you want to run roi : numpy.ndarray region of interest as bool mask over the navigation axes of the dataset Returns ------- dict: Return value of the UDF containing the result buffers """ return UDFRunner(udf).run_for_dataset(dataset, self.executor, roi)
def test_simple_multi_udf_run(): data = _mk_random(size=(32, 1860, 2048)) dataset = MemoryDataSet( data=data, num_partitions=1, sig_dims=2, base_shape=(1, 930, 16), force_need_decode=True, ) executor = InlineJobExecutor() udfs = [ SumSigUDF(), SumUDF(), ] sumsigres, sumres = UDFRunner(udfs=udfs).run_for_dataset( dataset=dataset, executor=executor, ) print(sumsigres, sumres) assert np.allclose(sumres['intensity'], np.sum(data, axis=0)) assert np.allclose(sumsigres['intensity'], np.sum(data, axis=(1, 2)))
def _run_sync_wrap(): result_iter = UDFRunner(udfs).run_for_dataset_sync( dataset=dataset, executor=self.executor, roi=roi, progress=progress, corrections=corrections, backends=backends, ) for udf_results in result_iter: yield udf_results if enable_plotting: self._update_plots(plots, udfs, udf_results.buffers, udf_results.damage, force=False) if enable_plotting: self._update_plots(plots, udfs, udf_results.buffers, udf_results.damage, force=True)
async def controller(self, cancel_id, executor, job_is_cancelled, send_results): stddev_udf = StdDevUDF() roi = self.get_sd_roi() result_iter = UDFRunner(stddev_udf).run_for_dataset_async( self.dataset, executor, roi=roi, cancel_id=cancel_id) async for sd_udf_results in result_iter: pass if job_is_cancelled(): raise JobCancelledError() sd_udf_results['var'].data sd_udf_results['num_frame'].data sd_udf_results = dict(sd_udf_results.items()) sd_udf_results['var'] = sd_udf_results['var'].data / sd_udf_results[ 'num_frame'].data sd_udf_results['std'] = np.sqrt(sd_udf_results['var'].data) sd_udf_results['mean'] = sd_udf_results[ 'sum_frame'].data / sd_udf_results['num_frame'].data sd_udf_results['num_frame'] = sd_udf_results['num_frame'].data sd_udf_results['sum_frame'] = sd_udf_results['sum_frame'].data center = (self.parameters["cy"], self.parameters["cx"]) rad_in = self.parameters["ri"] rad_out = self.parameters["ro"] delta = self.parameters["delta"] n_peaks = self.parameters["n_peaks"] min_dist = self.parameters["min_dist"] savg = sd_udf_results['mean'] sstd = sd_udf_results['std'] sshape = sstd.shape if not (center is None or rad_in is None or rad_out is None): mask_out = 1 * _make_circular_mask(center[1], center[0], sshape[1], sshape[0], rad_out) mask_in = 1 * _make_circular_mask(center[1], center[0], sshape[1], sshape[0], rad_in) mask = mask_out - mask_in masked_sstd = sstd * mask else: masked_sstd = sstd coordinates = peak_local_max(masked_sstd, num_peaks=n_peaks, min_distance=min_dist) udf = feature.FeatureVecMakerUDF(delta=delta, savg=savg, coordinates=coordinates) result_iter = UDFRunner(udf).run_for_dataset_async(self.dataset, executor, cancel_id=cancel_id) async for udf_results in result_iter: pass if job_is_cancelled(): raise JobCancelledError() results = await run_blocking( self.get_udf_results, udf_results=udf_results, roi=roi, ) await send_results(results, True)
async def controller(self, cancel_id, executor, job_is_cancelled, send_results): stddev_udf = StdDevUDF() roi = self.get_sd_roi() result_iter = UDFRunner(stddev_udf).run_for_dataset_async( self.dataset, executor, roi=roi, cancel_id=cancel_id) async for sd_udf_results in result_iter: pass if job_is_cancelled(): raise JobCancelledError() sd_udf_results = consolidate_result(sd_udf_results) center = (self.parameters["cy"], self.parameters["cx"]) rad_in = self.parameters["ri"] rad_out = self.parameters["ro"] n_peaks = self.parameters["n_peaks"] min_dist = self.parameters["min_dist"] sstd = sd_udf_results['std'] sshape = sstd.shape if not (center is None or rad_in is None or rad_out is None): mask_out = 1 * _make_circular_mask(center[1], center[0], sshape[1], sshape[0], rad_out) mask_in = 1 * _make_circular_mask(center[1], center[0], sshape[1], sshape[0], rad_in) mask = mask_out - mask_in masked_sstd = sstd * mask else: masked_sstd = sstd coordinates = peak_local_max(masked_sstd, num_peaks=n_peaks, min_distance=min_dist) y = coordinates[..., 0] x = coordinates[..., 1] z = range(len(y)) mask = sparse.COO(shape=(len(y), ) + tuple(self.dataset.shape.sig), coords=(z, y, x), data=1) udf = ApplyMasksUDF(mask_factories=lambda: mask, mask_count=len(y), mask_dtype=np.uint8, use_sparse=True) result_iter = UDFRunner(udf).run_for_dataset_async(self.dataset, executor, cancel_id=cancel_id) async for udf_results in result_iter: pass if job_is_cancelled(): raise JobCancelledError() results = await run_blocking( self.get_udf_results, udf_results=udf_results, roi=roi, ) await send_results(results, True)