def _update_collection_index(axis, value): libertem_metadata = copy.deepcopy(src.metadata.get('libertem-io')) if not libertem_metadata: return file_parameters = libertem_metadata['file_parameters'] file_type = file_parameters.pop('type') current_index = libertem_metadata['display_slice']['start'] current_index = np.unravel_index(current_index, target.data.shape) if value == current_index[axis]: return executor = Registry.get_component('libertem_executor') if not executor: return executor = executor.ensure_sync() ds = dataset.load(file_type, executor, **file_parameters) roi = np.zeros(ds.shape.nav, dtype=bool) if axis == 0: roi[value, current_index[1]] = True current_index = (value, current_index[1]) else: roi[current_index[0], value] = True current_index = (current_index[0], value) result = UDFRunner(PickUDF()).run_for_dataset(ds, executor, roi=roi) result_array = np.squeeze(np.array(result['intensity'])) new_metadata = copy.deepcopy(src.metadata) new_display_slice = np.ravel_multi_index(current_index, target.data.shape) new_metadata['libertem-io']['display_slice'][ 'start'] = new_display_slice new_xdata = self.__api.create_data_and_metadata( result_array, metadata=new_metadata) src.set_data_and_metadata(new_xdata)
def load(self, filetype: str, *args, **kwargs) -> DataSet: """ Load a `DataSet`. As it doesn't load the whole data into RAM at once, you can load and process datasets that are bigger than your available RAM. Using fast storage (i.e. SSD) is advisable. .. versionchanged:: 0.5.0.dev0 Added support for filetype="auto" Parameters ---------- filetype : str one of: %(types)s or auto to automatically determine filetype and parameters args passed on to the DataSet implementation kwargs passed on to the DataSet implementation Returns ------- DataSet : libertem.io.dataset.base.DataSet The loaded dataset Note ---- Additional parameters are passed to the concrete :class:`~libertem.io.dataset.base.DataSet` implementation. Note ---- See :ref:`dataset api` for format-specific documentation. """ # delegate to libertem.io.dataset.load: return load(filetype, executor=self.executor, *args, **kwargs)
async def async_main(address): # start background task: (can be replaced with asyncio.create_task(coro) in Python 3.7) background_events = asyncio.ensure_future(background_task()) executor = await AsyncDaskJobExecutor.connect(address) if len(sys.argv) > 1: path = sys.argv[1] else: path = ( "C:/Users/weber/Nextcloud/Projects/Open Pixelated STEM framework/" "Data/3rd-Party Datasets/Glasgow/10 um 110.blo") ds = load("blo", path=path, tileshape=(1, 8, 144, 144)) ds.initialize() job = SumFramesJob(dataset=ds) out = get_result_buffer(job) async for part_result in run(executor, job, out): print("Partial result sum: ", out.sum()) print("Final result sum: ", out.sum()) # stop the background task: background_events.cancel()
def load(self, filetype: str, *args, io_backend=None, **kwargs) -> DataSet: """ Load a :class:`~libertem.io.dataset.base.DataSet`. As it doesn't load the whole data into RAM at once, you can load and process datasets that are bigger than your available RAM. Using fast storage (i.e. SSD) is advisable. .. versionchanged:: 0.5.0 Added support for filetype="auto" .. versionchanged:: 0.6.0 Added support for specifying the I/O backend Parameters ---------- filetype : str one of: %(types)s; or use "auto" to automatically determine filetype and parameters io_backend : IOBackend or None Use a different I/O backend for this data set args passed on to the DataSet implementation kwargs passed on to the DataSet implementation Returns ------- DataSet : libertem.io.dataset.base.DataSet The loaded dataset Note ---- Additional parameters are passed to the concrete :class:`~libertem.io.dataset.base.DataSet` implementation. Note ---- See :ref:`dataset api` for format-specific documentation. Examples -------- Load a data set from a given path, automatically determinig the type: >>> ds = ctx.load("auto", path="...") # doctest: +SKIP To configure an alternative I/O backend, in this case configuring the mmap backend to enable readahead hints: >>> from libertem.io.dataset.base import MMapBackend >>> io_backend = MMapBackend(enable_readahead_hints=True) >>> ds = ctx.load("auto", path="...", io_backend=io_backend) # doctest: +SKIP """ # delegate to libertem.io.dataset.load: return load(filetype, *args, io_backend=io_backend, executor=self.executor, **kwargs)
async def async_main(ds_kwargs, address): executor = await AsyncDaskJobExecutor.connect(address) ds = load(**ds_kwargs) sum_job = SumFramesJob(dataset=ds) (y, x) = sum_job.get_result_shape() sum_image = get_result_image(sum_job) sum_buffer = sum_image.GetNumArray() doc = DM.NewImageDocument("test document") d = doc.AddImageDisplay(sum_image, 1) c = d.AddNewComponent(5, int(y * 0.4), int(x * 0.4), int(y * 0.6), int(x * 0.6)) c.SetForegroundColor(1, 0, 0) doc.Show() async for _ in run(executor, sum_job, sum_buffer): sum_image.UpdateImage() rect = c.GetRect() mask = mask_factory_from_rect(rect, tuple(ds.shape.sig)) rect_job = ApplyMasksJob(dataset=ds, mask_factories=[mask]) result_buffer = np.zeros(rect_job.get_result_shape()) result_image = DM.CreateImage(result_buffer[0]) result_image.ShowImage() result_image_buffer = result_image.GetNumArray() # For now we do a limited number of runs # FIXME implement a proper way to exit the loop counter = 0 while counter < 20: counter += 1 result_buffer[:] = 0 async for _ in run(executor, rect_job, result_buffer): np.copyto( result_image_buffer, # The reshape is a workaround for a bug in the current alpha version of DM # This will not be required in the final DM release result_buffer[0].reshape(result_image_buffer.shape), casting='unsafe') result_image.UpdateImage() while True: newrect = c.GetRect() if newrect != rect: rect = newrect mask = mask_factory_from_rect(rect, tuple(ds.shape.sig)) rect_job = ApplyMasksJob(dataset=ds, mask_factories=[mask]) break await asyncio.sleep(1)
async def put(self, uuid): request_data = tornado.escape.json_decode(self.request.body) params = request_data['dataset']['params'] # TODO: validate request_data # let's start simple: assert params['type'].lower() in ["hdfs", "hdf5", "raw", "mib", "blo"] if params["type"].lower() == "hdfs": dataset_params = { "index_path": params["path"], "tileshape": params["tileshape"], "host": "localhost", # FIXME: config param "port": 8020, # FIXME: config param } elif params["type"].lower() == "hdf5": dataset_params = { "path": params["path"], "ds_path": params["dsPath"], "tileshape": params["tileshape"], } elif params["type"].lower() == "raw": dataset_params = { "path": params["path"], "dtype": params["dtype"], "detector_size_raw": params["detectorSizeRaw"], "crop_detector_to": params["cropDetectorTo"], "tileshape": params["tileshape"], "scan_size": params["scanSize"], } elif params["type"].lower() == "mib": dataset_params = { "files_pattern": params["filesPattern"], "tileshape": params["tileshape"], "scan_size": params["scanSize"], } elif params["type"].lower() == "blo": dataset_params = { "path": params["path"], "tileshape": params["tileshape"], } try: ds = dataset.load(filetype=params["type"], **dataset_params) ds.check_valid() except DataSetException as e: msg = Message(self.data).create_dataset_error(uuid, str(e)) log_message(msg) self.write(msg) return self.data.register_dataset( uuid=uuid, dataset=ds, params=request_data['dataset'], ) msg = Message(self.data).create_dataset(dataset=uuid) log_message(msg) self.write(msg) self.event_registry.broadcast_event(msg)
def niondata_to_libertemdata(self, niondata): sig_dims = niondata.xdata.datum_dimension_count niondata = niondata._data_item filepath = niondata.persistent_storage.get_storage_property( niondata, "file_path") file_parameters = { "path": filepath, "ds_path": "data", "sig_dims": sig_dims } return dataset.load("hdf5", self.__executor.ensure_sync(), **file_parameters)
def niondata_to_libertemdata(self, niondata: API_1_0.DataItem) -> dataset.base.DataSet: sig_dims = niondata.xdata.datum_dimension_count data_item = niondata._data_item filepath = data_item.persistent_storage.get_storage_property(data_item, 'file_path') file_parameters = {'path': filepath, 'ds_path': 'data', 'sig_dims': sig_dims} metadata = copy.deepcopy(niondata.metadata) libertem_metadata = {'display_slice': {'start': 0, 'stop': 0}, 'file_parameters': file_parameters} metadata['libertem-io'] = copy.deepcopy(libertem_metadata) metadata['libertem-io']['file_parameters']['type'] = 'hdf5' niondata.set_metadata(metadata) return dataset.load('hdf5', self.__executor.ensure_sync(), **file_parameters)
def read_data_and_metadata_from_stream(self, stream): executor = Registry.get_component('libertem_executor') if executor is None: logging.error( 'No libertem executor could be retrieved from the Registry.') return executor = executor.ensure_sync() file_parameters = dataset.detect(stream, executor=executor) file_type = file_parameters.pop('type', None) if file_type is None: file_type = 'raw' file_parameters = {'path': stream} file_params = dict() def params_callback(file_params_): file_params.update(file_params_) self.__api.queue_task( lambda: self.show_file_param_dialog(file_type, params_callback)) self.__show_file_param_dialog_finished_event.wait() self.__show_file_param_dialog_finished_event.clear() self.__file_param_dialog_closed_event.wait() file_params.pop('name', None) file_parameters.update(file_params) ds = dataset.load(file_type, executor, **file_parameters) roi = np.zeros(ds.shape.nav, dtype=bool) roi_flat = roi.ravel() roi_flat[0] = True result = UDFRunner(PickUDF()).run_for_dataset(ds, executor, roi=roi) result_array = np.squeeze(np.array(result['intensity'])) file_parameters['type'] = file_type metadata = { 'libertem-io': { 'file_parameters': file_parameters, 'display_slice': { 'start': 0, 'stop': 0 } } } return self.__api.create_data_and_metadata(result_array, metadata=metadata)
def load(self, filetype, *args, **kwargs): """ Load a `DataSet`. As it doesn't load the whole data into RAM at once, you can load and process datasets that are bigger than your available RAM. Using fast storage (i.e. SSD) is advisable. Parameters ---------- filetype : str see libertem.io.dataset.filetypes for supported types, example: 'hdf5' Note ---- Additional parameters are passed to the concrete DataSet implementation Returns ------- DataSet a subclass of DataSet """ # delegate to libertem.io.dataset.load: return load(filetype, *args, **kwargs)
def execute(self, src, map_regions): try: if hasattr(self.computation._computation, 'last_src_uuid') and hasattr( self.computation._computation, 'last_map_regions'): map_regions_ = [ region.persistent_dict for region in map_regions ] if str( src.uuid ) == self.computation._computation.last_src_uuid and map_regions_ == self.computation._computation.last_map_regions: return metadata = copy.deepcopy(src.xdata.metadata) libertem_metadata = metadata.get('libertem-io') if libertem_metadata is None: return executor = Registry.get_component('libertem_executor') if executor is None: logging.error( 'No libertem executor could be retrieved from the Registry.' ) return file_parameters = libertem_metadata['file_parameters'] file_type = file_parameters.pop('type') shape = src.xdata.data_shape if map_regions: mask_data = np.zeros(shape, dtype=np.bool) for region in map_regions: np.logical_or(mask_data, region.get_mask(shape), out=mask_data) else: mask_data = np.ones(shape, dtype=np.bool) ds = dataset.load(file_type, executor.ensure_sync(), **file_parameters) udf = ApplyMasksUDF(mask_factories=[lambda: mask_data]) dc = self.__api.application.document_controllers[ 0]._document_controller if hasattr(self.computation._computation, 'cancel_id'): print( f'Cancelling task: {self.computation._computation.cancel_id}' ) to_cancel = self.computation._computation.cancel_id self.__api.queue_task(lambda: self.__event_loop.create_task( executor.cancel(to_cancel))) #self.computation._computation.cancel_id = None self.computation._computation.cancel_id = str(time.time()) print(f'Creating task: {self.computation._computation.cancel_id}') dc.add_task( 'libertem-map4d', lambda: self.__event_loop.create_task( self.run_udf(udf, self.computation._computation.cancel_id, executor, dataset=ds))) self.computation._computation.last_src_uuid = str(src.uuid) self.computation._computation.last_map_regions = copy.deepcopy( [region.persistent_dict for region in map_regions]) except Exception as e: print(str(e)) import traceback traceback.print_exc()
async def async_main(address): GUI_events = asyncio.ensure_future(background_task()) executor = await AsyncDaskJobExecutor.connect(address) # Just an alternative dataset that works better on a slower machine # ds = load( # "blo", # path=("C:/Users/weber/Nextcloud/Projects/Open Pixelated STEM framework/" # "Data/3rd-Party Datasets/Glasgow/10 um 110.blo"), # tileshape=(1,8,144,144) # ) # For a remote cluster this has to be the path on the worker nodes, not the client ds = load("raw", path='/data/users/weber/scan_11_x256_y256.raw', dtype="float32", scan_size=(256, 256), detector_size_raw=(130, 128), crop_detector_to=(128, 128)) sum_job = SumFramesJob(dataset=ds) (y, x) = sum_job.get_result_shape() sum_image = get_result_image(sum_job) sum_buffer = sum_image.GetNumArray() doc = DM.NewImageDocument("test document") d = doc.AddImageDisplay(sum_image, 1) c = d.AddNewComponent(5, int(y * 0.4), int(x * 0.4), int(y * 0.6), int(x * 0.6)) c.SetForegroundColor(1, 0, 0) doc.Show() async for _ in run(executor, sum_job, sum_buffer): sum_image.UpdateImage() rect = c.GetRect() mask = mask_factory_from_rect(rect, tuple(ds.shape.sig)) rect_job = ApplyMasksJob(dataset=ds, mask_factories=[mask]) result_buffer = np.zeros(rect_job.get_result_shape()) result_image = DM.CreateImage(result_buffer[0]) result_image.ShowImage() result_image_buffer = result_image.GetNumArray() # For now we do a limited number of runs # FIXME implement a proper way to exit the loop counter = 0 while counter < 20: counter += 1 result_buffer[:] = 0 async for _ in run(executor, rect_job, result_buffer): np.copyto( result_image_buffer, # The reshape is a workaround for a bug in the current alpha version of DM # This will not be required in the final DM release result_buffer[0].reshape(result_image_buffer.shape), casting='unsafe') result_image.UpdateImage() while True: newrect = c.GetRect() if newrect != rect: rect = newrect mask = mask_factory_from_rect(rect, tuple(ds.shape.sig)) rect_job = ApplyMasksJob(dataset=ds, mask_factories=[mask]) break await asyncio.sleep(1) GUI_events.cancel()