def _do_initialize(self): header = self._header = _read_header(self._path, HEADER_FIELDS) self._image_offset = _get_image_offset(header) if header['version'] >= 5: # StreamPix version 6 # Timestamp = 4-byte unsigned long + 2-byte unsigned short (ms) # + 2-byte unsigned short (us) self._timestamp_micro = True else: # Older versions self._timestamp_micro = False try: dtype = np.dtype('uint%i' % header['bit_depth']) except TypeError: raise DataSetException("unsupported bit depth: %s" % header['bit_depth']) frame_size_bytes = header['width'] * header['height'] * dtype.itemsize self._footer_size = header['true_image_size'] - frame_size_bytes self._filesize = os.stat(self._path).st_size self._image_count = int( (self._filesize - self._image_offset) / header['true_image_size']) if self._image_count != np.prod(self._scan_size): raise DataSetException("scan_size doesn't match number of frames") shape = Shape( self._scan_size + (header['height'], header['width']), sig_dims=2, ) self._meta = DataSetMeta( shape=shape, raw_dtype=dtype, dtype=dtype, metadata=header, ) self._maybe_load_dark_gain() return self
def check_valid(self): if self._header['magic'] != 0xFEED: raise DataSetException('The format of this .seq file is unrecognized') if self._header['compression_format'] != 0: raise DataSetException('Only uncompressed images are supported in .seq files') if self._header['image_format'] != 100: raise DataSetException('Non-monochrome images are not supported')
def _auto_load(path, *args, executor, **kwargs): if path is None: raise DataSetException( "please specify the `path` argument to allow auto detection") detected_params = detect(path, executor=executor) filetype_detected = detected_params.get('type', None) if filetype_detected is None: raise DataSetException( "could not determine DataSet type for file '%s'" % path, ) return load(filetype_detected, path, *args, executor=executor, **kwargs)
def _auto_load(path, executor): if path is None: raise DataSetException( "please specify the `path` kwarg to allow auto detection") params = detect(path, executor=executor) filetype_detected = params.pop('type', None) if filetype_detected is None: raise DataSetException( "could not determine DataSet type for file '%s'" % path, ) return load(filetype_detected, executor=executor, **params)
def _get_dataset_cls(filetype): try: ft = filetypes[filetype.lower()] except KeyError: raise DataSetException("unknown filetype: %s" % filetype) parts = ft.split(".") module = ".".join(parts[:-1]) cls = parts[-1] try: module = importlib.import_module(module) except ImportError as e: raise DataSetException("could not load dataset: %s" % str(e)) cls = getattr(module, cls) return cls
def check_valid(self): try: # FIXME: maybe validate existence of all files? self.open_file(self._files[0]) # TODO: check file size match # TODO: try to read from file(s)? return True except (IOError, OSError, ValueError) as e: raise DataSetException("invalid dataset: %s" % e)
def get_dataset_cls(filetype: str) -> typing.Type[DataSet]: if not isinstance(filetype, str): return filetype try: ft = filetypes[filetype.lower()] except KeyError: raise DataSetException("unknown filetype: %s" % filetype) if not isinstance(ft, str): return ft parts = ft.split(".") module_name = ".".join(parts[:-1]) cls_name = parts[-1] try: module = importlib.import_module(module_name) except ImportError as e: raise DataSetException("could not load dataset: %s" % str(e)) cls: typing.Type[DataSet] = getattr(module, cls_name) return cls
def _do_initialize(self): header = self._header = _read_header(self._path, HEADER_FIELDS) self._image_offset = _get_image_offset(header) if header['version'] >= 5: # StreamPix version 6 # Timestamp = 4-byte unsigned long + 2-byte unsigned short (ms) # + 2-byte unsigned short (us) self._timestamp_micro = True else: # Older versions self._timestamp_micro = False try: dtype = np.dtype('uint%i' % header['bit_depth']) except TypeError: raise DataSetException("unsupported bit depth: %s" % header['bit_depth']) frame_size_bytes = header['width'] * header['height'] * dtype.itemsize self._footer_size = header['true_image_size'] - frame_size_bytes self._filesize = os.stat(self._path).st_size self._image_count = int( (self._filesize - self._image_offset) / header['true_image_size']) if self._sig_shape is None: self._sig_shape = tuple((header['height'], header['width'])) elif int(np.prod( self._sig_shape)) != (header['height'] * header['width']): raise DataSetException("sig_shape must be of size: %s" % (header['height'] * header['width'])) self._nav_shape_product = int(np.prod(self._nav_shape)) self._sync_offset_info = self.get_sync_offset_info() shape = Shape(self._nav_shape + self._sig_shape, sig_dims=len(self._sig_shape)) self._meta = DataSetMeta( shape=shape, raw_dtype=dtype, dtype=dtype, metadata=header, sync_offset=self._sync_offset, image_count=self._image_count, ) self._maybe_load_dark_gain() return self
def _check_array(self, array, sig_dims): if not isinstance(array, da.Array): raise DataSetException('Expected a Dask array as input, recieved ' f'{type(array)}.') if not isinstance(sig_dims, int) and sig_dims >= 0: raise DataSetException('Expected non-negative integer sig_dims,' f'recieved {sig_dims}.') if any([np.isnan(c).any() for c in array.shape])\ or any([np.isnan(c).any() for c in array.chunks]): raise DataSetException('Dask array has an unknown shape or chunk sizes ' 'so cannot be interpreted as a LiberTEM partitions. ' 'Run array.compute_compute_chunk_sizes() ' 'before passing to DaskDataSet, though this ' 'may be performance-intensive. Chunking: ' f'{array.chunks}, Shape {array.shape}') if sig_dims >= array.ndim: raise DataSetException(f'Number of sig_dims {sig_dims} not compatible ' f'with number of array dims {array.ndim}, ' 'must be able to create partitions along nav ' 'dimensions.') return True
def __init__(self, dask_array, *, sig_dims, preserve_dimensions=True, min_size=None, io_backend=None): super().__init__(io_backend=io_backend) if io_backend is not None: raise DataSetException("DaskDataSet currently doesn't support alternative I/O backends") self._check_array(dask_array, sig_dims) self._array = dask_array self._sig_dims = sig_dims self._sig_shape = self._array.shape[-self._sig_dims:] self._dtype = self._array.dtype self._preserve_dimension = preserve_dimensions self._min_size = min_size if self._min_size is None: # TODO add a method to determine a sensible partition byte-size self._min_size = self._default_min_size
def get_sync_offset_info(self): """ Check sync_offset specified and returns number of frames skipped and inserted """ if not -1*self._image_count < self._sync_offset < self._image_count: raise DataSetException( "sync_offset should be in (%s, %s), which is (-image_count, image_count)" % (-1*self._image_count, self._image_count) ) return { "frames_skipped_start": max(0, self._sync_offset), "frames_ignored_end": max( 0, self._image_count - self._nav_shape_product - self._sync_offset ), "frames_inserted_start": abs(min(0, self._sync_offset)), "frames_inserted_end": max( 0, self._nav_shape_product - self._image_count + self._sync_offset ) }
def detect_params(cls, path, executor): try: return executor.run_function(cls._do_detect_params, path) except Exception as e: raise DataSetException(repr(e)) from e