def test_dim_dict_invalid_obj_type(self): dims = (2, 3) h5file = make_simple_nsid_dataset(dsetshapes=[dims]) dset = h5file['MyGroup']['data'] dim_dict = [Dimension(np.arange(2), 'X'), Dimension(np.arange(3), 'Y')] err_msg = 'dim_dict must be a dictionary' with self.assertRaises(TypeError) as context: _ = link_as_main(dset, dim_dict) self.assertTrue(err_msg in str(context.exception))
def test_items_in_dim_dict_invalid_obj_type(self): dims = (2, 3) h5file = make_simple_nsid_dataset(dsetshapes=[dims]) dset = h5file['MyGroup']['data'] dim_dict = { 0: Dimension(np.arange(2), 'X'), 1: Dimension(np.arange(3), 'Y') } err_msg = 'Items in dictionary must all be h5py.Datasets !' with self.assertRaises(TypeError) as context: _ = link_as_main(dset, dim_dict) self.assertTrue(err_msg in str(context.exception))
def __new__(cls, name, units, values, quantity='generic', dimension_type='unknown', mode=DimType.DEFAULT): """ Simple object that describes a dimension in a dataset by its name, units, and values Parameters ---------- name : str or unicode Name of the dimension. For example 'Bias' units : str or unicode Units for this dimension. For example: 'V' values : array-like or int Values over which this dimension was varied. A linearly increasing set of values will be generated if an integer is provided instead of an array. mode : Enum, Optional. Default = DimType.DEFAULT How the parameter associated with the dimension was varied. DimType.DEFAULT - data was recorded for all combinations of values in this dimension against **all** other dimensions. This is typically the case. DimType.INCOMPLETE - Data not present for all combinations of values in this dimension and all other dimensions. Examples include spiral scans, sparse sampling, aborted measurements DimType.DEPENDENT - Values in this dimension were varied as a function of another (independent) dimension. quantity : str or unicode Physical quantity such as Length dimension_type : str or sidpy.DimensionTypes Type of dimension. such as spectral, spatial, etc. """ if isinstance(values, int): if values < 1: raise ValueError('values must be a whole number. {} provided' ''.format(values)) self = SIDimension.__new__(cls, values, name=name, quantity=quantity, units=units, dimension_type=dimension_type) self.mode = mode return self
def write_dummy_dset(hf_group: Type[h5py.Group], dims: Tuple[int], main_name: str, **kwargs) -> None: dset = Dataset.from_array(np.random.random([*dims]), name="new") dnames = kwargs.get("dnames", np.arange(len(dims))) for i, d in enumerate(dims): dset.set_dimension(i, Dimension(np.arange(d), str(dnames[i]))) write_nsid_dataset(dset, hf_group, main_data_name=main_name)
def read_h5py_dataset(dset): if not isinstance(dset, h5py.Dataset): raise TypeError( 'can only read single Dataset, use read_all_in_group or read_all function instead' ) if not check_if_main(dset): raise TypeError( 'can only read NSID datasets, not general one, try to import with from_array' ) # create vanilla dask array dataset = Dataset.from_array(np.array(dset)) if 'title' in dset.attrs: dataset.title = dset.attrs['title'] else: dataset.title = dset.name if 'units' in dset.attrs: dataset.units = dset.attrs['units'] else: dataset.units = 'generic' if 'quantity' in dset.attrs: dataset.quantity = dset.attrs['quantity'] else: dataset.quantity = 'generic' if 'data_type' in dset.attrs: dataset.data_type = dset.attrs['data_type'] else: dataset.data_type = 'generic' if 'modality' in dset.attrs: dataset.modality = dset.attrs['modality'] else: dataset.modality = 'generic' if 'source' in dset.attrs: dataset.source = dset.attrs['source'] else: dataset.source = 'generic' dataset.axes = {} for dim in range(np.array(dset).ndim): try: label = dset.dims[dim].keys()[-1] name = dset.dims[dim][label].name dim_dict = { 'quantity': 'generic', 'units': 'generic', 'dimension_type': 'generic' } dim_dict.update(dict(dset.parent[name].attrs)) dataset.set_dimension( dim, Dimension(np.array(dset.parent[name][()]), dset.dims[dim].label, dim_dict['quantity'], dim_dict['units'], dim_dict['dimension_type'])) except ValueError: print('dimension {} not NSID type using generic'.format(dim)) for key in dset.parent: if isinstance(dset.parent[key], h5py.Group): if key[0] != '_': setattr(dataset, key, nest_dict(dset.parent[key].attrs)) dataset.h5_dataset = dset dataset.h5_filename = dset.file.filename try: dataset.h5_dataset_name = dset.name except ValueError: dataset.h5_dataset_name = '' return dataset
def load_ringdown(data_files, parm_dict, h5_path, verbose=False, loadverbose=True, average=True, mirror=False): """ Generates the HDF5 file given path to files_list and parameters dictionary Creates a Datagroup FFtrEFM_Group with a single dataset in chunks :param data_files: List of the \*.ibw files to be invidually scanned :type data_files: list :param parm_dict: Scan parameters to be saved as attributes :type parm_dict: dict :param h5_path: Path to H5 file on disk :type h5_path: string :param verbose: Display outputs of each function or not :type verbose: bool, optional :param loadverbose: Whether to print any simple "loading Line X" statements for feedback :type loadverbose: bool, optional :param average: Whether to reverse the data on each line read (since data are usually saved during a RETRACE scan) :type average: bool, optional :param mirror: Flips the ibw signal if acquired during a retrace, so data match the topography pixel-to-pixel :type mirror: bool, optional :returns: The filename path to the H5 file created :rtype: str """ # e.g. if a 16000 point signal with 2000 averages and 10 pixels # (10MHz sampling of a 1.6 ms long signal=16000, 200 averages per pixel) # parm_dict['pnts_per_pixel'] = 200 (# signals at each pixel) # ['pnts_per_avg'] = 16000 (# pnts per signal, called an "average") # ['pnts_per_line'] = 2000 (# signals in each line) num_rows = parm_dict['num_rows'] num_cols = parm_dict['num_cols'] # The signals are hard-coded in the AFM software as 800 points long # Therefore, we can calculate pnts_per_pixel etc from the first file signal = loadibw(data_files[0])['wave']['wData'] # Load data. parm_dict['pnts_per_pixel'] = int(signal.shape[0] / (800 * num_cols)) parm_dict['pnts_per_avg'] = 800 # hard-coded in our AFM software parm_dict['total_time'] = 16e-3 # hard-coded in our AFM software if 'AMPINVOLS' not in parm_dict: parm_dict.update({'AMPINVOLS': 100e-9}) pnts_per_avg = parm_dict['pnts_per_avg'] orig_pnts_per_pixel = parm_dict['pnts_per_pixel'] if average: parm_dict['pnts_per_pixel'] = 1 parm_dict['pnts_per_line'] = num_cols pnts_per_pixel = parm_dict['pnts_per_pixel'] pnts_per_line = parm_dict['pnts_per_line'] hdf = h5py.File(h5_path) try: rd_group = hdf.file.create_group('RD_Group') except: rd_group = usid.hdf_utils.create_indexed_group(hdf.file['/'], 'RD_Group') pos_desc = [ Dimension( 'X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols * pnts_per_pixel)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows)) ] spec_desc = [ Dimension('Time', 's', np.linspace(0, parm_dict['total_time'], pnts_per_avg)) ] for p in parm_dict: rd_group.attrs[p] = parm_dict[p] rd_group.attrs[ 'pnts_per_line'] = num_cols # to change number of pnts in a line h5_rd = usid.hdf_utils.write_main_dataset( rd_group, # parent HDF5 group (num_rows * num_cols * pnts_per_pixel, pnts_per_avg), # shape of Main dataset 'Ringdown', # Name of main dataset 'Amplitude', # Physical quantity contained in Main dataset 'nm', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision compression='gzip', main_dset_attrs=parm_dict) # Cycles through the remaining files. This takes a while (~few minutes) for k, num in zip(data_files, np.arange(0, len(data_files))): if loadverbose: fname = k.replace('/', '\\') print('####', fname.split('\\')[-1], '####') fname = str(num).rjust(4, '0') signal = loadibw(k)['wave']['wData'] signal = np.reshape(signal.T, [num_cols * orig_pnts_per_pixel, pnts_per_avg]) if average: pixels = np.split(signal, num_cols, axis=0) signal = np.vstack([np.mean(p, axis=0) for p in pixels]) signal *= parm_dict['AMPINVOLS'] if mirror: h5_rd[num_cols * pnts_per_pixel * num:num_cols * pnts_per_pixel * (num + 1), :] = np.flipud(signal[:, :]) else: h5_rd[num_cols * pnts_per_pixel * num:num_cols * pnts_per_pixel * (num + 1), :] = signal[:, :] if verbose == True: usid.hdf_utils.print_tree(hdf.file, rel_paths=True) return h5_rd
def read(self, bin_factor=None, interp_func=Image.BICUBIC, normalize=False, **image_args): """ Translates the image in the provided file into a USID HDF5 file Parameters ---------------- bin_factor : uint or array-like of uint, optional Down-sampling factor for each dimension. Default is None. If specifying different binning for each dimension, please specify as (height binning, width binning) interp_func : int, optional. Default = :attr:`PIL.Image.BICUBIC` How the image will be interpolated to provide the down-sampled or binned image. For more information see instructions for the `resample` argument for :meth:`PIL.Image.resize` normalize : boolean, optional. Default = False Should the raw image be normalized between the values of 0 and 1 image_args : dict Arguments to be passed to read_image. Arguments depend on the type of image. Returns ---------- """ image_path = self._parse_file_path(self._input_file_path) image = read_image(image_path, **image_args) image_parms = dict() usize, vsize = image.shape[:2] ''' Check if a bin_factor is given. Set up binning objects if it is. ''' if bin_factor is not None: if isinstance(bin_factor, (list, tuple)): if not contains_integers(bin_factor, min_val=1): raise TypeError( 'bin_factor should contain positive whole integers') if len(bin_factor) == 2: bin_factor = tuple(bin_factor) else: raise ValueError( 'Input parameter `bin_factor` must be a length 2 array-like or an integer.\n' + '{} was given.'.format(bin_factor)) elif isinstance(bin_factor, int): bin_factor = (bin_factor, bin_factor) else: raise TypeError( 'bin_factor should either be an integer or an iterable of positive integers' ) if np.min(bin_factor) < 0: raise ValueError('bin_factor must consist of positive factors') if interp_func not in [ Image.NEAREST, Image.BILINEAR, Image.BICUBIC, Image.LANCZOS ]: raise ValueError( "'interp_func' argument for ImageTranslator.translate must be one of " "PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC, PIL.Image.LANCZOS" ) image_parms.update({ 'image_binning_size': bin_factor, 'image_PIL_resample_mode': interp_func }) usize = int(usize / bin_factor[0]) vsize = int(vsize / bin_factor[1]) # Unfortunately, we need to make a round-trip through PIL for the interpolation. Not possible with numpy img_obj = Image.fromarray(image) img_obj = img_obj.resize((vsize, usize), resample=interp_func) image = np.asarray(img_obj) # Working around occasional "cannot modify read-only array" error image = image.copy() ''' Normalize Raw Image ''' if normalize: image -= np.min(image) image = image / np.float32(np.max(image)) image_parms.update({ 'normalized': normalize, 'image_min': np.min(image), 'image_max': np.max(image) }) data_set = Dataset.from_array(image, name='random') data_set.data_type = 'image' data_set.units = 'a. u.' data_set.quantity = 'Intensity' data_set.set_dimension( 0, Dimension('y', np.arange(usize), units='a. u.', quantity='Length', dimension_type='spatial')) data_set.set_dimension( 1, Dimension('x', np.arange(vsize), units='a. u.', quantity='Length', dimension_type='spatial')) return data_set
def read(self): """ Reads a single Raman spectra from a Princeton Instruments CCD camera. Returns ------- data_set: sidpy.Dataset object wraps all the raw data and metadata from the input file into a Dataset object """ # open the .SPE file with open(self._input_file_path, 'rb') as f: lines = f.readlines() # Create an empty dictionary for the metadata metadata_dictionary = {} # Search through the file for the needed metadata metadata_dictionary['date_acquired'] = re.search( b'date="(.*?)"', lines[1])[1].decode('ANSI') metadata_dictionary['width'] = int( re.search(b'width="(.*?)"', lines[1])[1]) metadata_dictionary['height'] = int( re.search(b'height="(.*?)"', lines[1])[1]) metadata_dictionary['size'] = metadata_dictionary[ 'width'] * metadata_dictionary['height'] metadata_dictionary['exposure_time'] = int( re.search(b'<ExposureTime type="Double">(.*?)</ExposureTime>', lines[1])[1]) metadata_dictionary['excitation_wavelength'] = float( re.search(b'laserLine="(.*?)"', lines[1])[1]) metadata_dictionary['center_wavelength'] = float( re.search( b'<CenterWavelength type="Double">(.*?)</CenterWavelength>', lines[1])[1]) metadata_dictionary['orientation'] = re.search( b'orientation="(.*?)"', lines[1])[1].decode('ANSI') # Get the wavelength and intensity wavelength_string = re.search( b'<Wavelength xml:space="preserve">(.*?)</Wavelength>', lines[1])[1].decode('utf-8') wavelength = np.array(wavelength_string.split(','), dtype=np.float64) f.seek(4100) intensity = np.fromfile(f, dtype=np.float32, count=metadata_dictionary['size']) raman_shift_wavenumbers = 1e7 * ( 1 / metadata_dictionary['excitation_wavelength'] - 1 / wavelength) f.close() # create the sidpy dataset data_set = Dataset.from_array(intensity, name='Raman Spectra') data_set.data_type = 'spectrum' data_set.units = 'counts' data_set.quantity = 'Intensity' # set dimensions data_set.set_dimension( 0, Dimension(raman_shift_wavenumbers, name='Raman Shift', units='cm-1', quantity='Raman shift', dimension_type='spectral')) data_set.set_dimension( 1, Dimension(intensity, name='Intensity', units='counts', quantity='intensity', dimension_type='spectral')) data_set.metadata = metadata_dictionary return data_set