Ejemplo n.º 1
0
 def test_dim_dict_invalid_obj_type(self):
     dims = (2, 3)
     h5file = make_simple_nsid_dataset(dsetshapes=[dims])
     dset = h5file['MyGroup']['data']
     dim_dict = [Dimension(np.arange(2), 'X'), Dimension(np.arange(3), 'Y')]
     err_msg = 'dim_dict must be a dictionary'
     with self.assertRaises(TypeError) as context:
         _ = link_as_main(dset, dim_dict)
     self.assertTrue(err_msg in str(context.exception))
Ejemplo n.º 2
0
 def test_items_in_dim_dict_invalid_obj_type(self):
     dims = (2, 3)
     h5file = make_simple_nsid_dataset(dsetshapes=[dims])
     dset = h5file['MyGroup']['data']
     dim_dict = {
         0: Dimension(np.arange(2), 'X'),
         1: Dimension(np.arange(3), 'Y')
     }
     err_msg = 'Items in dictionary must all  be h5py.Datasets !'
     with self.assertRaises(TypeError) as context:
         _ = link_as_main(dset, dim_dict)
     self.assertTrue(err_msg in str(context.exception))
Ejemplo n.º 3
0
    def __new__(cls, name, units, values, quantity='generic',
                dimension_type='unknown', mode=DimType.DEFAULT):
        """
        Simple object that describes a dimension in a dataset by its name, units, and values

        Parameters
        ----------
        name : str or unicode
            Name of the dimension. For example 'Bias'
        units : str or unicode
            Units for this dimension. For example: 'V'
        values : array-like or int
            Values over which this dimension was varied. A linearly increasing set of values will be generated if an
            integer is provided instead of an array.
        mode : Enum, Optional. Default = DimType.DEFAULT
            How the parameter associated with the dimension was varied.
            DimType.DEFAULT - data was recorded for all combinations of values in this dimension against **all** other
            dimensions. This is typically the case.
            DimType.INCOMPLETE - Data not present for all combinations of values in this dimension and all other
                dimensions. Examples include spiral scans, sparse sampling, aborted measurements
            DimType.DEPENDENT - Values in this dimension were varied as a function of another (independent) dimension.
        quantity : str or unicode
            Physical quantity such as Length
        dimension_type : str or sidpy.DimensionTypes
            Type of dimension. such as spectral, spatial, etc.
        """
        if isinstance(values, int):
            if values < 1:
                raise ValueError('values must be a whole number. {} provided'
                                 ''.format(values))
        self = SIDimension.__new__(cls, values, name=name, quantity=quantity,
                                   units=units, dimension_type=dimension_type)
        self.mode = mode
        return self
Ejemplo n.º 4
0
def write_dummy_dset(hf_group: Type[h5py.Group], dims: Tuple[int],
                     main_name: str, **kwargs) -> None:
    dset = Dataset.from_array(np.random.random([*dims]), name="new")
    dnames = kwargs.get("dnames", np.arange(len(dims)))
    for i, d in enumerate(dims):
        dset.set_dimension(i, Dimension(np.arange(d), str(dnames[i])))
    write_nsid_dataset(dset, hf_group, main_data_name=main_name)
Ejemplo n.º 5
0
def read_h5py_dataset(dset):
    if not isinstance(dset, h5py.Dataset):
        raise TypeError(
            'can only read single Dataset, use read_all_in_group or read_all function instead'
        )

    if not check_if_main(dset):
        raise TypeError(
            'can only read NSID datasets, not general one, try to import with from_array'
        )

    # create vanilla dask array
    dataset = Dataset.from_array(np.array(dset))

    if 'title' in dset.attrs:
        dataset.title = dset.attrs['title']
    else:
        dataset.title = dset.name

    if 'units' in dset.attrs:
        dataset.units = dset.attrs['units']
    else:
        dataset.units = 'generic'

    if 'quantity' in dset.attrs:
        dataset.quantity = dset.attrs['quantity']
    else:
        dataset.quantity = 'generic'

    if 'data_type' in dset.attrs:
        dataset.data_type = dset.attrs['data_type']
    else:
        dataset.data_type = 'generic'

    if 'modality' in dset.attrs:
        dataset.modality = dset.attrs['modality']
    else:
        dataset.modality = 'generic'

    if 'source' in dset.attrs:
        dataset.source = dset.attrs['source']
    else:
        dataset.source = 'generic'

    dataset.axes = {}

    for dim in range(np.array(dset).ndim):
        try:
            label = dset.dims[dim].keys()[-1]
            name = dset.dims[dim][label].name
            dim_dict = {
                'quantity': 'generic',
                'units': 'generic',
                'dimension_type': 'generic'
            }
            dim_dict.update(dict(dset.parent[name].attrs))

            dataset.set_dimension(
                dim,
                Dimension(np.array(dset.parent[name][()]),
                          dset.dims[dim].label, dim_dict['quantity'],
                          dim_dict['units'], dim_dict['dimension_type']))
        except ValueError:
            print('dimension {} not NSID type using generic'.format(dim))

    for key in dset.parent:
        if isinstance(dset.parent[key], h5py.Group):
            if key[0] != '_':
                setattr(dataset, key, nest_dict(dset.parent[key].attrs))

    dataset.h5_dataset = dset
    dataset.h5_filename = dset.file.filename
    try:
        dataset.h5_dataset_name = dset.name
    except ValueError:
        dataset.h5_dataset_name = ''
    return dataset
Ejemplo n.º 6
0
def load_ringdown(data_files,
                  parm_dict,
                  h5_path,
                  verbose=False,
                  loadverbose=True,
                  average=True,
                  mirror=False):
    """
    Generates the HDF5 file given path to files_list and parameters dictionary

    Creates a Datagroup FFtrEFM_Group with a single dataset in chunks

    :param data_files: List of the \*.ibw files to be invidually scanned
    :type data_files: list

    :param parm_dict: Scan parameters to be saved as attributes
    :type parm_dict: dict

    :param h5_path: Path to H5 file on disk
    :type h5_path: string

    :param verbose: Display outputs of each function or not
    :type verbose: bool, optional

    :param loadverbose: Whether to print any simple "loading Line X" statements for feedback
    :type loadverbose: bool, optional

    :param average: Whether to reverse the data on each line read (since data are usually saved during a RETRACE scan)
    :type average: bool, optional

    :param mirror: Flips the ibw signal if acquired during a retrace, so data match the topography pixel-to-pixel
    :type mirror: bool, optional

    :returns: The filename path to the H5 file created
    :rtype: str

    """
    # e.g. if a 16000 point signal with 2000 averages and 10 pixels
    #   (10MHz sampling of a 1.6 ms long signal=16000, 200 averages per pixel)
    # parm_dict['pnts_per_pixel'] = 200 (# signals at each pixel)
    #           ['pnts_per_avg'] = 16000 (# pnts per signal, called an "average")
    #           ['pnts_per_line'] = 2000 (# signals in each line)

    num_rows = parm_dict['num_rows']
    num_cols = parm_dict['num_cols']

    # The signals are hard-coded in the AFM software as 800 points long
    # Therefore, we can calculate pnts_per_pixel etc from the first file
    signal = loadibw(data_files[0])['wave']['wData']  # Load data.
    parm_dict['pnts_per_pixel'] = int(signal.shape[0] / (800 * num_cols))
    parm_dict['pnts_per_avg'] = 800  # hard-coded in our AFM software
    parm_dict['total_time'] = 16e-3  # hard-coded in our AFM software

    if 'AMPINVOLS' not in parm_dict:
        parm_dict.update({'AMPINVOLS': 100e-9})

    pnts_per_avg = parm_dict['pnts_per_avg']
    orig_pnts_per_pixel = parm_dict['pnts_per_pixel']
    if average:
        parm_dict['pnts_per_pixel'] = 1
        parm_dict['pnts_per_line'] = num_cols
    pnts_per_pixel = parm_dict['pnts_per_pixel']
    pnts_per_line = parm_dict['pnts_per_line']

    hdf = h5py.File(h5_path)

    try:
        rd_group = hdf.file.create_group('RD_Group')
    except:
        rd_group = usid.hdf_utils.create_indexed_group(hdf.file['/'],
                                                       'RD_Group')

    pos_desc = [
        Dimension(
            'X', 'm',
            np.linspace(0, parm_dict['FastScanSize'],
                        num_cols * pnts_per_pixel)),
        Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'],
                                        num_rows))
    ]

    spec_desc = [
        Dimension('Time', 's',
                  np.linspace(0, parm_dict['total_time'], pnts_per_avg))
    ]

    for p in parm_dict:
        rd_group.attrs[p] = parm_dict[p]
    rd_group.attrs[
        'pnts_per_line'] = num_cols  # to change number of pnts in a line

    h5_rd = usid.hdf_utils.write_main_dataset(
        rd_group,  # parent HDF5 group
        (num_rows * num_cols * pnts_per_pixel, pnts_per_avg),
        # shape of Main dataset
        'Ringdown',  # Name of main dataset
        'Amplitude',  # Physical quantity contained in Main dataset
        'nm',  # Units for the physical quantity
        pos_desc,  # Position dimensions
        spec_desc,  # Spectroscopic dimensions
        dtype=np.float32,  # data type / precision
        compression='gzip',
        main_dset_attrs=parm_dict)

    # Cycles through the remaining files. This takes a while (~few minutes)
    for k, num in zip(data_files, np.arange(0, len(data_files))):

        if loadverbose:
            fname = k.replace('/', '\\')
            print('####', fname.split('\\')[-1], '####')
            fname = str(num).rjust(4, '0')

        signal = loadibw(k)['wave']['wData']
        signal = np.reshape(signal.T,
                            [num_cols * orig_pnts_per_pixel, pnts_per_avg])

        if average:
            pixels = np.split(signal, num_cols, axis=0)
            signal = np.vstack([np.mean(p, axis=0) for p in pixels])

        signal *= parm_dict['AMPINVOLS']

        if mirror:
            h5_rd[num_cols * pnts_per_pixel * num:num_cols * pnts_per_pixel *
                  (num + 1), :] = np.flipud(signal[:, :])
        else:
            h5_rd[num_cols * pnts_per_pixel * num:num_cols * pnts_per_pixel *
                  (num + 1), :] = signal[:, :]

    if verbose == True:
        usid.hdf_utils.print_tree(hdf.file, rel_paths=True)

    return h5_rd
Ejemplo n.º 7
0
    def read(self,
             bin_factor=None,
             interp_func=Image.BICUBIC,
             normalize=False,
             **image_args):
        """
        Translates the image in the provided file into a USID HDF5 file

        Parameters
        ----------------
        bin_factor : uint or array-like of uint, optional
            Down-sampling factor for each dimension.  Default is None.
            If specifying different binning for each dimension, please specify as (height binning, width binning)
        interp_func : int, optional. Default = :attr:`PIL.Image.BICUBIC`
            How the image will be interpolated to provide the down-sampled or binned image.
            For more information see instructions for the `resample` argument for :meth:`PIL.Image.resize`
        normalize : boolean, optional. Default = False
            Should the raw image be normalized between the values of 0 and 1
        image_args : dict
            Arguments to be passed to read_image.  Arguments depend on the type of image.

        Returns
        ----------

        """
        image_path = self._parse_file_path(self._input_file_path)

        image = read_image(image_path, **image_args)
        image_parms = dict()
        usize, vsize = image.shape[:2]
        '''
        Check if a bin_factor is given.  Set up binning objects if it is.
        '''
        if bin_factor is not None:
            if isinstance(bin_factor, (list, tuple)):
                if not contains_integers(bin_factor, min_val=1):
                    raise TypeError(
                        'bin_factor should contain positive whole integers')
                if len(bin_factor) == 2:
                    bin_factor = tuple(bin_factor)
                else:
                    raise ValueError(
                        'Input parameter `bin_factor` must be a length 2 array-like or an integer.\n'
                        + '{} was given.'.format(bin_factor))

            elif isinstance(bin_factor, int):
                bin_factor = (bin_factor, bin_factor)
            else:
                raise TypeError(
                    'bin_factor should either be an integer or an iterable of positive integers'
                )

            if np.min(bin_factor) < 0:
                raise ValueError('bin_factor must consist of positive factors')

            if interp_func not in [
                    Image.NEAREST, Image.BILINEAR, Image.BICUBIC, Image.LANCZOS
            ]:
                raise ValueError(
                    "'interp_func' argument for ImageTranslator.translate must be one of "
                    "PIL.Image.NEAREST, PIL.Image.BILINEAR, PIL.Image.BICUBIC, PIL.Image.LANCZOS"
                )

            image_parms.update({
                'image_binning_size': bin_factor,
                'image_PIL_resample_mode': interp_func
            })
            usize = int(usize / bin_factor[0])
            vsize = int(vsize / bin_factor[1])

            # Unfortunately, we need to make a round-trip through PIL for the interpolation. Not possible with numpy
            img_obj = Image.fromarray(image)
            img_obj = img_obj.resize((vsize, usize), resample=interp_func)
            image = np.asarray(img_obj)

        # Working around occasional "cannot modify read-only array" error
        image = image.copy()
        '''
        Normalize Raw Image
        '''
        if normalize:
            image -= np.min(image)
            image = image / np.float32(np.max(image))

        image_parms.update({
            'normalized': normalize,
            'image_min': np.min(image),
            'image_max': np.max(image)
        })

        data_set = Dataset.from_array(image, name='random')

        data_set.data_type = 'image'
        data_set.units = 'a. u.'
        data_set.quantity = 'Intensity'

        data_set.set_dimension(
            0,
            Dimension('y',
                      np.arange(usize),
                      units='a. u.',
                      quantity='Length',
                      dimension_type='spatial'))
        data_set.set_dimension(
            1,
            Dimension('x',
                      np.arange(vsize),
                      units='a. u.',
                      quantity='Length',
                      dimension_type='spatial'))

        return data_set
Ejemplo n.º 8
0
    def read(self):
        """
        Reads a single Raman spectra from a Princeton Instruments CCD camera.

        Returns
        -------
        data_set: sidpy.Dataset object
            wraps all the raw data and metadata from the input file into a Dataset object
        
        """
        # open the .SPE file
        with open(self._input_file_path, 'rb') as f:
            lines = f.readlines()
            # Create an empty dictionary for the metadata
            metadata_dictionary = {}

            # Search through the file for the needed metadata
            metadata_dictionary['date_acquired'] = re.search(
                b'date="(.*?)"', lines[1])[1].decode('ANSI')
            metadata_dictionary['width'] = int(
                re.search(b'width="(.*?)"', lines[1])[1])
            metadata_dictionary['height'] = int(
                re.search(b'height="(.*?)"', lines[1])[1])
            metadata_dictionary['size'] = metadata_dictionary[
                'width'] * metadata_dictionary['height']
            metadata_dictionary['exposure_time'] = int(
                re.search(b'<ExposureTime type="Double">(.*?)</ExposureTime>',
                          lines[1])[1])
            metadata_dictionary['excitation_wavelength'] = float(
                re.search(b'laserLine="(.*?)"', lines[1])[1])
            metadata_dictionary['center_wavelength'] = float(
                re.search(
                    b'<CenterWavelength type="Double">(.*?)</CenterWavelength>',
                    lines[1])[1])
            metadata_dictionary['orientation'] = re.search(
                b'orientation="(.*?)"', lines[1])[1].decode('ANSI')

            # Get the wavelength and intensity
            wavelength_string = re.search(
                b'<Wavelength xml:space="preserve">(.*?)</Wavelength>',
                lines[1])[1].decode('utf-8')
            wavelength = np.array(wavelength_string.split(','),
                                  dtype=np.float64)

            f.seek(4100)
            intensity = np.fromfile(f,
                                    dtype=np.float32,
                                    count=metadata_dictionary['size'])

            raman_shift_wavenumbers = 1e7 * (
                1 / metadata_dictionary['excitation_wavelength'] -
                1 / wavelength)

            f.close()

        # create the sidpy dataset
        data_set = Dataset.from_array(intensity, name='Raman Spectra')

        data_set.data_type = 'spectrum'
        data_set.units = 'counts'
        data_set.quantity = 'Intensity'

        # set dimensions
        data_set.set_dimension(
            0,
            Dimension(raman_shift_wavenumbers,
                      name='Raman Shift',
                      units='cm-1',
                      quantity='Raman shift',
                      dimension_type='spectral'))
        data_set.set_dimension(
            1,
            Dimension(intensity,
                      name='Intensity',
                      units='counts',
                      quantity='intensity',
                      dimension_type='spectral'))

        data_set.metadata = metadata_dictionary

        return data_set