예제 #1
0
    def translate(self, file_path, *args, **kwargs):
        # Two kinds of files:
        # 1. Simple GSF files -> use metadata, data = gsf_read(file_path)
        # 2. Native .gwy files -> use the gwyfile package
        # I have a notebook that shows how such data can be read.
        # Create the .h5 file from the input file
        if not isinstance(file_path, (str, unicode)):
            raise TypeError('file_path should be a string!')
        if not (file_path.endswith('.gsf') or file_path.endswith('.gwy')):
            # TODO: Gwyddion is weird, it doesn't append the file extension some times.
            # In theory, you could identify the kind of file by looking at the header (line 38 in gsf_read()).
            # Ideally the header check should be used instead of the extension check
            raise ValueError('file_path must have a .gsf or .gwy extension!')
        
        file_path = path.abspath(file_path)
        folder_path, base_name = path.split(file_path)
        base_name = base_name[:-4]
        h5_path = path.join(folder_path, base_name + '.h5')
        if path.exists(h5_path):
            remove(h5_path)

        self.h5_file = h5py.File(h5_path, 'w')

        """
        Setup the global parameters
        ---------------------------
        translator: Gywddion
        data_type: depends on file type
                    GwyddionGSF_<gsf_meta['title']>
                    or
                    GwyddionGWY_<gwy_meta['title']>
        """
        self.global_parms = generate_dummy_main_parms()
        self.global_parms['translator'] = 'Gwyddion'

        # Create the measurement group
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')

        if file_path.endswith('.gsf'):
            self._translate_gsf(file_path, meas_grp)

        if file_path.endswith('gwy'):
            self._translate_gwy(file_path, meas_grp)
        
        write_simple_attrs(self.h5_file, self.global_parms)

        return h5_path
예제 #2
0
    def _setupH5(self, image_parms):
        """
        Setup the HDF5 file in which to store the data
        Due to the structure of the ndata format, we can only create the Measurement and Channel groups here

        Parameters
        ----------
        image_parms : dict
            Dictionary of parameters

        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        root_parms = generate_dummy_main_parms()
        root_parms['data_type'] = 'PtychographyData'

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_f, root_parms)

        h5_channels = list()
        for meas_parms in image_parms:
            # Create new measurement group for each set of parameters
            meas_grp = create_indexed_group(self.h5_f, 'Measurement')
            # Write the parameters as attributes of the group
            write_simple_attrs(meas_grp, meas_parms)

            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_channels.append(chan_grp)

        self.h5_f.flush()

        return h5_channels
예제 #3
0
    def _setupH5(self, image_parms):
        """
        Setup the HDF5 file in which to store the data
        Due to the structure of the ndata format, we can only create the Measurement and Channel groups here

        Parameters
        ----------
        image_parms : dict
            Dictionary of parameters

        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        root_parms = generate_dummy_main_parms()
        root_parms['data_type'] = 'PtychographyData'

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_f, root_parms)

        h5_channels = list()
        for meas_parms in image_parms:
            # Create new measurement group for each set of parameters
            meas_grp = create_indexed_group(self.h5_f, 'Measurement')
            # Write the parameters as attributes of the group
            write_simple_attrs(meas_grp, meas_parms)

            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_channels.append(chan_grp)

        self.h5_f.flush()

        return h5_channels
예제 #4
0
    def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y, image_parms):
        """
        Setup the HDF5 file in which to store the data including creating
        the Position and Spectroscopic datasets

        Parameters
        ----------
        usize : int
            Number of pixel columns in the images
        vsize : int
            Number of pixel rows in the images
        data_type : type
            Data type to save image as
        scan_size_x : int
            Number of images in the x dimension
        scan_size_y : int
            Number of images in the y dimension
        image_parms : dict
            Dictionary of parameters

        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        num_pixels = usize * vsize
        num_files = scan_size_x * scan_size_y

        root_parms = generate_dummy_main_parms()
        root_parms['data_type'] = 'PtychographyData'

        main_parms = {'num_images': num_files,
                      'image_size_u': usize,
                      'image_size_v': vsize,
                      'num_pixels': num_pixels,
                      'translator': 'Ptychography',
                      'scan_size_x': scan_size_x,
                      'scan_size_y': scan_size_y}
        main_parms.update(image_parms)

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_f, root_parms)
        meas_grp = create_indexed_group(self.h5_f, 'Measurement')
        write_simple_attrs(meas_grp, main_parms)
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        # Build the Position and Spectroscopic Datasets
        spec_desc = [Dimension('U', 'pixel', np.arange(usize)),
                     Dimension('V', 'pixel', np.arange(vsize))]
        pos_desc = [Dimension('X', 'pixel', np.arange(scan_size_x)),
                    Dimension('Y', 'pixel', np.arange(scan_size_y))]

        ds_chunking = calc_chunks([num_files, num_pixels],
                                  data_type(0).itemsize,
                                  unit_chunks=(1, num_pixels))

        # Allocate space for Main_Data and Pixel averaged Data
        h5_main = write_main_dataset(chan_grp, (num_files, num_pixels), 'Raw_Data',
                                     'Intensity', 'a.u.',
                                     pos_desc, spec_desc,
                                     chunks=ds_chunking, dtype=data_type)

        h5_ronch= chan_grp.create_dataset('Mean_Ronchigram', shape=[num_pixels], dtype=np.float32)
        h5_mean_spec = chan_grp.create_dataset('Spectroscopic_Mean', shape=[num_files], dtype=np.float32)

        self.h5_f.flush()

        return h5_main, h5_mean_spec, h5_ronch
예제 #5
0
파일: sporc.py 프로젝트: yig319/pycroscopy
    def translate(self, parm_path):
        """
        Basic method that translates .mat data files to a single .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        (folder_path, file_name) = path.split(parm_path)
        (file_name, base_name) = path.split(folder_path)
        h5_path = path.join(folder_path, base_name + '.h5')

        # Read parameters
        print('reading parameter files')
        parm_dict, excit_wfm, spec_ind_mat = self.__readparms(parm_path)
        parm_dict['data_type'] = 'SPORC'

        num_rows = parm_dict['grid_num_rows']
        num_cols = parm_dict['grid_num_cols']
        num_pix = num_rows * num_cols

        # new data format
        spec_ind_mat = np.transpose(VALUES_DTYPE(spec_ind_mat))

        # Now start creating datasets and populating:
        pos_desc = [
            Dimension('Y', 'm', np.arange(num_rows)),
            Dimension('X', 'm', np.arange(num_cols))
        ]
        ds_pos_ind, ds_pos_val = build_ind_val_dsets(pos_desc,
                                                     is_spectral=False)

        spec_ind_labels = [
            'x index', 'y index', 'loop index', 'repetition index',
            'slope index'
        ]
        spec_ind_dict = dict()
        for col_ind, col_name in enumerate(spec_ind_labels):
            spec_ind_dict[col_name] = (slice(col_ind,
                                             col_ind + 1), slice(None))
        ds_spec_inds = VirtualDataset('Spectroscopic_Indices',
                                      INDICES_DTYPE(spec_ind_mat))
        ds_spec_inds.attrs['labels'] = spec_ind_dict
        ds_spec_vals = VirtualDataset('Spectroscopic_Values', spec_ind_mat)
        ds_spec_vals.attrs['labels'] = spec_ind_dict
        ds_spec_vals.attrs['units'] = ['V', 'V', '', '', '']

        ds_excit_wfm = VirtualDataset('Excitation_Waveform',
                                      np.float32(excit_wfm))

        ds_raw_data = VirtualDataset('Raw_Data',
                                     data=[],
                                     maxshape=(num_pix, len(excit_wfm)),
                                     dtype=np.float16,
                                     chunking=(1, len(excit_wfm)),
                                     compression='gzip')

        # technically should change the date, etc.

        chan_grp = VirtualGroup('Channel_000')
        chan_grp.attrs = parm_dict
        chan_grp.add_children([
            ds_pos_ind, ds_pos_val, ds_spec_inds, ds_spec_vals, ds_excit_wfm,
            ds_raw_data
        ])

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict['data_type']
        global_parms['translator'] = 'SPORC'

        meas_grp = VirtualGroup('Measurement_000')
        meas_grp.add_children([chan_grp])
        spm_data = VirtualGroup('')
        spm_data.attrs = global_parms
        spm_data.add_children([meas_grp])

        if path.exists(h5_path):
            remove(h5_path)

        # Write everything except for the main data.
        hdf = HDFwriter(h5_path)

        h5_refs = hdf.write(spm_data)

        h5_main = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]

        # Now doing link_h5_objects_as_attrs:
        aux_ds_names = [
            'Excitation_Waveform', 'Position_Indices', 'Position_Values',
            'Spectroscopic_Indices', 'Spectroscopic_Values'
        ]
        link_h5_objects_as_attrs(h5_main,
                                 get_h5_obj_refs(aux_ds_names, h5_refs))

        print('reading raw data now...')

        # Now read the raw data files:
        pos_ind = 0
        for row_ind in range(1, num_rows + 1):
            for col_ind in range(1, num_cols + 1):
                file_path = path.join(
                    folder_path,
                    'result_r' + str(row_ind) + '_c' + str(col_ind) + '.mat')
                # print('Working on row {} col {}'.format(row_ind,col_ind))
                if path.exists(file_path):
                    # Load data file
                    pix_data = loadmat(file_path, squeeze_me=True)
                    # Take the inverse FFT on 1st dimension
                    pix_vec = np.fft.ifft(np.fft.ifftshift(pix_data['data']))
                    # Verified with Matlab - no conjugate required here.
                    h5_main[pos_ind, :] = np.float16(np.real(pix_vec))
                    hdf.flush()  # flush from memory!
                else:
                    print('File for row {} col {} not found'.format(
                        row_ind, col_ind))
                pos_ind += 1
                if (100.0 * pos_ind / num_pix) % 10 == 0:
                    print('Finished reading {} % of data'.format(
                        int(100 * pos_ind / num_pix)))

        hdf.close()

        return h5_path
예제 #6
0
    def _setup_h5(self, data_gen_parms):
        """
        Setups up the hdf5 file structure before doing the actual generation

        Parameters
        ----------
        data_gen_parms : dict
            Dictionary containing the parameters to write to the Measurement Group as attributes

        Returns
        -------

        """

        '''
        Build the group structure down to the channel group
        '''
        # Set up the basic group structure
        root_grp = VirtualGroup('')
        root_parms = generate_dummy_main_parms()
        root_parms['translator'] = 'FAKEBEPS'
        root_parms['data_type'] = data_gen_parms['data_type']
        root_grp.attrs = root_parms

        meas_grp = VirtualGroup('Measurement_')
        chan_grp = VirtualGroup('Channel_')

        meas_grp.attrs.update(data_gen_parms)

        # Create the Position and Spectroscopic datasets for the Raw Data
        ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals = self._build_ancillary_datasets()

        raw_chunking = calc_chunks([self.n_pixels,
                                    self.n_spec_bins],
                                   np.complex64(0).itemsize,
                                   unit_chunks=[1, self.n_bins])

        ds_raw_data = VirtualDataset('Raw_Data', data=None,
                                     maxshape=[self.n_pixels, self.n_spec_bins],
                                     dtype=np.complex64,
                                     compression='gzip',
                                     chunking=raw_chunking,
                                     parent=meas_grp)

        chan_grp.add_children([ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals,
                               ds_raw_data])
        meas_grp.add_children([chan_grp])
        root_grp.add_children([meas_grp])

        hdf = HDFwriter(self.h5_path)
        hdf.delete()
        h5_refs = hdf.write(root_grp)

        # Delete the MicroDatasets to save memory
        del ds_raw_data, ds_spec_inds, ds_spec_vals, ds_pos_inds, ds_pos_vals

        # Get the file and Raw_Data objects
        h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]
        h5_chan_grp = h5_raw.parent

        # Get the Position and Spectroscopic dataset objects
        h5_pos_inds = get_h5_obj_refs(['Position_Indices'], h5_refs)[0]
        h5_pos_vals = get_h5_obj_refs(['Position_Values'], h5_refs)[0]
        h5_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_refs)[0]
        h5_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_refs)[0]

        # Link the Position and Spectroscopic datasets as attributes of Raw_Data
        link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds, h5_spec_vals)

        '''
        Build the SHO Group
        '''
        sho_grp = VirtualGroup('Raw_Data-SHO_Fit_', parent=h5_chan_grp.name)

        # Build the Spectroscopic datasets for the SHO Guess and Fit
        sho_spec_starts = np.where(h5_spec_inds[h5_spec_inds.attrs['Frequency']].squeeze() == 0)[0]
        sho_spec_labs = get_attr(h5_spec_inds, 'labels')
        ds_sho_spec_inds, ds_sho_spec_vals = build_reduced_spec_dsets(h5_spec_inds,
                                                                      h5_spec_vals,
                                                                      keep_dim=sho_spec_labs != 'Frequency',
                                                                      step_starts=sho_spec_starts)

        sho_chunking = calc_chunks([self.n_pixels,
                                    self.n_sho_bins],
                                   sho32.itemsize,
                                   unit_chunks=[1, 1])
        ds_sho_fit = VirtualDataset('Fit', data=None,
                                    maxshape=[self.n_pixels, self.n_sho_bins],
                                    dtype=sho32,
                                    compression='gzip',
                                    chunking=sho_chunking,
                                    parent=sho_grp)
        ds_sho_guess = VirtualDataset('Guess', data=None,
                                      maxshape=[self.n_pixels, self.n_sho_bins],
                                      dtype=sho32,
                                      compression='gzip',
                                      chunking=sho_chunking,
                                      parent=sho_grp)

        sho_grp.add_children([ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals])

        # Write the SHO group and datasets to the file and delete the MicroDataset objects
        h5_sho_refs = hdf.write(sho_grp)
        del ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals

        # Get the dataset handles for the fit and guess
        h5_sho_fit = get_h5_obj_refs(['Fit'], h5_sho_refs)[0]
        h5_sho_guess = get_h5_obj_refs(['Guess'], h5_sho_refs)[0]

        # Get the dataset handles for the SHO Spectroscopic datasets
        h5_sho_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_sho_refs)[0]
        h5_sho_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_sho_refs)[0]

        # Link the Position and Spectroscopic datasets as attributes of the SHO Fit and Guess
        link_as_main(h5_sho_fit, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals)
        link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds, h5_sho_spec_vals)

        '''
        Build the loop group
        '''
        loop_grp = VirtualGroup('Fit-Loop_Fit_', parent=h5_sho_fit.parent.name)

        # Build the Spectroscopic datasets for the loops
        loop_spec_starts = np.where(h5_sho_spec_inds[h5_sho_spec_inds.attrs['DC_Offset']].squeeze() == 0)[0]
        loop_spec_labs = get_attr(h5_sho_spec_inds, 'labels')
        ds_loop_spec_inds, ds_loop_spec_vals = build_reduced_spec_dsets(h5_sho_spec_inds,
                                                                        h5_sho_spec_vals,
                                                                        keep_dim=loop_spec_labs != 'DC_Offset',
                                                                        step_starts=loop_spec_starts)

        # Create the loop fit and guess MicroDatasets
        loop_chunking = calc_chunks([self.n_pixels, self.n_loops],
                                    loop_fit32.itemsize,
                                    unit_chunks=[1, 1])
        ds_loop_fit = VirtualDataset('Fit', data=None,
                                     maxshape=[self.n_pixels, self.n_loops],
                                     dtype=loop_fit32,
                                     compression='gzip',
                                     chunking=loop_chunking,
                                     parent=loop_grp)

        ds_loop_guess = VirtualDataset('Guess', data=None,
                                       maxshape=[self.n_pixels, self.n_loops],
                                       dtype=loop_fit32,
                                       compression='gzip',
                                       chunking=loop_chunking,
                                       parent=loop_grp)

        # Add the datasets to the loop group then write it to the file
        loop_grp.add_children([ds_loop_fit, ds_loop_guess, ds_loop_spec_inds, ds_loop_spec_vals])
        h5_loop_refs = hdf.write(loop_grp)

        # Delete the MicroDatasets
        del ds_loop_spec_vals, ds_loop_spec_inds, ds_loop_guess, ds_loop_fit

        # Get the handles to the datasets
        h5_loop_fit = get_h5_obj_refs(['Fit'], h5_loop_refs)[0]
        h5_loop_guess = get_h5_obj_refs(['Guess'], h5_loop_refs)[0]
        h5_loop_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_loop_refs)[0]
        h5_loop_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_loop_refs)[0]

        # Link the Position and Spectroscopic datasets to the Loop Guess and Fit
        link_as_main(h5_loop_fit, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds, h5_loop_spec_vals)
        link_as_main(h5_loop_guess, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds, h5_loop_spec_vals)

        self.h5_raw = USIDataset(h5_raw)
        self.h5_sho_guess = USIDataset(h5_sho_guess)
        self.h5_sho_fit = USIDataset(h5_sho_fit)
        self.h5_loop_guess = USIDataset(h5_loop_guess)
        self.h5_loop_fit = USIDataset(h5_loop_fit)
        self.h5_spec_vals = h5_spec_vals
        self.h5_spec_inds = h5_spec_inds
        self.h5_sho_spec_inds = h5_sho_spec_inds
        self.h5_sho_spec_vals = h5_sho_spec_vals
        self.h5_loop_spec_inds = h5_loop_spec_inds
        self.h5_loop_spec_vals = h5_loop_spec_vals
        self.h5_file = h5_raw.file

        return
예제 #7
0
    def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y):
        """
        Setup the HDF5 file in which to store the data including creating
        the Position and Spectroscopic datasets

        Parameters
        ----------
        usize : int
            Number of pixel columns in the images
        vsize : int
            Number of pixel rows in the images
        data_type : type
            Data type to save image as
        scan_size_x : int
            Number of images in the x dimension
        scan_size_y : int
            Number of images in the y dimension

        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        num_pixels = usize * vsize
        num_files = scan_size_x * scan_size_y

        root_parms = generate_dummy_main_parms()
        root_parms['data_type'] = 'ImageStackData'

        main_parms = {
            'num_images': num_files,
            'image_size_u': usize,
            'image_size_v': vsize,
            'num_pixels': num_pixels,
            'translator': 'ImageStack',
            'scan_size_x': scan_size_x,
            'scan_size_y': scan_size_y
        }

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_file, root_parms)
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, main_parms)
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        # Build the Position and Spectroscopic Datasets
        spec_desc = [
            Dimension('U', 'pixel', np.arange(usize)),
            Dimension('V', 'pixel', np.arange(vsize))
        ]
        pos_desc = [
            Dimension('X', 'pixel', np.arange(scan_size_x)),
            Dimension('Y', 'pixel', np.arange(scan_size_y))
        ]

        ds_chunking = calc_chunks([num_files, num_pixels],
                                  data_type(0).itemsize,
                                  unit_chunks=(1, num_pixels))

        # Allocate space for Main_Data and Pixel averaged Data
        h5_main = write_main_dataset(chan_grp, (num_files, num_pixels),
                                     'Raw_Data',
                                     'Intensity',
                                     'a.u.',
                                     pos_desc,
                                     spec_desc,
                                     chunks=ds_chunking,
                                     dtype=data_type)
        h5_ronch = meas_grp.create_dataset('Stack_Mean',
                                           data=np.zeros(num_pixels,
                                                         dtype=np.float32),
                                           dtype=np.float32)
        h5_mean_spec = meas_grp.create_dataset('Image_Means',
                                               data=np.zeros(num_files,
                                                             dtype=np.float32),
                                               dtype=np.float32)

        self.h5_file.flush()

        return h5_main, h5_mean_spec, h5_ronch
예제 #8
0
    def translate(self, file_path, verbose=False, parm_encoding='utf-8'):
        """
        Translates the provided file to .h5

        Parameters
        ----------
        file_path : String / unicode
            Absolute path of the .ibw file
        verbose : Boolean (Optional)
            Whether or not to show  print statements for debugging
        parm_encoding : str, optional
            Codec to be used to decode the bytestrings into Python strings if needed.
            Default 'utf-8'

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the .h5 file
        """
        file_path = path.abspath(file_path)
        # Prepare the .h5 file:
        folder_path, base_name = path.split(file_path)
        base_name = base_name[:-4]
        h5_path = path.join(folder_path, base_name + '.h5')
        if path.exists(h5_path):
            remove(h5_path)

        h5_file = h5py.File(h5_path, 'w')

        # Load the ibw file first
        ibw_obj = bw.load(file_path)
        ibw_wave = ibw_obj.get('wave')
        parm_dict = self._read_parms(ibw_wave, parm_encoding)
        chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding)

        if verbose:
            print('Channels and units found:')
            print(chan_labels)
            print(chan_units)

        # Get the data to figure out if this is an image or a force curve
        images = ibw_wave.get('wData')

        if images.shape[2] != len(chan_labels):
            chan_labels = chan_labels[1:]  # for layer 0 null set errors in older AR software

        if images.ndim == 3:  # Image stack
            if verbose:
                print('Found image stack of size {}'.format(images.shape))
            type_suffix = 'Image'

            num_rows = parm_dict['ScanLines']
            num_cols = parm_dict['ScanPoints']

            images = images.transpose(2, 1, 0)  # now ordered as [chan, Y, X] image
            images = np.reshape(images, (images.shape[0], -1, 1))  # 3D [chan, Y*X points,1]

            pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)),
                        Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))]

            spec_desc = Dimension('arb', 'a.u.', [1])

        else:  # single force curve
            if verbose:
                print('Found force curve of size {}'.format(images.shape))

            type_suffix = 'ForceCurve'
            images = np.atleast_3d(images)  # now [Z, chan, 1]
            images = images.transpose((1, 2, 0))  # [chan ,1, Z] force curve

            # The data generated above varies linearly. Override.
            # For now, we'll shove the Z sensor data into the spectroscopic values.

            # Find the channel that corresponds to either Z sensor or Raw:
            try:
                chan_ind = chan_labels.index('ZSnsr')
                spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind]))
            except ValueError:
                try:
                    chan_ind = chan_labels.index('Raw')
                    spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind]))
                except ValueError:
                    # We don't expect to come here. If we do, spectroscopic values remains as is
                    spec_data = np.arange(images.shape[2])

            pos_desc = Dimension('X', 'm', [1])
            spec_desc = Dimension('Z', 'm', spec_data)

        # Create measurement group
        meas_grp = create_indexed_group(h5_file, 'Measurement')

        # Write file and measurement level parameters
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'IgorIBW_' + type_suffix
        global_parms['translator'] = 'IgorIBW'
        write_simple_attrs(h5_file, global_parms)

        write_simple_attrs(meas_grp, parm_dict)

        # Create Position and spectroscopic datasets
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)

        # Prepare the list of raw_data datasets
        for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units):
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data',
                               chan_name, chan_unit,
                               None, None,
                               h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                               dtype=np.float32)

        if verbose:
            print('Finished preparing raw datasets')

        h5_file.close()
        return h5_path
예제 #9
0
    def translate(self, file_path, *args, **kwargs):
        """
        Translates a given Bruker / Veeco / Nanoscope AFM derived file to HDF5. Currently handles scans, force curves,
        and force-distance maps

        Note that this translator was written with a single example file for each modality and may be buggy.

        Parameters
        ----------
        file_path : str / unicode
            path to data file

        Returns
        -------
        h5_path : str / unicode
            path to translated HDF5 file
        """
        self.file_path = path.abspath(file_path)
        self.meta_data, other_parms = self._extract_metadata()

        # These files are weirdly named with extensions such as .001
        h5_path = file_path.replace('.', '_') + '.h5'

        if path.exists(h5_path):
            remove(h5_path)

        h5_file = h5py.File(h5_path, 'w')

        type_suffixes = ['Image', 'Force_Curve', 'Force_Map']
        # 0 - stack of scan images
        # 1 - single force curve
        # 2 - force map
        force_count = 0
        image_count = 0
        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                force_count += 1
            elif 'Ciao image list' in class_name:
                image_count += 1
        data_type = 0
        if force_count > 0:
            if image_count > 0:
                data_type = 2
            else:
                data_type = 1

        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'Bruker_AFM_' + type_suffixes[data_type]
        global_parms['translator'] = 'Bruker_AFM'
        write_simple_attrs(h5_file, global_parms)

        # too many parameters. Making a dummy group just for the parameters.
        h5_parms_grp = h5_file.create_group('Parameters')
        # We currently have a dictionary of dictionaries. This needs to be flattened
        flat_dict = dict()
        for class_name, sub_dict in other_parms.items():
            for key, val in sub_dict.items():
                flat_dict[class_name + '_' + key] = val
        write_simple_attrs(h5_parms_grp, flat_dict)

        # Create measurement group
        h5_meas_grp = create_indexed_group(h5_file, 'Measurement')

        # Call the data specific translation function
        trans_funcs = [
            self._translate_image_stack, self._translate_force_curve,
            self._translate_force_map
        ]
        trans_funcs[data_type](h5_meas_grp)

        # wrap up and return path
        h5_file.close()
        return h5_path
예제 #10
0
    def _setupH5(self, usize, vsize, data_type, num_images, main_parms):
        """
        Setup the HDF5 file in which to store the data including creating
        the Position and Spectroscopic datasets

        Parameters
        ----------
        usize : int
            Number of pixel columns in the images
        vsize : int
            Number of pixel rows in the images
        data_type : type
            Data type to save image as
        num_images : int
            Number of images in the movie
        main_parms : dict


        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        num_pixels = usize * vsize

        root_parms = generate_dummy_main_parms()
        root_parms['data_type'] = 'PtychographyData'

        main_parms['num_images'] = num_images
        main_parms['image_size_u'] = usize
        main_parms['image_size_v'] = vsize
        main_parms['num_pixels'] = num_pixels
        main_parms['translator'] = 'Movie'

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_file, root_parms)
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, main_parms)
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        # Build the Position and Spectroscopic Datasets
        spec_dim = Dimension('Time', 's', np.arange(num_images))
        pos_dims = [
            Dimension('X', 'a.u.', np.arange(usize)),
            Dimension('Y', 'a.u.', np.arange(vsize))
        ]

        ds_chunking = calc_chunks([num_pixels, num_images],
                                  data_type(0).itemsize,
                                  unit_chunks=(num_pixels, 1))

        # Allocate space for Main_Data and Pixel averaged Data
        h5_main = write_main_dataset(chan_grp, (num_pixels, num_images),
                                     'Raw_Data',
                                     'Intensity',
                                     'a.u.',
                                     pos_dims,
                                     spec_dim,
                                     chunks=ds_chunking,
                                     dtype=data_type)
        h5_ronch = meas_grp.create_dataset('Mean_Ronchigram',
                                           data=np.zeros(num_pixels,
                                                         dtype=np.float32),
                                           dtype=np.float32)
        h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean',
                                               data=np.zeros(num_images,
                                                             dtype=np.float32),
                                               dtype=np.float32)

        self.h5_file.flush()

        return h5_main, h5_mean_spec, h5_ronch
예제 #11
0
    def translate(self, parm_path):
        """
        The main function that translates the provided file into a .h5 file

        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file.

        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        parm_dict, excit_wfm = self._read_parms(parm_path)

        self._parse_file_path(parm_path)

        num_dat_files = len(self.file_list)

        f = open(self.file_list[0], 'rb')
        spectrogram_size, count_vals = self._parse_spectrogram_size(f)
        print("spectrogram size:", spectrogram_size)
        num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols']
        print('Number of pixels: ', num_pixels)
        print('Count Values: ', count_vals)
        if (num_pixels + 1) != count_vals:
            print(
                "Data size does not match number of pixels expected. Cannot continue"
            )

        # Now start creating datasets and populating:

        ds_spec_inds, ds_spec_vals = build_ind_val_dsets(Dimension(
            'Bias', 'V', excit_wfm),
                                                         is_spectral=True,
                                                         verbose=False)

        ds_spec_vals.data = np.atleast_2d(
            excit_wfm)  # The data generated above varies linearly. Override.

        pos_desc = [
            Dimension('X', 'a.u.', np.arange(parm_dict['grid_num_cols'])),
            Dimension('Y', 'a.u.', np.arange(parm_dict['grid_num_rows']))
        ]

        ds_pos_ind, ds_pos_val = build_ind_val_dsets(pos_desc,
                                                     is_spectral=False,
                                                     verbose=False)

        ds_raw_data = VirtualDataset('Raw_Data',
                                     data=[],
                                     maxshape=(ds_pos_ind.shape[0],
                                               spectrogram_size - 5),
                                     dtype=np.complex64,
                                     chunking=(1, spectrogram_size - 5),
                                     compression='gzip')
        ds_raw_data.attrs['quantity'] = ['Complex']

        aux_ds_names = [
            'Position_Indices', 'Position_Values', 'Spectroscopic_Indices',
            'Spectroscopic_Values'
        ]

        num_ai_chans = np.int(num_dat_files /
                              2)  # Division by 2 due to real/imaginary

        # technically should change the date, etc.
        spm_data = VirtualGroup('')
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'trKPFM'
        global_parms['translator'] = 'trKPFM'
        spm_data.attrs = global_parms
        meas_grp = VirtualGroup('Measurement_000')
        meas_grp.attrs = parm_dict
        spm_data.add_children([meas_grp])

        hdf = HDFwriter(self.h5_path)
        # spm_data.showTree()
        hdf.write(spm_data, print_log=False)

        self.raw_datasets = list()

        for chan_index in range(num_ai_chans):
            chan_grp = VirtualGroup(
                '{:s}{:03d}'.format('Channel_', chan_index),
                '/Measurement_000/')

            if chan_index == 0:

                chan_grp.attrs = {'Harmonic': 1}
            else:
                chan_grp.attrs = {'Harmonic': 2}

            chan_grp.add_children([
                ds_pos_ind, ds_pos_val, ds_spec_inds, ds_spec_vals, ds_raw_data
            ])
            h5_refs = hdf.write(chan_grp, print_log=False)
            h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]
            link_h5_objects_as_attrs(h5_raw,
                                     get_h5_obj_refs(aux_ds_names, h5_refs))
            self.raw_datasets.append(h5_raw)
            self.raw_datasets.append(h5_raw)

        # Now that the N channels have been made, populate them with the actual data....
        self._read_data(parm_dict, parm_path, spectrogram_size)

        hdf.close()
        return self.h5_path
예제 #12
0
    def translate(self, parm_path):
        """
        The main function that translates the provided file into a .h5 file

        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file.

        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        parm_dict, excit_wfm = self._read_parms(parm_path)

        self._parse_file_path(parm_path)

        num_dat_files = len(self.file_list)

        f = open(self.file_list[0], 'rb')
        spectrogram_size, count_vals = self._parse_spectrogram_size(f)
        print("Excitation waveform shape: ", excit_wfm.shape)
        print("spectrogram size:", spectrogram_size)
        num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols']
        print('Number of pixels: ', num_pixels)
        print('Count Values: ', count_vals)
        if (num_pixels + 1) != count_vals:
            print("Data size does not match number of pixels expected. Cannot continue")

        #Find how many channels we have to make
        num_ai_chans = num_dat_files // 2  # Division by 2 due to real/imaginary

        # Now start creating datasets and populating:
        #Start with getting an h5 file
        h5_file = h5py.File(self.h5_path)

        #First create a measurement group
        h5_meas_group = create_indexed_group(h5_file, 'Measurement')

        #Set up some parameters that will be written as attributes to this Measurement group
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'trKPFM'
        global_parms['translator'] = 'trKPFM'
        write_simple_attrs(h5_meas_group, global_parms)
        write_simple_attrs(h5_meas_group, parm_dict)

        #Now start building the position and spectroscopic dimension containers
        #There's only one spectroscpoic dimension and two position dimensions

        #The excit_wfm only has the DC values without any information on cycles, time, etc.
        #What we really need is to add the time component. For every DC step there are some time steps.

        num_time_steps = (spectrogram_size-5) //excit_wfm.size

        #Let's repeat the excitation so that we get the full vector of same size as the spectrogram
        #TODO: Check if this is the norm for this type of dataset

        full_spect_val = np.copy(excit_wfm).repeat(num_time_steps)

        spec_dims = Dimension('Bias', 'V', full_spect_val)
        pos_dims = [Dimension('Cols', 'nm', parm_dict['grid_num_cols']),
                    Dimension('Rows', 'um', parm_dict['grid_num_rows'])]


        self.raw_datasets = list()

        for chan_index in range(num_ai_chans):
            chan_grp = create_indexed_group(h5_meas_group,'Channel')

            if chan_index == 0:
                write_simple_attrs(chan_grp,{'Harmonic': 1})
            else:
                write_simple_attrs(chan_grp,{'Harmonic': 2})

            h5_raw = write_main_dataset(chan_grp,  # parent HDF5 group
                                        (num_pixels, spectrogram_size - 5),
                                        # shape of Main dataset
                                        'Raw_Data',  # Name of main dataset
                                        'Deflection',  # Physical quantity contained in Main dataset
                                        'V',  # Units for the physical quantity
                                        pos_dims,  # Position dimensions
                                        spec_dims,  # Spectroscopic dimensions
                                        dtype=np.complex64,  # data type / precision
                                        compression='gzip',
                                        chunks=(1, spectrogram_size - 5),
                                        main_dset_attrs={'quantity': 'Complex'})

            #h5_refs = hdf.write(chan_grp, print_log=False)
            #h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]
            #link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs))
            self.raw_datasets.append(h5_raw)
            self.raw_datasets.append(h5_raw)

        # Now that the N channels have been made, populate them with the actual data....
        self._read_data(parm_dict, parm_path, spectrogram_size)

        h5_file.file.close()

        #hdf.close()
        return self.h5_path
예제 #13
0
    def _setupH5(self, usize, vsize, data_type, num_images, main_parms):
        """
        Setup the HDF5 file in which to store the data including creating
        the Position and Spectroscopic datasets

        Parameters
        ----------
        usize : int
            Number of pixel columns in the images
        vsize : int
            Number of pixel rows in the images
        data_type : type
            Data type to save image as
        num_images : int
            Number of images in the movie
        main_parms : dict


        Returns
        -------
        h5_main : h5py.Dataset
            HDF5 Dataset that the images will be written into
        h5_mean_spec : h5py.Dataset
            HDF5 Dataset that the mean over all positions will be written
            into
        h5_ronch : h5py.Dataset
            HDF5 Dateset that the mean over all Spectroscopic steps will be
            written into
        """
        num_pixels = usize * vsize

        root_parms = generate_dummy_main_parms()
        root_parms['data_type'] = 'PtychographyData'

        main_parms['num_images'] = num_images
        main_parms['image_size_u'] = usize
        main_parms['image_size_v'] = vsize
        main_parms['num_pixels'] = num_pixels
        main_parms['translator'] = 'Movie'

        # Create the hdf5 data Group
        write_simple_attrs(self.h5_file, root_parms)
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, main_parms)
        chan_grp = create_indexed_group(meas_grp, 'Channel')

        # Build the Position and Spectroscopic Datasets
        spec_dim = Dimension('Time', 's', np.arange(num_images))
        pos_dims = [Dimension('X', 'a.u.', np.arange(usize)), Dimension('Y', 'a.u.', np.arange(vsize))]

        ds_chunking = calc_chunks([num_pixels, num_images],
                                  data_type(0).itemsize,
                                  unit_chunks=(num_pixels, 1))

        # Allocate space for Main_Data and Pixel averaged Data
        h5_main = write_main_dataset(chan_grp, (num_pixels, num_images), 'Raw_Data',
                                     'Intensity', 'a.u.',
                                     pos_dims, spec_dim,
                                     chunks=ds_chunking, dtype=data_type)
        h5_ronch = meas_grp.create_dataset('Mean_Ronchigram',
                                           data=np.zeros(num_pixels, dtype=np.float32),
                                           dtype=np.float32)
        h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean',
                                               data=np.zeros(num_images, dtype=np.float32),
                                               dtype=np.float32)

        self.h5_file.flush()

        return h5_main, h5_mean_spec, h5_ronch
예제 #14
0
    def translate(self, file_path, show_plots=True, save_plots=True, do_histogram=False):
        """
        Basic method that translates .dat data file(s) to a single .h5 file
        
        Inputs:
            file_path -- Absolute file path for one of the data files. 
            It is assumed that this file is of the OLD data format. 
            
        Outputs:
            Nothing
        """
        file_path = path.abspath(file_path)
        (folder_path, basename) = path.split(file_path)
        (basename, path_dict) = self._parse_file_path(file_path)

        h5_path = path.join(folder_path, basename + '.h5')
        if path.exists(h5_path):
            remove(h5_path)
        self.h5_file = h5py.File(h5_path, 'w')

        isBEPS = True
        parm_dict = self.__getParmsFromOldMat(path_dict['old_mat_parms'])

        ignored_plt_grps = ['in-field']  # Here we assume that there is no in-field.
        # If in-field data is captured then the translator would have to be modified.

        # Technically, we could do away with this if statement, as isBEPS is always true for this translation
        if isBEPS:
            parm_dict['data_type'] = 'BEPSData'

            std_expt = parm_dict['VS_mode'] != 'load user defined VS Wave from file'

            if not std_expt:
                warn('This translator does not handle user defined voltage spectroscopy')
                return

            spec_label = getSpectroscopicParmLabel(parm_dict['VS_mode'])

            # Check file sizes:
        if 'read_real' in path_dict.keys():
            real_size = path.getsize(path_dict['read_real'])
            imag_size = path.getsize(path_dict['read_imag'])
        else:
            real_size = path.getsize(path_dict['write_real'])
            imag_size = path.getsize(path_dict['write_imag'])

        if real_size != imag_size:
            raise ValueError("Real and imaginary file sizes DON'T match!. Ending")

        num_rows = int(parm_dict['grid_num_rows'])
        num_cols = int(parm_dict['grid_num_cols'])
        num_pix = num_rows * num_cols
        tot_bins = real_size / (num_pix * 4)  # Finding bins by simple division of entire datasize

        # Check for case where only a single pixel is missing.
        check_bins = real_size / ((num_pix - 1) * 4)

        if tot_bins % 1 and check_bins % 1:
            warn('Aborting! Some parameter appears to have changed in-between')
            return
        elif not tot_bins % 1:
            #             Everything's ok
            pass
        elif not check_bins % 1:
            tot_bins = check_bins
            warn('Warning:  A pixel seems to be missing from the data.  File will be padded with zeros.')

        tot_bins = int(tot_bins)
        (bin_inds, bin_freqs, bin_FFT, ex_wfm, dc_amp_vec) = self.__readOldMatBEvecs(path_dict['old_mat_parms'])
        """
        Because this is the old data format and there is a discrepancy in the number of bins (they seem to be 2 less 
        than the actual number), we need to re-calculate it based on the available data. This is done below.
        """

        band_width = parm_dict['BE_band_width_[Hz]'] * (0.5 - parm_dict['BE_band_edge_trim'])
        st_f = parm_dict['BE_center_frequency_[Hz]'] - band_width
        en_f = parm_dict['BE_center_frequency_[Hz]'] + band_width
        bin_freqs = np.linspace(st_f, en_f, len(bin_inds), dtype=np.float32)

        # Forcing standardized datatypes:
        bin_inds = np.int32(bin_inds)
        bin_freqs = np.float32(bin_freqs)
        bin_FFT = np.complex64(bin_FFT)
        ex_wfm = np.float32(ex_wfm)

        self.FFT_BE_wave = bin_FFT

        (UDVS_labs, UDVS_units, UDVS_mat) = self.__buildUDVSTable(parm_dict)

        # Remove the unused plot group columns before proceeding:
        (UDVS_mat, UDVS_labs, UDVS_units) = trimUDVS(UDVS_mat, UDVS_labs, UDVS_units, ignored_plt_grps)

        spec_inds = np.zeros(shape=(2, tot_bins), dtype=INDICES_DTYPE)

        # Will assume that all excitation waveforms have same number of bins
        # Here, the denominator is 2 because only out of field measruements. For IF + OF, should be 1
        num_actual_udvs_steps = UDVS_mat.shape[0] / 2
        bins_per_step = tot_bins / num_actual_udvs_steps

        # Some more checks
        if bins_per_step % 1:
            warn('Non integer number of bins per step!')
            return
        else:
            bins_per_step = int(bins_per_step)

        num_actual_udvs_steps = int(num_actual_udvs_steps)

        stind = 0
        for step_index in range(UDVS_mat.shape[0]):
            if UDVS_mat[step_index, 2] < 1E-3:  # invalid AC amplitude
                continue  # skip
            spec_inds[0, stind:stind + bins_per_step] = np.arange(bins_per_step, dtype=INDICES_DTYPE)  # Bin step
            spec_inds[1, stind:stind + bins_per_step] = step_index * np.ones(bins_per_step,
                                                                             dtype=INDICES_DTYPE)  # UDVS step
            stind += bins_per_step
        del stind, step_index

        # Some very basic information that can help the processing / analysis crew
        parm_dict['num_bins'] = tot_bins
        parm_dict['num_pix'] = num_pix
        parm_dict['num_udvs_steps'] = num_actual_udvs_steps

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        global_parms['experiment_date'] = parm_dict['File_date_and_time']

        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict['data_type']  # self.__class__.__name__
        global_parms['translator'] = 'ODF'
        write_simple_attrs(self.h5_file, global_parms)

        # Create Measurement and Channel groups
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        chan_grp = create_indexed_group(meas_grp, 'Channel')
        chan_grp.attrs['Channel_Input'] = parm_dict['IO_Analog_Input_1']

        # Create Auxilliary Datasets
        h5_ex_wfm = chan_grp.create_dataset('Excitation_Waveform', data=ex_wfm)

        udvs_slices = dict()
        for col_ind, col_name in enumerate(UDVS_labs):
            udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1))
        h5_UDVS = chan_grp.create_dataset('UDVS',
                                          data=UDVS_mat,
                                          dtype=np.float32)
        write_simple_attrs(h5_UDVS, {'labels': UDVS_labs, 'units': UDVS_units})

        h5_bin_steps = chan_grp.create_dataset('Bin_Steps',
                                               data=np.arange(bins_per_step, dtype=np.uint32),
                                               dtype=np.uint32)

        # Need to add the Bin Waveform type - infer from UDVS
        exec_bin_vec = self.signal_type * np.ones(len(bin_inds), dtype=np.int32)
        h5_wfm_typ = chan_grp.create_dataset('Bin_Wfm_Type',
                                             data=exec_bin_vec,
                                             dtype=np.int32)

        h5_bin_inds = chan_grp.create_dataset('Bin_Indices',
                                              data=bin_inds,
                                              dtype=np.uint32)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies',
                                              data=bin_freqs,
                                              dtype=np.float32)
        h5_bin_FFT = chan_grp.create_dataset('Bin_FFT',
                                             data=bin_FFT,
                                             dtype=np.complex64)
        # Noise floor should be of shape: (udvs_steps x 3 x positions)
        h5_noise_floor = chan_grp.create_dataset('Noise_Floor',
                                                 shape=(num_pix, num_actual_udvs_steps),
                                                 dtype=nf32,
                                                 chunks=(1, num_actual_udvs_steps))

        """ 
        ONLY ALLOCATING SPACE FOR MAIN DATA HERE!
        Chunk by each UDVS step - this makes it easy / quick to:
            1. read data for a single UDVS step from all pixels
            2. read an entire / multiple pixels at a time
        The only problem is that a typical UDVS step containing 50 steps occupies only 400 bytes.
        This is smaller than the recommended chunk sizes of 10,000 - 999,999 bytes
        meaning that the metadata would be very substantial.
        This assumption is fine since we almost do not handle any user defined cases
        """

        """
        New Method for chunking the Main_Data dataset.  Chunking is now done in N-by-N squares of UDVS steps by pixels.
        N is determined dinamically based on the dimensions of the dataset.  Currently it is set such that individual
        chunks are less than 10kB in size.
        
        Chris Smith -- [email protected]
        """
        pos_dims = [Dimension('X', 'nm', num_cols), Dimension('Y', 'nm', num_rows)]

        # Create Spectroscopic Values and Spectroscopic Values Labels datasets
        spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_names = createSpecVals(UDVS_mat, spec_inds,
                                                                                                bin_freqs,
                                                                                                exec_bin_vec,
                                                                                                parm_dict, UDVS_labs,
                                                                                                UDVS_units)

        spec_dims = list()
        for row_ind, row_name in enumerate(spec_vals_labs):
            spec_dims.append(Dimension(row_name,
                                            spec_vals_units[row_ind],
                                            spec_vals[row_ind]))

        pixel_chunking = maxReadPixels(10240, num_pix * num_actual_udvs_steps,
                                       bins_per_step, np.dtype('complex64').itemsize)
        chunking = np.floor(np.sqrt(pixel_chunking))
        chunking = max(1, chunking)
        chunking = min(num_actual_udvs_steps, num_pix, chunking)
        self.h5_main = write_main_dataset(chan_grp, (num_pix, tot_bins), 'Raw_Data',
                                          'Piezoresponse', 'V',
                                          pos_dims, spec_dims,
                                          dtype=np.complex64,
                                          chunks=(chunking, chunking * bins_per_step),
                                          compression='gzip')

        self.mean_resp = np.zeros(shape=(self.ds_main.shape[1]), dtype=np.complex64)
        self.max_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32)
        self.min_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32)

        # Now read the raw data files:
        self._read_data(path_dict['read_real'], path_dict['read_imag'], parm_dict)
        self.h5_file.flush()

        generatePlotGroups(self.ds_main, self.mean_resp, folder_path, basename, self.max_resp,
                           self.min_resp, max_mem_mb=self.max_ram, spec_label=spec_label, show_plots=show_plots,
                           save_plots=save_plots, do_histogram=do_histogram)

        self.h5_file.close()

        return h5_path
예제 #15
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file

        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path)

        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename + '.h5')

        if path.exists(h5_path):
            remove(h5_path)

        # Load parameters from .mat file
        matread = loadmat(parm_paths['parm_mat'],
                          variable_names=['AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train',
                                          'BE_wave', 'total_cols', 'total_rows'])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))
        be_wave_train = np.float32(np.squeeze(matread['BE_wave_train']))

        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])

        self.points_per_pixel = len(be_wave)
        self.points_per_line = len(be_wave_train)

        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])

        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])

        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['points_per_line'] = self.points_per_line
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'


        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows))

        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size / self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_file = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()

        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_file, global_parms)

        # Next create the Measurement and Channel groups and write the appropriate parameters to them
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        # Now that the file has been created, go over each raw data file:
        """ 
        We only allocate the space for the main data here.
        This does NOT change with each file. The data written to it does.
        The auxiliary datasets will not change with each raw data file since
        only one excitation waveform is used
        """
        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols))

        h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)


        for f_index in data_paths.keys():
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data',
                                         'Deflection', 'V',
                                         None, None,
                                         h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val,
                                         h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                                         chunks=(1, self.points_per_pixel), dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main)

        h5_file.close()
        print('G-Tune translation complete!')

        return h5_path
예제 #16
0
    def translate(self, parm_path):
        """
        Basic method that translates .mat data files to a single .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        self.parm_path = path.abspath(parm_path)
        (folder_path, file_name) = path.split(parm_path)
        (file_name, base_name) = path.split(folder_path)
        h5_path = path.join(folder_path, base_name + '.h5')

        # Read parameters
        parm_dict = readGmodeParms(parm_path)

        # Add the w^2 specific parameters to this list
        parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True)
        #freq_sweep_parms = parm_data['freqSweepParms']
        #parm_dict['freq_sweep_delay'] = np.float(freq_sweep_parms['delay'].item())
        gen_sig = parm_data['genSig']
        #parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item())
        #freq_array = np.float32(parm_data['freqArray'])

        # prepare and write spectroscopic values
        samp_rate = parm_dict['IO_down_samp_rate_[Hz]']
        num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] *
                       samp_rate)

        w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate,
                          np.float32(samp_rate / num_bins))

        # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid
        spec_val_mat = np.zeros((len(freq_array) * num_bins, 2),
                                dtype=VALUES_DTYPE)
        spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array))
        spec_val_mat[:, 1] = np.repeat(freq_array, num_bins)

        spec_ind_mat = np.zeros((2, len(freq_array) * num_bins),
                                dtype=np.int32)
        spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array))
        spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins)

        num_rows = parm_dict['grid_num_rows']
        num_cols = parm_dict['grid_num_cols']
        parm_dict['data_type'] = 'GVS'

        num_pix = num_rows * num_cols

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict[
            'data_type']  # self.__class__.__name__
        global_parms['translator'] = 'GVS'

        # Now start creating datasets and populating:
        if path.exists(h5_path):
            remove(h5_path)

        h5_f = h5py.File(h5_path, 'w')
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        chan_grp = create_indexed_group(meas_grp, 'Channel')
        write_simple_attrs(chan_grp, parm_dict)

        pos_dims = [
            Dimension('X', 'nm', num_rows),
            Dimension('Y', 'nm', num_cols)
        ]
        spec_dims = [
            Dimension('Response Bin', 'a.u.', num_bins),
            Dimension('Excitation Frequency ', 'Hz', len(freq_array))
        ]

        # Minimize file size to the extent possible.
        # DAQs are rated at 16 bit so float16 should be most appropriate.
        # For some reason, compression is more effective on time series data

        h5_main = write_main_dataset(chan_grp, (num_pix, num_bins),
                                     'Raw_Data',
                                     'Deflection',
                                     'V',
                                     pos_dims,
                                     spec_dims,
                                     chunks=(1, num_bins),
                                     dtype=np.float32)

        h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies',
                                              freq_array)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec)

        # Now doing link_h5_objects_as_attrs:
        link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq])

        # Now read the raw data files:
        pos_ind = 0
        for row_ind in range(1, num_rows + 1):
            for col_ind in range(1, num_cols + 1):
                file_path = path.join(
                    folder_path,
                    'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat')
                print('Working on row {} col {}'.format(row_ind, col_ind))
                if path.exists(file_path):
                    # Load data file
                    pix_data = loadmat(file_path, squeeze_me=True)
                    pix_mat = pix_data['AI_mat']
                    # Take the inverse FFT on 2nd dimension
                    pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1),
                                          axis=1)
                    # Verified with Matlab - no conjugate required here.
                    pix_vec = pix_mat.transpose().reshape(pix_mat.size)
                    h5_main[pos_ind, :] = np.float32(pix_vec)
                    h5_f.flush()  # flush from memory!
                else:
                    print('File not found for: row {} col {}'.format(
                        row_ind, col_ind))
                pos_ind += 1
                if (100.0 * pos_ind / num_pix) % 10 == 0:
                    print('completed translating {} %'.format(
                        int(100 * pos_ind / num_pix)))

        h5_f.close()

        return h5_path
예제 #17
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file
        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths, data_paths) = self._parse_file_path(file_path)
        
        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename+'.h5')
        
        if path.exists(h5_path):
            remove(h5_path)
        
        # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'
        matread = loadmat(parm_paths['parm_mat'], variable_names=['BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows'])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))

        # Need to take the complex conjugate if reading from a .mat file
        # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave'])))
        
        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])
        self.points_per_pixel = len(be_wave)
        
        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])
        
        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])
        
        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # method 2 for calculating the exact excitation frequency:
        """
        fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave)))
        w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel,
                            self.points_per_pixel)
        hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3))
        ex_freq_correct = w_vec[hot_bins[-1]]
        """

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'
            
        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows))
        
        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size/self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_f = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols))

        first_dat = True
        for key in data_paths.keys():
            # Now that the file has been created, go over each raw data file:
            # 1. write all ancillary data. Link data. 2. Write main data sequentially

            """ We only allocate the space for the main data here.
            This does NOT change with each file. The data written to it does.
            The auxiliary datasets will not change with each raw data file since
            only one excitation waveform is used"""
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            if first_dat:
                if len(data_paths) > 1:
                    # All positions and spectra are shared between channels
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)
                elif len(data_paths) == 1:
                    h5_pos_inds, h5_pos_vals = write_ind_val_dsets(chan_grp, pos_desc, is_spectral=False)
                    h5_spec_inds, h5_spec_vals = write_ind_val_dsets(chan_grp, spec_desc, is_spectral=True)

                first_dat = False
            else:
                pass

            h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data',
                                         'Deflection', 'V',
                                         None, None,
                                         h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                                         h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                                         chunks=(1, self.points_per_pixel), dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            self._read_data(data_paths[key], h5_main)
            
        h5_f.close()
        print('G-Line translation complete!')

        return h5_path
예제 #18
0
    def translate(self, data_filepath, show_plots=True, save_plots=True, do_histogram=False, debug=False):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------------
        data_filepath : String / unicode
            Absolute path of the data file (.dat)
        show_plots : Boolean (Optional. Default is True)
            Whether or not to show plots
        save_plots : Boolean (Optional. Default is True)
            Whether or not to save the generated plots
        do_histogram : Boolean (Optional. Default is False)
            Whether or not to generate and save 2D histograms of the raw data
        debug : Boolean (Optional. default is false)
            Whether or not to print log statements
            
        Returns
        --------------
        h5_path : String / unicode
            Absolute path of the generated .h5 file

        """
        data_filepath = path.abspath(data_filepath)
        # Read the parameter files
        self.debug = debug
        if debug:
            print('BEndfTranslator: Getting file paths')

        parm_filepath, udvs_filepath, parms_mat_path = self._parse_file_path(data_filepath)
        if debug:
            print('BEndfTranslator: Reading Parms text file')

        isBEPS, self.parm_dict = parmsToDict(parm_filepath)
        self.parm_dict['data_type'] = 'BEPSData'
        if not isBEPS:
            warn('This is NOT a BEPS new-data-format dataset!')
            return None

        """ Find out if this is a custom experiment and whether in and out of field were acquired
        For a standard experiment where only in / out field is acquired, zeros are stored
        even for those UDVS steps without band excitation"""
        self.field_mode = self.parm_dict['VS_measure_in_field_loops']
        expt_type = self.parm_dict['VS_mode']
        self.spec_label = getSpectroscopicParmLabel(expt_type)
        std_expt = expt_type in ['DC modulation mode', 'current mode']
        self.halve_udvs_steps = False
        ignored_plt_grps = []
        if std_expt and self.field_mode != 'in and out-of-field':
            self.halve_udvs_steps = True
            if self.field_mode == 'out-of-field':
                ignored_plt_grps = ['in-field']
            else:
                ignored_plt_grps = ['out-of-field']

        h5_path = path.join(self.folder_path, self.basename + '.h5')
        if path.exists(h5_path):
            remove(h5_path)

        if debug:
            print('BEndfTranslator: Preparing to read parms.mat file')
        self.BE_wave, self.BE_wave_rev, self.BE_bin_inds = self.__get_excit_wfm(parms_mat_path)

        if debug:
            print('BEndfTranslator: About to read UDVS file')

        self.udvs_labs, self.udvs_units, self.udvs_mat = self.__read_udvs_table(udvs_filepath)
        # Remove the unused plot group columns before proceeding:
        self.udvs_mat, self.udvs_labs, self.udvs_units = trimUDVS(self.udvs_mat, self.udvs_labs, self.udvs_units,
                                                                  ignored_plt_grps)
        if debug:
            print('BEndfTranslator: Read UDVS file')

        self.num_udvs_steps = self.udvs_mat.shape[0]
        # This is absolutely crucial for reconstructing the data chronologically
        self.excit_type_vec = (self.udvs_mat[:, 4]).astype(int)

        # First figure out how many waveforms are present in the data from the UDVS
        unique_waves = self.__get_unique_wave_types(self.excit_type_vec)
        self.__unique_waves__ = unique_waves
        self.__num_wave_types__ = len(unique_waves)
        # print self.__num_wave_types__, 'different excitation waveforms in this experiment'

        if debug:
            print('BEndfTranslator: Preparing to set up parsers')

        # Preparing objects to parse the file(s)
        parsers = self.__assemble_parsers()

        # Gathering some basic details before parsing the files:
        self.max_pixels = parsers[0].get_num_pixels()
        s_pixels = np.array(parsers[0].get_spatial_pixels())
        self.pos_labels = ['Laser Spot', 'Z', 'Y', 'X']
        self.pos_labels = [self.pos_labels[i] for i in np.where(s_pixels > 1)[0]]
        self.pos_mat = make_indices_matrix(s_pixels[np.argwhere(s_pixels > 1)].squeeze())
        self.pos_units = ['um' for _ in range(len(self.pos_labels))]
        #         self.pos_mat = np.int32(self.pos_mat)

        # Helping Eric out a bit. Remove this section at a later time:
        main_parms = generate_dummy_main_parms()
        # main_parms['grid_size_x'] = self.parm_dict['grid_num_cols']
        # main_parms['grid_size_y'] = self.parm_dict['grid_num_rows']
        main_parms['grid_size_x'] = self.parm_dict['grid_num_rows']
        main_parms['grid_size_y'] = self.parm_dict['grid_num_cols']
        main_parms['experiment_date'] = self.parm_dict['File_date_and_time']
        # assuming that the experiment was completed:        
        main_parms['current_position_x'] = self.parm_dict['grid_num_rows'] - 1
        main_parms['current_position_y'] = self.parm_dict['grid_num_cols'] - 1
        main_parms['data_type'] = 'BEPSData'
        main_parms['translator'] = 'NDF'

        # Writing only the root now:
        spm_data = VirtualGroup('')
        spm_data.attrs = main_parms
        self.hdf = HDFwriter(h5_path)
        # self.hdf.clear()

        # cacheSettings = self.hdf.file.id.get_access_plist().get_cache()

        self.hdf.write(spm_data)

        ########################################################
        # Reading and parsing the .dat file(s) 

        self._read_data(parsers, unique_waves, show_plots, save_plots, do_histogram)

        self.hdf.close()

        return h5_path
예제 #19
0
    def translate(self, parm_path):
        """      
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        parm_dict, excit_wfm = self._read_parms(parm_path)
        folder_path, base_name = path.split(parm_path)
        waste, base_name = path.split(folder_path)

        # Until a better method is provided....
        with h5py.File(path.join(folder_path, 'line_1.mat'), 'r') as h5_mat_line_1:
            num_ai_chans = h5_mat_line_1['data'].shape[1]
        
        h5_path = path.join(folder_path, base_name+'.h5')
        if path.exists(h5_path):
            remove(h5_path)

        with h5py.File(h5_path) as h5_f:

            h5_meas_grp = create_indexed_group(h5_f, 'Measurement')
            global_parms = generate_dummy_main_parms()
            global_parms.update({'data_type': 'gIV', 'translator': 'gIV'})
            write_simple_attrs(h5_meas_grp, global_parms)

            # Only prepare the instructions for the dimensions here
            spec_dims = Dimension('Bias', 'V', excit_wfm)
            pos_dims = Dimension('Y', 'm', np.linspace(0, parm_dict['grid_scan_height_[m]'],
                                                       parm_dict['grid_num_rows']))

            self.raw_datasets = list()

            for chan_index in range(num_ai_chans):

                h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel')
                write_simple_attrs(h5_chan_grp, parm_dict)
                """
                Minimize file size to the extent possible.
                DAQs are rated at 16 bit so float16 should be most appropriate.
                For some reason, compression is effective only on time series data
                """
                h5_raw = write_main_dataset(h5_chan_grp, (parm_dict['grid_num_rows'], excit_wfm.size), 'Raw_Data',
                                            'Current',
                                            '1E-{} A'.format(parm_dict['IO_amplifier_gain']), pos_dims, spec_dims,
                                            dtype=np.float16, chunks=(1, excit_wfm.size), compression='gzip')

                self.raw_datasets.append(h5_raw)

            # Now that the N channels have been made, populate them with the actual data....
            self._read_data(parm_dict, folder_path)

        return h5_path
예제 #20
0
    def _setup_h5(self, data_gen_parms):
        """
        Setups up the hdf5 file structure before doing the actual generation

        Parameters
        ----------
        data_gen_parms : dict
            Dictionary containing the parameters to write to the Measurement Group as attributes

        Returns
        -------

        """
        '''
        Build the group structure down to the channel group
        '''
        # Set up the basic group structure
        root_grp = VirtualGroup('')
        root_parms = generate_dummy_main_parms()
        root_parms['translator'] = 'FAKEBEPS'
        root_parms['data_type'] = data_gen_parms['data_type']
        root_grp.attrs = root_parms

        meas_grp = VirtualGroup('Measurement_')
        chan_grp = VirtualGroup('Channel_')

        meas_grp.attrs.update(data_gen_parms)

        # Create the Position and Spectroscopic datasets for the Raw Data
        ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals = self._build_ancillary_datasets(
        )

        raw_chunking = calc_chunks([self.n_pixels, self.n_spec_bins],
                                   np.complex64(0).itemsize,
                                   unit_chunks=[1, self.n_bins])

        ds_raw_data = VirtualDataset(
            'Raw_Data',
            data=None,
            maxshape=[self.n_pixels, self.n_spec_bins],
            dtype=np.complex64,
            compression='gzip',
            chunking=raw_chunking,
            parent=meas_grp)

        chan_grp.add_children([
            ds_pos_inds, ds_pos_vals, ds_spec_inds, ds_spec_vals, ds_raw_data
        ])
        meas_grp.add_children([chan_grp])
        root_grp.add_children([meas_grp])

        hdf = HDFwriter(self.h5_path)
        hdf.delete()
        h5_refs = hdf.write(root_grp)

        # Delete the MicroDatasets to save memory
        del ds_raw_data, ds_spec_inds, ds_spec_vals, ds_pos_inds, ds_pos_vals

        # Get the file and Raw_Data objects
        h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]
        h5_chan_grp = h5_raw.parent

        # Get the Position and Spectroscopic dataset objects
        h5_pos_inds = get_h5_obj_refs(['Position_Indices'], h5_refs)[0]
        h5_pos_vals = get_h5_obj_refs(['Position_Values'], h5_refs)[0]
        h5_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'], h5_refs)[0]
        h5_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'], h5_refs)[0]

        # Link the Position and Spectroscopic datasets as attributes of Raw_Data
        link_as_main(h5_raw, h5_pos_inds, h5_pos_vals, h5_spec_inds,
                     h5_spec_vals)
        '''
        Build the SHO Group
        '''
        sho_grp = VirtualGroup('Raw_Data-SHO_Fit_', parent=h5_chan_grp.name)

        # Build the Spectroscopic datasets for the SHO Guess and Fit
        sho_spec_starts = np.where(
            h5_spec_inds[h5_spec_inds.attrs['Frequency']].squeeze() == 0)[0]
        sho_spec_labs = get_attr(h5_spec_inds, 'labels')
        ds_sho_spec_inds, ds_sho_spec_vals = build_reduced_spec_dsets(
            h5_spec_inds,
            h5_spec_vals,
            keep_dim=sho_spec_labs != 'Frequency',
            step_starts=sho_spec_starts)

        sho_chunking = calc_chunks([self.n_pixels, self.n_sho_bins],
                                   sho32.itemsize,
                                   unit_chunks=[1, 1])
        ds_sho_fit = VirtualDataset('Fit',
                                    data=None,
                                    maxshape=[self.n_pixels, self.n_sho_bins],
                                    dtype=sho32,
                                    compression='gzip',
                                    chunking=sho_chunking,
                                    parent=sho_grp)
        ds_sho_guess = VirtualDataset(
            'Guess',
            data=None,
            maxshape=[self.n_pixels, self.n_sho_bins],
            dtype=sho32,
            compression='gzip',
            chunking=sho_chunking,
            parent=sho_grp)

        sho_grp.add_children(
            [ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals])

        # Write the SHO group and datasets to the file and delete the MicroDataset objects
        h5_sho_refs = hdf.write(sho_grp)
        del ds_sho_fit, ds_sho_guess, ds_sho_spec_inds, ds_sho_spec_vals

        # Get the dataset handles for the fit and guess
        h5_sho_fit = get_h5_obj_refs(['Fit'], h5_sho_refs)[0]
        h5_sho_guess = get_h5_obj_refs(['Guess'], h5_sho_refs)[0]

        # Get the dataset handles for the SHO Spectroscopic datasets
        h5_sho_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'],
                                           h5_sho_refs)[0]
        h5_sho_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'],
                                           h5_sho_refs)[0]

        # Link the Position and Spectroscopic datasets as attributes of the SHO Fit and Guess
        link_as_main(h5_sho_fit, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds,
                     h5_sho_spec_vals)
        link_as_main(h5_sho_guess, h5_pos_inds, h5_pos_vals, h5_sho_spec_inds,
                     h5_sho_spec_vals)
        '''
        Build the loop group
        '''
        loop_grp = VirtualGroup('Fit-Loop_Fit_', parent=h5_sho_fit.parent.name)

        # Build the Spectroscopic datasets for the loops
        loop_spec_starts = np.where(h5_sho_spec_inds[
            h5_sho_spec_inds.attrs['DC_Offset']].squeeze() == 0)[0]
        loop_spec_labs = get_attr(h5_sho_spec_inds, 'labels')
        ds_loop_spec_inds, ds_loop_spec_vals = build_reduced_spec_dsets(
            h5_sho_spec_inds,
            h5_sho_spec_vals,
            keep_dim=loop_spec_labs != 'DC_Offset',
            step_starts=loop_spec_starts)

        # Create the loop fit and guess MicroDatasets
        loop_chunking = calc_chunks([self.n_pixels, self.n_loops],
                                    loop_fit32.itemsize,
                                    unit_chunks=[1, 1])
        ds_loop_fit = VirtualDataset('Fit',
                                     data=None,
                                     maxshape=[self.n_pixels, self.n_loops],
                                     dtype=loop_fit32,
                                     compression='gzip',
                                     chunking=loop_chunking,
                                     parent=loop_grp)

        ds_loop_guess = VirtualDataset('Guess',
                                       data=None,
                                       maxshape=[self.n_pixels, self.n_loops],
                                       dtype=loop_fit32,
                                       compression='gzip',
                                       chunking=loop_chunking,
                                       parent=loop_grp)

        # Add the datasets to the loop group then write it to the file
        loop_grp.add_children(
            [ds_loop_fit, ds_loop_guess, ds_loop_spec_inds, ds_loop_spec_vals])
        h5_loop_refs = hdf.write(loop_grp)

        # Delete the MicroDatasets
        del ds_loop_spec_vals, ds_loop_spec_inds, ds_loop_guess, ds_loop_fit

        # Get the handles to the datasets
        h5_loop_fit = get_h5_obj_refs(['Fit'], h5_loop_refs)[0]
        h5_loop_guess = get_h5_obj_refs(['Guess'], h5_loop_refs)[0]
        h5_loop_spec_inds = get_h5_obj_refs(['Spectroscopic_Indices'],
                                            h5_loop_refs)[0]
        h5_loop_spec_vals = get_h5_obj_refs(['Spectroscopic_Values'],
                                            h5_loop_refs)[0]

        # Link the Position and Spectroscopic datasets to the Loop Guess and Fit
        link_as_main(h5_loop_fit, h5_pos_inds, h5_pos_vals, h5_loop_spec_inds,
                     h5_loop_spec_vals)
        link_as_main(h5_loop_guess, h5_pos_inds, h5_pos_vals,
                     h5_loop_spec_inds, h5_loop_spec_vals)

        self.h5_raw = USIDataset(h5_raw)
        self.h5_sho_guess = USIDataset(h5_sho_guess)
        self.h5_sho_fit = USIDataset(h5_sho_fit)
        self.h5_loop_guess = USIDataset(h5_loop_guess)
        self.h5_loop_fit = USIDataset(h5_loop_fit)
        self.h5_spec_vals = h5_spec_vals
        self.h5_spec_inds = h5_spec_inds
        self.h5_sho_spec_inds = h5_sho_spec_inds
        self.h5_sho_spec_vals = h5_sho_spec_vals
        self.h5_loop_spec_inds = h5_loop_spec_inds
        self.h5_loop_spec_vals = h5_loop_spec_vals
        self.h5_file = h5_raw.file

        return
예제 #21
0
    def translate(self, parm_path):
        """
        Basic method that translates .mat data files to a single .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        parm_path = path.abspath(parm_path)
        (folder_path, file_name) = path.split(parm_path)
        (file_name, base_name) = path.split(folder_path)
        h5_path = path.join(folder_path, base_name + '.h5')

        # Read parameters
        print('reading parameter files')
        parm_dict, excit_wfm, spec_ind_mat = self.__readparms(parm_path)
        parm_dict['data_type'] = 'SPORC'

        num_rows = parm_dict['grid_num_rows']
        num_cols = parm_dict['grid_num_cols']
        num_pix = num_rows * num_cols

        # new data format
        spec_ind_mat = np.transpose(VALUES_DTYPE(spec_ind_mat))

        # Now start creating datasets and populating:
        pos_desc = [Dimension('Y', 'm', np.arange(num_rows)), Dimension('X', 'm', np.arange(num_cols))]
        ds_pos_ind, ds_pos_val = build_ind_val_dsets(pos_desc, is_spectral=False)

        spec_ind_labels = ['x index', 'y index', 'loop index', 'repetition index', 'slope index']
        spec_ind_dict = dict()
        for col_ind, col_name in enumerate(spec_ind_labels):
            spec_ind_dict[col_name] = (slice(col_ind, col_ind + 1), slice(None))
        ds_spec_inds = VirtualDataset('Spectroscopic_Indices', INDICES_DTYPE(spec_ind_mat))
        ds_spec_inds.attrs['labels'] = spec_ind_dict
        ds_spec_vals = VirtualDataset('Spectroscopic_Values', spec_ind_mat)
        ds_spec_vals.attrs['labels'] = spec_ind_dict
        ds_spec_vals.attrs['units'] = ['V', 'V', '', '', '']

        ds_excit_wfm = VirtualDataset('Excitation_Waveform', np.float32(excit_wfm))

        ds_raw_data = VirtualDataset('Raw_Data', data=[],
                                     maxshape=(num_pix, len(excit_wfm)),
                                     dtype=np.float16, chunking=(1, len(excit_wfm)),
                                     compression='gzip')

        # technically should change the date, etc.

        chan_grp = VirtualGroup('Channel_000')
        chan_grp.attrs = parm_dict
        chan_grp.add_children([ds_pos_ind, ds_pos_val, ds_spec_inds, ds_spec_vals,
                               ds_excit_wfm, ds_raw_data])

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        # assuming that the experiment was completed:        
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict['data_type']
        global_parms['translator'] = 'SPORC'

        meas_grp = VirtualGroup('Measurement_000')
        meas_grp.add_children([chan_grp])
        spm_data = VirtualGroup('')
        spm_data.attrs = global_parms
        spm_data.add_children([meas_grp])

        if path.exists(h5_path):
            remove(h5_path)

        # Write everything except for the main data.
        hdf = HDFwriter(h5_path)

        h5_refs = hdf.write(spm_data)

        h5_main = get_h5_obj_refs(['Raw_Data'], h5_refs)[0]

        # Now doing link_h5_objects_as_attrs:
        aux_ds_names = ['Excitation_Waveform', 'Position_Indices', 'Position_Values',
                        'Spectroscopic_Indices', 'Spectroscopic_Values']
        link_h5_objects_as_attrs(h5_main, get_h5_obj_refs(aux_ds_names, h5_refs))

        print('reading raw data now...')

        # Now read the raw data files:
        pos_ind = 0
        for row_ind in range(1, num_rows + 1):
            for col_ind in range(1, num_cols + 1):
                file_path = path.join(folder_path, 'result_r' + str(row_ind) + '_c' + str(col_ind) + '.mat')
                # print('Working on row {} col {}'.format(row_ind,col_ind))
                if path.exists(file_path):
                    # Load data file
                    pix_data = loadmat(file_path, squeeze_me=True)
                    # Take the inverse FFT on 1st dimension
                    pix_vec = np.fft.ifft(np.fft.ifftshift(pix_data['data']))
                    # Verified with Matlab - no conjugate required here.
                    h5_main[pos_ind, :] = np.float16(np.real(pix_vec))
                    hdf.flush()  # flush from memory!
                else:
                    print('File for row {} col {} not found'.format(row_ind, col_ind))
                pos_ind += 1
                if (100.0 * pos_ind / num_pix) % 10 == 0:
                    print('Finished reading {} % of data'.format(int(100 * pos_ind / num_pix)))

        hdf.close()

        return h5_path
예제 #22
0
    def translate(self, file_path, verbose=False, append_path='', 
                  grp_name='Measurement', parm_encoding='utf-8'):
        """
        Translates the provided file to .h5

        Parameters
        ----------
        file_path : String / unicode
            Absolute path of the .ibw file
        verbose : Boolean (Optional)
            Whether or not to show  print statements for debugging
        append_path : string (Optional)
            h5_file to add these data to, must be a path to the h5_file on disk
        grp_name : string (Optional)
            Change from default "Measurement" name to something specific
        parm_encoding : str, optional
            Codec to be used to decode the bytestrings into Python strings if needed.
            Default 'utf-8'

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the .h5 file
        """
        file_path = path.abspath(file_path)
        # Prepare the .h5 file:
        folder_path, base_name = path.split(file_path)
        base_name = base_name[:-4]
        
        if not append_path:
            h5_path = path.join(folder_path, base_name + '.h5')
            if path.exists(h5_path):
                remove(h5_path)
            h5_file = h5py.File(h5_path, 'w')
        else:
            h5_path = append_path
            if not path.exists(append_path):
                raise Exception('File does not exist. Check pathname.')
            h5_file = h5py.File(h5_path, 'r+')
        

        # Load the ibw file first
        ibw_obj = bw.load(file_path)
        ibw_wave = ibw_obj.get('wave')
        parm_dict = self._read_parms(ibw_wave, parm_encoding)
        chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding)

        if verbose:
            print('Channels and units found:')
            print(chan_labels)
            print(chan_units)

        # Get the data to figure out if this is an image or a force curve
        images = ibw_wave.get('wData')

        if images.shape[-1] != len(chan_labels):
            chan_labels = chan_labels[1:]  # for layer 0 null set errors in older AR software

        if images.ndim == 3:  # Image stack
            if verbose:
                print('Found image stack of size {}'.format(images.shape))
            type_suffix = 'Image'

            num_rows = parm_dict['ScanLines']
            num_cols = parm_dict['ScanPoints']

            images = images.transpose(2, 1, 0)  # now ordered as [chan, Y, X] image
            images = np.reshape(images, (images.shape[0], -1, 1))  # 3D [chan, Y*X points,1]

            pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)),
                        Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))]

            spec_desc = Dimension('arb', 'a.u.', [1])

        else:  # single force curve
            if verbose:
                print('Found force curve of size {}'.format(images.shape))

            type_suffix = 'ForceCurve'
            images = np.atleast_3d(images)  # now [Z, chan, 1]
            images = images.transpose((1, 2, 0))  # [chan ,1, Z] force curve

            # The data generated above varies linearly. Override.
            # For now, we'll shove the Z sensor data into the spectroscopic values.

            # Find the channel that corresponds to either Z sensor or Raw:
            try:
                chan_ind = chan_labels.index('ZSnsr')
                spec_data = VALUES_DTYPE(images[chan_ind]).squeeze()
            except ValueError:
                try:
                    chan_ind = chan_labels.index('Raw')
                    spec_data = VALUES_DTYPE(images[chan_ind]).squeeze()
                except ValueError:
                    # We don't expect to come here. If we do, spectroscopic values remains as is
                    spec_data = np.arange(images.shape[2])

            pos_desc = Dimension('X', 'm', [1])
            spec_desc = Dimension('Z', 'm', spec_data)

        # Create measurement group
        meas_grp = create_indexed_group(h5_file, grp_name)

        # Write file and measurement level parameters
        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'IgorIBW_' + type_suffix
        global_parms['translator'] = 'IgorIBW'
        write_simple_attrs(h5_file, global_parms)

        write_simple_attrs(meas_grp, parm_dict)

        # Create Position and spectroscopic datasets
        h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True)

        # Prepare the list of raw_data datasets
        for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units):
            if verbose:
                print('channel', chan_name)
                print('unit', chan_unit)
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data',
                               chan_name, chan_unit,
                               None, None,
                               h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals,
                               h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals,
                               dtype=np.float32)

        if verbose:
            print('Finished preparing raw datasets')

        h5_file.close()
        return h5_path
예제 #23
0
    def translate(self, file_path):
        """
        The main function that translates the provided file into a .h5 file
        
        Parameters
        ----------
        file_path : String / unicode
            Absolute path of any file in the directory

        Returns
        -------
        h5_path : String / unicode
            Absolute path of the h5 file

        """
        file_path = path.abspath(file_path)
        # Figure out the basename of the data:
        (basename, parm_paths,
         data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path)

        (folder_path, unused) = path.split(file_path)
        h5_path = path.join(folder_path, basename + '.h5')

        if path.exists(h5_path):
            remove(h5_path)

        # Load parameters from .mat file
        matread = loadmat(parm_paths['parm_mat'],
                          variable_names=[
                              'AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1',
                              'BE_wave_train', 'BE_wave', 'total_cols',
                              'total_rows'
                          ])
        be_wave = np.float32(np.squeeze(matread['BE_wave']))
        be_wave_train = np.float32(np.squeeze(matread['BE_wave_train']))

        num_cols = int(matread['total_cols'][0][0])
        expected_rows = int(matread['total_rows'][0][0])

        self.points_per_pixel = len(be_wave)
        self.points_per_line = len(be_wave_train)

        # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate'
        is_beps, parm_dict = parmsToDict(parm_paths['parm_txt'])

        # Get file byte size:
        # For now, assume that bigtime_00 always exists and is the main file
        file_size = path.getsize(data_paths[0])

        # Calculate actual number of lines since the first few lines may not be saved
        self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel *
                                           num_cols)
        if self.num_rows % 1:
            warn('Error - File has incomplete rows')
            return None
        else:
            self.num_rows = int(self.num_rows)

        samp_rate = parm_dict['IO_rate_[Hz]']
        ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]']

        # method 1 for calculating the correct excitation frequency:
        pixel_duration = 1.0 * self.points_per_pixel / samp_rate
        num_periods = pixel_duration * ex_freq_nominal
        ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods))

        # correcting the excitation frequency - will be VERY useful during analysis and filtering
        parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct

        # Some very basic information that can help the processing crew
        parm_dict['points_per_line'] = self.points_per_line
        parm_dict['num_bins'] = self.points_per_pixel
        parm_dict['grid_num_rows'] = self.num_rows
        parm_dict['data_type'] = 'G_mode_line'

        if self.num_rows != expected_rows:
            print('Note: {} of {} lines found in data file'.format(
                self.num_rows, expected_rows))

        # Calculate number of points to read per line:
        self.__bytes_per_row__ = int(file_size / self.num_rows)

        # First finish writing all global parameters, create the file too:
        h5_file = h5py.File(h5_path, 'w')
        global_parms = generate_dummy_main_parms()

        global_parms['data_type'] = 'G_mode_line'
        global_parms['translator'] = 'G_mode_line'
        write_simple_attrs(h5_file, global_parms)

        # Next create the Measurement and Channel groups and write the appropriate parameters to them
        meas_grp = create_indexed_group(h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        # Now that the file has been created, go over each raw data file:
        """ 
        We only allocate the space for the main data here.
        This does NOT change with each file. The data written to it does.
        The auxiliary datasets will not change with each raw data file since
        only one excitation waveform is used
        """
        pos_desc = Dimension('Y', 'm', np.arange(self.num_rows))
        spec_desc = Dimension('Excitation', 'V',
                              np.tile(VALUES_DTYPE(be_wave), num_cols))

        h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp,
                                                     pos_desc,
                                                     is_spectral=False)
        h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp,
                                                         spec_desc,
                                                         is_spectral=True)

        for f_index in data_paths.keys():
            chan_grp = create_indexed_group(meas_grp, 'Channel')

            h5_main = write_main_dataset(
                chan_grp, (self.num_rows, self.points_per_pixel * num_cols),
                'Raw_Data',
                'Deflection',
                'V',
                None,
                None,
                h5_pos_inds=h5_pos_ind,
                h5_pos_vals=h5_pos_val,
                h5_spec_inds=h5_spec_inds,
                h5_spec_vals=h5_spec_vals,
                chunks=(1, self.points_per_pixel),
                dtype=np.float16)

            # Now transfer scan data in the dat file to the h5 file:
            super(GTuneTranslator, self)._read_data(data_paths[f_index],
                                                    h5_main)

        h5_file.close()
        print('G-Tune translation complete!')

        return h5_path
예제 #24
0
    def translate(self, file_path, *args, **kwargs):
        """
        Translates a given Bruker / Veeco / Nanoscope AFM derived file to HDF5. Currently handles scans, force curves,
        and force-distance maps

        Note that this translator was written with a single example file for each modality and may be buggy.

        Parameters
        ----------
        file_path : str / unicode
            path to data file

        Returns
        -------
        h5_path : str / unicode
            path to translated HDF5 file
        """
        self.file_path = path.abspath(file_path)
        self.meta_data, other_parms = self._extract_metadata()

        # These files are weirdly named with extensions such as .001
        h5_path = file_path.replace('.', '_') + '.h5'

        if path.exists(h5_path):
            remove(h5_path)

        h5_file = h5py.File(h5_path, 'w')

        type_suffixes = ['Image', 'Force_Curve', 'Force_Map']
        # 0 - stack of scan images
        # 1 - single force curve
        # 2 - force map
        force_count = 0
        image_count = 0
        for class_name in self.meta_data.keys():
            if 'Ciao force image list' in class_name:
                force_count += 1
            elif 'Ciao image list' in class_name:
                image_count += 1
        data_type = 0
        if force_count > 0:
            if image_count > 0:
                data_type = 2
            else:
                data_type = 1

        global_parms = generate_dummy_main_parms()
        global_parms['data_type'] = 'Bruker_AFM_' + type_suffixes[data_type]
        global_parms['translator'] = 'Bruker_AFM'
        write_simple_attrs(h5_file, global_parms)

        # too many parameters. Making a dummy group just for the parameters.
        h5_parms_grp = h5_file.create_group('Parameters')
        # We currently have a dictionary of dictionaries. This needs to be flattened
        flat_dict = dict()
        for class_name, sub_dict in other_parms.items():
            for key, val in sub_dict.items():
                flat_dict[class_name + '_' + key] = val
        write_simple_attrs(h5_parms_grp, flat_dict)

        # Create measurement group
        h5_meas_grp = create_indexed_group(h5_file, 'Measurement')

        # Call the data specific translation function
        trans_funcs = [self._translate_image_stack, self._translate_force_curve, self._translate_force_map]
        trans_funcs[data_type](h5_meas_grp)

        # wrap up and return path
        h5_file.close()
        return h5_path
예제 #25
0
    def translate(self,
                  file_path,
                  show_plots=True,
                  save_plots=True,
                  do_histogram=False):
        """
        Basic method that translates .dat data file(s) to a single .h5 file
        
        Inputs:
            file_path -- Absolute file path for one of the data files. 
            It is assumed that this file is of the OLD data format. 
            
        Outputs:
            Nothing
        """
        file_path = path.abspath(file_path)
        (folder_path, basename) = path.split(file_path)
        (basename, path_dict) = self._parse_file_path(file_path)

        h5_path = path.join(folder_path, basename + '.h5')
        if path.exists(h5_path):
            remove(h5_path)
        self.h5_file = h5py.File(h5_path, 'w')

        isBEPS = True
        parm_dict = self.__getParmsFromOldMat(path_dict['old_mat_parms'])

        ignored_plt_grps = ['in-field'
                            ]  # Here we assume that there is no in-field.
        # If in-field data is captured then the translator would have to be modified.

        # Technically, we could do away with this if statement, as isBEPS is always true for this translation
        if isBEPS:
            parm_dict['data_type'] = 'BEPSData'

            std_expt = parm_dict[
                'VS_mode'] != 'load user defined VS Wave from file'

            if not std_expt:
                warn(
                    'This translator does not handle user defined voltage spectroscopy'
                )
                return

            spec_label = getSpectroscopicParmLabel(parm_dict['VS_mode'])

            # Check file sizes:
        if 'read_real' in path_dict.keys():
            real_size = path.getsize(path_dict['read_real'])
            imag_size = path.getsize(path_dict['read_imag'])
        else:
            real_size = path.getsize(path_dict['write_real'])
            imag_size = path.getsize(path_dict['write_imag'])

        if real_size != imag_size:
            raise ValueError(
                "Real and imaginary file sizes DON'T match!. Ending")

        num_rows = int(parm_dict['grid_num_rows'])
        num_cols = int(parm_dict['grid_num_cols'])
        num_pix = num_rows * num_cols
        tot_bins = real_size / (
            num_pix * 4)  # Finding bins by simple division of entire datasize

        # Check for case where only a single pixel is missing.
        check_bins = real_size / ((num_pix - 1) * 4)

        if tot_bins % 1 and check_bins % 1:
            warn('Aborting! Some parameter appears to have changed in-between')
            return
        elif not tot_bins % 1:
            #             Everything's ok
            pass
        elif not check_bins % 1:
            tot_bins = check_bins
            warn(
                'Warning:  A pixel seems to be missing from the data.  File will be padded with zeros.'
            )

        tot_bins = int(tot_bins)
        (bin_inds, bin_freqs, bin_FFT, ex_wfm,
         dc_amp_vec) = self.__readOldMatBEvecs(path_dict['old_mat_parms'])
        """
        Because this is the old data format and there is a discrepancy in the number of bins (they seem to be 2 less 
        than the actual number), we need to re-calculate it based on the available data. This is done below.
        """

        band_width = parm_dict['BE_band_width_[Hz]'] * (
            0.5 - parm_dict['BE_band_edge_trim'])
        st_f = parm_dict['BE_center_frequency_[Hz]'] - band_width
        en_f = parm_dict['BE_center_frequency_[Hz]'] + band_width
        bin_freqs = np.linspace(st_f, en_f, len(bin_inds), dtype=np.float32)

        # Forcing standardized datatypes:
        bin_inds = np.int32(bin_inds)
        bin_freqs = np.float32(bin_freqs)
        bin_FFT = np.complex64(bin_FFT)
        ex_wfm = np.float32(ex_wfm)

        self.FFT_BE_wave = bin_FFT

        (UDVS_labs, UDVS_units, UDVS_mat) = self.__buildUDVSTable(parm_dict)

        # Remove the unused plot group columns before proceeding:
        (UDVS_mat, UDVS_labs, UDVS_units) = trimUDVS(UDVS_mat, UDVS_labs,
                                                     UDVS_units,
                                                     ignored_plt_grps)

        spec_inds = np.zeros(shape=(2, tot_bins), dtype=INDICES_DTYPE)

        # Will assume that all excitation waveforms have same number of bins
        # Here, the denominator is 2 because only out of field measruements. For IF + OF, should be 1
        num_actual_udvs_steps = UDVS_mat.shape[0] / 2
        bins_per_step = tot_bins / num_actual_udvs_steps

        # Some more checks
        if bins_per_step % 1:
            warn('Non integer number of bins per step!')
            return
        else:
            bins_per_step = int(bins_per_step)

        num_actual_udvs_steps = int(num_actual_udvs_steps)

        stind = 0
        for step_index in range(UDVS_mat.shape[0]):
            if UDVS_mat[step_index, 2] < 1E-3:  # invalid AC amplitude
                continue  # skip
            spec_inds[0, stind:stind + bins_per_step] = np.arange(
                bins_per_step, dtype=INDICES_DTYPE)  # Bin step
            spec_inds[1, stind:stind + bins_per_step] = step_index * np.ones(
                bins_per_step, dtype=INDICES_DTYPE)  # UDVS step
            stind += bins_per_step
        del stind, step_index

        # Some very basic information that can help the processing / analysis crew
        parm_dict['num_bins'] = tot_bins
        parm_dict['num_pix'] = num_pix
        parm_dict['num_udvs_steps'] = num_actual_udvs_steps

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        global_parms['experiment_date'] = parm_dict['File_date_and_time']

        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict[
            'data_type']  # self.__class__.__name__
        global_parms['translator'] = 'ODF'
        write_simple_attrs(self.h5_file, global_parms)

        # Create Measurement and Channel groups
        meas_grp = create_indexed_group(self.h5_file, 'Measurement')
        write_simple_attrs(meas_grp, parm_dict)

        chan_grp = create_indexed_group(meas_grp, 'Channel')
        chan_grp.attrs['Channel_Input'] = parm_dict['IO_Analog_Input_1']

        # Create Auxilliary Datasets
        h5_ex_wfm = chan_grp.create_dataset('Excitation_Waveform', data=ex_wfm)

        udvs_slices = dict()
        for col_ind, col_name in enumerate(UDVS_labs):
            udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1))
        h5_UDVS = chan_grp.create_dataset('UDVS',
                                          data=UDVS_mat,
                                          dtype=np.float32)
        write_simple_attrs(h5_UDVS, {'labels': UDVS_labs, 'units': UDVS_units})

        h5_bin_steps = chan_grp.create_dataset('Bin_Steps',
                                               data=np.arange(bins_per_step,
                                                              dtype=np.uint32),
                                               dtype=np.uint32)

        # Need to add the Bin Waveform type - infer from UDVS
        exec_bin_vec = self.signal_type * np.ones(len(bin_inds),
                                                  dtype=np.int32)
        h5_wfm_typ = chan_grp.create_dataset('Bin_Wfm_Type',
                                             data=exec_bin_vec,
                                             dtype=np.int32)

        h5_bin_inds = chan_grp.create_dataset('Bin_Indices',
                                              data=bin_inds,
                                              dtype=np.uint32)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies',
                                              data=bin_freqs,
                                              dtype=np.float32)
        h5_bin_FFT = chan_grp.create_dataset('Bin_FFT',
                                             data=bin_FFT,
                                             dtype=np.complex64)
        # Noise floor should be of shape: (udvs_steps x 3 x positions)
        h5_noise_floor = chan_grp.create_dataset(
            'Noise_Floor',
            shape=(num_pix, num_actual_udvs_steps),
            dtype=nf32,
            chunks=(1, num_actual_udvs_steps))
        """ 
        ONLY ALLOCATING SPACE FOR MAIN DATA HERE!
        Chunk by each UDVS step - this makes it easy / quick to:
            1. read data for a single UDVS step from all pixels
            2. read an entire / multiple pixels at a time
        The only problem is that a typical UDVS step containing 50 steps occupies only 400 bytes.
        This is smaller than the recommended chunk sizes of 10,000 - 999,999 bytes
        meaning that the metadata would be very substantial.
        This assumption is fine since we almost do not handle any user defined cases
        """
        """
        New Method for chunking the Main_Data dataset.  Chunking is now done in N-by-N squares of UDVS steps by pixels.
        N is determined dinamically based on the dimensions of the dataset.  Currently it is set such that individual
        chunks are less than 10kB in size.
        
        Chris Smith -- [email protected]
        """
        pos_dims = [
            Dimension('X', 'nm', num_cols),
            Dimension('Y', 'nm', num_rows)
        ]

        # Create Spectroscopic Values and Spectroscopic Values Labels datasets
        spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_names = createSpecVals(
            UDVS_mat, spec_inds, bin_freqs, exec_bin_vec, parm_dict, UDVS_labs,
            UDVS_units)

        spec_dims = list()
        for row_ind, row_name in enumerate(spec_vals_labs):
            spec_dims.append(
                Dimension(row_name, spec_vals_units[row_ind],
                          spec_vals[row_ind]))

        pixel_chunking = maxReadPixels(10240, num_pix * num_actual_udvs_steps,
                                       bins_per_step,
                                       np.dtype('complex64').itemsize)
        chunking = np.floor(np.sqrt(pixel_chunking))
        chunking = max(1, chunking)
        chunking = min(num_actual_udvs_steps, num_pix, chunking)
        self.h5_main = write_main_dataset(chan_grp, (num_pix, tot_bins),
                                          'Raw_Data',
                                          'Piezoresponse',
                                          'V',
                                          pos_dims,
                                          spec_dims,
                                          dtype=np.complex64,
                                          chunks=(chunking,
                                                  chunking * bins_per_step),
                                          compression='gzip')

        self.mean_resp = np.zeros(shape=(self.ds_main.shape[1]),
                                  dtype=np.complex64)
        self.max_resp = np.zeros(shape=(self.ds_main.shape[0]),
                                 dtype=np.float32)
        self.min_resp = np.zeros(shape=(self.ds_main.shape[0]),
                                 dtype=np.float32)

        # Now read the raw data files:
        self._read_data(path_dict['read_real'], path_dict['read_imag'],
                        parm_dict)
        self.h5_file.flush()

        generatePlotGroups(self.ds_main,
                           self.mean_resp,
                           folder_path,
                           basename,
                           self.max_resp,
                           self.min_resp,
                           max_mem_mb=self.max_ram,
                           spec_label=spec_label,
                           show_plots=show_plots,
                           save_plots=save_plots,
                           do_histogram=do_histogram)

        self.h5_file.close()

        return h5_path
예제 #26
0
    def translate(self, parm_path):
        """
        Basic method that translates .mat data files to a single .h5 file
        
        Parameters
        ------------
        parm_path : string / unicode
            Absolute file path of the parameters .mat file. 
            
        Returns
        ----------
        h5_path : string / unicode
            Absolute path of the translated h5 file
        """
        self.parm_path = path.abspath(parm_path)
        (folder_path, file_name) = path.split(parm_path)
        (file_name, base_name) = path.split(folder_path)
        h5_path = path.join(folder_path, base_name + '.h5')

        # Read parameters
        parm_dict = readGmodeParms(parm_path)

        # Add the w^2 specific parameters to this list
        parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True)
        #freq_sweep_parms = parm_data['freqSweepParms']
        #parm_dict['freq_sweep_delay'] = np.float(freq_sweep_parms['delay'].item())
        gen_sig = parm_data['genSig']
        #parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item())
        #freq_array = np.float32(parm_data['freqArray'])

        # prepare and write spectroscopic values
        samp_rate = parm_dict['IO_down_samp_rate_[Hz]']
        num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] * samp_rate)

        w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate, np.float32(samp_rate / num_bins))

        # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid
        spec_val_mat = np.zeros((len(freq_array) * num_bins, 2), dtype=VALUES_DTYPE)
        spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array))
        spec_val_mat[:, 1] = np.repeat(freq_array, num_bins)

        spec_ind_mat = np.zeros((2, len(freq_array) * num_bins), dtype=np.int32)
        spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array))
        spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins)

        num_rows = parm_dict['grid_num_rows']
        num_cols = parm_dict['grid_num_cols']
        parm_dict['data_type'] = 'GVS'

        num_pix = num_rows * num_cols

        global_parms = generate_dummy_main_parms()
        global_parms['grid_size_x'] = parm_dict['grid_num_cols']
        global_parms['grid_size_y'] = parm_dict['grid_num_rows']
        # assuming that the experiment was completed:
        global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1
        global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1
        global_parms['data_type'] = parm_dict['data_type']  # self.__class__.__name__
        global_parms['translator'] = 'GVS'

        # Now start creating datasets and populating:
        if path.exists(h5_path):
            remove(h5_path)

        h5_f = h5py.File(h5_path, 'w')
        write_simple_attrs(h5_f, global_parms)

        meas_grp = create_indexed_group(h5_f, 'Measurement')
        chan_grp = create_indexed_group(meas_grp, 'Channel')
        write_simple_attrs(chan_grp, parm_dict)


        pos_dims = [Dimension('X', 'nm', num_rows),
                    Dimension('Y', 'nm', num_cols)]
        spec_dims = [Dimension('Response Bin', 'a.u.', num_bins),
                     Dimension('Excitation Frequency ', 'Hz', len(freq_array))]

        # Minimize file size to the extent possible.
        # DAQs are rated at 16 bit so float16 should be most appropriate.
        # For some reason, compression is more effective on time series data

        h5_main = write_main_dataset(chan_grp, (num_pix, num_bins), 'Raw_Data',
                                     'Deflection', 'V',
                                     pos_dims, spec_dims,
                                     chunks=(1, num_bins), dtype=np.float32)

        h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies', freq_array)
        h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec)

        # Now doing link_h5_objects_as_attrs:
        link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq])

        # Now read the raw data files:
        pos_ind = 0
        for row_ind in range(1, num_rows + 1):
            for col_ind in range(1, num_cols + 1):
                file_path = path.join(folder_path, 'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat')
                print('Working on row {} col {}'.format(row_ind, col_ind))
                if path.exists(file_path):
                    # Load data file
                    pix_data = loadmat(file_path, squeeze_me=True)
                    pix_mat = pix_data['AI_mat']
                    # Take the inverse FFT on 2nd dimension
                    pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1), axis=1)
                    # Verified with Matlab - no conjugate required here.
                    pix_vec = pix_mat.transpose().reshape(pix_mat.size)
                    h5_main[pos_ind, :] = np.float32(pix_vec)
                    h5_f.flush()  # flush from memory!
                else:
                    print('File not found for: row {} col {}'.format(row_ind, col_ind))
                pos_ind += 1
                if (100.0 * pos_ind / num_pix) % 10 == 0:
                    print('completed translating {} %'.format(int(100 * pos_ind / num_pix)))

        h5_f.close()

        return h5_path