def _translate_image_stack(self, h5_meas_grp): """ Reads the scan images from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension( 'single', 'a. u.', 1), is_spectral=True) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] break h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [ Dimension('X', 'nm', layer_info['Samps/line']), Dimension('Y', 'nm', layer_info['Number of lines']) ], is_spectral=False) for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_2') data = self._read_image_layer(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, np.reshape(data, (-1, 1)), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes for rows and columns write_simple_attrs(h5_chan_grp, layer_info)
def _translate_force_curve(self, h5_meas_grp): """ Reads the force curves from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, Dimension( 'single', 'a. u.', 1), is_spectral=False) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] break tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')] h5_spec_inds, h5_spec_vals = write_ind_val_dsets( h5_meas_grp, Dimension('Z', 'nm', int(np.sum(tr_rt))), is_spectral=True) for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_4') data = self._read_data_vector(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset( h5_chan_grp, np.expand_dims(data, axis=0), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes write_simple_attrs(h5_chan_grp, layer_info)
def _write_results_chunk(self): """ Writes the labels and mean response to the h5 file Returns --------- h5_group : HDF5 Group reference Reference to the group that contains the clustering results """ print('Writing clustering results to file.') num_clusters = self.__mean_resp.shape[0] h5_cluster_group = create_results_group(self.h5_main, self.process_name) write_simple_attrs(h5_cluster_group, self.parms_dict) h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0] h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels', 'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, aux_spec_prefix='Cluster_', dtype=np.uint32) if self.num_comps != self.h5_main.shape[1]: ''' Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data for now until a better method is figured out ''' """ if isinstance(self.data_slice[1], np.ndarray): centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()] else: centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]] ds_centroid_values.data[0, :] = centroid_vals_mat """ if isinstance(self.data_slice[1], np.ndarray): vals_slice = self.data_slice[1].tolist() else: vals_slice = self.data_slice[1] vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze() new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals) h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True) else: h5_inds = self.h5_main.h5_spec_inds h5_vals = self.h5_main.h5_spec_vals # For now, link centroids with default spectroscopic indices and values. h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response', get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0], Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None, h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_', h5_spec_vals=h5_vals) return h5_cluster_group
def _translate_image_stack(self, h5_meas_grp): """ Reads the scan images from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension('single', 'a. u.', 1), is_spectral=True) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] break h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, [Dimension('X', 'nm', layer_info['Samps/line']), Dimension('Y', 'nm', layer_info['Number of lines'])], is_spectral=False) for class_name in self.meta_data.keys(): if 'Ciao image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_2') data = self._read_image_layer(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset(h5_chan_grp, np.reshape(data, (-1, 1)), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes for rows and columns write_simple_attrs(h5_chan_grp, layer_info)
def _translate_force_curve(self, h5_meas_grp): """ Reads the force curves from the proprietary file and writes them to HDF5 datasets Parameters ---------- h5_meas_grp : h5py.Group object Reference to the measurement group """ # since multiple channels will share the same position and spectroscopic dimensions, why not share them? h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_meas_grp, Dimension('single', 'a. u.', 1), is_spectral=False) # Find out the size of the force curves from the metadata: layer_info = None for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] break tr_rt = [int(item) for item in layer_info['Samps/line'].split(' ')] h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_meas_grp, Dimension('Z', 'nm', int(np.sum(tr_rt))), is_spectral=True) for class_name in self.meta_data.keys(): if 'Ciao force image list' in class_name: layer_info = self.meta_data[class_name] quantity = layer_info.pop('Image Data_4') data = self._read_data_vector(layer_info) h5_chan_grp = create_indexed_group(h5_meas_grp, 'Channel') write_main_dataset(h5_chan_grp, np.expand_dims(data, axis=0), 'Raw_Data', # Quantity and Units needs to be fixed by someone who understands these files better quantity, 'a. u.', None, None, dtype=np.float32, compression='gzip', h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) # Think about standardizing attributes write_simple_attrs(h5_chan_grp, layer_info)
def _create_results_datasets(self): """ Creates all the datasets necessary for holding all parameters + data. """ self.h5_results_grp = create_results_group(self.h5_main, self.process_name) self.parms_dict.update({'last_pixel': 0, 'algorithm': 'pycroscopy_SignalFilter'}) write_simple_attrs(self.h5_results_grp, self.parms_dict) assert isinstance(self.h5_results_grp, h5py.Group) if isinstance(self.composite_filter, np.ndarray): h5_comp_filt = self.h5_results_grp.create_dataset('Composite_Filter', data=np.float32(self.composite_filter)) if self.verbose and self.mpi_rank == 0: print('Rank {} - Finished creating the Composite_Filter dataset'.format(self.mpi_rank)) # First create the position datsets if the new indices are smaller... if self.num_effective_pix != self.h5_main.shape[0]: # TODO: Do this part correctly. See past solution: """ # need to make new position datasets by taking every n'th index / value: new_pos_vals = np.atleast_2d(h5_pos_vals[slice(0, None, self.num_effective_pix), :]) pos_descriptor = [] for name, units, leng in zip(h5_pos_inds.attrs['labels'], h5_pos_inds.attrs['units'], [int(np.unique(h5_pos_inds[:, dim_ind]).size / self.num_effective_pix) for dim_ind in range(h5_pos_inds.shape[1])]): pos_descriptor.append(Dimension(name, units, np.arange(leng))) ds_pos_inds, ds_pos_vals = build_ind_val_dsets(pos_descriptor, is_spectral=False, verbose=self.verbose) h5_pos_vals.data = np.atleast_2d(new_pos_vals) # The data generated above varies linearly. Override. """ h5_pos_inds_new, h5_pos_vals_new = write_ind_val_dsets(self.h5_results_grp, Dimension('pixel', 'a.u.', self.num_effective_pix), is_spectral=False, verbose=self.verbose and self.mpi_rank==0) if self.verbose and self.mpi_rank == 0: print('Rank {} - Created the new position ancillary dataset'.format(self.mpi_rank)) else: h5_pos_inds_new = self.h5_main.h5_pos_inds h5_pos_vals_new = self.h5_main.h5_pos_vals if self.verbose and self.mpi_rank == 0: print('Rank {} - Reusing source datasets position datasets'.format(self.mpi_rank)) if self.noise_threshold is not None: self.h5_noise_floors = write_main_dataset(self.h5_results_grp, (self.num_effective_pix, 1), 'Noise_Floors', 'Noise', 'a.u.', None, Dimension('arb', '', [1]), dtype=np.float32, aux_spec_prefix='Noise_Spec_', h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new, verbose=self.verbose and self.mpi_rank == 0) if self.verbose and self.mpi_rank == 0: print('Rank {} - Finished creating the Noise_Floors dataset'.format(self.mpi_rank)) if self.write_filtered: # Filtered data is identical to Main_Data in every way - just a duplicate self.h5_filtered = create_empty_dataset(self.h5_main, self.h5_main.dtype, 'Filtered_Data', h5_group=self.h5_results_grp) if self.verbose and self.mpi_rank == 0: print('Rank {} - Finished creating the Filtered dataset'.format(self.mpi_rank)) self.hot_inds = None if self.write_condensed: self.hot_inds = np.where(self.composite_filter > 0)[0] self.hot_inds = np.uint(self.hot_inds[int(0.5 * len(self.hot_inds)):]) # only need to keep half the data condensed_spec = Dimension('hot_frequencies', '', int(0.5 * len(self.hot_inds))) self.h5_condensed = write_main_dataset(self.h5_results_grp, (self.num_effective_pix, len(self.hot_inds)), 'Condensed_Data', 'Complex', 'a. u.', None, condensed_spec, h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new, dtype=np.complex, verbose=self.verbose and self.mpi_rank == 0) if self.verbose and self.mpi_rank == 0: print('Rank {} - Finished creating the Condensed dataset'.format(self.mpi_rank)) if self.mpi_size > 1: self.mpi_comm.Barrier() self.h5_main.file.flush()
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = self._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' matread = loadmat(parm_paths['parm_mat'], variable_names=[ 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' ]) be_wave = np.float32(np.squeeze(matread['BE_wave'])) # Need to take the complex conjugate if reading from a .mat file # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave']))) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # method 2 for calculating the exact excitation frequency: """ fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave))) w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel, self.points_per_pixel) hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3)) ex_freq_correct = w_vec[hot_bins[-1]] """ # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format( self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_f = h5py.File(h5_path, 'w') global_parms = dict() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') write_simple_attrs(meas_grp, parm_dict) pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) first_dat = True for key in data_paths.keys(): # Now that the file has been created, go over each raw data file: # 1. write all ancillary data. Link data. 2. Write main data sequentially """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used""" chan_grp = create_indexed_group(meas_grp, 'Channel') if first_dat: if len(data_paths) > 1: # All positions and spectra are shared between channels h5_pos_inds, h5_pos_vals = write_ind_val_dsets( meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets( meas_grp, spec_desc, is_spectral=True) elif len(data_paths) == 1: h5_pos_inds, h5_pos_vals = write_ind_val_dsets( chan_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets( chan_grp, spec_desc, is_spectral=True) first_dat = False else: pass h5_main = write_main_dataset( chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: self._read_data(data_paths[key], h5_main) h5_f.close() print('G-Line translation complete!') return h5_path
def translate(self, file_path, verbose=False, parm_encoding='utf-8', ftype='FF', subfolder='Measurement_000', h5_path='', channel_label_name=True): """ Translates the provided file to .h5 Adapted heavily from pycroscopy IBW file, modified to work with Ginger format :param file_path: Absolute path of the .ibw file :type file_path: String / unicode :param verbose: Whether or not to show print statements for debugging :type verbose: boolean, optional :param parm_encoding: Codec to be used to decode the bytestrings into Python strings if needed. Default 'utf-8' :type parm_encoding: str, optional :param ftype: Delineates Ginger Lab imaging file type to be imported (not case-sensitive) 'FF' : FF-trEFM 'SKPM' : FM-SKPM 'ringdown' : Ringdown 'trEFM' : normal trEFM :type ftype: str, optional :param subfolder: Specifies folder under root (/) to save data in. Default is standard pycroscopy format :type subfolder: str, optional :param h5_path: Existing H5 file to append to :type h5_path: str, optional :param channel_label_name: If True, uses the Channel as the subfolder name (e.g. Height, Phase, Amplitude, Charging) :type channel_label_name: bool, optional :returns: Absolute path of the .h5 file :rtype: String / unicode """ # Prepare the .h5 file: if not any(h5_path): folder_path, base_name = path.split(file_path) base_name = base_name[:-4] h5_path = path.join(folder_path, base_name + '.h5') # hard-coded exception, rarely occurs but can be useful if path.exists(h5_path): h5_path = path.join(folder_path, base_name + '_00.h5') h5_file = h5py.File(h5_path, 'w') # If subfolder improperly formatted if subfolder == '': subfolder = '/' # Load the ibw file first ibw_obj = bw.load(file_path) ibw_wave = ibw_obj.get('wave') parm_dict = self._read_parms(ibw_wave, parm_encoding) chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding) if verbose: print('Channels and units found:') print(chan_labels) print(chan_units) # Get the data to figure out if this is an image or a force curve images = ibw_wave.get('wData') if images.shape[2] != len(chan_labels): chan_labels = chan_labels[1:] # for weird null set errors in older AR software # Check if a Ginger Lab format ibw (has 'UserIn' in channel labels) _is_gl_type = any(['UserIn0' in str(s) for s in chan_labels]) if _is_gl_type == True: chan_labels = self._get_image_type(chan_labels, ftype) if verbose: print('Processing image type', ftype, 'with channels', chan_labels) type_suffix = 'Image' num_rows = ibw_wave['wave_header']['nDim'][1] # lines num_cols = ibw_wave['wave_header']['nDim'][0] # points num_imgs = ibw_wave['wave_header']['nDim'][2] # layers unit_scale = self._get_unit_factor(''.join([str(s)[-2] for s in ibw_wave['wave_header']['dimUnits'][0][0:2]])) data_scale = self._get_unit_factor(str(ibw_wave['wave_header']['dataUnits'][0])[-2]) parm_dict['FastScanSize'] = unit_scale * num_cols * ibw_wave['wave_header']['sfA'][0] parm_dict['SlowScanSize'] = unit_scale * num_rows * ibw_wave['wave_header']['sfA'][1] images = images.transpose(2, 0, 1) # now ordered as [chan, Y, X] image images = np.reshape(images, (images.shape[0], -1, 1)) # 3D [chan, Y*X points,1] pos_desc = [Dimension(name='X', units='m', values=np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension(name='Y', units='m', values=np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = [Dimension(name='arb', units='a.u.', values=[1])] # Create Position and spectroscopic datasets h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_file['/'], pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_file['/'], spec_desc, is_spectral=True) # Prepare the list of raw_data datasets for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units): chan_grp = create_indexed_group(h5_file['/'], chan_name) write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data', chan_name, chan_unit, pos_desc, spec_desc, dtype=np.float32) if verbose: print('Finished writing all channels') h5_file.close() return h5_path
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file matread = loadmat(parm_paths['parm_mat'], variable_names=[ 'AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train', 'BE_wave', 'total_cols', 'total_rows' ]) be_wave = np.float32(np.squeeze(matread['BE_wave'])) be_wave_train = np.float32(np.squeeze(matread['BE_wave_train'])) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) self.points_per_line = len(be_wave_train) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['points_per_line'] = self.points_per_line parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format( self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_file = h5py.File(h5_path, 'w') global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_file, global_parms) # Next create the Measurement and Channel groups and write the appropriate parameters to them meas_grp = create_indexed_group(h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) # Now that the file has been created, go over each raw data file: """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used """ pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) for f_index in data_paths.keys(): chan_grp = create_indexed_group(meas_grp, 'Channel') h5_main = write_main_dataset( chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main) h5_file.close() print('G-Tune translation complete!') return h5_path
def translate(self, data_channels=None, verbose=False): """ Translate the data into a Pycroscopy compatible HDF5 file. Parameters ---------- data_channels : (optional) list of str Names of channels that will be read and stored in the file. If not given, all channels in the file will be used. verbose : (optional) Boolean Whether or not to print statements Returns ------- h5_path : str Filepath to the output HDF5 file. """ if self.parm_dict is None or self.data_dict is None: self._read_data(self.data_path) if data_channels is None: print('No channels specified. All channels in file will be used.') data_channels = self.parm_dict['channel_parms'].keys() if verbose: print('Using the following channels') for channel in data_channels: print(channel) if os.path.exists(self.h5_path): os.remove(self.h5_path) h5_file = h5py.File(self.h5_path, 'w') # Create measurement group and assign attributes meas_grp = create_indexed_group(h5_file, 'Measurement') write_simple_attrs( meas_grp, self.parm_dict['meas_parms'] ) # Create datasets for positional and spectroscopic indices and values spec_dim = self.data_dict['Spectroscopic Dimensions'] pos_dims = self.data_dict['Position Dimensions'] h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_dims, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_dim, is_spectral=True) # Create the datasets for all the channels num_points = h5_pos_inds.shape[0] for data_channel in data_channels: raw_data = self.data_dict[data_channel].reshape([num_points, -1]) chan_grp = create_indexed_group(meas_grp, 'Channel') data_label = data_channel data_unit = self.parm_dict['channel_parms'][data_channel]['Unit'] write_simple_attrs( chan_grp, self.parm_dict['channel_parms'][data_channel] ) write_main_dataset(chan_grp, raw_data, 'Raw_Data', data_label, data_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) h5_file.flush() h5_file.close() print('Nanonis translation complete.') return self.h5_path
def _write_results_chunk(self): """ Writes the labels and mean response to the h5 file Returns --------- h5_group : HDF5 Group reference Reference to the group that contains the clustering results """ print('Writing clustering results to file.') num_clusters = self.__mean_resp.shape[0] h5_cluster_group = create_results_group(self.h5_main, self.process_name) write_simple_attrs(h5_cluster_group, self.parms_dict) h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels', 'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, aux_spec_prefix='Cluster_', dtype=np.uint32) if self.num_comps != self.h5_main.shape[1]: ''' Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data for now until a better method is figured out ''' """ if isinstance(self.data_slice[1], np.ndarray): centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()] else: centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]] ds_centroid_values.data[0, :] = centroid_vals_mat """ if isinstance(self.data_slice[1], np.ndarray): vals_slice = self.data_slice[1].tolist() else: vals_slice = self.data_slice[1] vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze() new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals) h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True) else: h5_inds = self.h5_main.h5_spec_inds h5_vals = self.h5_main.h5_spec_vals # For now, link centroids with default spectroscopic indices and values. h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response', get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0], Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None, h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_', h5_spec_vals=h5_vals) # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = h5_cluster_group.create_dataset(self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0] return h5_cluster_group
def translate(self, data_channels=None, verbose=False): """ Translates the data in the Nanonis file into a Pycroscopy compatible HDF5 file. Parameters ---------- data_channels : (optional) list of str Names of channels that will be read and stored in the file. If not given, all channels in the file will be used. verbose : (optional) Boolean Whether or not to print statements Returns ------- h5_path : str Filepath to the output HDF5 file. """ if self.parm_dict is None or self.data_dict is None: self._read_data(self.data_path) if data_channels is None: print('No channels specified. All channels in file will be used.') data_channels = self.parm_dict['channels'] if verbose: print('Using the following channels') for channel in data_channels: print(channel) if os.path.exists(self.h5_path): os.remove(self.h5_path) h5_file = h5py.File(self.h5_path, 'w') meas_grp = create_indexed_group(h5_file, 'Measurement') dc_offset = self.data_dict['sweep_signal'] spec_label, spec_units = self.parm_dict['sweep_signal'].split() spec_units = spec_units.strip('()') spec_dim = Dimension(spec_label, spec_units, dc_offset) pos_dims = self.data_dict['Position Dimensions'] h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_dims, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_dim, is_spectral=True) num_points = h5_pos_inds.shape[0] for data_channel in data_channels: raw_data = self.data_dict[data_channel].reshape( [num_points, -1]) * 1E9 # Convert to nA chan_grp = create_indexed_group(meas_grp, 'Channel') data_label, data_unit = data_channel.rsplit(maxsplit=1) data_unit = data_unit.strip('()') write_main_dataset(chan_grp, raw_data, 'Raw_Data', data_label, data_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals) h5_file.flush() h5_file.close() print('Nanonis translation complete.') return self.h5_path
def translate(self, file_path, verbose=False, append_path='', grp_name='Measurement', parm_encoding='utf-8'): """ Translates the provided file to .h5 Parameters ---------- file_path : String / unicode Absolute path of the .ibw file verbose : Boolean (Optional) Whether or not to show print statements for debugging append_path : string (Optional) h5_file to add these data to, must be a path to the h5_file on disk grp_name : string (Optional) Change from default "Measurement" name to something specific parm_encoding : str, optional Codec to be used to decode the bytestrings into Python strings if needed. Default 'utf-8' Returns ------- h5_path : String / unicode Absolute path of the .h5 file """ file_path = path.abspath(file_path) # Prepare the .h5 file: folder_path, base_name = path.split(file_path) base_name = base_name[:-4] if not append_path: h5_path = path.join(folder_path, base_name + '.h5') if path.exists(h5_path): remove(h5_path) h5_file = h5py.File(h5_path, 'w') else: h5_path = append_path if not path.exists(append_path): raise Exception('File does not exist. Check pathname.') h5_file = h5py.File(h5_path, 'r+') # Load the ibw file first ibw_obj = bw.load(file_path) ibw_wave = ibw_obj.get('wave') parm_dict = self._read_parms(ibw_wave, parm_encoding) chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding) if verbose: print('Channels and units found:') print(chan_labels) print(chan_units) # Get the data to figure out if this is an image or a force curve images = ibw_wave.get('wData') if images.shape[-1] != len(chan_labels): chan_labels = chan_labels[1:] # for layer 0 null set errors in older AR software if images.ndim == 3: # Image stack if verbose: print('Found image stack of size {}'.format(images.shape)) type_suffix = 'Image' num_rows = parm_dict['ScanLines'] num_cols = parm_dict['ScanPoints'] images = images.transpose(2, 1, 0) # now ordered as [chan, Y, X] image images = np.reshape(images, (images.shape[0], -1, 1)) # 3D [chan, Y*X points,1] pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = Dimension('arb', 'a.u.', [1]) else: # single force curve if verbose: print('Found force curve of size {}'.format(images.shape)) type_suffix = 'ForceCurve' images = np.atleast_3d(images) # now [Z, chan, 1] images = images.transpose((1, 2, 0)) # [chan ,1, Z] force curve # The data generated above varies linearly. Override. # For now, we'll shove the Z sensor data into the spectroscopic values. # Find the channel that corresponds to either Z sensor or Raw: try: chan_ind = chan_labels.index('ZSnsr') spec_data = VALUES_DTYPE(images[chan_ind]).squeeze() except ValueError: try: chan_ind = chan_labels.index('Raw') spec_data = VALUES_DTYPE(images[chan_ind]).squeeze() except ValueError: # We don't expect to come here. If we do, spectroscopic values remains as is spec_data = np.arange(images.shape[2]) pos_desc = Dimension('X', 'm', [1]) spec_desc = Dimension('Z', 'm', spec_data) # Create measurement group meas_grp = create_indexed_group(h5_file, grp_name) # Write file and measurement level parameters global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'IgorIBW_' + type_suffix global_parms['translator'] = 'IgorIBW' write_simple_attrs(h5_file, global_parms) write_simple_attrs(meas_grp, parm_dict) # Create Position and spectroscopic datasets h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) # Prepare the list of raw_data datasets for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units): if verbose: print('channel', chan_name) print('unit', chan_unit) chan_grp = create_indexed_group(meas_grp, 'Channel') write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data', chan_name, chan_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, dtype=np.float32) if verbose: print('Finished preparing raw datasets') h5_file.close() return h5_path
with h5py.File(h5_path, mode='r+') as h5_f: h5_grp = h5_f['Measurement_000/Channel_000'] f = usid.hdf_utils.get_attr(h5_grp, 'excitation_frequency_[Hz]') V0 = usid.hdf_utils.get_attr(h5_grp, 'excitation_amplitude_[V]') #original dataset h5_resh = usid.USIDataset(h5_grp[ 'Raw_Data-FFT_Filtering_000/Filtered_Data-Reshape_000/Reshaped_Data']) pixelInds = np.random.randint(0, h5_resh[()].shape[0], numPixels) print("PixelInds is {} with shape {}".format(pixelInds, pixelInds.shape)) #copy subset to new h5 file f = h5py.File('subsetFile{}.h5'.format(time.time()), 'a') subsetGroup = f.create_group("subsetBoi") h5_spec_inds, h5_spec_vals = write_ind_val_dsets( subsetGroup, Dimension("Bias", "V", int(h5_resh.h5_spec_inds.size)), is_spectral=True) h5_spec_vals[()] = h5_resh.h5_spec_vals[()] h5_pos_inds, h5_pos_vals = write_ind_val_dsets(subsetGroup, Dimension( "Position", "m", numPixels), is_spectral=False) #h5_pos_vals[()] = h5_resh.h5_pos_vals[()][pixelInds, :] h5_subset = write_main_dataset(subsetGroup, (numPixels, h5_resh.shape[1]), "Measured Current", "Current", "nA", None, None, dtype=np.float64,
def _create_results_datasets(self): """ Creates all the datasets necessary for holding all parameters + data. """ self.h5_results_grp = create_results_group(self.h5_main, self.process_name) self.params_dict.update({ 'last_pixel': 0, 'algorithm': 'pycroscopy_AdaptiveBayesianInference' }) # Write in our full_V and num_pixels as attributes to this new group write_simple_attrs(self.h5_results_grp, self.params_dict) assert isinstance(self.h5_results_grp, h5py.Group) # If we ended up parsing down the data, create new spectral datasets (i.e. smaller full_V's) # By convention, we convert the full_V back to a sine wave. if self.parse_mod != 1: h5_spec_inds_new, h5_spec_vals_new = write_ind_val_dsets( self.h5_results_grp, Dimension("Bias", "V", self.full_V.size), is_spectral=True) h5_spec_vals_new[()] = get_unshifted_response( self.full_V, self.shift_index) else: h5_spec_inds_new = self.h5_main.h5_spec_inds h5_spec_vals_new = self.h5_main.h5_spec_vals # Also make some new spectroscopic datasets for R and R_sig h5_spec_inds_R, h5_spec_vals_R = write_ind_val_dsets( self.h5_results_grp, Dimension("Bias", "V", 2 * self.M), is_spectral=True, base_name="Spectroscopic_R") h5_spec_vals_R[()] = np.concatenate((self.x, self.x)).T # Initialize our datasets # Note by convention, the spectroscopic values are stored as a sine wave # so i_recon and i_corrected are shifted at the end of bayesian_utils.process_pixel # accordingly. self.h5_R = write_main_dataset(self.h5_results_grp, (self.h5_main.shape[0], 2 * self.M), "Resistance", "Resistance", "GOhms", None, None, dtype=np.float64, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=h5_spec_inds_R, h5_spec_vals=h5_spec_vals_R) assert isinstance(self.h5_R, usid.USIDataset) # Quick sanity check self.h5_R_sig = create_empty_dataset(self.h5_R, np.float64, "R_sig") self.h5_capacitance = write_main_dataset( self.h5_results_grp, (self.h5_main.shape[0], 2), "Capacitance", "Capacitance", "pF", None, Dimension("Direction", "", 2), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=np.float64, aux_spec_prefix="Cap_Spec_") # Not sure what units this should be so tentatively storing it as amps self.h5_i_recon = write_main_dataset( self.h5_results_grp, (self.h5_main.shape[0], self.full_V.size), "Reconstructed_Current", "Current", "nA", None, None, dtype=np.float64, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=h5_spec_inds_new, h5_spec_vals=h5_spec_vals_new) self.h5_i_corrected = create_empty_dataset(self.h5_i_recon, np.float64, "Corrected_Current") ''' # Initialize our datasets # Note, each pixel of the datasets will hold the forward and reverse sweeps concatenated together. # R and R_sig are plotted against [x, -x], and i_recon and i_corrected are plotted against full_V. self.h5_R = h5_results_grp.create_dataset("R", shape=(self.h5_main.shape[0], 2*self.M), dtype=np.float) self.h5_R_sig = h5_results_grp.create_dataset("R_sig", shape=(self.h5_main.shape[0], 2*self.M), dtype=np.float) self.h5_capacitance = h5_results_grp.create_dataset("capacitance", shape=(self.h5_main.shape[0], 2), dtype=np.float) self.h5_i_recon = h5_results_grp.create_dataset("i_recon", shape=(self.h5_main.shape[0], self.full_V.size), dtype=np.float) self.h5_i_corrected = h5_results_grp.create_dataset("i_corrected", shape=(self.h5_main.shape[0], self.full_V.size), dtype=np.float) ''' if self.verbose: print("results datasets set up") self.h5_main.file.flush()
def translate(self, file_path, verbose=False, parm_encoding='utf-8'): """ Translates the provided file to .h5 Parameters ---------- file_path : String / unicode Absolute path of the .ibw file verbose : Boolean (Optional) Whether or not to show print statements for debugging parm_encoding : str, optional Codec to be used to decode the bytestrings into Python strings if needed. Default 'utf-8' Returns ------- h5_path : String / unicode Absolute path of the .h5 file """ file_path = path.abspath(file_path) # Prepare the .h5 file: folder_path, base_name = path.split(file_path) base_name = base_name[:-4] h5_path = path.join(folder_path, base_name + '.h5') if path.exists(h5_path): remove(h5_path) h5_file = h5py.File(h5_path, 'w') # Load the ibw file first ibw_obj = bw.load(file_path) ibw_wave = ibw_obj.get('wave') parm_dict = self._read_parms(ibw_wave, parm_encoding) chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding) if verbose: print('Channels and units found:') print(chan_labels) print(chan_units) # Get the data to figure out if this is an image or a force curve images = ibw_wave.get('wData') if images.shape[2] != len(chan_labels): chan_labels = chan_labels[1:] # for layer 0 null set errors in older AR software if images.ndim == 3: # Image stack if verbose: print('Found image stack of size {}'.format(images.shape)) type_suffix = 'Image' num_rows = parm_dict['ScanLines'] num_cols = parm_dict['ScanPoints'] images = images.transpose(2, 1, 0) # now ordered as [chan, Y, X] image images = np.reshape(images, (images.shape[0], -1, 1)) # 3D [chan, Y*X points,1] pos_desc = [Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = Dimension('arb', 'a.u.', [1]) else: # single force curve if verbose: print('Found force curve of size {}'.format(images.shape)) type_suffix = 'ForceCurve' images = np.atleast_3d(images) # now [Z, chan, 1] images = images.transpose((1, 2, 0)) # [chan ,1, Z] force curve # The data generated above varies linearly. Override. # For now, we'll shove the Z sensor data into the spectroscopic values. # Find the channel that corresponds to either Z sensor or Raw: try: chan_ind = chan_labels.index('ZSnsr') spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind])) except ValueError: try: chan_ind = chan_labels.index('Raw') spec_data = np.atleast_2d(VALUES_DTYPE(images[chan_ind])) except ValueError: # We don't expect to come here. If we do, spectroscopic values remains as is spec_data = np.arange(images.shape[2]) pos_desc = Dimension('X', 'm', [1]) spec_desc = Dimension('Z', 'm', spec_data) # Create measurement group meas_grp = create_indexed_group(h5_file, 'Measurement') # Write file and measurement level parameters global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'IgorIBW_' + type_suffix global_parms['translator'] = 'IgorIBW' write_simple_attrs(h5_file, global_parms) write_simple_attrs(meas_grp, parm_dict) # Create Position and spectroscopic datasets h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) # Prepare the list of raw_data datasets for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units): chan_grp = create_indexed_group(meas_grp, 'Channel') write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data', chan_name, chan_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, dtype=np.float32) if verbose: print('Finished preparing raw datasets') h5_file.close() return h5_path
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = self._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename+'.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' matread = loadmat(parm_paths['parm_mat'], variable_names=['BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows']) be_wave = np.float32(np.squeeze(matread['BE_wave'])) # Need to take the complex conjugate if reading from a .mat file # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave']))) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # method 2 for calculating the exact excitation frequency: """ fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave))) w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel, self.points_per_pixel) hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3)) ex_freq_correct = w_vec[hot_bins[-1]] """ # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size/self.num_rows) # First finish writing all global parameters, create the file too: h5_f = h5py.File(h5_path, 'w') global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') write_simple_attrs(meas_grp, parm_dict) pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) first_dat = True for key in data_paths.keys(): # Now that the file has been created, go over each raw data file: # 1. write all ancillary data. Link data. 2. Write main data sequentially """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used""" chan_grp = create_indexed_group(meas_grp, 'Channel') if first_dat: if len(data_paths) > 1: # All positions and spectra are shared between channels h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) elif len(data_paths) == 1: h5_pos_inds, h5_pos_vals = write_ind_val_dsets(chan_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(chan_grp, spec_desc, is_spectral=True) first_dat = False else: pass h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: self._read_data(data_paths[key], h5_main) h5_f.close() print('G-Line translation complete!') return h5_path
def test_existing_both_aux(self): file_path = 'test.h5' data_utils.delete_existing_file(file_path) main_data = np.random.rand(15, 14) main_data_name = 'Test_Main' quantity = 'Current' dset_units = 'nA' pos_sizes = [5, 3] pos_names = ['X', 'Y'] pos_units = ['nm', 'um'] pos_dims = [] for length, name, units in zip(pos_sizes, pos_names, pos_units): pos_dims.append( write_utils.Dimension(name, units, np.arange(length))) pos_data = np.vstack((np.tile(np.arange(5), 3), np.repeat(np.arange(3), 5))).T spec_sizes = [7, 2] spec_names = ['Bias', 'Cycle'] spec_units = ['V', ''] spec_dims = [] for length, name, units in zip(spec_sizes, spec_names, spec_units): spec_dims.append( write_utils.Dimension(name, units, np.arange(length))) spec_data = np.vstack((np.tile(np.arange(7), 2), np.repeat(np.arange(2), 7))) with h5py.File(file_path) as h5_f: h5_spec_inds, h5_spec_vals = hdf_utils.write_ind_val_dsets( h5_f, spec_dims, is_spectral=True) h5_pos_inds, h5_pos_vals = hdf_utils.write_ind_val_dsets( h5_f, pos_dims, is_spectral=False) usid_main = hdf_utils.write_main_dataset(h5_f, main_data, main_data_name, quantity, dset_units, None, None, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, h5_pos_vals=h5_pos_vals, h5_pos_inds=h5_pos_inds, main_dset_attrs=None) data_utils.validate_aux_dset_pair(self, h5_f, h5_pos_inds, h5_pos_vals, pos_names, pos_units, pos_data, h5_main=usid_main, is_spectral=False) data_utils.validate_aux_dset_pair(self, h5_f, h5_spec_inds, h5_spec_vals, spec_names, spec_units, spec_data, h5_main=usid_main, is_spectral=True) os.remove(file_path)
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file matread = loadmat(parm_paths['parm_mat'], variable_names=['AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train', 'BE_wave', 'total_cols', 'total_rows']) be_wave = np.float32(np.squeeze(matread['BE_wave'])) be_wave_train = np.float32(np.squeeze(matread['BE_wave_train'])) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) self.points_per_line = len(be_wave_train) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['points_per_line'] = self.points_per_line parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_file = h5py.File(h5_path, 'w') global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_file, global_parms) # Next create the Measurement and Channel groups and write the appropriate parameters to them meas_grp = create_indexed_group(h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) # Now that the file has been created, go over each raw data file: """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used """ pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) for f_index in data_paths.keys(): chan_grp = create_indexed_group(meas_grp, 'Channel') h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main) h5_file.close() print('G-Tune translation complete!') return h5_path