def _setupH5(self, usize, vsize, data_type, scan_size_x, scan_size_y): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as scan_size_x : int Number of images in the x dimension scan_size_y : int Number of images in the y dimension Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize num_files = scan_size_x * scan_size_y root_parms = generate_dummy_main_parms() root_parms['data_type'] = 'PtychographyData' main_parms = { 'num_images': num_files, 'image_size_u': usize, 'image_size_v': vsize, 'num_pixels': num_pixels, 'translator': 'Ptychography', 'scan_size_x': scan_size_x, 'scan_size_y': scan_size_y } # Create the hdf5 data Group write_simple_attrs(self.h5_file, root_parms) meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_desc = [ Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize)) ] pos_desc = [ Dimension('X', 'pixel', np.arange(scan_size_x)), Dimension('Y', 'pixel', np.arange(scan_size_y)) ] ds_chunking = calc_chunks([num_files, num_pixels], data_type(0).itemsize, unit_chunks=(1, num_pixels)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_files, num_pixels), 'Raw_Data', 'Intensity', 'a.u.', pos_desc, spec_desc, chunks=ds_chunking, dtype=data_type) h5_ronch = meas_grp.create_dataset('Mean_Ronchigram', data=np.zeros(num_pixels, dtype=np.float32), dtype=np.float32) h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean', data=np.zeros(num_files, dtype=np.float32), dtype=np.float32) self.h5_file.flush() return h5_main, h5_mean_spec, h5_ronch
def reshape_from_lines_to_pixels(h5_main, pts_per_cycle, scan_step_x_m=None): """ Breaks up the provided raw G-mode dataset into lines and pixels (from just lines) Parameters ---------- h5_main : h5py.Dataset object Reference to the main dataset that contains the raw data that is only broken up by lines pts_per_cycle : unsigned int Number of points in a single pixel scan_step_x_m : float Step in meters for pixels Returns ------- h5_resh : h5py.Dataset object Reference to the main dataset that contains the reshaped data """ if not check_if_main(h5_main): raise TypeError('h5_main is not a Main dataset') h5_main = USIDataset(h5_main) if pts_per_cycle % 1 != 0 or pts_per_cycle < 1: raise TypeError('pts_per_cycle should be a positive integer') if scan_step_x_m is not None: if not isinstance(scan_step_x_m, Number): raise TypeError('scan_step_x_m should be a real number') else: scan_step_x_m = 1 if h5_main.shape[1] % pts_per_cycle != 0: warn( 'Error in reshaping the provided dataset to pixels. Check points per pixel' ) raise ValueError num_cols = int(h5_main.shape[1] / pts_per_cycle) # TODO: DO NOT assume simple 1 spectral dimension! single_ao = np.squeeze(h5_main.h5_spec_vals[:, :pts_per_cycle]) spec_dims = Dimension( get_attr(h5_main.h5_spec_vals, 'labels')[0], get_attr(h5_main.h5_spec_vals, 'units')[0], single_ao) # TODO: DO NOT assume simple 1D in positions! pos_dims = [ Dimension('X', 'm', np.linspace(0, scan_step_x_m, num_cols)), Dimension('Y', 'm', np.linspace(0, h5_main.h5_pos_vals[1, 0], h5_main.shape[0])) ] h5_group = create_results_group(h5_main, 'Reshape') # TODO: Create empty datasets and then write for very large datasets h5_resh = write_main_dataset(h5_group, (num_cols * h5_main.shape[0], pts_per_cycle), 'Reshaped_Data', get_attr(h5_main, 'quantity')[0], get_attr(h5_main, 'units')[0], pos_dims, spec_dims, chunks=(10, pts_per_cycle), dtype=h5_main.dtype, compression=h5_main.compression) # TODO: DON'T write in one shot assuming small datasets fit in memory! print('Starting to reshape G-mode line data. Please be patient') h5_resh[()] = np.reshape(h5_main[()], (-1, pts_per_cycle)) print('Finished reshaping G-mode line data to rows and columns') return USIDataset(h5_resh)
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = super(GTuneTranslator, self)._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file matread = loadmat(parm_paths['parm_mat'], variable_names=[ 'AI_wave', 'BE_wave_AO_0', 'BE_wave_AO_1', 'BE_wave_train', 'BE_wave', 'total_cols', 'total_rows' ]) be_wave = np.float32(np.squeeze(matread['BE_wave'])) be_wave_train = np.float32(np.squeeze(matread['BE_wave_train'])) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) self.points_per_line = len(be_wave_train) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['points_per_line'] = self.points_per_line parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format( self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size / self.num_rows) # First finish writing all global parameters, create the file too: h5_file = h5py.File(h5_path, 'w') global_parms = dict() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_file, global_parms) # Next create the Measurement and Channel groups and write the appropriate parameters to them meas_grp = create_indexed_group(h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) # Now that the file has been created, go over each raw data file: """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used """ pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) h5_pos_ind, h5_pos_val = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) for f_index in data_paths.keys(): chan_grp = create_indexed_group(meas_grp, 'Channel') h5_main = write_main_dataset( chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_ind, h5_pos_vals=h5_pos_val, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: super(GTuneTranslator, self)._read_data(data_paths[f_index], h5_main) h5_file.close() print('G-Tune translation complete!') return h5_path
def translate(self, parm_path): """ Basic method that translates .mat data files to a single .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ self.parm_path = path.abspath(parm_path) (folder_path, file_name) = path.split(parm_path) (file_name, base_name) = path.split(folder_path) h5_path = path.join(folder_path, base_name + '.h5') # Read parameters parm_dict = readGmodeParms(parm_path) # Add the w^2 specific parameters to this list parm_data = loadmat(parm_path, squeeze_me=True, struct_as_record=True) freq_sweep_parms = parm_data['freqSweepParms'] parm_dict['freq_sweep_delay'] = np.float( freq_sweep_parms['delay'].item()) gen_sig = parm_data['genSig'] parm_dict['wfm_fix_d_fast'] = np.int32(gen_sig['restrictT'].item()) freq_array = np.float32(parm_data['freqArray']) # prepare and write spectroscopic values samp_rate = parm_dict['IO_down_samp_rate_[Hz]'] num_bins = int(parm_dict['wfm_n_cycles'] * parm_dict['wfm_p_slow'] * samp_rate) w_vec = np.arange(-0.5 * samp_rate, 0.5 * samp_rate, np.float32(samp_rate / num_bins)) # There is most likely a more elegant solution to this but I don't have the time... Maybe np.meshgrid spec_val_mat = np.zeros((len(freq_array) * num_bins, 2), dtype=VALUES_DTYPE) spec_val_mat[:, 0] = np.tile(w_vec, len(freq_array)) spec_val_mat[:, 1] = np.repeat(freq_array, num_bins) spec_ind_mat = np.zeros((2, len(freq_array) * num_bins), dtype=np.int32) spec_ind_mat[0, :] = np.tile(np.arange(num_bins), len(freq_array)) spec_ind_mat[1, :] = np.repeat(np.arange(len(freq_array)), num_bins) num_rows = parm_dict['grid_num_rows'] num_cols = parm_dict['grid_num_cols'] parm_dict['data_type'] = 'GmodeW2' num_pix = num_rows * num_cols global_parms = dict() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict[ 'data_type'] # self.__class__.__name__ global_parms['translator'] = 'W2' # Now start creating datasets and populating: if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) pos_dims = [ Dimension('X', 'nm', num_rows), Dimension('Y', 'nm', num_cols) ] spec_dims = [ Dimension('Response Bin', 'a.u.', num_bins), Dimension('Excitation Frequency ', 'Hz', len(freq_array)) ] # Minimize file size to the extent possible. # DAQs are rated at 16 bit so float16 should be most appropriate. # For some reason, compression is more effective on time series data h5_main = write_main_dataset(chan_grp, (num_pix, num_bins), 'Raw_Data', 'Deflection', 'V', pos_dims, spec_dims, chunks=(1, num_bins), dtype=np.float32) h5_ex_freqs = chan_grp.create_dataset('Excitation_Frequencies', freq_array) h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', w_vec) # Now doing link_h5_objects_as_attrs: link_h5_objects_as_attrs(h5_main, [h5_ex_freqs, h5_bin_freq]) # Now read the raw data files: pos_ind = 0 for row_ind in range(1, num_rows + 1): for col_ind in range(1, num_cols + 1): file_path = path.join( folder_path, 'fSweep_r' + str(row_ind) + '_c' + str(col_ind) + '.mat') print('Working on row {} col {}'.format(row_ind, col_ind)) if path.exists(file_path): # Load data file pix_data = loadmat(file_path, squeeze_me=True) pix_mat = pix_data['AI_mat'] # Take the inverse FFT on 2nd dimension pix_mat = np.fft.ifft(np.fft.ifftshift(pix_mat, axes=1), axis=1) # Verified with Matlab - no conjugate required here. pix_vec = pix_mat.transpose().reshape(pix_mat.size) h5_main[pos_ind, :] = np.float32(pix_vec) h5_f.flush() # flush from memory! else: print('File not found for: row {} col {}'.format( row_ind, col_ind)) pos_ind += 1 if (100.0 * pos_ind / num_pix) % 10 == 0: print('completed translating {} %'.format( int(100 * pos_ind / num_pix))) h5_f.close() return h5_path
def translate(self, file_path): """ Translates the provided .asc file to .h5 Parameters ---------- file_path : str Absolute path of the source .ASC STS file from Omicron STMs Returns ------- h5_path : str Absolute path of the translated file """ file_path = path.abspath(file_path) folder_path, file_name = path.split(file_path) file_name = file_name[:-4] # Extracting the raw data into memory file_handle = open(file_path, 'r') string_lines = file_handle.readlines() file_handle.close() # Extract parameters from the first few header lines parm_dict, num_headers = self._read_parms(string_lines) num_rows = int(parm_dict['Main-y_pixels']) num_cols = int(parm_dict['Main-x_pixels']) num_pos = num_rows * num_cols spectra_length = int(parm_dict['Main-z_points']) # Extract the STS data from subsequent lines raw_data_2d = self._read_data(string_lines, num_pos, spectra_length, num_headers) # Generate the x / voltage / spectroscopic axis: volt_vec = np.linspace(parm_dict['Spectroscopy-Device_1_Start [Volt]'], parm_dict['Spectroscopy-Device_1_End [Volt]'], spectra_length) h5_path = path.join(folder_path, file_name + '.h5') # pass on the the necessary pieces of information onto the numpy translate that will handle the creation and # writing to the h5 file. pos_dims = [ Dimension( 'X', 'nm', np.linspace(parm_dict['Main-x_offset'], parm_dict['Main-x_length'], parm_dict['Main-x_pixels'])), Dimension( 'Y', 'nm', np.linspace(parm_dict['Main-y_offset'], parm_dict['Main-y_length'], parm_dict['Main-y_pixels'])) ] spec_dims = Dimension('Bias', 'V', volt_vec) h5_path = super(AscTranslator, self).translate(h5_path, 'STS', raw_data_2d, 'Tunnelling current', parm_dict['Main-value_unit'], pos_dims, spec_dims, translator_name='ASC', parm_dict=parm_dict) return h5_path
def _create_results_datasets(self): """ Creates all the datasets necessary for holding all parameters + data. """ self.h5_results_grp = create_results_group(self.h5_main, self.process_name) self.params_dict.update({ 'last_pixel': 0, 'algorithm': 'pycroscopy_AdaptiveBayesianInference' }) # Write in our full_V and num_pixels as attributes to this new group write_simple_attrs(self.h5_results_grp, self.params_dict) assert isinstance(self.h5_results_grp, h5py.Group) # If we ended up parsing down the data, create new spectral datasets (i.e. smaller full_V's) # By convention, we convert the full_V back to a sine wave. if self.parse_mod != 1: h5_spec_inds_new, h5_spec_vals_new = write_ind_val_dsets( self.h5_results_grp, Dimension("Bias", "V", self.full_V.size), is_spectral=True) h5_spec_vals_new[()] = get_unshifted_response( self.full_V, self.shift_index) else: h5_spec_inds_new = self.h5_main.h5_spec_inds h5_spec_vals_new = self.h5_main.h5_spec_vals # Also make some new spectroscopic datasets for R and R_sig h5_spec_inds_R, h5_spec_vals_R = write_ind_val_dsets( self.h5_results_grp, Dimension("Bias", "V", 2 * self.M), is_spectral=True, base_name="Spectroscopic_R") h5_spec_vals_R[()] = np.concatenate((self.x, self.x)).T # Initialize our datasets # Note by convention, the spectroscopic values are stored as a sine wave # so i_recon and i_corrected are shifted at the end of bayesian_utils.process_pixel # accordingly. self.h5_R = write_main_dataset(self.h5_results_grp, (self.h5_main.shape[0], 2 * self.M), "Resistance", "Resistance", "GOhms", None, None, dtype=np.float64, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=h5_spec_inds_R, h5_spec_vals=h5_spec_vals_R) assert isinstance(self.h5_R, usid.USIDataset) # Quick sanity check self.h5_R_sig = create_empty_dataset(self.h5_R, np.float64, "R_sig") self.h5_capacitance = write_main_dataset( self.h5_results_grp, (self.h5_main.shape[0], 2), "Capacitance", "Capacitance", "pF", None, Dimension("Direction", "", 2), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=np.float64, aux_spec_prefix="Cap_Spec_") # Not sure what units this should be so tentatively storing it as amps self.h5_i_recon = write_main_dataset( self.h5_results_grp, (self.h5_main.shape[0], self.full_V.size), "Reconstructed_Current", "Current", "nA", None, None, dtype=np.float64, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=h5_spec_inds_new, h5_spec_vals=h5_spec_vals_new) self.h5_i_corrected = create_empty_dataset(self.h5_i_recon, np.float64, "Corrected_Current") ''' # Initialize our datasets # Note, each pixel of the datasets will hold the forward and reverse sweeps concatenated together. # R and R_sig are plotted against [x, -x], and i_recon and i_corrected are plotted against full_V. self.h5_R = h5_results_grp.create_dataset("R", shape=(self.h5_main.shape[0], 2*self.M), dtype=np.float) self.h5_R_sig = h5_results_grp.create_dataset("R_sig", shape=(self.h5_main.shape[0], 2*self.M), dtype=np.float) self.h5_capacitance = h5_results_grp.create_dataset("capacitance", shape=(self.h5_main.shape[0], 2), dtype=np.float) self.h5_i_recon = h5_results_grp.create_dataset("i_recon", shape=(self.h5_main.shape[0], self.full_V.size), dtype=np.float) self.h5_i_corrected = h5_results_grp.create_dataset("i_corrected", shape=(self.h5_main.shape[0], self.full_V.size), dtype=np.float) ''' if self.verbose: print("results datasets set up") self.h5_main.file.flush()
def _create_results_datasets(self): """ Creates hdf5 datasets and datagroups to hold the resutls """ # create all h5 datasets here: num_pos = self.h5_main.shape[0] if self.verbose and self.mpi_rank == 0: print('Now creating the datasets') self.h5_results_grp = create_results_group( self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) write_simple_attrs(self.h5_results_grp, { 'algorithm_author': 'Kody J. Law', 'last_pixel': 0 }) write_simple_attrs(self.h5_results_grp, self.parms_dict) if self.verbose and self.mpi_rank == 0: print('created group: {} with attributes:'.format( self.h5_results_grp.name)) print(get_attributes(self.h5_results_grp)) # One of those rare instances when the result is exactly the same as the source self.h5_i_corrected = create_empty_dataset( self.h5_main, np.float32, 'Corrected_Current', h5_group=self.h5_results_grp) if self.verbose and self.mpi_rank == 0: print('Created I Corrected') # print_tree(self.h5_results_grp) # For some reason, we cannot specify chunks or compression! # The resistance dataset requires the creation of a new spectroscopic dimension self.h5_resistance = write_main_dataset( self.h5_results_grp, (num_pos, self.num_x_steps), 'Resistance', 'Resistance', 'GOhms', None, Dimension('Bias', 'V', self.num_x_steps), dtype=np. float32, # chunks=(1, self.num_x_steps), #compression='gzip', h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals) if self.verbose and self.mpi_rank == 0: print('Created Resistance') # print_tree(self.h5_results_grp) assert isinstance(self.h5_resistance, USIDataset) # only here for PyCharm self.h5_new_spec_vals = self.h5_resistance.h5_spec_vals # The variance is identical to the resistance dataset self.h5_variance = create_empty_dataset(self.h5_resistance, np.float32, 'R_variance') if self.verbose and self.mpi_rank == 0: print('Created Variance') # print_tree(self.h5_results_grp) # The capacitance dataset requires new spectroscopic dimensions as well self.h5_cap = write_main_dataset( self.h5_results_grp, (num_pos, 1), 'Capacitance', 'Capacitance', 'pF', None, Dimension('Direction', '', [1]), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=cap_dtype, #compression='gzip', aux_spec_prefix='Cap_Spec_') if self.verbose and self.mpi_rank == 0: print('Created Capacitance') # print_tree(self.h5_results_grp) print('Done creating all results datasets!') if self.mpi_size > 1: self.mpi_comm.Barrier() self.h5_main.file.flush()
def _create_results_datasets(self): ''' Creates the datasets an Groups necessary to store the results. Parameters ---------- h5_if : 'Inst_Freq' h5 Dataset Contains the Instantaneous Frequencies tfp : 'tfp' h5 Dataset Contains the time-to-first-peak data as a 1D matrix shift : 'shift' h5 Dataset Contains the frequency shift data as a 1D matrix ''' print('Creating CPD results datasets') # Get relevant parameters num_rows = self.parm_dict['num_rows'] num_cols = self.parm_dict['num_cols'] pnts_per_avg = self.parm_dict['pnts_per_avg'] ds_shape = [num_rows * num_cols, pnts_per_avg] cpd_ds_shape = [num_rows * num_cols, self.cpd_dict['num_CPD']] self.h5_results_grp = usid.hdf_utils.create_results_group(self.h5_main, self.process_name) self.h5_cpd_grp = usid.hdf_utils.create_results_group(self.h5_main, self.process_name + '_CPD') usid.hdf_utils.copy_attributes(self.h5_main.parent, self.h5_results_grp) usid.hdf_utils.copy_attributes(self.h5_main.parent, self.h5_cpd_grp) # Create dimensions pos_desc = [Dimension('X', 'm', np.linspace(0, self.parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, self.parm_dict['SlowScanSize'], num_rows))] # ds_pos_ind, ds_pos_val = build_ind_val_matrices(pos_desc, is_spectral=False) spec_desc = [Dimension('Time', 's', np.linspace(0, self.parm_dict['total_time'], pnts_per_avg))] cpd_spec_desc = [Dimension('Time', 's', np.linspace(0, self.parm_dict['total_time'], self.cpd_dict['num_CPD']))] # ds_spec_inds, ds_spec_vals = build_ind_val_matrices(spec_desc, is_spectral=True) # Writes main dataset self.h5_force = usid.hdf_utils.write_main_dataset(self.h5_results_grp, ds_shape, 'force', # Name of main dataset 'Force', # Physical quantity contained in Main dataset 'N', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_cpd = usid.hdf_utils.write_main_dataset(self.h5_cpd_grp, cpd_ds_shape, 'CPD', # Name of main dataset 'Potential', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity None, # Position dimensions cpd_spec_desc, # Spectroscopic dimensions h5_pos_inds=self.h5_main.h5_pos_inds, # Copy Pos Dimensions h5_pos_vals=self.h5_main.h5_pos_vals, dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_cap = usid.hdf_utils.write_main_dataset(self.h5_cpd_grp, cpd_ds_shape, 'capacitance', # Name of main dataset 'Capacitance', # Physical quantity contained in Main dataset 'F', # Units for the physical quantity None, # Position dimensions None, h5_pos_inds=self.h5_main.h5_pos_inds, # Copy Pos Dimensions h5_pos_vals=self.h5_main.h5_pos_vals, h5_spec_inds=self.h5_cpd.h5_spec_inds, # Copy Spectroscopy Dimensions h5_spec_vals=self.h5_cpd.h5_spec_vals, dtype=np.float32, # data type / precision main_dset_attrs=self.parm_dict) self.h5_cpd.file.flush() return
def translate(self, file_path, verbose=False, append_path='', grp_name='Measurement', parm_encoding='utf-8'): """ Translates the provided file to .h5 Parameters ---------- file_path : String / unicode Absolute path of the .ibw file verbose : Boolean (Optional) Whether or not to show print statements for debugging append_path : string (Optional) h5_file to add these data to, must be a path to the h5_file on disk grp_name : string (Optional) Change from default "Measurement" name to something specific parm_encoding : str, optional Codec to be used to decode the bytestrings into Python strings if needed. Default 'utf-8' Returns ------- h5_path : String / unicode Absolute path of the .h5 file """ file_path = path.abspath(file_path) # Prepare the .h5 file: folder_path, base_name = path.split(file_path) base_name = base_name[:-4] if not append_path: h5_path = path.join(folder_path, base_name + '.h5') if path.exists(h5_path): remove(h5_path) h5_file = h5py.File(h5_path, 'w') else: h5_path = append_path if not path.exists(append_path): raise Exception('File does not exist. Check pathname.') h5_file = h5py.File(h5_path, 'r+') # Load the ibw file first ibw_obj = bw.load(file_path) ibw_wave = ibw_obj.get('wave') parm_dict = self._read_parms(ibw_wave, parm_encoding) chan_labels, chan_units = self._get_chan_labels( ibw_wave, parm_encoding) if verbose: print('Channels and units found:') print(chan_labels) print(chan_units) # Get the data to figure out if this is an image or a force curve images = ibw_wave.get('wData') if images.shape[-1] != len(chan_labels): chan_labels = chan_labels[ 1:] # for layer 0 null set errors in older AR software if images.ndim == 3: # Image stack if verbose: print('Found image stack of size {}'.format(images.shape)) type_suffix = 'Image' num_rows = parm_dict['ScanLines'] num_cols = parm_dict['ScanPoints'] images = images.transpose(2, 1, 0) # now ordered as [chan, Y, X] image images = np.reshape( images, (images.shape[0], -1, 1)) # 3D [chan, Y*X points,1] pos_desc = [ Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows)) ] spec_desc = Dimension('arb', 'a.u.', [1]) else: # single force curve if verbose: print('Found force curve of size {}'.format(images.shape)) type_suffix = 'ForceCurve' images = np.atleast_3d(images) # now [Z, chan, 1] images = images.transpose((1, 2, 0)) # [chan ,1, Z] force curve # The data generated above varies linearly. Override. # For now, we'll shove the Z sensor data into the spectroscopic values. # Find the channel that corresponds to either Z sensor or Raw: try: chan_ind = chan_labels.index('ZSnsr') spec_data = VALUES_DTYPE(images[chan_ind]).squeeze() except ValueError: try: chan_ind = chan_labels.index('Raw') spec_data = VALUES_DTYPE(images[chan_ind]).squeeze() except ValueError: # We don't expect to come here. If we do, spectroscopic values remains as is spec_data = np.arange(images.shape[2]) pos_desc = Dimension('X', 'm', [1]) spec_desc = Dimension('Z', 'm', spec_data) # Create measurement group meas_grp = create_indexed_group(h5_file, grp_name) # Write file and measurement level parameters global_parms = dict() global_parms['data_type'] = 'IgorIBW_' + type_suffix global_parms['translator'] = 'IgorIBW' write_simple_attrs(h5_file, global_parms) write_simple_attrs(meas_grp, parm_dict) # Create Position and spectroscopic datasets h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) # Prepare the list of raw_data datasets for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units): if verbose: print('channel', chan_name) print('unit', chan_unit) chan_grp = create_indexed_group(meas_grp, 'Channel') write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data', chan_name, chan_unit, None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, dtype=np.float32) if verbose: print('Finished preparing raw datasets') h5_file.close() return h5_path
def _translate_gsf(self, file_path, meas_grp): """ Parameters ---------- file_path meas_grp For more information on the .gsf file format visit the link below - http://gwyddion.net/documentation/user-guide-en/gsf.html """ # Read the data in from the specified file gsf_meta, gsf_values = gsf_read(file_path) # Write parameters where available specifically for sample_name # data_type, comments and experiment_date to file-level parms # Using pop, move some global parameters from gsf_meta to global_parms: self.global_parms['data_type'] = 'Gwyddion_GSF' self.global_parms['comments'] = gsf_meta.get('comment', '') self.global_parms['experiment_date'] = gsf_meta.get('date', '') # overwrite some parameters at the file level: write_simple_attrs(meas_grp.parent, self.global_parms) # Build the reference values for the ancillary position datasets: # TODO: Remove information from parameters once it is used meaningfully where it needs to be. # Here, it is no longer necessary to save XReal anymore so we will pop (remove) it from gsf_meta x_offset = gsf_meta.get('XOffset', 0) x_range = gsf_meta.get('XReal', 1.0) # TODO: Use Numpy wherever possible instead of pure python x_vals = np.linspace(0, x_range, gsf_meta.get('XRes')) + x_offset y_offset = gsf_meta.get('YOffset', 0) y_range = gsf_meta.get('YReal', 1.0) y_vals = np.linspace(0, y_range, gsf_meta.get('YRes')) + y_offset # Just define the ancillary position and spectral dimensions. Do not create datasets yet pos_desc = [ Dimension('X', gsf_meta.get('XYUnits', 'arb. units'), x_vals), Dimension('Y', gsf_meta.get('XYUnits', 'arb. units'), y_vals) ] spec_desc = Dimension('Intensity', gsf_meta.get('ZUnits', 'arb. units'), [1]) """ You only need to prepare the dimensions for positions and spectroscopic. You do not need to write the ancillary datasets at this point. write_main_dataset will take care of that. You only need to use write_ind_val_datasets() for the cases where you may need to reuse the datasets. See the tutorial online. """ # Create the channel-level group chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, gsf_meta) # Create the main dataset (and the two_dim_image = gsf_values write_main_dataset( chan_grp, np.atleast_2d( np.reshape(two_dim_image, len(pos_desc[0].values) * len(pos_desc[1].values))).transpose(), 'Raw_Data', gsf_meta.get('Title', 'Unknown'), gsf_meta.get('ZUnits', 'arb. units'), pos_desc, spec_desc)
def translate(self, file_path): """ The main function that translates the provided file into a .h5 file Parameters ---------- file_path : String / unicode Absolute path of any file in the directory Returns ------- h5_path : String / unicode Absolute path of the h5 file """ file_path = path.abspath(file_path) # Figure out the basename of the data: (basename, parm_paths, data_paths) = self._parse_file_path(file_path) (folder_path, unused) = path.split(file_path) h5_path = path.join(folder_path, basename+'.h5') if path.exists(h5_path): remove(h5_path) # Load parameters from .mat file - 'BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows' matread = loadmat(parm_paths['parm_mat'], variable_names=['BE_wave', 'FFT_BE_wave', 'total_cols', 'total_rows']) be_wave = np.float32(np.squeeze(matread['BE_wave'])) # Need to take the complex conjugate if reading from a .mat file # FFT_BE_wave = np.conjugate(np.complex64(np.squeeze(matread['FFT_BE_wave']))) num_cols = int(matread['total_cols'][0][0]) expected_rows = int(matread['total_rows'][0][0]) self.points_per_pixel = len(be_wave) # Load parameters from .txt file - 'BE_center_frequency_[Hz]', 'IO rate' is_beps, parm_dict = parmsToDict(parm_paths['parm_txt']) # Get file byte size: # For now, assume that bigtime_00 always exists and is the main file file_size = path.getsize(data_paths[0]) # Calculate actual number of lines since the first few lines may not be saved self.num_rows = 1.0 * file_size / (4 * self.points_per_pixel * num_cols) if self.num_rows % 1: warn('Error - File has incomplete rows') return None else: self.num_rows = int(self.num_rows) samp_rate = parm_dict['IO_rate_[Hz]'] ex_freq_nominal = parm_dict['BE_center_frequency_[Hz]'] # method 1 for calculating the correct excitation frequency: pixel_duration = 1.0 * self.points_per_pixel / samp_rate num_periods = pixel_duration * ex_freq_nominal ex_freq_correct = 1 / (pixel_duration / np.floor(num_periods)) # method 2 for calculating the exact excitation frequency: """ fft_ex_wfm = np.abs(np.fft.fftshift(np.fft.fft(be_wave))) w_vec = np.linspace(-0.5 * samp_rate, 0.5 * samp_rate - 1.0*samp_rate / self.points_per_pixel, self.points_per_pixel) hot_bins = np.squeeze(np.argwhere(fft_ex_wfm > 1E+3)) ex_freq_correct = w_vec[hot_bins[-1]] """ # correcting the excitation frequency - will be VERY useful during analysis and filtering parm_dict['BE_center_frequency_[Hz]'] = ex_freq_correct # Some very basic information that can help the processing crew parm_dict['num_bins'] = self.points_per_pixel parm_dict['grid_num_rows'] = self.num_rows parm_dict['data_type'] = 'G_mode_line' if self.num_rows != expected_rows: print('Note: {} of {} lines found in data file'.format(self.num_rows, expected_rows)) # Calculate number of points to read per line: self.__bytes_per_row__ = int(file_size/self.num_rows) # First finish writing all global parameters, create the file too: h5_f = h5py.File(h5_path, 'w') global_parms = dict() global_parms['data_type'] = 'G_mode_line' global_parms['translator'] = 'G_mode_line' write_simple_attrs(h5_f, global_parms) meas_grp = create_indexed_group(h5_f, 'Measurement') write_simple_attrs(meas_grp, parm_dict) pos_desc = Dimension('Y', 'm', np.arange(self.num_rows)) spec_desc = Dimension('Excitation', 'V', np.tile(VALUES_DTYPE(be_wave), num_cols)) first_dat = True for key in data_paths.keys(): # Now that the file has been created, go over each raw data file: # 1. write all ancillary data. Link data. 2. Write main data sequentially """ We only allocate the space for the main data here. This does NOT change with each file. The data written to it does. The auxiliary datasets will not change with each raw data file since only one excitation waveform is used""" chan_grp = create_indexed_group(meas_grp, 'Channel') if first_dat: if len(data_paths) > 1: # All positions and spectra are shared between channels h5_pos_inds, h5_pos_vals = write_ind_val_dsets(meas_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(meas_grp, spec_desc, is_spectral=True) elif len(data_paths) == 1: h5_pos_inds, h5_pos_vals = write_ind_val_dsets(chan_grp, pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(chan_grp, spec_desc, is_spectral=True) first_dat = False else: pass h5_main = write_main_dataset(chan_grp, (self.num_rows, self.points_per_pixel * num_cols), 'Raw_Data', 'Deflection', 'V', None, None, h5_pos_inds=h5_pos_inds, h5_pos_vals=h5_pos_vals, h5_spec_inds=h5_spec_inds, h5_spec_vals=h5_spec_vals, chunks=(1, self.points_per_pixel), dtype=np.float16) # Now transfer scan data in the dat file to the h5 file: self._read_data(data_paths[key], h5_main) h5_f.close() print('G-Line translation complete!') return h5_path
def _translate_image_stack(self, meas_grp, gwy_data, obj, channels): """ Use this function to write data corresponding to a stack of scan images (most common) Returns ------- """ current_channel = '' # Iterate through each object in the gwy dataset gwy_key = obj.split('/') # Test whether a new channel needs to be created # The 'filename' structure in the gwy file should not have a channel created hence the try/except block try: if int(gwy_key[1]) not in channels.keys(): current_channel = create_indexed_group(meas_grp, "Channel") channels[int(gwy_key[1])] = current_channel else: current_channel = channels[int(gwy_key[1])] except ValueError: if obj.endswith('filename'): pass # The data structure of the gwy file will be used to create the main dataset in the h5 file if obj.endswith('data'): x_range = gwy_data[obj].get('xreal', 1.0) x_vals = np.linspace(0, x_range, gwy_data[obj]['xres']) # print('obj {}\nx_vals {}'.format(obj, x_vals)) y_range = gwy_data[obj].get('yreal', 1.0) y_vals = np.linspace(0, y_range, gwy_data[obj]['yres']) pos_desc = [ Dimension('X', gwy_data[obj]['si_unit_xy'].get('unitstr'), x_vals), Dimension('Y', gwy_data[obj]['si_unit_xy'].get('unitstr'), y_vals) ] # print(pos_desc) spec_dim = gwy_data['/{}/data/title'.format(gwy_key[1])] spec_desc = Dimension( spec_dim, gwy_data[obj]['si_unit_z'].get('unitstr', 'arb. units'), [0]) two_dim_image = gwy_data[obj]['data'] write_main_dataset( current_channel, np.atleast_2d( np.reshape( two_dim_image, len(pos_desc[0].values) * len(pos_desc[1].values))).transpose(), 'Raw_Data', spec_dim, gwy_data[obj]['si_unit_z'].get('unitstr'), pos_desc, spec_desc) # print('main dataset has been written') # image data processing elif obj.endswith('meta'): meta = {} write_simple_attrs(current_channel, meta, verbose=False) return channels
def _parse_3ds_parms(header_dict, signal_dict): """ Parse 3ds files. Parameters ---------- header_dict : dict signal_dict : dict Returns ------- parm_dict : dict """ parm_dict = dict() data_dict = dict() # Create dictionary with measurement parameters meas_parms = {key: value for key, value in header_dict.items() if value is not None} channels = meas_parms.pop('channels') for key, parm_grid in zip(meas_parms.pop('fixed_parameters') + meas_parms.pop('experimental_parameters'), signal_dict['params'].T): # Collapse the parm_grid along one axis if it's constant # along said axis if parm_grid.ndim > 1: dim_slice = list() # Find dimensions that are constant for idim in range(parm_grid.ndim): tmp_grid = np.moveaxis(parm_grid.copy(), idim, 0) if np.all(np.equal(tmp_grid[0], tmp_grid[1])): dim_slice.append(0) else: dim_slice.append(slice(None)) # print(key, dim_slice) # print(parm_grid[tuple(dim_slice)]) parm_grid = parm_grid[tuple(dim_slice)] meas_parms[key] = parm_grid parm_dict['meas_parms'] = meas_parms # Create dictionary with channel parameters and # save channel data before renaming keys data_channel_parms = dict() for chan_name in channels: splitted_chan_name = chan_name.split(maxsplit=2) if len(splitted_chan_name) == 2: direction = 'forward' elif len(splitted_chan_name) == 3: direction = 'backward' splitted_chan_name.pop(1) name, unit = splitted_chan_name key = ' '.join((name, direction)) data_channel_parms[key] = {'Name': name, 'Direction': direction, 'Unit': unit.strip('()'), } data_dict[key] = signal_dict.pop(chan_name) parm_dict['channel_parms'] = data_channel_parms # Add remaining signal_dict elements to data_dict data_dict.update(signal_dict) # Position dimensions nx, ny = header_dict['dim_px'] if 'X (m)' in parm_dict: row_vals = parm_dict.pop('X (m)') else: row_vals = np.arange(nx, dtype=np.float32) if 'Y (m)' in parm_dict: col_vals = parm_dict.pop('Y (m)') else: col_vals = np.arange(ny, dtype=np.float32) pos_vals = np.hstack([row_vals.reshape(-1, 1), col_vals.reshape(-1, 1)]) pos_names = ['X', 'Y'] pos_dims = [Dimension(label, 'nm', values) for label, values in zip(pos_names, pos_vals.T)] data_dict['Position Dimensions'] = pos_dims # Spectroscopic dimensions sweep_signal = header_dict['sweep_signal'] spec_label, spec_unit = sweep_signal.split(maxsplit=1) spec_unit = spec_unit.strip('()') # parm_dict['sweep_signal'] = (sweep_name, sweep_unit) dc_offset = data_dict['sweep_signal'] spec_dim = Dimension(spec_label, spec_unit, dc_offset) data_dict['Spectroscopic Dimensions'] = spec_dim return parm_dict, data_dict
def _parse_sxm_parms(header_dict, signal_dict): """ Parse sxm files. Parameters ---------- header_dict : dict signal_dict : dict Returns ------- parm_dict : dict """ parm_dict = dict() data_dict = dict() # Create dictionary with measurement parameters meas_parms = {key: value for key, value in header_dict.items() if value is not None} info_dict = meas_parms.pop('data_info') parm_dict['meas_parms'] = meas_parms # Create dictionary with channel parameters channel_parms = dict() channel_names = info_dict['Name'] single_channel_parms = {name: dict() for name in channel_names} for field_name, field_value, in info_dict.items(): for channel_name, value in zip(channel_names, field_value): single_channel_parms[channel_name][field_name] = value for value in single_channel_parms.values(): if value['Direction'] == 'both': value['Direction'] = ['forward', 'backward'] else: direction = [value['Direction']] scan_dir = meas_parms['scan_dir'] for name, parms in single_channel_parms.items(): for direction in parms['Direction']: key = ' '.join((name, direction)) channel_parms[key] = dict(parms) channel_parms[key]['Direction'] = direction data = signal_dict[name][direction] if scan_dir == 'up': data = np.flip(data, axis=0) if direction == 'backward': data = np.flip(data, axis=1) data_dict[key] = data parm_dict['channel_parms'] = channel_parms # Position dimensions num_cols, num_rows = header_dict['scan_pixels'] width, height = header_dict['scan_range'] pos_names = ['X', 'Y'] pos_units = ['nm', 'nm'] pos_vals = np.vstack([ np.linspace(0, width, num_cols), np.linspace(0, height, num_rows), ]) pos_vals *= 1e9 pos_dims = [Dimension(name, unit, values) for name, unit, values in zip(pos_names, pos_units, pos_vals)] data_dict['Position Dimensions'] = pos_dims # Spectroscopic dimensions spec_dims = Dimension('arb.', 'a. u.', np.arange(1, dtype=np.float32)) data_dict['Spectroscopic Dimensions'] = spec_dims return parm_dict, data_dict
def translate(self, raw_data_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ raw_data_path : string / unicode Absolute file path of the data .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ raw_data_path = path.abspath(raw_data_path) folder_path, file_name = path.split(raw_data_path) h5_path = path.join(folder_path, file_name[:-4] + '.h5') if path.exists(h5_path): remove(h5_path) h5_f = h5py.File(h5_path, 'w') self.h5_read = True try: h5_raw = h5py.File(raw_data_path, 'r') except ImportError: self.h5_read = False h5_raw = loadmat(raw_data_path) excite_cell = h5_raw['dc_amp_cell3'] test = excite_cell[0][0] if self.h5_read: excitation_vec = h5_raw[test] else: excitation_vec = np.float32(np.squeeze(test)) current_cell = h5_raw['current_cell3'] num_rows = current_cell.shape[0] num_cols = current_cell.shape[1] num_iv_pts = excitation_vec.size current_data = np.zeros(shape=(num_rows * num_cols, num_iv_pts), dtype=np.float32) for row_ind in range(num_rows): for col_ind in range(num_cols): pix_ind = row_ind * num_cols + col_ind if self.h5_read: curr_val = np.squeeze( h5_raw[current_cell[row_ind][col_ind]].value) else: curr_val = np.float32( np.squeeze(current_cell[row_ind][col_ind])) current_data[pix_ind, :] = 1E+9 * curr_val parm_dict = self._read_parms(h5_raw) parm_dict.update({'translator': 'FORC_IV'}) pos_desc = [ Dimension('Y', 'm', np.arange(num_rows)), Dimension('X', 'm', np.arange(num_cols)) ] spec_desc = [Dimension('DC Bias', 'V', excitation_vec)] meas_grp = create_indexed_group(h5_f, 'Measurement') chan_grp = create_indexed_group(meas_grp, 'Channel') write_simple_attrs(chan_grp, parm_dict) h5_main = write_main_dataset(chan_grp, current_data, 'Raw_Data', 'Current', '1E-9 A', pos_desc, spec_desc) return
def _setupH5(self, usize, vsize, data_type, num_images, main_parms): """ Setup the HDF5 file in which to store the data including creating the Position and Spectroscopic datasets Parameters ---------- usize : int Number of pixel columns in the images vsize : int Number of pixel rows in the images data_type : type Data type to save image as num_images : int Number of images in the movie main_parms : dict Returns ------- h5_main : h5py.Dataset HDF5 Dataset that the images will be written into h5_mean_spec : h5py.Dataset HDF5 Dataset that the mean over all positions will be written into h5_ronch : h5py.Dataset HDF5 Dateset that the mean over all Spectroscopic steps will be written into """ num_pixels = usize * vsize root_parms = dict() root_parms['data_type'] = 'PtychographyData' main_parms['num_images'] = num_images main_parms['image_size_u'] = usize main_parms['image_size_v'] = vsize main_parms['num_pixels'] = num_pixels main_parms['translator'] = 'Movie' # Create the hdf5 data Group write_simple_attrs(self.h5_file, root_parms) meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, main_parms) chan_grp = create_indexed_group(meas_grp, 'Channel') # Build the Position and Spectroscopic Datasets spec_dim = Dimension('Time', 's', np.arange(num_images)) pos_dims = [ Dimension('X', 'a.u.', np.arange(usize)), Dimension('Y', 'a.u.', np.arange(vsize)) ] ds_chunking = calc_chunks([num_pixels, num_images], data_type(0).itemsize, unit_chunks=(num_pixels, 1)) # Allocate space for Main_Data and Pixel averaged Data h5_main = write_main_dataset(chan_grp, (num_pixels, num_images), 'Raw_Data', 'Intensity', 'a.u.', pos_dims, spec_dim, chunks=ds_chunking, dtype=data_type) h5_ronch = meas_grp.create_dataset('Mean_Ronchigram', data=np.zeros(num_pixels, dtype=np.float32), dtype=np.float32) h5_mean_spec = meas_grp.create_dataset('Spectroscopic_Mean', data=np.zeros(num_images, dtype=np.float32), dtype=np.float32) self.h5_file.flush() return h5_main, h5_mean_spec, h5_ronch
def translate(self, parm_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ parm_path = path.abspath(parm_path) parm_dict, excit_wfm = self._read_parms(parm_path) self._parse_file_path(parm_path) num_dat_files = len(self.file_list) f = open(self.file_list[0], 'rb') spectrogram_size, count_vals = self._parse_spectrogram_size(f) print("spectrogram size:", spectrogram_size) num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols'] print('Number of pixels: ', num_pixels) print('Count Values: ', count_vals) if (num_pixels + 1) != count_vals: print( "Data size does not match number of pixels expected. Cannot continue" ) # Now start creating datasets and populating: ds_spec_inds, ds_spec_vals = build_ind_val_dsets(Dimension( 'Bias', 'V', excit_wfm), is_spectral=True, verbose=False) ds_spec_vals.data = np.atleast_2d( excit_wfm) # The data generated above varies linearly. Override. pos_desc = [ Dimension('X', 'a.u.', np.arange(parm_dict['grid_num_cols'])), Dimension('Y', 'a.u.', np.arange(parm_dict['grid_num_rows'])) ] ds_pos_ind, ds_pos_val = build_ind_val_dsets(pos_desc, is_spectral=False, verbose=False) ds_raw_data = VirtualDataset('Raw_Data', data=[], maxshape=(ds_pos_ind.shape[0], spectrogram_size - 5), dtype=np.complex64, chunking=(1, spectrogram_size - 5), compression='gzip') ds_raw_data.attrs['quantity'] = ['Complex'] aux_ds_names = [ 'Position_Indices', 'Position_Values', 'Spectroscopic_Indices', 'Spectroscopic_Values' ] num_ai_chans = np.int(num_dat_files / 2) # Division by 2 due to real/imaginary # technically should change the date, etc. spm_data = VirtualGroup('') global_parms = generate_dummy_main_parms() global_parms['data_type'] = 'trKPFM' global_parms['translator'] = 'trKPFM' spm_data.attrs = global_parms meas_grp = VirtualGroup('Measurement_000') meas_grp.attrs = parm_dict spm_data.add_children([meas_grp]) hdf = HDFwriter(self.h5_path) # spm_data.showTree() hdf.write(spm_data, print_log=False) self.raw_datasets = list() for chan_index in range(num_ai_chans): chan_grp = VirtualGroup( '{:s}{:03d}'.format('Channel_', chan_index), '/Measurement_000/') if chan_index == 0: chan_grp.attrs = {'Harmonic': 1} else: chan_grp.attrs = {'Harmonic': 2} chan_grp.add_children([ ds_pos_ind, ds_pos_val, ds_spec_inds, ds_spec_vals, ds_raw_data ]) h5_refs = hdf.write(chan_grp, print_log=False) h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs)) self.raw_datasets.append(h5_raw) self.raw_datasets.append(h5_raw) # Now that the N channels have been made, populate them with the actual data.... self._read_data(parm_dict, parm_path, spectrogram_size) hdf.close() return self.h5_path
def _build_ancillary_datasets(self): """ Parameters ---------- None Returns ------- ds_pos_inds : VirtualDataset Position Indices ds_pos_vals : VirtualDataset Position Values ds_spec_inds : VirtualDataset Spectrosocpic Indices ds_spec_vals : VirtualDataset Spectroscopic Values """ # create spectrogram at each pixel from the coefficients spec_step = np.arange(0, 1, 1 / self.n_steps) V_vec = 10 * np.arcsin(np.sin( self.n_fields * np.pi * spec_step)) * 2 / np.pi # build DC vector for typical BEPS Vdc_mat = np.vstack( (V_vec, np.full(np.shape(V_vec), np.nan))) # Add out-of-field values IF_vec = Vdc_mat.T.flatten() # Base DC vector IF_vec = np.tile(IF_vec, self.n_cycles) # Now with Cycles IF_vec = np.dot(1 + np.arange(self.forc_cycles)[:, None], IF_vec[None, :]) # Do a single FORC IF_vec = np.tile(IF_vec.flatten(), self.forc_repeats) # Repeat the FORC IF_inds = np.logical_not(np.isnan(IF_vec)) Vdc_vec = np.where(IF_inds, IF_vec, 0) # build AC vector Vac_vec = np.ones(np.shape(Vdc_vec)) # Build the Spectroscopic Values matrix spec_dims = [ self.n_fields, self.n_steps, self.n_cycles, self.forc_cycles, self.forc_repeats, self.n_bins ] spec_labs = [ 'Field', 'DC_Offset', 'Cycle', 'FORC', 'FORC_repeat', 'Frequency' ] spec_units = ['', 'V', '', '', '', 'Hz'] spec_start = [0, 0, 0, 0, 0, self.start_freq] spec_steps = [ 1, 1, 1, 1, 1, (self.end_freq - self.start_freq) / self.n_bins ] # Remove dimensions with single values real_dims = np.argwhere(np.array(spec_dims) != 1).squeeze() spec_dims = [spec_dims[idim] for idim in real_dims] spec_labs = [spec_labs[idim] for idim in real_dims] spec_units = [spec_units[idim] for idim in real_dims] spec_start = [spec_start[idim] for idim in real_dims] spec_steps = [spec_steps[idim] for idim in real_dims] # Correct the DC Offset dimension spec_dims_corrected = list() for dim_size, dim_name, dim_units, step_size, init_val in zip( spec_dims, spec_labs, spec_units, spec_steps, spec_start): if dim_name == 'DC_Offset': value = Vdc_vec[::2] else: value = np.arange(dim_size) * step_size + init_val spec_dims_corrected.append(Dimension(dim_name, dim_units, value)) pos_dims = list() for dim_size, dim_name, dim_units, step_size, init_val in zip( [self.N_y, self.N_x], ['Y', 'X'], ['um', 'um'], [10 / self.N_y, 10 / self.N_x], [-5, -5]): pos_dims.append( Dimension(dim_name, dim_units, np.arange(dim_size) * step_size + init_val)) return pos_dims, spec_dims_corrected
def translate(self, file_path, show_plots=True, save_plots=True, do_histogram=False): """ Basic method that translates .dat data file(s) to a single .h5 file Inputs: file_path -- Absolute file path for one of the data files. It is assumed that this file is of the OLD data format. Outputs: Nothing """ file_path = path.abspath(file_path) (folder_path, basename) = path.split(file_path) (basename, path_dict) = self._parse_file_path(file_path) h5_path = path.join(folder_path, basename + '.h5') if path.exists(h5_path): remove(h5_path) self.h5_file = h5py.File(h5_path, 'w') isBEPS = True parm_dict = self.__getParmsFromOldMat(path_dict['old_mat_parms']) ignored_plt_grps = ['in-field' ] # Here we assume that there is no in-field. # If in-field data is captured then the translator would have to be modified. # Technically, we could do away with this if statement, as isBEPS is always true for this translation if isBEPS: parm_dict['data_type'] = 'BEPSData' std_expt = parm_dict[ 'VS_mode'] != 'load user defined VS Wave from file' if not std_expt: warn( 'This translator does not handle user defined voltage spectroscopy' ) return spec_label = getSpectroscopicParmLabel(parm_dict['VS_mode']) # Check file sizes: if 'read_real' in path_dict.keys(): real_size = path.getsize(path_dict['read_real']) imag_size = path.getsize(path_dict['read_imag']) else: real_size = path.getsize(path_dict['write_real']) imag_size = path.getsize(path_dict['write_imag']) if real_size != imag_size: raise ValueError( "Real and imaginary file sizes DON'T match!. Ending") num_rows = int(parm_dict['grid_num_rows']) num_cols = int(parm_dict['grid_num_cols']) num_pix = num_rows * num_cols tot_bins = real_size / ( num_pix * 4) # Finding bins by simple division of entire datasize # Check for case where only a single pixel is missing. check_bins = real_size / ((num_pix - 1) * 4) if tot_bins % 1 and check_bins % 1: warn('Aborting! Some parameter appears to have changed in-between') return elif not tot_bins % 1: # Everything's ok pass elif not check_bins % 1: tot_bins = check_bins warn( 'Warning: A pixel seems to be missing from the data. File will be padded with zeros.' ) tot_bins = int(tot_bins) (bin_inds, bin_freqs, bin_FFT, ex_wfm, dc_amp_vec) = self.__readOldMatBEvecs(path_dict['old_mat_parms']) """ Because this is the old data format and there is a discrepancy in the number of bins (they seem to be 2 less than the actual number), we need to re-calculate it based on the available data. This is done below. """ band_width = parm_dict['BE_band_width_[Hz]'] * ( 0.5 - parm_dict['BE_band_edge_trim']) st_f = parm_dict['BE_center_frequency_[Hz]'] - band_width en_f = parm_dict['BE_center_frequency_[Hz]'] + band_width bin_freqs = np.linspace(st_f, en_f, len(bin_inds), dtype=np.float32) # Forcing standardized datatypes: bin_inds = np.int32(bin_inds) bin_freqs = np.float32(bin_freqs) bin_FFT = np.complex64(bin_FFT) ex_wfm = np.float32(ex_wfm) self.FFT_BE_wave = bin_FFT (UDVS_labs, UDVS_units, UDVS_mat) = self.__buildUDVSTable(parm_dict) # Remove the unused plot group columns before proceeding: (UDVS_mat, UDVS_labs, UDVS_units) = trimUDVS(UDVS_mat, UDVS_labs, UDVS_units, ignored_plt_grps) spec_inds = np.zeros(shape=(2, tot_bins), dtype=INDICES_DTYPE) # Will assume that all excitation waveforms have same number of bins # Here, the denominator is 2 because only out of field measruements. For IF + OF, should be 1 num_actual_udvs_steps = UDVS_mat.shape[0] / 2 bins_per_step = tot_bins / num_actual_udvs_steps # Some more checks if bins_per_step % 1: warn('Non integer number of bins per step!') return else: bins_per_step = int(bins_per_step) num_actual_udvs_steps = int(num_actual_udvs_steps) stind = 0 for step_index in range(UDVS_mat.shape[0]): if UDVS_mat[step_index, 2] < 1E-3: # invalid AC amplitude continue # skip spec_inds[0, stind:stind + bins_per_step] = np.arange( bins_per_step, dtype=INDICES_DTYPE) # Bin step spec_inds[1, stind:stind + bins_per_step] = step_index * np.ones( bins_per_step, dtype=INDICES_DTYPE) # UDVS step stind += bins_per_step del stind, step_index # Some very basic information that can help the processing / analysis crew parm_dict['num_bins'] = tot_bins parm_dict['num_pix'] = num_pix parm_dict['num_udvs_steps'] = num_actual_udvs_steps global_parms = dict() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] global_parms['experiment_date'] = parm_dict['File_date_and_time'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict[ 'data_type'] # self.__class__.__name__ global_parms['translator'] = 'ODF' write_simple_attrs(self.h5_file, global_parms) # Create Measurement and Channel groups meas_grp = create_indexed_group(self.h5_file, 'Measurement') write_simple_attrs(meas_grp, parm_dict) chan_grp = create_indexed_group(meas_grp, 'Channel') chan_grp.attrs['Channel_Input'] = parm_dict['IO_Analog_Input_1'] # Create Auxilliary Datasets h5_ex_wfm = chan_grp.create_dataset('Excitation_Waveform', data=ex_wfm) udvs_slices = dict() for col_ind, col_name in enumerate(UDVS_labs): udvs_slices[col_name] = (slice(None), slice(col_ind, col_ind + 1)) h5_UDVS = chan_grp.create_dataset('UDVS', data=UDVS_mat, dtype=np.float32) write_simple_attrs(h5_UDVS, {'labels': UDVS_labs, 'units': UDVS_units}) h5_bin_steps = chan_grp.create_dataset('Bin_Steps', data=np.arange(bins_per_step, dtype=np.uint32), dtype=np.uint32) # Need to add the Bin Waveform type - infer from UDVS exec_bin_vec = self.signal_type * np.ones(len(bin_inds), dtype=np.int32) h5_wfm_typ = chan_grp.create_dataset('Bin_Wfm_Type', data=exec_bin_vec, dtype=np.int32) h5_bin_inds = chan_grp.create_dataset('Bin_Indices', data=bin_inds, dtype=np.uint32) h5_bin_freq = chan_grp.create_dataset('Bin_Frequencies', data=bin_freqs, dtype=np.float32) h5_bin_FFT = chan_grp.create_dataset('Bin_FFT', data=bin_FFT, dtype=np.complex64) # Noise floor should be of shape: (udvs_steps x 3 x positions) h5_noise_floor = chan_grp.create_dataset( 'Noise_Floor', shape=(num_pix, num_actual_udvs_steps), dtype=nf32, chunks=(1, num_actual_udvs_steps)) """ ONLY ALLOCATING SPACE FOR MAIN DATA HERE! Chunk by each UDVS step - this makes it easy / quick to: 1. read data for a single UDVS step from all pixels 2. read an entire / multiple pixels at a time The only problem is that a typical UDVS step containing 50 steps occupies only 400 bytes. This is smaller than the recommended chunk sizes of 10,000 - 999,999 bytes meaning that the metadata would be very substantial. This assumption is fine since we almost do not handle any user defined cases """ """ New Method for chunking the Main_Data dataset. Chunking is now done in N-by-N squares of UDVS steps by pixels. N is determined dinamically based on the dimensions of the dataset. Currently it is set such that individual chunks are less than 10kB in size. Chris Smith -- [email protected] """ pos_dims = [ Dimension('X', 'nm', num_cols), Dimension('Y', 'nm', num_rows) ] # Create Spectroscopic Values and Spectroscopic Values Labels datasets spec_vals, spec_inds, spec_vals_labs, spec_vals_units, spec_vals_names = createSpecVals( UDVS_mat, spec_inds, bin_freqs, exec_bin_vec, parm_dict, UDVS_labs, UDVS_units) spec_dims = list() for row_ind, row_name in enumerate(spec_vals_labs): spec_dims.append( Dimension(row_name, spec_vals_units[row_ind], spec_vals[row_ind])) pixel_chunking = maxReadPixels(10240, num_pix * num_actual_udvs_steps, bins_per_step, np.dtype('complex64').itemsize) chunking = np.floor(np.sqrt(pixel_chunking)) chunking = max(1, chunking) chunking = min(num_actual_udvs_steps, num_pix, chunking) self.h5_main = write_main_dataset(chan_grp, (num_pix, tot_bins), 'Raw_Data', 'Piezoresponse', 'V', pos_dims, spec_dims, dtype=np.complex64, chunks=(chunking, chunking * bins_per_step), compression='gzip') self.mean_resp = np.zeros(shape=(self.ds_main.shape[1]), dtype=np.complex64) self.max_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32) self.min_resp = np.zeros(shape=(self.ds_main.shape[0]), dtype=np.float32) # Now read the raw data files: self._read_data(path_dict['read_real'], path_dict['read_imag'], parm_dict) self.h5_file.flush() generatePlotGroups(self.ds_main, self.mean_resp, folder_path, basename, self.max_resp, self.min_resp, max_mem_mb=self.max_ram, spec_label=spec_label, show_plots=show_plots, save_plots=save_plots, do_histogram=do_histogram) self.h5_file.close() return h5_path
def _read_data(self, file_list, h5_channels): """ Iterates over the images in `file_list`, reading each image and downsampling if reqeusted, and writes the flattened image to file. Also builds the Mean_Ronchigram and the Spectroscopic_Mean datasets at the same time. Parameters ---------- file_list : list of str List of all files in `image_path` that will be read h5_main : h5py.Dataset Dataset which will hold the Ronchigrams h5_mean_spec : h5py.Dataset Dataset which will hold the Spectroscopic Mean h5_ronch : h5py.Dataset Dataset which will hold the Mean Ronchigram image_path : str Absolute file path to the directory which hold the images Returns ------- None """ h5_main_list = list() ''' For each file, we must read the data then create the neccessary datasets, add them to the channel, and write it all to file ''' ''' Get zipfile handles for all the ndata1 files that were found in the image_path ''' for ifile, (this_file, this_channel) in enumerate(zip(file_list, h5_channels)): _, ext = os.path.splitext(this_file) if ext in ['.ndata1', '.ndata']: ''' Extract the data file from the zip archive and read it into an array ''' this_zip = zipfile.ZipFile(this_file, 'r') tmp_path = this_zip.extract('data.npy') this_data = np.load(tmp_path) os.remove(tmp_path) elif ext == '.npy': # Read data directly from npy file this_data = np.load(this_file) ''' Find the shape of the data, then calculate the final dimensions based on the crop and downsampling parameters ''' while this_data.ndim < 4: this_data = np.expand_dims(this_data, 0) this_data = self.crop_ronc(this_data) scan_size_x, scan_size_y, usize, vsize = this_data.shape usize = int(round(1.0 * usize / self.bin_factor[-2])) vsize = int(round(1.0 * vsize / self.bin_factor[-1])) num_images = scan_size_x * scan_size_y num_pixels = usize * vsize ''' Write these attributes to the Measurement group ''' new_attrs = { 'image_size_u': usize, 'image_size_v': vsize, 'scan_size_x': scan_size_x, 'scan_size_y': scan_size_y } write_simple_attrs(this_channel.parent, new_attrs) # Get the Position and Spectroscopic Datasets spec_desc = [ Dimension('U', 'pixel', np.arange(usize)), Dimension('V', 'pixel', np.arange(vsize)) ] pos_desc = [ Dimension('X', 'pixel', np.arange(scan_size_x)), Dimension('Y', 'pixel', np.arange(scan_size_y)) ] ds_chunking = calc_chunks([num_images, num_pixels], np.float32(0).itemsize, unit_chunks=(1, num_pixels)) # Allocate space for Main_Data and Pixel averaged DataX h5_main = write_main_dataset(this_channel, (num_images, num_pixels), 'Raw_Data', 'Intensity', 'a.u.', pos_desc, spec_desc, chunks=ds_chunking, dtype=np.float32) h5_ronch = this_channel.create_dataset('Mean_Ronchigram', data=np.zeros( num_pixels, dtype=np.float32)) h5_mean_spec = this_channel.create_dataset('Mean_Spectrogram', data=np.zeros( num_images, dtype=np.float32)) this_data = self.binning_func(this_data, self.bin_factor, self.bin_func).reshape(h5_main.shape) h5_main[:, :] = this_data h5_mean_spec[:] = np.mean(this_data, axis=1) h5_ronch[:] = np.mean(this_data, axis=0) self.h5_f.flush() h5_main_list.append(h5_main) self.h5_f.flush()
def _write_results_chunk(self): """ Writes the provided SVD results to file Parameters ---------- """ comp_dim = Dimension('Principal Component', 'a. u.', len(self.__s)) h5_svd_group = create_results_group( self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) self.h5_results_grp = h5_svd_group self._write_source_dset_provenance() write_simple_attrs(h5_svd_group, self.parms_dict) write_simple_attrs(h5_svd_group, {'svd_method': 'sklearn-randomized'}) h5_u = write_main_dataset(h5_svd_group, np.float32(self.__u), 'U', 'Abundance', 'a.u.', None, comp_dim, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=np.float32, chunks=calc_chunks(self.__u.shape, np.float32(0).itemsize)) # print(get_attr(self.h5_main, 'quantity')[0]) h5_v = write_main_dataset(h5_svd_group, self.__v, 'V', get_attr(self.h5_main, 'quantity')[0], 'a.u.', comp_dim, None, h5_spec_inds=self.h5_main.h5_spec_inds, h5_spec_vals=self.h5_main.h5_spec_vals, chunks=calc_chunks( self.__v.shape, self.h5_main.dtype.itemsize)) # No point making this 1D dataset a main dataset h5_s = h5_svd_group.create_dataset('S', data=np.float32(self.__s)) ''' Check h5_main for plot group references. Copy them into V if they exist ''' for key in self.h5_main.attrs.keys(): if '_Plot_Group' not in key: continue ref_inds = get_indices_for_region_ref(self.h5_main, self.h5_main.attrs[key], return_method='corners') ref_inds = ref_inds.reshape([-1, 2, 2]) ref_inds[:, 1, 0] = h5_v.shape[0] - 1 svd_ref = create_region_reference(h5_v, ref_inds) h5_v.attrs[key] = svd_ref # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = h5_svd_group.create_dataset( self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: h5_svd_group.attrs['last_pixel'] = self.h5_main.shape[0]
def translate(self, parm_path): """ Basic method that translates .mat data files to a single .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ parm_path = path.abspath(parm_path) (folder_path, file_name) = path.split(parm_path) (file_name, base_name) = path.split(folder_path) h5_path = path.join(folder_path, base_name + '.h5') # Read parameters print('reading parameter files') parm_dict, excit_wfm, spec_ind_mat = self.__readparms(parm_path) parm_dict['data_type'] = 'SPORC' num_rows = parm_dict['grid_num_rows'] num_cols = parm_dict['grid_num_cols'] num_pix = num_rows * num_cols # new data format spec_ind_mat = np.transpose(VALUES_DTYPE(spec_ind_mat)) # Now start creating datasets and populating: pos_desc = [ Dimension('Y', 'm', np.arange(num_rows)), Dimension('X', 'm', np.arange(num_cols)) ] ds_pos_ind, ds_pos_val = build_ind_val_dsets(pos_desc, is_spectral=False) spec_ind_labels = [ 'x index', 'y index', 'loop index', 'repetition index', 'slope index' ] spec_ind_dict = dict() for col_ind, col_name in enumerate(spec_ind_labels): spec_ind_dict[col_name] = (slice(col_ind, col_ind + 1), slice(None)) ds_spec_inds = VirtualDataset('Spectroscopic_Indices', INDICES_DTYPE(spec_ind_mat)) ds_spec_inds.attrs['labels'] = spec_ind_dict ds_spec_vals = VirtualDataset('Spectroscopic_Values', spec_ind_mat) ds_spec_vals.attrs['labels'] = spec_ind_dict ds_spec_vals.attrs['units'] = ['V', 'V', '', '', ''] ds_excit_wfm = VirtualDataset('Excitation_Waveform', np.float32(excit_wfm)) ds_raw_data = VirtualDataset('Raw_Data', data=[], maxshape=(num_pix, len(excit_wfm)), dtype=np.float16, chunking=(1, len(excit_wfm)), compression='gzip') # technically should change the date, etc. chan_grp = VirtualGroup('Channel_000') chan_grp.attrs = parm_dict chan_grp.add_children([ ds_pos_ind, ds_pos_val, ds_spec_inds, ds_spec_vals, ds_excit_wfm, ds_raw_data ]) global_parms = dict() global_parms['grid_size_x'] = parm_dict['grid_num_cols'] global_parms['grid_size_y'] = parm_dict['grid_num_rows'] # assuming that the experiment was completed: global_parms['current_position_x'] = parm_dict['grid_num_cols'] - 1 global_parms['current_position_y'] = parm_dict['grid_num_rows'] - 1 global_parms['data_type'] = parm_dict['data_type'] global_parms['translator'] = 'SPORC' meas_grp = VirtualGroup('Measurement_000') meas_grp.add_children([chan_grp]) spm_data = VirtualGroup('') spm_data.attrs = global_parms spm_data.add_children([meas_grp]) if path.exists(h5_path): remove(h5_path) # Write everything except for the main data. hdf = HDFwriter(h5_path) h5_refs = hdf.write(spm_data) h5_main = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] # Now doing link_h5_objects_as_attrs: aux_ds_names = [ 'Excitation_Waveform', 'Position_Indices', 'Position_Values', 'Spectroscopic_Indices', 'Spectroscopic_Values' ] link_h5_objects_as_attrs(h5_main, get_h5_obj_refs(aux_ds_names, h5_refs)) print('reading raw data now...') # Now read the raw data files: pos_ind = 0 for row_ind in range(1, num_rows + 1): for col_ind in range(1, num_cols + 1): file_path = path.join( folder_path, 'result_r' + str(row_ind) + '_c' + str(col_ind) + '.mat') # print('Working on row {} col {}'.format(row_ind,col_ind)) if path.exists(file_path): # Load data file pix_data = loadmat(file_path, squeeze_me=True) # Take the inverse FFT on 1st dimension pix_vec = np.fft.ifft(np.fft.ifftshift(pix_data['data'])) # Verified with Matlab - no conjugate required here. h5_main[pos_ind, :] = np.float16(np.real(pix_vec)) hdf.flush() # flush from memory! else: print('File for row {} col {} not found'.format( row_ind, col_ind)) pos_ind += 1 if (100.0 * pos_ind / num_pix) % 10 == 0: print('Finished reading {} % of data'.format( int(100 * pos_ind / num_pix))) hdf.close() return h5_path
def translate(self, parm_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ parm_path = path.abspath(parm_path) parm_dict, excit_wfm = self._read_parms(parm_path) self._parse_file_path(parm_path) num_dat_files = len(self.file_list) f = open(self.file_list[0], 'rb') spectrogram_size, count_vals = self._parse_spectrogram_size(f) print("Excitation waveform shape: ", excit_wfm.shape) print("spectrogram size:", spectrogram_size) num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols'] print('Number of pixels: ', num_pixels) print('Count Values: ', count_vals) #if (num_pixels + 1) != count_vals: # print("Data size does not match number of pixels expected. Cannot continue") #Find how many channels we have to make num_ai_chans = num_dat_files // 2 # Division by 2 due to real/imaginary # Now start creating datasets and populating: #Start with getting an h5 file h5_file = h5py.File(self.h5_path) #First create a measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') #Set up some parameters that will be written as attributes to this Measurement group global_parms = dict() global_parms['data_type'] = 'trKPFM' global_parms['translator'] = 'trKPFM' write_simple_attrs(h5_meas_group, global_parms) write_simple_attrs(h5_meas_group, parm_dict) #Now start building the position and spectroscopic dimension containers #There's only one spectroscpoic dimension and two position dimensions #The excit_wfm only has the DC values without any information on cycles, time, etc. #What we really need is to add the time component. For every DC step there are some time steps. num_time_steps = ( spectrogram_size - 5 ) // excit_wfm.size // 2 #Need to divide by 2 because it considers on and off field #There should be three spectroscopic axes #In order of fastest to slowest varying, we have #time, voltage, field time_vec = np.linspace(0, parm_dict['IO_time'], num_time_steps) print('Num time steps: {}'.format(num_time_steps)) print('DC Vec size: {}'.format(excit_wfm.shape)) print('Spectrogram size: {}'.format(spectrogram_size)) field_vec = np.array([0, 1]) spec_dims = [ Dimension('Time', 's', time_vec), Dimension('Field', 'Binary', field_vec), Dimension('Bias', 'V', excit_wfm) ] pos_dims = [ Dimension('Cols', 'nm', parm_dict['grid_num_cols']), Dimension('Rows', 'um', parm_dict['grid_num_rows']) ] self.raw_datasets = list() for chan_index in range(num_ai_chans): chan_grp = create_indexed_group(h5_meas_group, 'Channel') if chan_index == 0: write_simple_attrs(chan_grp, {'Harmonic': 1}) else: write_simple_attrs(chan_grp, {'Harmonic': 2}) h5_raw = write_main_dataset( chan_grp, # parent HDF5 group (num_pixels, spectrogram_size - 5), # shape of Main dataset 'Raw_Data', # Name of main dataset 'Deflection', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_dims, # Position dimensions spec_dims, # Spectroscopic dimensions dtype=np.complex64, # data type / precision compression='gzip', chunks=(1, spectrogram_size - 5), main_dset_attrs={'quantity': 'Complex'}) #h5_refs = hdf.write(chan_grp, print_log=False) #h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] #link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs)) self.raw_datasets.append(h5_raw) self.raw_datasets.append(h5_raw) # Now that the N channels have been made, populate them with the actual data.... self._read_data(parm_dict, parm_path, spectrogram_size) h5_file.file.close() #hdf.close() return self.h5_path
def _write_results_chunk(self): """ Writes the labels and mean response to the h5 file Returns --------- h5_group : HDF5 Group reference Reference to the group that contains the clustering results """ print('Writing clustering results to file.') num_clusters = self.__mean_resp.shape[0] h5_cluster_group = create_results_group(self.h5_main, self.process_name) write_simple_attrs(h5_cluster_group, self.parms_dict) h5_labels = write_main_dataset(h5_cluster_group, np.uint32(self.__labels.reshape([-1, 1])), 'Labels', 'Cluster ID', 'a. u.', None, Dimension('Cluster', 'ID', 1), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, aux_spec_prefix='Cluster_', dtype=np.uint32) if self.num_comps != self.h5_main.shape[1]: ''' Setup the Spectroscopic Indices and Values for the Mean Response if we didn't use all components Note that a sliced spectroscopic matrix may not be contiguous. Let's just lose the spectroscopic data for now until a better method is figured out ''' """ if isinstance(self.data_slice[1], np.ndarray): centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1].tolist()] else: centroid_vals_mat = h5_centroids.h5_spec_vals[self.data_slice[1]] ds_centroid_values.data[0, :] = centroid_vals_mat """ if isinstance(self.data_slice[1], np.ndarray): vals_slice = self.data_slice[1].tolist() else: vals_slice = self.data_slice[1] vals = self.h5_main.h5_spec_vals[:, vals_slice].squeeze() new_spec = Dimension('Original_Spectral_Index', 'a.u.', vals) h5_inds, h5_vals = write_ind_val_dsets(h5_cluster_group, new_spec, is_spectral=True) else: h5_inds = self.h5_main.h5_spec_inds h5_vals = self.h5_main.h5_spec_vals # For now, link centroids with default spectroscopic indices and values. h5_centroids = write_main_dataset(h5_cluster_group, self.__mean_resp, 'Mean_Response', get_attr(self.h5_main, 'quantity')[0], get_attr(self.h5_main, 'units')[0], Dimension('Cluster', 'a. u.', np.arange(num_clusters)), None, h5_spec_inds=h5_inds, aux_pos_prefix='Mean_Resp_Pos_', h5_spec_vals=h5_vals) # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = h5_cluster_group.create_dataset( self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: h5_cluster_group.attrs['last_pixel'] = self.h5_main.shape[0] return h5_cluster_group
def _create_results_datasets(self): """ Creates all the datasets necessary for holding all parameters + data. """ self.h5_results_grp = create_results_group(self.h5_main, self.process_name) self.parms_dict.update({ 'last_pixel': 0, 'algorithm': 'pycroscopy_SignalFilter' }) write_simple_attrs(self.h5_results_grp, self.parms_dict) assert isinstance(self.h5_results_grp, h5py.Group) if isinstance(self.composite_filter, np.ndarray): h5_comp_filt = self.h5_results_grp.create_dataset( 'Composite_Filter', data=np.float32(self.composite_filter)) if self.verbose and self.mpi_rank == 0: print( 'Rank {} - Finished creating the Composite_Filter dataset'. format(self.mpi_rank)) # First create the position datsets if the new indices are smaller... if self.num_effective_pix != self.h5_main.shape[0]: # TODO: Do this part correctly. See past solution: """ # need to make new position datasets by taking every n'th index / value: new_pos_vals = np.atleast_2d(h5_pos_vals[slice(0, None, self.num_effective_pix), :]) pos_descriptor = [] for name, units, leng in zip(h5_pos_inds.attrs['labels'], h5_pos_inds.attrs['units'], [int(np.unique(h5_pos_inds[:, dim_ind]).size / self.num_effective_pix) for dim_ind in range(h5_pos_inds.shape[1])]): pos_descriptor.append(Dimension(name, units, np.arange(leng))) ds_pos_inds, ds_pos_vals = build_ind_val_dsets(pos_descriptor, is_spectral=False, verbose=self.verbose) h5_pos_vals.data = np.atleast_2d(new_pos_vals) # The data generated above varies linearly. Override. """ h5_pos_inds_new, h5_pos_vals_new = write_ind_val_dsets( self.h5_results_grp, Dimension('pixel', 'a.u.', self.num_effective_pix), is_spectral=False, verbose=self.verbose and self.mpi_rank == 0) if self.verbose and self.mpi_rank == 0: print('Rank {} - Created the new position ancillary dataset'. format(self.mpi_rank)) else: h5_pos_inds_new = self.h5_main.h5_pos_inds h5_pos_vals_new = self.h5_main.h5_pos_vals if self.verbose and self.mpi_rank == 0: print('Rank {} - Reusing source datasets position datasets'. format(self.mpi_rank)) if self.noise_threshold is not None: self.h5_noise_floors = write_main_dataset( self.h5_results_grp, (self.num_effective_pix, 1), 'Noise_Floors', 'Noise', 'a.u.', None, Dimension('arb', '', [1]), dtype=np.float32, aux_spec_prefix='Noise_Spec_', h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new, verbose=self.verbose and self.mpi_rank == 0) if self.verbose and self.mpi_rank == 0: print('Rank {} - Finished creating the Noise_Floors dataset'. format(self.mpi_rank)) if self.write_filtered: # Filtered data is identical to Main_Data in every way - just a duplicate self.h5_filtered = create_empty_dataset( self.h5_main, self.h5_main.dtype, 'Filtered_Data', h5_group=self.h5_results_grp) if self.verbose and self.mpi_rank == 0: print( 'Rank {} - Finished creating the Filtered dataset'.format( self.mpi_rank)) self.hot_inds = None if self.write_condensed: self.hot_inds = np.where(self.composite_filter > 0)[0] self.hot_inds = np.uint(self.hot_inds[int(0.5 * len(self.hot_inds)):] ) # only need to keep half the data condensed_spec = Dimension('hot_frequencies', '', int(0.5 * len(self.hot_inds))) self.h5_condensed = write_main_dataset( self.h5_results_grp, (self.num_effective_pix, len(self.hot_inds)), 'Condensed_Data', 'Complex', 'a. u.', None, condensed_spec, h5_pos_inds=h5_pos_inds_new, h5_pos_vals=h5_pos_vals_new, dtype=np.complex, verbose=self.verbose and self.mpi_rank == 0) if self.verbose and self.mpi_rank == 0: print( 'Rank {} - Finished creating the Condensed dataset'.format( self.mpi_rank)) if self.mpi_size > 1: self.mpi_comm.Barrier() self.h5_main.file.flush()
def _write_results_chunk(self): """ Writes the labels and mean response to the h5 file Returns --------- h5_group : HDF5 Group reference Reference to the group that contains the decomposition results """ h5_decomp_group = create_results_group( self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) self._write_source_dset_provenance() write_simple_attrs(h5_decomp_group, self.parms_dict) write_simple_attrs( h5_decomp_group, { 'n_components': self.__components.shape[0], 'n_samples': self.h5_main.shape[0] }) decomp_desc = Dimension('Endmember', 'a. u.', self.__components.shape[0]) # equivalent to V - compound / complex h5_components = write_main_dataset( h5_decomp_group, self.__components, 'Components', get_attr(self.h5_main, 'quantity')[0], 'a.u.', decomp_desc, None, h5_spec_inds=self.h5_main.h5_spec_inds, h5_spec_vals=self.h5_main.h5_spec_vals) # equivalent of U - real h5_projections = write_main_dataset( h5_decomp_group, np.float32(self.__projection), 'Projection', 'abundance', 'a.u.', None, decomp_desc, dtype=np.float32, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals) # return the h5 group object self.h5_results_grp = h5_decomp_group # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = h5_decomp_group.create_dataset( self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: h5_decomp_group.attrs['last_pixel'] = self.h5_main.shape[0] return self.h5_results_grp