def use_partial_computation(self, h5_partial_group=None): """ Extracts the necessary parameters from the provided h5 group to resume computation Parameters ---------- h5_partial_group : :class:`h5py.Group` Group containing partially computed results """ # Attempt to automatically take partial results if h5_partial_group is None: if len(self.partial_h5_groups) < 1: raise ValueError( 'No group was found with partial results and no such group was provided' ) h5_partial_group = self.partial_h5_groups[-1] else: # Make sure that this group is among the legal ones already discovered: if h5_partial_group not in self.partial_h5_groups: raise ValueError( 'Provided group does not appear to be in the list of discovered groups' ) self.parms_dict = get_attributes(h5_partial_group) self.h5_results_grp = h5_partial_group
def use_partial_computation(self, h5_partial_group=None): """ Extracts the necessary parameters from the provided h5 group to resume computation Parameters ---------- h5_partial_group : h5py.Datagroup object Datagroup containing partially computed results """ # Attempt to automatically take partial results if h5_partial_group is None: if len(self.partial_h5_groups) < 1: raise ValueError( 'No group was found with partial results and no such group was provided' ) h5_partial_group = self.partial_h5_groups[-1] else: # Make sure that this group is among the legal ones already discovered: if h5_partial_group not in self.partial_h5_groups: raise ValueError( 'Provided group does not appear to be in the list of discovered groups' ) self.parms_dict = get_attributes(h5_partial_group) # Be careful in assigning the start and end positions - these will be per rank! self.__assign_job_indices(start=self.parms_dict.pop('last_pixel')) self.h5_results_grp = h5_partial_group
def test_all(self): attrs = { 'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3'] } with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: h5_group = h5_f['/Raw_Measurement/source_main-Fitter_000'] returned_attrs = hdf_utils.get_attributes(h5_group) self.assertIsInstance(returned_attrs, dict) for key in attrs.keys(): self.assertTrue(np.all(returned_attrs[key] == attrs[key]))
def translate(self, data_filepath, out_filename, verbose=False, debug=False): ''' The main function that translates the provided file into a .h5 file Parameters ---------------- data_filepath : String / unicode Absolute path of the data file out_filename : String / unicode Name for the new generated hdf5 file. The new file will be saved in the same folder of the input file with file name "out_filename". NOTE: the .h5 extension is automatically added to "out_filename" debug : Boolean (Optional. default is false) Whether or not to print log statements Returns ---------------- h5_path : String / unicode Absolute path of the generated .h5 file ''' self.debug = debug # Open the datafile try: data_filepath = os.path.abspath(data_filepath) ARh5_file = h5py.File(data_filepath, 'r') except: print('Unable to open the file', data_filepath) raise # Get info from the origin file like Notes and Segments self.notes = ARh5_file.attrs['Note'] self.segments = ARh5_file['ForceMap']['Segments'] #shape: (X, Y, 4) self.segments_name = list(ARh5_file['ForceMap'].attrs['Segments']) self.map_size['X'] = ARh5_file['ForceMap']['Segments'].shape[0] self.map_size['Y'] = ARh5_file['ForceMap']['Segments'].shape[1] self.channels_name = list(ARh5_file['ForceMap'].attrs['Channels']) try: self.points_per_sec = np.float(self.note_value('ARDoIVPointsPerSec')) except NameError: self.points_per_sec = np.float(self.note_value('NumPtsPerSec')) if self.debug: print('Map size [X, Y]: ', self.map_size) print('Channels names: ', self.channels_name) # Only the extension 'Ext' segment can change size # so we get the shortest one and we trim all the others extension_idx = self.segments_name.index('Ext') short_ext = np.amin(np.array(self.segments[:, :, extension_idx])) longest_ext = np.amax(np.array(self.segments[:, :, extension_idx])) difference = longest_ext - short_ext # this is a difference between integers tot_length = (np.amax(self.segments) - difference) + 1 # +1 otherwise array(tot_length) will be of 1 position shorter points_trimmed = np.array(self.segments[:, :, extension_idx]) - short_ext if self.debug: print('Data were trimmed in the extension segment of {} points'.format(difference)) # Open the output hdf5 file folder_path = os.path.dirname(data_filepath) h5_path = os.path.join(folder_path, out_filename + '.h5') h5_file = h5py.File(h5_path, 'w') # Create the measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') # Create all channels and main datasets # at this point the main dataset are just function of time x_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['X']) y_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['Y']) z_dim = np.arange(tot_length) / np.float(self.points_per_sec) pos_dims = [Dimension('Cols', 'm', x_dim), Dimension('Rows', 'm', y_dim)] spec_dims = [Dimension('Time', 's', z_dim)] # This is quite time consuming, but on magnetic drive is limited from the disk, and therefore is not useful # to parallelize these loops for index, channel in enumerate(self.channels_name): cur_chan = create_indexed_group(h5_meas_group, 'Channel') main_dset = np.empty((self.map_size['X'], self.map_size['Y'], tot_length)) for column in np.arange(self.map_size['X']): for row in np.arange(self.map_size['Y']): AR_pos_string = str(column) + ':' + str(row) seg_start = self.segments[column, row, extension_idx] - short_ext main_dset[column, row, :] = ARh5_file['ForceMap'][AR_pos_string][index, seg_start:] # Reshape with Fortran order to have the correct position indices main_dset = np.reshape(main_dset, (-1, tot_length), order='F') if index == 0: first_main_dset = cur_chan quant_unit = self.get_def_unit(channel) h5_raw = write_main_dataset(cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_'+channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions ) else: h5_raw = write_main_dataset(cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_'+channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_main_dset['Spectroscopic_Indices'], h5_spec_vals=first_main_dset['Spectroscopic_Values'], ) # Make Channels with IMAGES. # Position indices/values are the same of all other channels # Spectroscopic indices/valus are they are just one single dimension img_spec_dims = [Dimension('arb', 'a.u.', [1])] for index, image in enumerate(ARh5_file['Image'].keys()): main_dset = np.reshape(np.array(ARh5_file['Image'][image]), (-1,1), order='F') cur_chan = create_indexed_group(h5_meas_group, 'Channel') if index == 0: first_image_dset = cur_chan h5_raw = write_main_dataset(cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_'+image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], ) else: h5_raw = write_main_dataset(cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_'+image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_image_dset['Spectroscopic_Indices'], h5_spec_vals=first_image_dset['Spectroscopic_Values'], ) # Create the new segments that will be stored as attribute new_segments = {} for seg, name in enumerate(self.segments_name): new_segments.update({name:self.segments[0,0,seg] - short_ext}) write_simple_attrs(h5_meas_group, {'Segments':new_segments, 'Points_trimmed':points_trimmed, 'Notes':self.notes}) write_simple_attrs(h5_file, {'translator':'ARhdf5', 'instrument':'Asylum Research '+self.note_value('MicroscopeModel'), 'AR sftware version':self.note_value('Version')}) if self.debug: print(print_tree(h5_file)) print('\n') for key, val in get_attributes(h5_meas_group).items(): if key != 'Notes': print('{} : {}'.format(key, val)) else: print('{} : {}'.format(key, 'notes string too long to be written here.')) # Clean up ARh5_file.close() h5_file.close() self.translated = True return h5_path
def translate(self, data_filepath, out_filename, verbose=False, debug=False): ''' The main function that translates the provided file into a .h5 file Parameters ---------------- data_filepath : String / unicode Absolute path of the data file out_filename : String / unicode Name for the new generated hdf5 file. The new file will be saved in the same folder of the input file with file name "out_filename". NOTE: the .h5 extension is automatically added to "out_filename" debug : Boolean (Optional. default is false) Whether or not to print log statements Returns ---------------- h5_path : String / unicode Absolute path of the generated .h5 file ''' self.debug = debug # Open the datafile try: data_filepath = os.path.abspath(data_filepath) ARh5_file = h5py.File(data_filepath, 'r') except: print('Unable to open the file', data_filepath) raise # Get info from the origin file like Notes and Segments self.notes = ARh5_file.attrs['Note'] self.segments = ARh5_file['ForceMap']['Segments'] #shape: (X, Y, 4) self.segments_name = list(ARh5_file['ForceMap'].attrs['Segments']) self.map_size['X'] = ARh5_file['ForceMap']['Segments'].shape[0] self.map_size['Y'] = ARh5_file['ForceMap']['Segments'].shape[1] self.channels_name = list(ARh5_file['ForceMap'].attrs['Channels']) try: self.points_per_sec = np.float( self.note_value('ARDoIVPointsPerSec')) except NameError: self.points_per_sec = np.float(self.note_value('NumPtsPerSec')) if self.debug: print('Map size [X, Y]: ', self.map_size) print('Channels names: ', self.channels_name) # Only the extension 'Ext' segment can change size # so we get the shortest one and we trim all the others extension_idx = self.segments_name.index('Ext') short_ext = np.amin(np.array(self.segments[:, :, extension_idx])) longest_ext = np.amax(np.array(self.segments[:, :, extension_idx])) difference = longest_ext - short_ext # this is a difference between integers tot_length = (np.amax(self.segments) - difference) + 1 # +1 otherwise array(tot_length) will be of 1 position shorter points_trimmed = np.array(self.segments[:, :, extension_idx]) - short_ext if self.debug: print('Data were trimmed in the extension segment of {} points'. format(difference)) # Open the output hdf5 file folder_path = os.path.dirname(data_filepath) h5_path = os.path.join(folder_path, out_filename + '.h5') h5_file = h5py.File(h5_path, 'w') # Create the measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') # Create all channels and main datasets # at this point the main dataset are just function of time x_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['X']) y_dim = np.linspace(0, np.float(self.note_value('FastScanSize')), self.map_size['Y']) z_dim = np.arange(tot_length) / np.float(self.points_per_sec) pos_dims = [ Dimension('Cols', 'm', x_dim), Dimension('Rows', 'm', y_dim) ] spec_dims = [Dimension('Time', 's', z_dim)] # This is quite time consuming, but on magnetic drive is limited from the disk, and therefore is not useful # to parallelize these loops for index, channel in enumerate(self.channels_name): cur_chan = create_indexed_group(h5_meas_group, 'Channel') main_dset = np.empty( (self.map_size['X'], self.map_size['Y'], tot_length)) for column in np.arange(self.map_size['X']): for row in np.arange(self.map_size['Y']): AR_pos_string = str(column) + ':' + str(row) seg_start = self.segments[column, row, extension_idx] - short_ext main_dset[column, row, :] = ARh5_file['ForceMap'][AR_pos_string][ index, seg_start:] # Reshape with Fortran order to have the correct position indices main_dset = np.reshape(main_dset, (-1, tot_length), order='F') if index == 0: first_main_dset = cur_chan quant_unit = self.get_def_unit(channel) h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_' + channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions ) else: h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of raw data 'Raw_' + channel, # Name of main dset channel, # Physical quantity self.get_def_unit(channel), # Unit pos_dims, # position dimensions spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_main_dset['Spectroscopic_Indices'], h5_spec_vals=first_main_dset['Spectroscopic_Values'], ) # Make Channels with IMAGES. # Position indices/values are the same of all other channels # Spectroscopic indices/valus are they are just one single dimension img_spec_dims = [Dimension('arb', 'a.u.', [1])] for index, image in enumerate(ARh5_file['Image'].keys()): main_dset = np.reshape(np.array(ARh5_file['Image'][image]), (-1, 1), order='F') cur_chan = create_indexed_group(h5_meas_group, 'Channel') if index == 0: first_image_dset = cur_chan h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_' + image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], ) else: h5_raw = write_main_dataset( cur_chan, # parent HDF5 group main_dset, # 2D array of image (shape: P*Q x 1) 'Img_' + image, # Name of main dset image, # Physical quantity self.get_def_unit(image), # Unit pos_dims, # position dimensions img_spec_dims, #spectroscopy dimensions # Link Ancilliary dset to the first h5_pos_inds=first_main_dset['Position_Indices'], h5_pos_vals=first_main_dset['Position_Values'], h5_spec_inds=first_image_dset['Spectroscopic_Indices'], h5_spec_vals=first_image_dset['Spectroscopic_Values'], ) # Create the new segments that will be stored as attribute new_segments = {} for seg, name in enumerate(self.segments_name): new_segments.update({name: self.segments[0, 0, seg] - short_ext}) write_simple_attrs( h5_meas_group, { 'Segments': new_segments, 'Points_trimmed': points_trimmed, 'Notes': self.notes }) write_simple_attrs( h5_file, { 'translator': 'ARhdf5', 'instrument': 'Asylum Research ' + self.note_value('MicroscopeModel'), 'AR sftware version': self.note_value('Version') }) if self.debug: print(print_tree(h5_file)) print('\n') for key, val in get_attributes(h5_meas_group).items(): if key != 'Notes': print('{} : {}'.format(key, val)) else: print('{} : {}'.format( key, 'notes string too long to be written here.')) # Clean up ARh5_file.close() h5_file.close() self.translated = True return h5_path
def _create_results_datasets(self): """ Creates hdf5 datasets and datagroups to hold the resutls """ # create all h5 datasets here: num_pos = self.h5_main.shape[0] if self.verbose and self.mpi_rank == 0: print('Now creating the datasets') self.h5_results_grp = create_results_group(self.h5_main, self.process_name) write_simple_attrs(self.h5_results_grp, { 'algorithm_author': 'Kody J. Law', 'last_pixel': 0 }) write_simple_attrs(self.h5_results_grp, self.parms_dict) if self.verbose and self.mpi_rank == 0: print('created group: {} with attributes:'.format( self.h5_results_grp.name)) print(get_attributes(self.h5_results_grp)) # One of those rare instances when the result is exactly the same as the source self.h5_i_corrected = create_empty_dataset( self.h5_main, np.float32, 'Corrected_Current', h5_group=self.h5_results_grp) if self.verbose and self.mpi_rank == 0: print('Created I Corrected') # print_tree(self.h5_results_grp) # For some reason, we cannot specify chunks or compression! # The resistance dataset requires the creation of a new spectroscopic dimension self.h5_resistance = write_main_dataset( self.h5_results_grp, (num_pos, self.num_x_steps), 'Resistance', 'Resistance', 'GOhms', None, Dimension('Bias', 'V', self.num_x_steps), dtype=np. float32, # chunks=(1, self.num_x_steps), #compression='gzip', h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals) if self.verbose and self.mpi_rank == 0: print('Created Resistance') # print_tree(self.h5_results_grp) assert isinstance(self.h5_resistance, USIDataset) # only here for PyCharm self.h5_new_spec_vals = self.h5_resistance.h5_spec_vals # The variance is identical to the resistance dataset self.h5_variance = create_empty_dataset(self.h5_resistance, np.float32, 'R_variance') if self.verbose and self.mpi_rank == 0: print('Created Variance') # print_tree(self.h5_results_grp) # The capacitance dataset requires new spectroscopic dimensions as well self.h5_cap = write_main_dataset( self.h5_results_grp, (num_pos, 1), 'Capacitance', 'Capacitance', 'pF', None, Dimension('Direction', '', [1]), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=cap_dtype, #compression='gzip', aux_spec_prefix='Cap_Spec_') if self.verbose and self.mpi_rank == 0: print('Created Capacitance') # print_tree(self.h5_results_grp) print('Done creating all results datasets!') if self.mpi_size > 1: self.mpi_comm.Barrier() self.h5_main.file.flush()
def test_invalid_type_multi(self): with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: h5_group = h5_f['/Raw_Measurement/source_main-Fitter_000'] with self.assertRaises(TypeError): _ = hdf_utils.get_attributes(h5_group, ['att_1', 15])
def test_not_hdf_obj(self): with self.assertRaises(TypeError): _ = hdf_utils.get_attributes(np.arange(4))
def test_absent_attr(self): sub_attrs = ['att_1', 'att_4', 'does_not_exist'] with h5py.File(data_utils.std_beps_path, mode='r') as h5_f: h5_group = h5_f['/Raw_Measurement/source_main-Fitter_000'] with self.assertRaises(KeyError): _ = hdf_utils.get_attributes(h5_group, attr_names=sub_attrs)
def _create_results_datasets(self): """ Creates hdf5 datasets and datagroups to hold the resutls """ # create all h5 datasets here: num_pos = self.h5_main.shape[0] if self.verbose and self.mpi_rank == 0: print('Now creating the datasets') self.h5_results_grp = create_results_group(self.h5_main, self.process_name) write_simple_attrs(self.h5_results_grp, {'algorithm_author': 'Kody J. Law', 'last_pixel': 0}) write_simple_attrs(self.h5_results_grp, self.parms_dict) if self.verbose and self.mpi_rank == 0: print('created group: {} with attributes:'.format(self.h5_results_grp.name)) print(get_attributes(self.h5_results_grp)) # One of those rare instances when the result is exactly the same as the source self.h5_i_corrected = create_empty_dataset(self.h5_main, np.float32, 'Corrected_Current', h5_group=self.h5_results_grp) if self.verbose and self.mpi_rank == 0: print('Created I Corrected') # print_tree(self.h5_results_grp) # For some reason, we cannot specify chunks or compression! # The resistance dataset requires the creation of a new spectroscopic dimension self.h5_resistance = write_main_dataset(self.h5_results_grp, (num_pos, self.num_x_steps), 'Resistance', 'Resistance', 'GOhms', None, Dimension('Bias', 'V', self.num_x_steps), dtype=np.float32, # chunks=(1, self.num_x_steps), #compression='gzip', h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals) if self.verbose and self.mpi_rank == 0: print('Created Resistance') # print_tree(self.h5_results_grp) assert isinstance(self.h5_resistance, USIDataset) # only here for PyCharm self.h5_new_spec_vals = self.h5_resistance.h5_spec_vals # The variance is identical to the resistance dataset self.h5_variance = create_empty_dataset(self.h5_resistance, np.float32, 'R_variance') if self.verbose and self.mpi_rank == 0: print('Created Variance') # print_tree(self.h5_results_grp) # The capacitance dataset requires new spectroscopic dimensions as well self.h5_cap = write_main_dataset(self.h5_results_grp, (num_pos, 1), 'Capacitance', 'Capacitance', 'pF', None, Dimension('Direction', '', [1]), h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals, dtype=cap_dtype, #compression='gzip', aux_spec_prefix='Cap_Spec_') if self.verbose and self.mpi_rank == 0: print('Created Capacitance') # print_tree(self.h5_results_grp) print('Done creating all results datasets!') if self.mpi_size > 1: self.mpi_comm.Barrier() self.h5_main.file.flush()