def test_create_indexed_group_invalid_types(self): with self.assertRaises(TypeError): _ = prov_utils.create_indexed_group(np.arange(4), "fddfd") file_path = 'test.h5' data_utils.delete_existing_file(file_path) with h5py.File(file_path, mode='w') as h5_f: with self.assertRaises(TypeError): _ = prov_utils.create_indexed_group(h5_f, 1.2343) os.remove(file_path)
def test_empty_base_name(self): file_path = 'test.h5' data_utils.delete_existing_file(file_path) with h5py.File(file_path, mode='w') as h5_f: with self.assertRaises(ValueError): _ = prov_utils.create_indexed_group(h5_f, ' ') os.remove(file_path)
def test_second(self): file_path = 'test.h5' data_utils.delete_existing_file(file_path) with h5py.File(file_path, mode='w') as h5_f: h5_group_1 = prov_utils.create_indexed_group(h5_f, 'Hello') self.assertIsInstance(h5_group_1, h5py.Group) self.assertEqual(h5_group_1.name, '/Hello_000') self.assertEqual(h5_group_1.parent, h5_f) data_utils.verify_book_keeping_attrs(self, h5_group_1) h5_group_2 = prov_utils.create_indexed_group(h5_f, 'Hello') self.assertIsInstance(h5_group_2, h5py.Group) self.assertEqual(h5_group_2.name, '/Hello_001') self.assertEqual(h5_group_2.parent, h5_f) data_utils.verify_book_keeping_attrs(self, h5_group_2) os.remove(file_path)
def test_first_group(self): file_path = 'test.h5' data_utils.delete_existing_file(file_path) with h5py.File(file_path, mode='w') as h5_f: h5_group = prov_utils.create_indexed_group(h5_f, 'Hello') self.assertIsInstance(h5_group, h5py.Group) self.assertEqual(h5_group.name, '/Hello_000') self.assertEqual(h5_group.parent, h5_f) data_utils.verify_book_keeping_attrs(self, h5_group) h5_sub_group = prov_utils.create_indexed_group(h5_group, 'Test') self.assertIsInstance(h5_sub_group, h5py.Group) self.assertEqual(h5_sub_group.name, '/Hello_000/Test_000') self.assertEqual(h5_sub_group.parent, h5_group) data_utils.verify_book_keeping_attrs(self, h5_sub_group) os.remove(file_path)
def write_results(h5_group, dataset=None, attributes=None, process_name=None): found_valid_dataset = False if dataset is not None: if isinstance(dataset, Dataset): found_valid_dataset = True found_valid_attributes = False if attributes is not None: if isinstance(attributes, dict): if len(attributes) > 0: found_valid_attributes = True if not (found_valid_dataset or found_valid_attributes): raise ValueError( 'results should contain at least a sidpy Dataset or a dictionary in results' ) log_name = 'Log_' if process_name is not None: log_name = log_name + process_name log_group = create_indexed_group(h5_group, log_name) if found_valid_dataset: write_nsid_dataset(dataset, log_group) if found_valid_attributes: write_simple_attrs(log_group, flatten_dict(attributes)) return log_group
def write_results(h5_group, dataset=None, attributes=None, process_name=None): """ Writes results of a processing step back to HDF5 in NSID format Parameters ---------- h5_group : h5py.Group HDF5 Group into which results will be written dataset : sidpy.Dataset, optional. Default = None Dataset ?? attributes : dict, optional. Default = None Metadata regarding processing step process_name : str, optional. Default = "Log_" Name of the prefix for group containing process results Returns ------- log_group : h5py.Group HDF5 group containing results """ found_valid_dataset = False if dataset is not None: if isinstance(dataset, Dataset): dataset = [dataset] if isinstance(dataset, list): if not all([isinstance(itm, Dataset) for itm in dataset]): raise TypeError('List contains non-Sidpy dataset entries! ' 'Should only contain sidpy datasets') found_valid_dataset = True found_valid_attributes = False if attributes is not None: if isinstance(attributes, dict): if len(attributes) > 0: found_valid_attributes = True else: raise TypeError("Provided attributes is type {} but should be type" " dict".format(type(attributes))) if not (found_valid_dataset or found_valid_attributes): raise ValueError('results should contain at least a sidpy Dataset or ' 'a dictionary in results') log_name = 'Log_' if process_name is not None: log_name = log_name + process_name log_group = create_indexed_group(h5_group, log_name) write_book_keeping_attrs(log_group) write_pynsid_book_keeping_attrs(log_group) if found_valid_dataset: for dset in dataset: write_nsid_dataset(dset, log_group) if found_valid_attributes: write_simple_attrs(log_group, flatten_dict(attributes)) return log_group
def translate(self, parm_path): """ The main function that translates the provided file into a .h5 file Parameters ------------ parm_path : string / unicode Absolute file path of the parameters .mat file. Returns ---------- h5_path : string / unicode Absolute path of the translated h5 file """ parm_path = path.abspath(parm_path) parm_dict, excit_wfm = self._read_parms(parm_path) excit_wfm = excit_wfm[1::2] self._parse_file_path(parm_path) num_dat_files = len(self.file_list) f = open(self.file_list[0], 'rb') spectrogram_size, count_vals = self._parse_spectrogram_size(f) print("Excitation waveform shape: ", excit_wfm.shape) print("spectrogram size:", spectrogram_size) num_pixels = parm_dict['grid_num_rows'] * parm_dict['grid_num_cols'] print('Number of pixels: ', num_pixels) print('Count Values: ', count_vals) #if (num_pixels + 1) != count_vals: # print("Data size does not match number of pixels expected. Cannot continue") #Find how many channels we have to make num_ai_chans = num_dat_files // 2 # Division by 2 due to real/imaginary # Now start creating datasets and populating: #Start with getting an h5 file h5_file = h5py.File(self.h5_path) #First create a measurement group h5_meas_group = create_indexed_group(h5_file, 'Measurement') #Set up some parameters that will be written as attributes to this Measurement group global_parms = dict() global_parms['data_type'] = 'trKPFM' global_parms['translator'] = 'trKPFM' write_simple_attrs(h5_meas_group, global_parms) write_simple_attrs(h5_meas_group, parm_dict) #Now start building the position and spectroscopic dimension containers #There's only one spectroscpoic dimension and two position dimensions #The excit_wfm only has the DC values without any information on cycles, time, etc. #What we really need is to add the time component. For every DC step there are some time steps. num_time_steps = ( spectrogram_size - 5 ) // excit_wfm.size // 2 #Need to divide by 2 because it considers on and off field #There should be three spectroscopic axes #In order of fastest to slowest varying, we have #time, voltage, field time_vec = np.linspace(0, parm_dict['IO_time'], num_time_steps) print('Num time steps: {}'.format(num_time_steps)) print('DC Vec size: {}'.format(excit_wfm.shape)) print('Spectrogram size: {}'.format(spectrogram_size)) field_vec = np.array([0, 1]) spec_dims = [ Dimension('Time', 's', time_vec), Dimension('Field', 'Binary', field_vec), Dimension('Bias', 'V', excit_wfm) ] pos_dims = [ Dimension('Cols', 'm', int(parm_dict['grid_num_cols'])), Dimension('Rows', 'm', int(parm_dict['grid_num_rows'])) ] self.raw_datasets = list() for chan_index in range(num_ai_chans): chan_grp = create_indexed_group(h5_meas_group, 'Channel') if chan_index == 0: write_simple_attrs(chan_grp, {'Harmonic': 1}) else: write_simple_attrs(chan_grp, {'Harmonic': 2}) h5_raw = write_main_dataset( chan_grp, # parent HDF5 group (num_pixels, spectrogram_size - 5), # shape of Main dataset 'Raw_Data', # Name of main dataset 'Deflection', # Physical quantity contained in Main dataset 'V', # Units for the physical quantity pos_dims, # Position dimensions spec_dims, # Spectroscopic dimensions dtype=np.complex64, # data type / precision compression='gzip', chunks=(1, spectrogram_size - 5), main_dset_attrs={'quantity': 'Complex'}) #h5_refs = hdf.write(chan_grp, print_log=False) #h5_raw = get_h5_obj_refs(['Raw_Data'], h5_refs)[0] #link_h5_objects_as_attrs(h5_raw, get_h5_obj_refs(aux_ds_names, h5_refs)) self.raw_datasets.append(h5_raw) self.raw_datasets.append(h5_raw) # Now that the N channels have been made, populate them with the actual data.... self._read_data(parm_dict, parm_path, spectrogram_size) h5_file.file.close() #hdf.close() return self.h5_path