def test_not_numpy_or_dask_array_main(self): translator = ArrayTranslator() with self.assertRaises(TypeError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', {'This is not a dataset': True}, 'quant', 'unit', Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3))
def setUp(self): data_utils.make_beps_file() self.orig_labels_order = ['X', 'Y', 'Cycle', 'Bias'] self.h5_file = h5py.File(data_utils.std_beps_path, mode='r') h5_grp = self.h5_file['/Raw_Measurement/'] self.source_nd_s2f = h5_grp['n_dim_form'][()] self.source_nd_f2s = self.source_nd_s2f.transpose(1, 0, 3, 2) self.h5_source = USIDataset(h5_grp['source_main']) self.pos_dims = [] self.spec_dims = [] for dim_name, dim_units in zip( self.h5_source.pos_dim_labels, get_attr(self.h5_source.h5_pos_inds, 'units')): self.pos_dims.append( Dimension(dim_name, dim_units, h5_grp[dim_name][()])) for dim_name, dim_units in zip( self.h5_source.spec_dim_labels, get_attr(self.h5_source.h5_spec_inds, 'units')): self.spec_dims.append( Dimension(dim_name, dim_units, h5_grp[dim_name][()])) res_grp_0 = h5_grp['source_main-Fitter_000'] self.results_0_nd_s2f = res_grp_0['n_dim_form'][()] self.results_0_nd_f2s = self.results_0_nd_s2f.transpose(1, 0, 3, 2) self.h5_compound = USIDataset(res_grp_0['results_main']) res_grp_1 = h5_grp['source_main-Fitter_001'] self.results_1_nd_s2f = res_grp_1['n_dim_form'][()] self.results_1_nd_f2s = self.results_1_nd_s2f.transpose(1, 0, 3, 2) self.h5_complex = USIDataset(res_grp_1['results_main'])
def save_IF(h5_gp, inst_freq, parm_dict): """ Adds Instantaneous Frequency as a main dataset :param h5_gp: :type h5_gp: :param inst_freq: :type inst_freq: :param parm_dict: :type parm_dict: dict :returns: :rtype: """ # Error check if isinstance(h5_gp, h5py.Dataset): raise ValueError('Must pass an h5Py group') # Get relevant parameters num_rows = parm_dict['num_rows'] num_cols = parm_dict['num_cols'] pnts_per_avg = parm_dict['pnts_per_avg'] h5_meas_group = usid.hdf_utils.create_indexed_group(h5_gp, 'processed') # Create dimensions pos_desc = [ Dimension('X', 'm', np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension('Y', 'm', np.linspace(0, parm_dict['SlowScanSize'], num_rows)) ] # ds_pos_ind, ds_pos_val = build_ind_val_matrices(pos_desc, is_spectral=False) spec_desc = [ Dimension('Time', 's', np.linspace(0, parm_dict['total_time'], pnts_per_avg)) ] # ds_spec_inds, ds_spec_vals = build_ind_val_matrices(spec_desc, is_spectral=True) # Writes main dataset h5_if = usid.hdf_utils.write_main_dataset( h5_meas_group, inst_freq, 'inst_freq', # Name of main dataset 'Frequency', # Physical quantity contained in Main dataset 'Hz', # Units for the physical quantity pos_desc, # Position dimensions spec_desc, # Spectroscopic dimensions dtype=np.float32, # data type / precision main_dset_attrs=parm_dict) usid.hdf_utils.copy_attributes(h5_if, h5_gp) h5_if.file.flush() return h5_if
def test_not_arrays(self): translator = ArrayTranslator() with self.assertRaises(TypeError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.random.rand(5, 3), 'quant', 'unit', Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3), extra_dsets={'Blah_other': 'I am not an array'})
def test_empty_name(self): translator = ArrayTranslator() with self.assertRaises(ValueError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.random.rand(5, 3), 'quant', 'unit', Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3), extra_dsets={' ': [1, 2, 3]})
def test_spec(self): translator = ArrayTranslator() with self.assertRaises(ValueError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.random.rand(5, 13), 'quant', 'unit', Dimension('Dim_1', 'au', 5), [Dimension('Spec_Dim', 'au', 3), Dimension('Dim_2', 'au', 4)])
def test_object_single(self): translator = ArrayTranslator() with self.assertRaises(TypeError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.random.rand(5, 13), 'quant', 'unit', 'my_string_Dimension', [Dimension('Spec_Dim', 'au', 3), Dimension('Dim_2', 'au', 4)])
def test_objects(self): translator = ArrayTranslator() with self.assertRaises(TypeError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.random.rand(5, 13), 'quant', 'unit', Dimension('Dim_1', 'au', 5), ['blah', Dimension('Dim_2', 'au', 4)])
def test_reserved_names(self): translator = ArrayTranslator() with self.assertRaises(KeyError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.random.rand(5, 3), 'quant', 'unit', Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3), extra_dsets={'Spectroscopic_Indices': np.arange(4), 'Blah_other': np.arange(15)})
def test_main_dset_1D(self): translator = ArrayTranslator() with self.assertRaises(ValueError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.arange(4), 'quant', 'unit', Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3)) with self.assertRaises(ValueError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', da.from_array(np.arange(4), chunks=(4)), 'quant', 'unit', Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3))
def setUp(self): super(TestGetDimsForSliceReal, self).setUp() self.pos_dims, self.spec_dims = self.get_all_dimensions() self.default_dimension = Dimension('arb.', 'a. u.', [1]) self.pos_dict = dict() self.spec_dict = dict() for item in self.pos_dims: self.pos_dict[item.name] = item for item in self.spec_dims: self.spec_dict[item.name] = item
def get_all_dimensions(): pos_dims = [] spec_dims = [] with h5py.File(test_h5_file_path, mode='r') as h5_f: h5_raw_grp = h5_f['Raw_Measurement'] usi_main = USIDataset(h5_raw_grp['source_main']) for dim_name, dim_units in zip( usi_main.pos_dim_labels, get_attr(usi_main.h5_pos_inds, 'units')): pos_dims.append( Dimension(dim_name, dim_units, h5_raw_grp[dim_name][()])) for dim_name, dim_units in zip( usi_main.spec_dim_labels, get_attr(usi_main.h5_spec_inds, 'units')): spec_dims.append( Dimension(dim_name, dim_units, h5_raw_grp[dim_name][()])) return pos_dims, spec_dims
def test_single_spec_dim_truncated(self): new_spec_dims = list() for item in self.spec_dims: if item.name == 'Bias': new_spec_dims.append( Dimension(item.name, item.units, item.values[slice(1, 7, 3)])) else: new_spec_dims.append(item) self.base({'Bias': slice(1, 7, 3)}, self.pos_dims, new_spec_dims)
def test_not_strings(self): translator = ArrayTranslator() with self.assertRaises(TypeError): delete_existing_file(file_path) _ = translator.translate(file_path, 1.2345, np.random.rand(5, 3), 'quant', 'unit', Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3)) with self.assertRaises(TypeError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.random.rand(5, 3), {'quant': 1}, 'unit', Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3)) with self.assertRaises(TypeError): delete_existing_file(file_path) _ = translator.translate(file_path, 'Blah', np.random.rand(5, 3), 'quant', ['unit'], Dimension('Position_Dim', 'au', 5), Dimension('Spec_Dim', 'au', 3))
def translate(self, file_path, verbose=False, parm_encoding='utf-8', ftype='FF', subfolder='Measurement_000', h5_path='', channel_label_name=True): """ Translates the provided file to .h5 Adapted heavily from pycroscopy IBW file, modified to work with Ginger format :param file_path: Absolute path of the .ibw file :type file_path: String / unicode :param verbose: Whether or not to show print statements for debugging :type verbose: boolean, optional :param parm_encoding: Codec to be used to decode the bytestrings into Python strings if needed. Default 'utf-8' :type parm_encoding: str, optional :param ftype: Delineates Ginger Lab imaging file type to be imported (not case-sensitive) 'FF' : FF-trEFM 'SKPM' : FM-SKPM 'ringdown' : Ringdown 'trEFM' : normal trEFM :type ftype: str, optional :param subfolder: Specifies folder under root (/) to save data in. Default is standard pycroscopy format :type subfolder: str, optional :param h5_path: Existing H5 file to append to :type h5_path: str, optional :param channel_label_name: If True, uses the Channel as the subfolder name (e.g. Height, Phase, Amplitude, Charging) :type channel_label_name: bool, optional :returns: Absolute path of the .h5 file :rtype: String / unicode """ # Prepare the .h5 file: if not any(h5_path): folder_path, base_name = path.split(file_path) base_name = base_name[:-4] h5_path = path.join(folder_path, base_name + '.h5') # hard-coded exception, rarely occurs but can be useful if path.exists(h5_path): h5_path = path.join(folder_path, base_name + '_00.h5') h5_file = h5py.File(h5_path, 'w') # If subfolder improperly formatted if subfolder == '': subfolder = '/' # Load the ibw file first ibw_obj = bw.load(file_path) ibw_wave = ibw_obj.get('wave') parm_dict = self._read_parms(ibw_wave, parm_encoding) chan_labels, chan_units = self._get_chan_labels(ibw_wave, parm_encoding) if verbose: print('Channels and units found:') print(chan_labels) print(chan_units) # Get the data to figure out if this is an image or a force curve images = ibw_wave.get('wData') if images.shape[2] != len(chan_labels): chan_labels = chan_labels[1:] # for weird null set errors in older AR software # Check if a Ginger Lab format ibw (has 'UserIn' in channel labels) _is_gl_type = any(['UserIn0' in str(s) for s in chan_labels]) if _is_gl_type == True: chan_labels = self._get_image_type(chan_labels, ftype) if verbose: print('Processing image type', ftype, 'with channels', chan_labels) type_suffix = 'Image' num_rows = ibw_wave['wave_header']['nDim'][1] # lines num_cols = ibw_wave['wave_header']['nDim'][0] # points num_imgs = ibw_wave['wave_header']['nDim'][2] # layers unit_scale = self._get_unit_factor(''.join([str(s)[-2] for s in ibw_wave['wave_header']['dimUnits'][0][0:2]])) data_scale = self._get_unit_factor(str(ibw_wave['wave_header']['dataUnits'][0])[-2]) parm_dict['FastScanSize'] = unit_scale * num_cols * ibw_wave['wave_header']['sfA'][0] parm_dict['SlowScanSize'] = unit_scale * num_rows * ibw_wave['wave_header']['sfA'][1] images = images.transpose(2, 0, 1) # now ordered as [chan, Y, X] image images = np.reshape(images, (images.shape[0], -1, 1)) # 3D [chan, Y*X points,1] pos_desc = [Dimension(name='X', units='m', values=np.linspace(0, parm_dict['FastScanSize'], num_cols)), Dimension(name='Y', units='m', values=np.linspace(0, parm_dict['SlowScanSize'], num_rows))] spec_desc = [Dimension(name='arb', units='a.u.', values=[1])] # Create Position and spectroscopic datasets h5_pos_inds, h5_pos_vals = write_ind_val_dsets(h5_file['/'], pos_desc, is_spectral=False) h5_spec_inds, h5_spec_vals = write_ind_val_dsets(h5_file['/'], spec_desc, is_spectral=True) # Prepare the list of raw_data datasets for chan_data, chan_name, chan_unit in zip(images, chan_labels, chan_units): chan_grp = create_indexed_group(h5_file['/'], chan_name) write_main_dataset(chan_grp, np.atleast_2d(chan_data), 'Raw_Data', chan_name, chan_unit, pos_desc, spec_desc, dtype=np.float32) if verbose: print('Finished writing all channels') h5_file.close() return h5_path
def _write_results_chunk(self): """ Writes the labels and mean response to the h5 file Returns --------- h5_group : HDF5 Group reference Reference to the group that contains the decomposition results """ self.h5_results_grp = create_results_group( self.h5_main, self.process_name, h5_parent_group=self._h5_target_group) self._write_source_dset_provenance() write_simple_attrs(self.h5_results_grp, self.parms_dict) write_simple_attrs( self.h5_results_grp, { 'n_components': self.__components.shape[0], 'n_samples': self.h5_main.shape[0] }) decomp_desc = Dimension('Endmember', 'a. u.', self.__components.shape[0]) # equivalent to V - compound / complex h5_components = write_main_dataset( self.h5_results_grp, self.__components, 'Components', get_attr(self.h5_main, 'quantity')[0], 'a.u.', decomp_desc, None, h5_spec_inds=self.h5_main.h5_spec_inds, h5_spec_vals=self.h5_main.h5_spec_vals) # equivalent of U - real h5_projections = write_main_dataset( self.h5_results_grp, np.float32(self.__projection), 'Projection', 'abundance', 'a.u.', None, decomp_desc, dtype=np.float32, h5_pos_inds=self.h5_main.h5_pos_inds, h5_pos_vals=self.h5_main.h5_pos_vals) # return the h5 group object self.h5_results_grp = self.h5_results_grp # Marking completion: self._status_dset_name = 'completed_positions' self._h5_status_dset = self.h5_results_grp.create_dataset( self._status_dset_name, data=np.ones(self.h5_main.shape[0], dtype=np.uint8)) # keeping legacy option: self.h5_results_grp.attrs['last_pixel'] = self.h5_main.shape[0] return self.h5_results_grp
def base_translation_tester(self, main_dset_as_dask=False, extra_dsets_type='numpy', use_parm_dict=True): data_name = 'My_Awesome_Measurement' if use_parm_dict: attrs = {'att_1': 'string_val', 'att_2': 1.2345, 'att_3': [1, 2, 3, 4], 'att_4': ['str_1', 'str_2', 'str_3']} else: attrs = None extra_dsets = {} if extra_dsets_type is not None: ref_dsets = {'dset_1': np.random.rand(5), 'dset_2': np.arange(25)} if extra_dsets_type == 'numpy': extra_dsets = ref_dsets elif extra_dsets_type == 'dask': for key, val in ref_dsets.items(): extra_dsets.update({key: da.from_array(val, chunks=val.shape)}) else: extra_dsets_type = None delete_existing_file(file_path) main_data = np.random.rand(15, 14) if main_dset_as_dask: main_data = da.from_array(main_data, chunks=main_data.shape) quantity = 'Current' units = 'nA' pos_sizes = [5, 3] pos_names = ['X', 'Y'] pos_units = ['nm', 'um'] pos_dims = [] for name, unit, length in zip(pos_names, pos_units, pos_sizes): pos_dims.append(Dimension(name, unit, np.arange(length))) pos_data = np.vstack((np.tile(np.arange(5), 3), np.repeat(np.arange(3), 5))).T spec_sizes = [7, 2] spec_names = ['Bias', 'Cycle'] spec_units = ['V', ''] spec_dims = [] for name, unit, length in zip(spec_names, spec_units, spec_sizes): spec_dims.append(Dimension(name, unit, np.arange(length))) spec_data = np.vstack((np.tile(np.arange(7), 2), np.repeat(np.arange(2), 7))) translator = ArrayTranslator() _ = translator.translate(file_path, data_name, main_data, quantity, units, pos_dims, spec_dims, parm_dict=attrs, extra_dsets=extra_dsets) with h5py.File(file_path, mode='r') as h5_f: # we are not interested in most of the attributes under root besides two: self.assertEqual(data_name, hdf_utils.get_attr(h5_f, 'data_type')) # self.assertEqual('NumpyTranslator', hdf_utils.get_attr(h5_f, 'translator')) # First level should have absolutely nothing besides one group self.assertEqual(len(h5_f.items()), 1) self.assertTrue('Measurement_000' in h5_f.keys()) h5_meas_grp = h5_f['Measurement_000'] self.assertIsInstance(h5_meas_grp, h5py.Group) # check the attributes under this group # self.assertEqual(len(h5_meas_grp.attrs), len(attrs)) if use_parm_dict: for key, expected_val in attrs.items(): self.assertTrue(np.all(hdf_utils.get_attr(h5_meas_grp, key) == expected_val)) # Again, this group should only have one group - Channel_000 self.assertEqual(len(h5_meas_grp.items()), 1) self.assertTrue('Channel_000' in h5_meas_grp.keys()) h5_chan_grp = h5_meas_grp['Channel_000'] self.assertIsInstance(h5_chan_grp, h5py.Group) # This channel group is not expected to have any (custom) attributes but it will contain the main dataset self.assertEqual(len(h5_chan_grp.items()), 5 + len(extra_dsets)) for dset_name in ['Raw_Data', 'Position_Indices', 'Position_Values', 'Spectroscopic_Indices', 'Spectroscopic_Values']: self.assertTrue(dset_name in h5_chan_grp.keys()) h5_dset = h5_chan_grp[dset_name] self.assertIsInstance(h5_dset, h5py.Dataset) usid_main = USIDataset(h5_chan_grp['Raw_Data']) self.assertIsInstance(usid_main, USIDataset) self.assertEqual(usid_main.name.split('/')[-1], 'Raw_Data') self.assertEqual(usid_main.parent, h5_chan_grp) self.assertTrue(np.allclose(main_data, usid_main[()])) validate_aux_dset_pair(self, h5_chan_grp, usid_main.h5_pos_inds, usid_main.h5_pos_vals, pos_names, pos_units, pos_data, h5_main=usid_main, is_spectral=False) validate_aux_dset_pair(self, h5_chan_grp, usid_main.h5_spec_inds, usid_main.h5_spec_vals, spec_names, spec_units, spec_data, h5_main=usid_main, is_spectral=True) # Now validate each of the extra datasets: if extra_dsets_type is not None: for key, val in extra_dsets.items(): self.assertTrue(key in h5_chan_grp.keys()) h5_dset = h5_chan_grp[key] self.assertIsInstance(h5_dset, h5py.Dataset) if extra_dsets_type == 'dask': val = val.compute() self.assertTrue(np.allclose(val, h5_dset[()])) os.remove(file_path)