def createHDF5_file(signal, parm_dict, h5_path='', ds_name='FF_Raw'): """ Generates the HDF5 file given path to a specific file and a parameters dictionary Parameters ---------- h5_path : string Path to desired h5 file. signal : str, ndarray Path to the data file to be converted or a workspace array parm_dict : dict Scan parameters Returns ------- h5_path: str The filename path to the H5 file create """ sg = signal if 'str' in str(type(signal)): sg = load.signal(signal) if not any(h5_path): # if not passed, auto-generate name fname = signal.replace('/', '\\') h5_path = fname[:-4] + '.h5' else: fname = h5_path hdf = px.ioHDF5(h5_path) usid.hdf_utils.print_tree(hdf.file) ff_group = px.MicroDataGroup('FF_Group', parent='/') root_group = px.MicroDataGroup('/') # fname = fname.split('\\')[-1][:-4] sg = px.MicroDataset(ds_name, data=sg, dtype=np.float32, parent=ff_group) if 'pnts_per_pixel' not in parm_dict.keys(): parm_dict['pnts_per_avg'] = signal.shape[1] parm_dict['pnts_per_pixel'] = 1 parm_dict['pnts_per_line'] = parm_dict['num_cols'] ff_group.addChildren([sg]) ff_group.attrs = parm_dict # Get reference for writing the data h5_refs = hdf.writeData(ff_group, print_log=True) hdf.flush()
def _create_results_datasets(self): """ Creates the datasets an datagroups necessary to store the results. Just as the raw data is stored in the pycroscopy format, the results also need to conform to the same standards. Hence, the create_datasets function can appear to be a little longer than one might expect. """ h5_spec_inds = px.hdf_utils.getAuxData( self.h5_main, auxDataName=['Spectroscopic_Indices'])[0] h5_spec_vals = px.hdf_utils.getAuxData( self.h5_main, auxDataName=['Spectroscopic_Values'])[0] self.step_start_inds = np.where(h5_spec_inds[0] == 0)[0] self.num_udvs_steps = len(self.step_start_inds) ds_guess = px.MicroDataset('Guess', data=[], maxshape=(self.h5_main.shape[0], self.num_udvs_steps), chunking=(1, self.num_udvs_steps), dtype=sho32) not_freq = px.hdf_utils.get_attr(h5_spec_inds, 'labels') != 'Frequency' ds_sho_inds, ds_sho_vals = px.hdf_utils.buildReducedSpec( h5_spec_inds, h5_spec_vals, not_freq, self.step_start_inds) dset_name = self.h5_main.name.split('/')[-1] sho_grp = px.MicroDataGroup('-'.join([dset_name, 'SHO_Fit_']), self.h5_main.parent.name[1:]) sho_grp.addChildren([ds_guess, ds_sho_inds, ds_sho_vals]) sho_grp.attrs['SHO_guess_method'] = "pycroscopy BESHO" h5_sho_grp_refs = self.hdf.writeData(sho_grp) self.h5_guess = px.hdf_utils.getH5DsetRefs(['Guess'], h5_sho_grp_refs)[0] self.h5_results_grp = self.h5_guess.parent h5_sho_inds = px.hdf_utils.getH5DsetRefs(['Spectroscopic_Indices'], h5_sho_grp_refs)[0] h5_sho_vals = px.hdf_utils.getH5DsetRefs(['Spectroscopic_Values'], h5_sho_grp_refs)[0] # Reference linking before actual fitting px.hdf_utils.linkRefs(self.h5_guess, [h5_sho_inds, h5_sho_vals]) # Linking ancillary position datasets: aux_dsets = px.hdf_utils.getAuxData( self.h5_main, auxDataName=['Position_Indices', 'Position_Values']) px.hdf_utils.linkRefs(self.h5_guess, aux_dsets) print('Finshed creating datasets')
# directly under the root of the file. The MicroDataset class also implements the # compression and chunking parameters from h5py.Dataset. ds_main = px.MicroDataset('Main_Data', data=data1, parent='/') ############################################################################## # We can also create an empty dataset and write the values in later # With this method, it is neccessary to specify the dtype and maxshape kwarg parameters. ds_empty = px.MicroDataset('Empty_Data', data=[], dtype=np.float32, maxshape=[7, 5, 3]) ############################################################################## # We can also create groups and add other MicroData objects as children. # If the group's parent is not given, it will be set to root. data_group = px.MicroDataGroup('Data_Group', parent='/') root_group = px.MicroDataGroup('/') # After creating the group, we then add an existing object as its child. data_group.addChildren([ds_empty]) root_group.addChildren([ds_main, data_group]) ############################################################################## # The showTree method allows us to view the data structure before the hdf5 file is # created. root_group.showTree() ############################################################################## # Now that we have created the objects, we can write them to an hdf5 file
# In the case of K-means it may be the number of clusters. pycroscopy allows all these results to be stored instead # of being overwritten by appending an index number to the end of the group name. Thus, one could have a tree # that contains the following groups: # * Raw_Data-Cluster_000 <--- K-means with 9 clusters # * Raw_Data-Cluster_001 <--- Agglomerative clustering # * Raw_Data-Cluster_002 <--- K-means again with 4 clusters # # Leaving a '_' at the end of the group name will instruct ioHDF5 to look for the last instance of the same # operation being performed on the same dataset. The index will then be updated accordingly source_dset_name = h5_main.name.split('/')[-1] operation_name = 'Cluster' subtree_root_path = h5_main.parent.name[1:] cluster_grp = px.MicroDataGroup(source_dset_name + '-' + operation_name + '_', subtree_root_path) print('New group to be created with name:', cluster_grp.name) print('This group (subtree) will be appended to the H5 file under the group:', subtree_root_path) # Making a tree structure by adding the MicroDataset objects as children of this group cluster_grp.addChildren([ ds_label_mat, ds_cluster_centroids, ds_cluster_inds, ds_cluster_vals, ds_labels_spec_inds, ds_labels_spec_vals ]) print('\nWill write the following tree:') cluster_grp.showTree() cluster_grp.attrs['num_clusters'] = num_clusters cluster_grp.attrs['num_samples'] = h5_main.shape[0]