def pre_process(self): # Create the hdf5 output file self.hdf5 = Hdf5Utils(self.exp) self.in_data = self.get_in_datasets()[0] self.data_name = self.in_data.get_name() current_pattern = self.__set_current_pattern() pattern_idx = {'current': current_pattern, 'next': []} self.filename = self.__get_file_name() self.group_name = self._get_group_name(self.data_name) logging.debug("creating the backing file %s", self.filename) self.backing_file = self.hdf5._open_backing_h5(self.filename, 'w') group = self.backing_file.create_group(self.group_name) group.attrs['NX_class'] = 'NXdata' group.attrs['signal'] = 'data' self.exp._barrier() shape = self.in_data.get_shape() chunking = Chunking(self.exp, pattern_idx) dtype = self.in_data.data.dtype chunks = chunking._calculate_chunking(shape, dtype) self.exp._barrier() self.out_data = self.hdf5.create_dataset_nofill(group, "data", shape, dtype, chunks=chunks)
def __get_backing_file(self, data_obj): fname = '%s/%s.h5' % \ (self.exp.get('out_path'), self.parameters['file_name']) if os.path.exists(fname): return h5py.File(fname, 'r') self.hdf5 = Hdf5Utils(self.exp) size = tuple(self.parameters['size']) patterns = data_obj.get_data_patterns() p_name = patterns[self.parameters['pattern']] if \ self.parameters['pattern'] is not None else patterns.keys()[0] p_name = patterns.keys()[0] p_dict = patterns[p_name] p_dict['max_frames_transfer'] = 1 nnext = {p_name: p_dict} pattern_idx = {'current': nnext, 'next': nnext} chunking = Chunking(self.exp, pattern_idx) chunks = chunking._calculate_chunking(size, np.int16) h5file = self.hdf5._open_backing_h5(fname, 'w') dset = h5file.create_dataset('test', size, chunks=chunks) self.exp._barrier() slice_dirs = nnext.values()[0]['slice_dims'] nDims = len(dset.shape) total_frames = np.prod([dset.shape[i] for i in slice_dirs]) sub_size = \ [1 if i in slice_dirs else dset.shape[i] for i in range(nDims)] # need an mpi barrier after creating the file before populating it idx = 0 sl, total_frames = \ self.__get_start_slice_list(slice_dirs, dset.shape, total_frames) # calculate the first slice for i in range(total_frames): low, high = self.parameters['range'] dset[tuple(sl)] = np.random.randint( low, high=high, size=sub_size, dtype=self.parameters['dtype']) if sl[slice_dirs[idx]].stop == dset.shape[slice_dirs[idx]]: idx += 1 if idx == len(slice_dirs): break tmp = sl[slice_dirs[idx]] sl[slice_dirs[idx]] = slice(tmp.start+1, tmp.stop+1) self.exp._barrier() # try: # h5file.close() # except: # logging.debug('There was a problem trying to close the file in random_hdf5_loader') return self.hdf5._open_backing_h5(fname, 'r')
def _add_input_data_to_nxs_file(self, transport): # save the loaded data to file h5 = Hdf5Utils(self) for name, data in self.index['in_data'].items(): self.meta_data.set(['link_type', name], 'input_data') self.meta_data.set(['group_name', name], name) self.meta_data.set(['filename', name], data.backing_file) transport._populate_nexus_file(data) h5._link_datafile_to_nexus_file(data)
def _transport_pre_plugin_list_run(self): # loaders have completed now revert back to BasicTransport, so any # output datasets created by a plugin will use this. self.hdf5 = Hdf5Utils(self.exp) self.data_flow = self.exp.meta_data.plugin_list._get_dataset_flow() self.exp.meta_data.set('transport', 'basic') plist = self.exp.meta_data.plugin_list self.n_plugins = plist._get_n_processing_plugins() self.final_dict = plist.plugin_list[-1]
def _transport_post_plugin(self): if self.count == self.n_plugins - 2: self.exp.meta_data.set('transport', 'hdf5') elif self.count == self.n_plugins - 1: # final plugin self.h5trans.exp = self.exp self.h5trans.hdf5 = Hdf5Utils(self.exp) self.h5trans._transport_post_plugin() self.count += 1
def _transport_pre_plugin_list_run(self): # run through the experiment (no processing) and create output files self.hdf5 = Hdf5Utils(self.exp) self.exp_coll = self.exp._get_experiment_collection() self.data_flow = self.exp.meta_data.plugin_list._get_dataset_flow() n_plugins = range(len(self.exp_coll['datasets'])) for i in n_plugins: self.exp._set_experiment_for_current_plugin(i) self.files.append( self._get_filenames(self.exp_coll['plugin_dict'][i])) self._set_file_details(self.files[i]) self._setup_h5_files() # creates the hdf5 files
def _transport_post_plugin(self): # revert back to basic if a temporary transport mechanism was used if self.hdf5_flag: self.__unset_hdf5_transport() if self.count == self.n_plugins - 2: self.exp.meta_data.set('transport', 'hdf5') if self.count == self.n_plugins - 1: # final plugin self.h5trans.exp = self.exp self.h5trans.hdf5 = Hdf5Utils(self.exp) self.h5trans._transport_post_plugin() self.count += 1
def __init__(self, exp, name='Checkpointing'): self._exp = exp self._h5 = Hdf5Utils(self._exp) self._filename = '_checkpoint.h5' self._file = None self._start_values = (0, 0, 0) self._completed_plugins = 0 self._level = None self._proc_idx = 0 self._trans_idx = 0 self._comm = None self._timer = None self._set_timer() self.meta_data = MetaData()
def _transport_pre_plugin_list_run(self): # loaders have completed now revert back to DosnaTransport, so any # output datasets created by a plugin will use this. self.hdf5 = Hdf5Utils(self.exp) exp_coll = self.exp._get_experiment_collection() self.data_flow = self.exp.meta_data.plugin_list._get_dataset_flow() self.exp.meta_data.set('transport', 'dosna') plist = self.exp.meta_data.plugin_list self.n_plugins = plist._get_n_processing_plugins() self.final_dict = plist.plugin_list[-1] for plugin_index in range(self.n_plugins): self.exp._set_experiment_for_current_plugin(plugin_index) self.files.append( self._get_filenames(exp_coll['plugin_dict'][plugin_index])) self._set_file_details(self.files[plugin_index]) self._setup_dosna_objects() # creates the dosna objects