def expand_hdf5(h5f: File, out_dir: Path, seeds: Optional[Set[str]] = None, jobs: int = 1, progress: bool = False): """ Expand an HDF5 containing code coverage. Args: h5f: h5py file object. out_dir: Directory to extract seed coverage to. seeds: An optional seed set. If provided, only these seeds will be extracted. jobs: Number of parallel jobs to run. progress: Set to `True` for progress bar. Returns: Yields each extracted seed. """ h5_filename = h5f.filename with mpp.Pool(processes=jobs) as pool: get_cov = partial(_get_seed_cov, out_dir=out_dir, seeds=seeds) h5_iter = zip(repeat(h5_filename), h5f.keys()) num_seeds = len(seeds) if seeds else len(list(h5f.keys())) print('%d seeds to extract' % num_seeds) iter_func = partial(tqdm, desc='Expanding %s' % h5_filename, total=num_seeds, unit='seeds') if progress else id for seed in iter_func(pool.istarmap(get_cov, h5_iter)): if seed: yield seed
class CountsHdf5Reader: def __init__(self, filename): self.data = File(filename, 'r') def keys(self): ''' # >>> path = 'fake-files/input/linnarsson/linnarsson.molecules.hdf5' # >>> reader = CountsHdf5Reader(path) # >>> len(reader.keys()) # 39 # >>> sorted(list(reader.keys()))[:2] # ['Acta2_Hybridization5', 'Aldoc_Hybridization1'] ''' return self.data.keys() def __getitem__(self, key): ''' # >>> path = 'fake-files/input/linnarsson/linnarsson.molecules.hdf5' # >>> reader = CountsHdf5Reader(path) # >>> pairs = list(reader['Acta2_Hybridization5']) # >>> len(pairs) # 13052 # >>> pairs[0] # [18215.0, 20052.0] ''' return (list(pair) for pair in self.data[key])
def write_metadata( infile: h5py.File, outfile: h5py.File, links_list: List[str], mask: SWIFTMask, ): """ Copy over all the metadata from snapshot to output file Parameters ---------- infile : h5py.File hdf5 file handle for input snapshot outfile : h5py.File hdf5 file handle for output snapshot links_list : list of str names of links found in the snapshot mask : SWIFTMask the mask being used to define subset """ update_metadata_counts(infile, outfile, mask) skip_list = links_list.copy() skip_list += ["PartType", "Cells"] for field in infile.keys(): if not any([substr for substr in skip_list if substr in field]): infile.copy(field, outfile)
def populateFrom(self, importedFile: h5py.File, topGroupKeys: List[str]): # We copy ilastikVersion as well as workflowName because that can influence the way in which the deserializers # interpret the imported data for key in topGroupKeys + self.BASE_KEYS: if key in importedFile.keys(): self.clearValue(key) importedFile.copy(key, self.file["/"])
class TestMapping(BaseTest): """ Test if the registration of Group as a Mapping behaves as expected """ def setUp(self): data = ('a', 'b') self.f = File('foo.hdf5', 'w') self.grp = self.f.create_group('bar') self.attr = self.f.attrs.create('x', data) def TearDown(self): if self.f: self.close() def test_keys(self): key_1 = self.f.keys() self.assertIsInstance(repr(key_1), str) key_2 = self.grp.keys() self.assertIsInstance(repr(key_2), str) def test_values(self): value_1 = self.f.values() self.assertIsInstance(repr(value_1), str) value_2 = self.grp.values() self.assertIsInstance(repr(value_2), str) def test_items(self): item_1 = self.f.items() self.assertIsInstance(repr(item_1), str) item_2 = self.grp.items() self.assertIsInstance(repr(item_1), str)
def convert_raw_to_img(src_filename, dst_filename, verbose=False, numba=False): """ converts .raw. data files to .img. data files preserving all original fields. The function will create a new file with the same name but different suffix """ if verbose: print(f'source filename: {src_filename}') print(f'destination filename: {dst_filename}') from h5py import File from lcp_video.analysis import mono12p_to_image, mono12p_to_image_numba if numba: mono12p_to_image = mono12p_to_image_numba src = File(src_filename, 'r') width = src['image width'][()] height = src['image height'][()] length = src['images'].shape[0] with File(dst_filename, 'w') as dst: for key in src.keys(): if "images" not in key: dst.create_dataset(key, data=src[key][()]) else: dst.create_dataset('images', (length, height, width), dtype='int16', chunks=(1, height, width)) for i in range(length): raw = src['images'][i] dst['images'][i] = mono12p_to_image(raw, height, width).reshape( (height, width))
def __init__(self, annot_file, split, tr_percent=0.7): print('Initializing data loader {} from {}'.format(split, annot_file)) f = File(annot_file, 'r') keys = [key for key in f.keys()] annot = {} for key in keys: annot[key] = np.asarray(f[key]).copy() f.close() # Keys: input, output, strike_low, strike_high, start_time, end_time, grid_size_str, grid_size_mat full_data_len = annot['input'].shape[0] ids = np.arange(full_data_len) max_id_te = int((1 - tr_percent) * full_data_len) te_ids = ids[ids < max_id_te] tr_ids = ids[ids >= max_id_te] self.tr_mean_inp = annot['input'][tr_ids].mean(axis=0) self.tr_std_inp = annot['input'][tr_ids].std(axis=0) annot['input'] = (annot['input'] - self.tr_mean_inp) / (self.tr_std_inp + 1e-8) self.tr_mean_opt = annot['output'][tr_ids].mean(axis=0) self.tr_std_opt = annot['output'][tr_ids].std(axis=0) annot['output'] = (annot['output'] - self.tr_mean_opt) / (self.tr_std_opt + 1e-8) for key in keys: if not annot[key].shape == (): annot[key] = annot[key][tr_ids if split == 'train' else te_ids] self.annot = annot self.nSamples = annot['input'].shape[0] print('Loaded {} {} samples'.format(split, self.nSamples))
def get_random_noise_mc_info_extr(input_file): """ Wrapper function that includes the actual mc_info_extr for random noise simulations. There are no n_gen like in the neutrino case. Parameters ---------- input_file : km3net data file Can be online or offline format. Returns ------- mc_info_extr : function The actual mc_info_extr function that holds the extractions. """ # check if std reco is present f = File(input_file, "r") has_std_reco = "reco" in f.keys() if has_std_reco: #also check, which rec types are present rec_types, rec_parameters_names = get_rec_types_in_file(f) def mc_info_extr(blob): """ Processes a blob and creates the y with mc_info and, if existing, std reco. For this random noise case it is only general event info, like the id. Parameters ---------- blob : dict The blob from the pipeline. Returns ------- track : dict Containing all the specified info the y should have. """ event_info = blob["EventInfo"] track = { "event_id": event_info.event_id[0], "run_id": event_info.run_id[0], "particle_type": 0, } # get all the std reco info if has_std_reco: std_reco_info = get_std_reco(blob, rec_types, rec_parameters_names) track.update(std_reco_info) return track return mc_info_extr
def create_submap_dataset(h5file: h5py.File): dataset = {} for submap_name in h5file.keys(): submap_dict = {} submap_dict['num_segments'] = np.array(h5file[submap_name + '/num_segments'])[0] segments = [] center_submap_xy = torch.Tensor([0., 0.]) num_points = 0 for i in range(submap_dict['num_segments']): segment_name = submap_name + '/segment_' + str(i) segments.append(np.array(h5file[segment_name])) center_submap_xy += segments[-1].sum(axis=0)[:2] num_points += segments[-1].shape[0] center_submap_xy /= num_points # segments = [np.array(segment - np.hstack([center_submap_xy, 0.])) for segment in segments] segment_centers = np.array([ segment.mean(axis=0) - np.hstack([center_submap_xy, 0.]) for segment in segments ]) submap_dict['segment_centers'] = torch.Tensor(segment_centers) submap_dict['segment_scales'] = torch.Tensor( np.array([np.sqrt(segment.var(axis=0)) for segment in segments])) submap_dict['segments'] = [ torch.Tensor((segment - segment.mean(axis=0)) / np.sqrt(segment.var(axis=0))) for segment in segments ] dataset[submap_name] = submap_dict return dataset
def embed_data(self, h5_file: h5py.File, embedder: EmbeddingModel, save_states: bool = False): """Embeds cylinder flow data into a 1D vector representation for the transformer. TODO: Remove redundant arguments Args: h5_file (h5py.File): HDF5 file object of Lorenz raw data embedder (EmbeddingModel): Embedding neural network save_states (bool, optional): To save the physical states or not, should be True for validation and testing. Defaults to False. """ # Iterate through stored time-series samples = 0 embedder.eval() for key in h5_file.keys(): ux = torch.Tensor(h5_file[key + '/ux']) uy = torch.Tensor(h5_file[key + '/uy']) p = torch.Tensor(h5_file[key + '/p']) data_series = torch.stack([ux, uy, p], dim=1).to(embedder.devices[0]) visc = (2.0 / float(key))*torch.ones(ux.size(0), 1).to(embedder.devices[0]) with torch.no_grad(): embedded_series = embedder.embed(data_series, visc).cpu() # Stride over time-series for i in range(0, data_series.size(0) - self.block_size + 1, self.stride): # Truncate in block of block_size data_series0 = embedded_series[i: i + self.block_size] # .repeat(1, 4) self.examples.append(data_series0) self.position_ids.append(torch.arange(0, self.block_size, dtype=torch.long)+i) if save_states: self.states.append(data_series[i: i + self.block_size].cpu()) samples = samples + 1 if (self.ndata > 0 and samples >= self.ndata): # If we have enough time-series samples break loop break
def orderByQmodulus(filename,outfile=None): """ Sassena does not enforce any ordering of the structure factors. Here we order by increasing value of modulus of Q-vectors. """ from h5py import File import numpy f=File(filename,'r') overwrite=False if not outfile: outfile=tempfile() # temporaty output file overwrite=True g=File(outfile,'w') ds_q = numpy.array(f["qvectors"]) # shape==(nvectors,3) moduli=numpy.square(ds_q).sum(axis=1) # moduli-squared of the Q-vectors rank=numpy.argsort(moduli) # rank from smallest to greatest for dset in ('qvectors', 'fqt', 'fq', 'fq0', 'fq2'): if dset in f.keys(): x=numpy.array(f[dset]) if not outfile: del f[dset] f[dset]=x[rank] else: g[dset]=x[rank] for key,val in f.attrs.items(): g.attrs[key]=val g.close() f.close() if overwrite: os.system('/bin/mv %s %s'%(outfile,filename)) return None
class H5Writer: L = TypeVar("L", List[ndarray], Dict[str, ndarray]) #__H5PY: h5py.File = h5py.File(fileobj=None, mode=None) def __init__(self, filename: str) -> None: self.__file = File(filename, 'a') def saveImgDataIntoGroup(self, imgData: L, groupName: str, datasetNames: List[str]) -> None: #with File(filename, 'a') as file: group: Group = self.__file.create_group(groupName) print('... group was created successfully!') assert (len(imgData) == len(datasetNames) ), 'the number of data to save and data set names are no equal' for i in range(len(datasetNames)): group.create_dataset(datasetNames[i], data=asarray(imgData[i]), compression='gzip', compression_opts=9) print('... dataset was created successfully!') def loadImgDataFromGroup(self, groupName: str = None, datasetNames: str = None) -> Generator: # with File(filename, "r") as file: keys: List[str] = list(self.__file.keys()) imgArray: ndarray = None if (keys): print(keys) else: pass if (groupName): group: Group = self.__file.get( groupName) # group2 = hf.get('group2/subfolder') items: List[Tuple] = list( group.items() ) # [(u'data3', <HDF5 dataset "data3": shape (100, 3333), type "<f8">)] if (items): print(items) try: for i in range(len(items)): print('recovering group class:', group.get(items[i][0])) yield asarray( group.get(items[i][0]) ) # n1 = group1.get('data1') \n np.array(n1).shape except StopIteration: self.closingH5PY() else: pass elif (datasetNames): #cls.__closingH5PY(file) yield self.__file.get( datasetNames) # n1 = group1.get('data1') \n np.array(n1).shape #print("Size of List", len(imgArray), "Size of Tuple", len(imgArray[0]), "Size of Array", imgArray[0][0].shape, imgArray[0][0].size) def closingH5PY(self) -> None: self.__file.close()
def __init__(self, embeddings_file: h5py.File): """ :param embeddings_file: an h5py File, aka `h5py.File("/path/to/file.h5")`. """ self._lookup_table = dict( (embeddings_file[new_id].attrs["original_id"], new_id) for new_id in embeddings_file.keys()) self._embeddings_file = embeddings_file
def plot_samples(h5py_file: h5py.File, n_samples: int = 3, dataset_length: int = 4000, cmap: str = 'Greys_r', vmin: float = None, vmax: float = None) -> None: """Plot samples and pixel distributions as they come out of the h5py file directly.""" sample_indices = np.random.choice(dataset_length, n_samples) keys = sorted(list(h5py_file.keys())) for counter, idx in enumerate(sample_indices): fig, axes = plt.subplots(ncols=len(keys) + 1, nrows=2, figsize=(12, 12)) mask = h5py_file['mask'][idx] scan = h5py_file['scan'][idx] masked_scan = np.where(mask.astype(bool), scan, np.zeros(scan.shape)) min_val = np.min(masked_scan) if vmin is None else vmin max_val = np.max(masked_scan) if vmax is None else vmax masked_pixels = scan[mask.astype(bool)].flatten() datasets = [h5py_file[key] for key in keys] + [masked_scan] for dataset_name, dataset, ax in zip(keys + ['masked_scan'], datasets, np.transpose(axes)): if dataset_name != 'masked_scan': array_2d = dataset[idx] else: # actually not a dataset but simply an array already array_2d = dataset im = ax[0].imshow(np.reshape(array_2d, (200, 200)), cmap=cmap, vmin=min_val, vmax=max_val) divider = make_axes_locatable(ax[0]) cax = divider.append_axes("right", size="5%", pad=0.05) plt.colorbar(im, cax=cax) ax[0].axis('off') ax[0].set_title(dataset_name) ax[1].hist( array_2d if dataset_name != 'masked_scan' else masked_pixels, bins=30, density=False) try: description = stats.describe(array_2d if dataset_name != 'masked_scan' else masked_pixels) except ValueError: print( f'Found sample with empty mask. No statistics available.') else: ax[1].set_title( f'mean: {description.mean:.2f}, var: {description.variance:.2f}' ) print( f'{dataset_name:15}: min/max: {description.minmax[0]:.2f}/{description.minmax[1]:.2f}, ' f'mean: {description.mean:.2f}, variance: {description.variance:.2f}' ) plt.tight_layout() plt.show()
def embed_data(self, h5_file: h5py.File, embedder: EmbeddingModel, save_states: bool = False): """Embeds cylinder flow data into a 1D vector representation for the transformer. TODO: Remove redundant arguments, add minibatch option for the encoding Args: h5_file (h5py.File): HDF5 file object of Lorenz raw data embedder (EmbeddingModel): Embedding neural network save_states (bool, optional): To save the physical states or not, should be True for validation and testing. Defaults to False. """ # Iterate through stored time-series samples = 0 embedder.eval() logger.info( 'Parsing hdf5 file and embedding data, this could take a bit...') # Loop simulations for key in h5_file.keys(): u = torch.Tensor(h5_file[key + '/u']) v = torch.Tensor(h5_file[key + '/v']) data_series = torch.stack([u, v], dim=1).to(embedder.devices[0]) # data_series = torch.nn.functional.interpolate(data_series, (32, 32, 32), mode='trilinear', align_corners=True) embedded_series = torch.zeros([data_series.size(0)] + [embedder.embedding_dims]) with torch.no_grad(): # Mini-batch embedding due to model size for i in range(0, data_series.size(0), 96): embedded_series[i:i + 96] = embedder.embed( data_series[i:i + 96]).cpu() # Stride over time-series for i in range(0, data_series.size(0) - self.block_size + 1, self.stride): # Truncate in block of block_size data_series0 = embedded_series[i:i + self.block_size] self.examples.append(data_series0) self.position_ids.append( torch.arange(0, self.block_size, dtype=torch.long) + i) if save_states: self.states.append(data_series[i:i + self.block_size].cpu()) samples = samples + 1 if self.ndata > 0 and samples >= self.ndata: # If we have enough time-series samples break loop break logger.info( 'Collected {:d} time-series from hdf5 file for a total of {:d} time-series.' .format(samples, len(self.examples)))
def __init__(self, hdf_path): hdf = File(hdf_path, "r") self.policies = list(hdf.keys()) self.award_amounts = list(hdf[self.policies[0]].keys()) self.pubneg_rates = list( hdf[self.policies[0]][self.award_amounts[0]].keys()) self.fpdrs = list(hdf[self.policies[0]][self.award_amounts[2]][ self.pubneg_rates[0]].keys()) self.hdf = hdf
def time_slice_info(meta_file: h5py.File) -> TimeSliceInfo: """ Assemble information about the event data time slices from the metadata file. Args: meta_file: Metadata ('_meta.h5') file. Assumes metadata version 1. Returns: - List of slice objects used to select each time slice from the virtual source objects in order to populate the virtual layouts. Length and order correspond to 'events_per_ts'. - List of the number of events in each time slice, in the order that the time slices will appear in the VDS. Length is the number of time slices recorded. """ fp_per_module = meta_file["fp_per_module"][()] ts_keys = sorted(filter(ts_key_regex.match, meta_file.keys())) ts_data = [meta_file[ts_key] for ts_key in ts_keys] time_slices = [] num_events_per_ts = [] # Loop through the modules, acting on the time slice metadata for each in turn. for num_files, ts_counts in zip(fp_per_module, ts_data): ts_counts = ts_counts[()] # Reshape the time slice metadata for a single module into a rectangular array # with shape (number of time slices per file, number of files), so as to be # able to generate file-specific slices. num_ts_per_fp = -(-ts_counts.size // num_files) ts_counts.resize(num_ts_per_fp * num_files) # Keep a separate record of each module's array of event counts per time slice. num_events_per_ts.append(ts_counts) ts_counts = ts_counts.reshape(num_ts_per_fp, num_files) # Generate the cumulative count of events per time slice for each file. ts_per_module = np.pad(np.cumsum(ts_counts, axis=0), ((1, 0), (0, 0))) # Turn these counts into slices to select from a virtual source for each file. time_slices.append( map(slice, ts_per_module[:-1].flatten(), ts_per_module[1:].flatten()) ) # Assemble all the source slices into a single list, ordered first # chronologically, then by module number. Where modules have recorded different # numbers of time slices, zip_longest will pad with None. time_slices = list(chain.from_iterable(zip_longest(*time_slices))) # Resize each module's array of event counts per time slice so that their sizes # match. This is achieved by zero-padding to match the None-padding of the list # of time slices by zip_longest. max_size = max(data.size for data in num_events_per_ts) num_events_per_ts = np.column_stack( [np.pad(data, (0, max_size - data.size)) for data in num_events_per_ts] ).flatten() return time_slices, num_events_per_ts
def contents(data: h5py.File) -> list: """ Returns the list of the contents of a h5py file. Parameters ---------- data : h5py.File Returns ------- list """ return list(data.keys())
def _get_outputs(self, input_file: h5py.File) -> Union[Any, Tuple]: """Extracts the step output from a given h5 file Args: input_file (h5py.File): File to load from Returns: Union[Any, Tuple]: Previously computed output of the step """ outputs = list() nr_outputs = len(input_file.keys()) # Legacy, remove at some point if nr_outputs == 1 and self.output_key in input_file.keys(): return tuple([input_file[self.output_key][()]]) for i in range(nr_outputs): outputs.append(input_file[f"{self.output_key}_{i}"][()]) if len(outputs) == 1: return outputs[0] else: return tuple(outputs)
def get_song_key_respecting_dic(dataset: h5py.File, dictionary: list) -> list: """ Verify which songs in dictionnary respect a given dictionary # Arguments: dataset (h5py.File): the dataset containing a key->songs mapping dictionary (list): the dictionary to use # Returns: a list of the song keys that respect the dictionary """ song_key_respecting_dic = list() print('Starting song analysis from dictionary...') for i, song_key in enumerate(list(dataset.keys())[:100]): print('Execution: {:.2f}\r'.format(i / len(dataset.keys()) * 100), end='') song = dataset[song_key][0] song_respect_dic = True for char in song: if char not in dictionary: song_respect_dic = False break if song_respect_dic: song_key_respecting_dic.append(song_key) return song_key_respecting_dic
def copy_hdf5(src_file: h5.File, dest_file: h5.File, indices: list): first_dim = len(indices) for key in src_file.keys(): src_data = src_file[key] shape = list(src_data.shape) shape[0] = first_dim dest_data = dest_file.create_dataset(name=key, shape=shape, dtype=src_data.dtype) for dest_i, src_i in enumerate(indices): dest_data[dest_i] = src_data[src_i] print('copied {}'.format(dest_i)) return
def pp_keys(self): data_file = File(self.data_path + self.log, 'r', libver='latest', swmr=True) try: keys = [key for key in data_file.keys()] pp_key_vals = [] for vals in keys: if vals != 'tpts' and vals != 'p_tot': pp_key_vals.append(vals) else: pass data_file.close() return pp_key_vals except: self.exit_handler() raise Exception('ERROR')
def find_links( input_file: h5py.File, link_names: Optional[List] = [], link_paths: Optional[List] = [], path: Optional[str] = None, ) -> (List[str], List[str]): """ Recursively finds all the links in the snapshot and writes them to a list Parameters ---------- input_file : h5py.File hdf5 file handle for snapshot link_names : list of str, optional names of links found in the snapshot link_paths : list of str, optional paths where links found in the snapshot point to path : str, optional the path to the current location in the snapshot Returns ------- link_names, link_paths : list of str, list of str lists of the names and links of paths in `input_file` """ if path is not None: keys = input_file[path].keys() else: keys = input_file.keys() path = "" link_names = [] link_paths = [] for key in keys: subpath = f"{path}/{key}" dataset = input_file.get(subpath, getlink=True) if isinstance(dataset, h5py.SoftLink): link_names.append(subpath.lstrip("/")) link_paths.append(dataset.path) else: try: if input_file[subpath].keys() is not None: find_links(input_file, link_names, link_paths, subpath) except: pass return link_names, link_paths
def iterate_nxs(nxs: h5py.File): q = [] q.extend([(None, nxs[group_key]) for group_key in nxs.keys()]) # q = [(file, "entry")] while len(q) > 0: parent, current_group = q.pop(0) # current_group_reference = parent_group[key] # print("Value:", group) next_groups = getattr(current_group, "keys")() if hasattr(current_group, "keys") else [] # extend with the next group references q.extend([(current_group, current_group[next_group_name]) for next_group_name in next_groups]) # q.extend([(current_group_reference, next_group_name) for next_group_name in next_groups]) # print("Queue state :", q) yield parent, current_group
def process_raw(filename, stats, delete=False): """ """ from h5py import File from numpy import empty fraw = File(filename, 'r') raw = fraw['images'][()] length = fraw['images'].shape[0] width = fraw['image width'][()] height = fraw['image height'][()] pixel_format = fraw['pixel format'][()] #preparation. Create ROI file and populate it with all fields. images = convert_raw_to_images(raw, pixel_format=pixel_format, height=height, width=width, length=length) hits_dict = hits_from_chunk(images) filename_hits = filename.replace('.raw.hdf5', '.hits.hdf5') hits_header = ['hits0', 'hits1', 'hits2', 'hits3'] with File(filename_hits, 'w') as fhits: fhits.create_dataset('hits0', data=hits_dict['hits0'], dtype='int32') fhits.create_dataset('hits1', data=hits_dict['hits1'], dtype='int32') fhits.create_dataset('hits2', data=hits_dict['hits2'], dtype='uint8') fhits.create_dataset('hits3', data=hits_dict['hits3'], dtype='uint8', compression='lzf') roi = roi_from_hits_and_data(hits2=hits_dict['hits2'], images=images) filename_roi = filename.replace('.raw.hdf5', '.roi.hdf5') with h5py.File(filename_roi, 'w') as froi: for key in fraw.keys(): if key != 'images': froi.create_dataset(key, data=fraw[key]) elif key == 'images': froi.create_dataset(key, data=roi, chunks=(1, height, width), compression='lzf', dtype='int16') froi.create_dataset
def create_dict(dump,variables=None): ''' Creates a dictionary of all variables in 'dump' Parameters: dump Path to dump to be dictionarized variables List of variables to be saved (default is all) - no packages ''' from numpy import array ret=dict() # Dictionary to be returned f=File(dump,'r') # Open readable dump as f for pack in f.keys(): # Loop through the list of packages p=f.get(pack) # Get the package object p for var in p.keys(): # Loop through all variables in the package if variables is None: # By default, save all variables to dict ret[pack+'.'+var]=array(p.get(var)) # Store the variable to the dictionary as array elif var in variables: # If variables listed, save only listed variables ret[pack+'.'+var]=array(p.get(var)) # Store the variable to the dictionary as array return ret
def read_hdf5_content(f: h5py.File, gp_current, gp_max, pattern: Pattern, filename, well_regex: Pattern, normalize_enum: int, terminal_columns: int, verbose: bool, best_well_max, best_well_min): key_list = list(f.keys()) key_list.sort(key=lambda a: int(re.split(pattern, a)[1])) worker_x = [] for k in range(len(key_list)): key = key_list[k] # print("Loading dataset associated with key ", str(key)) current_well = re.split(well_regex, key)[1] if verbose: line_print("Reading data file: " + filename + " - Current dataset key: " + str( key) + " Well: " + current_well + " [" + str(gp_current) + "/" + str(gp_max) + "]", max_width=terminal_columns) current_x = np.array(f[str(key)]) if normalize_enum == 0: pass elif normalize_enum == 1: current_x = normalize_np(current_x, 0, 255) elif normalize_enum == 2: current_x[0] = normalize_np(current_x[0], current_x[0].min(), current_x[0].max()) current_x[1] = normalize_np(current_x[1], current_x[1].min(), current_x[1].max()) current_x[2] = normalize_np(current_x[2], current_x[2].min(), current_x[2].max()) elif normalize_enum == 3: current_x[0] = normalize_np(current_x[0], current_x.min(), current_x.max()) current_x[1] = normalize_np(current_x[1], current_x.min(), current_x.max()) current_x[2] = normalize_np(current_x[2], current_x.min(), current_x.max()) elif normalize_enum == 4: best_well_max[0] = max(best_well_max[0], current_x[0].max()) best_well_max[1] = max(best_well_max[1], current_x[1].max()) best_well_max[2] = max(best_well_max[2], current_x[2].max()) best_well_min[0] = min(best_well_min[0], current_x[0].min()) best_well_min[1] = min(best_well_min[1], current_x[1].min()) best_well_min[2] = min(best_well_min[2], current_x[2].min()) else: raise Exception('Undefined state of normalize_enum') worker_x.append(np.array(current_x)) return worker_x
def get_event_timestamp_ext_link_index(ext_link_file: h5py.File, timestamps: np.ndarray): """ Function which generates an index map from timestamps to index in external link file :param ext_link_file: external link file as h5py.File object :param timestamps: array of timestamps :return: map of indexes in timestamps to indexes in ext_link_file """ # find name of groups to be read groups_list = list(ext_link_file.keys()) timestamps_ext_link = [] for event_ind, event in enumerate(groups_list): timestamps_ext_link.append( np.datetime64(ext_link_file[event].attrs.__getitem__( "Timestamp").decode('utf8'))) timestamps_ext_link = np.array(timestamps_ext_link) ext_link_index = np.empty_like(timestamps, dtype=int) for index, timestamp in enumerate(timestamps): ext_link_index[index] = np.where(timestamp == timestamps_ext_link)[0] return ext_link_index
def load_container_list(dfile: h5py.File): """ Loads a list of RegionContainer objects from an hdf5 file :param dfile: Handle of hdf5 file from which list should be loaded :return: A list of RegionContainer objects """ container_list = [] for k in dfile.keys(): try: pos = np.array(dfile[k]["positions"]) pos = [(p[0], p[1]) for p in pos] rn = str(np.array(dfile[k]["region_name"])) zi = int(np.array(dfile[k]["z_index"])) rc = RegionContainer(pos, rn, zi) container_list.append(rc) except KeyError: warnings.warn( "Found non RegionContainer object in file {0}".format( dfile.filename)) continue return container_list
def test_tree_to_hdf5_and_back(hdf5_temp: h5py.File): data = { 'a': 1, 'b': 3.14, 'c': 'asdf', 'd': np.full(4, 3.14), 'q': { 'foo': 'bar', 'deep': {} } } tree_to_hdf5(data, hdf5_temp) assert hdf5_temp.attrs == {'a': 1, 'b': 3.14, 'c': 'asdf'} assert hdf5_temp.keys() == {'d', 'q'} assert np.array(hdf5_temp['d']) == approx(np.full(4, 3.14)) assert hdf5_temp['q'].attrs == {'foo': 'bar'} assert hdf5_temp['q'].keys() == {'deep'} assert hdf5_temp['q/deep'].attrs == {} assert hdf5_temp['q/deep'].keys() == set() data['d'] = approx(data['d']) assert hdf5_to_tree(hdf5_temp) == data
def find_datasets(input_file: h5py.File, dataset_names=[], path=None, recurse=False) -> List[str]: """ Recursively finds all the datasets in the snapshot and writes them to a list Parameters ---------- input_file : h5py.File hdf5 file handle for snapshot dataset_names : list of str, optional names of datasets found in the snapshot path : str, optional the path to the current location in the snapshot recurse : bool, optional flag to indicate whether we're recursing or not Returns ------- dataset_names : list of str names of datasets in `path` in `input_file` """ if not recurse: dataset_names = [] if path is not None: keys = input_file[path].keys() else: keys = input_file.keys() path = "" for key in keys: subpath = f"{path}/{key}" if isinstance(input_file[subpath], h5py.Dataset): dataset_names.append(subpath) elif input_file[subpath].keys() is not None: find_datasets(input_file, dataset_names, subpath, recurse=True) return dataset_names