def getter(self): current_value = getattr(self, f"_{name}") if current_value is not None: return current_value else: with h5py.File(filename, "r") as handle: try: if mask is not None: # First, need to calculate data shape (which may be # non-trivial), so we read in the first value first_value = handle[field][0] output_type = first_value.dtype output_size = first_value.size if output_size != 1: output_shape = (mask_size, output_size) else: output_shape = mask_size setattr( self, f"_{name}", cosmo_array( read_ranges_from_file( handle[field], mask, output_shape=output_shape, output_type=output_type, columns=columns, ), unit, cosmo_factor=cosmo_factor, name=description, ), ) else: setattr( self, f"_{name}", cosmo_array( # Only use column data if array is multidimensional, otherwise # we will crash here handle[field][:, columns] if handle[field].ndim > 1 else handle[field][:], unit, cosmo_factor=cosmo_factor, name=description, ), ) except KeyError: print(f"Could not read {field}") return None return getattr(self, f"_{name}")
def getter(self): current_value = getattr(self, f"_{name}") if current_value is not None: return current_value else: with h5py.File(filename, "r") as handle: try: if mask is not None: # First, need to claculate data shape (which may be # non-trivial), so we read in the first value first_value = handle[field][0] output_type = first_value.dtype output_size = first_value.size if output_size != 1: output_shape = (mask_size, output_size) else: output_shape = mask_size import pdb pdb.set_trace() setattr( self, f"_{name}", cosmo_array( read_ranges_from_file( handle[field], mask, output_shape=output_shape, output_type=output_type, ), unit, cosmo_factor=cosmo_factor, ), ) else: setattr( self, f"_{name}", cosmo_array(handle[field][...], unit, cosmo_factor=cosmo_factor), ) except KeyError: print(f"Could not read {field}") return None return getattr(self, f"_{name}")
def write_datasubset( infile: h5py.File, outfile: h5py.File, mask: SWIFTMask, dataset_names: List[str], links_list: List[str], ): """ Writes subset of all datasets contained in snapshot according to specified mask Parameters ---------- infile : h5py.File hdf5 file handle for input snapshot outfile : h5py.File hdf5 file handle for output snapshot mask : SWIFTMask the mask used to define subset that is written to new snapshot dataset_names : list of str names of datasets found in the snapshot links_list : list of str names of links found in the snapshot """ skip_list = links_list.copy() skip_list.extend(["Cells", "SubgridScheme"]) if mask is not None: for name in dataset_names: if any([substr for substr in skip_list if substr in name]): continue # get output dtype and size first_value = infile[name][0] output_type = first_value.dtype output_size = first_value.size mask_size = get_dataset_mask(mask, name, suffix="_size") if output_size != 1: output_shape = (mask_size, output_size) else: output_shape = mask_size dataset_mask = get_dataset_mask(mask, name) subset = read_ranges_from_file( infile[name], dataset_mask, output_shape=output_shape, output_type=output_type, ) # Write the subset outfile.create_dataset(name, data=subset) for attr_name, attr_value in infile[name].attrs.items(): outfile[name].attrs.create(attr_name, attr_value)
def test_read_ranges_from_file(): """ Tests the reading of ranges from file using a numpy array as a stand in for the dataset. """ data = np.arange(1000) ranges = np.array([[77, 79], [88, 98], [204, 204]]) output_size = 3 + 11 + 1 output_type = type(data[0]) out = np.array( [77, 78, 79, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 204]) assert (out == read_ranges_from_file(data, ranges, output_size, output_type)).all()
def test_read_ranges_from_file(): """ Tests the reading of ranges from file using a numpy array as a stand in for the dataset. """ # In memory hdf5 file file_handle = create_in_memory_hdf5() handle = file_handle.create_dataset("test", data=np.arange(1000)) ranges = np.array([[77, 79], [81, 81], [88, 98], [204, 204]]) output_size = 2 + 10 output_type = type(handle[0]) expected = np.array([77, 78, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97]) out = read_ranges_from_file(handle, ranges, output_size, output_type) assert (out == expected).all() file_handle.close()