def sampletime(t: h5py.Dataset, bstride): """ read the time of the pulses to the microsecond level t: h5py variable bstride: 2-D boolean returns: 2-D single of UTC time unix epoch """ assert isinstance(t, (np.ndarray, h5py.Dataset)), "Numpy or h5py array only" assert t.ndim == 2 assert bstride.dtype == "bool" assert t.shape[0] == bstride.shape[0] # number of times if bstride.sum() == 0: # selected beam was never used in this file t = None elif t.shape == bstride.shape: # usual case t = t[bstride] if t.max() > 1.01 * t.mean(): logging.warning("at least one time gap in radar detected") elif t.shape[1] == 2: # improvised case for the oldest AMISR files logging.info( "improvised time method for very old AMISR files 2006-2007, may be inaccurate time" ) assert (bstride.sum(axis=1) <= 1).all(), "were some times without pulses?" bstride = bstride.any(axis=1) t = t[bstride, 0] return t
def extend_dataset(dataset: h5py.Dataset, data: Union[np.array, List]): if type(data) is list: data = np.array(data) if data.shape[0] == 0: return dataset.resize((dataset.shape[0] + data.shape[0]), axis=0) dataset[-data.shape[0]:] = data
def extend_dataset(dataset: h5py.Dataset, data: np.ndarray,) -> h5py.Dataset: """Ectend a dataset in the input HDF5 group. Used to update the images in a split. """ shape = dataset.shape newshape = (dataset.shape[0] + data.shape[0], *dataset.shape[1:]) dataset.resize(newshape) dataset[shape[0] :] = data return dataset
def append_buffer_to_dataset(dset: h5py.Dataset, buffer: ListBuffer): """Append values to resizable h5py dataset.""" if len(buffer): # Buffer is not empty logging.info("") values = np.array(buffer) new_shape = (buffer.end,) + values.shape[1:] dset.resize(new_shape) dset[buffer.start:] = values else: logging.warning("Buffer is empty.")
def read_c4_dataset_as_c8(ds: h5py.Dataset, key=np.s_[...]): """ Read a complex float16 HDF5 dataset as a numpy.complex64 array. Avoids h5py/numpy dtype bugs and uses numpy float16 -> float32 conversions which are about 10x faster than HDF5 ones. """ # This context manager avoids h5py exception: # TypeError: data type '<c4' not understood with ds.astype(complex32): z = ds[key] # Define a similar datatype for complex64 to be sure we cast safely. complex64 = np.dtype([("r", np.float32), ("i", np.float32)]) # Cast safely and then view as native complex64 numpy dtype. return z.astype(complex64).view(np.complex64)
def add_default_attributes(self, hdf5_group: h5py.Dataset): dilatation_factor = float(globals.config.general.dilatation_factor) l_x = np.abs(self.info["interpolation"]["x_max"] - self.info["interpolation"]["x_min"]) l_x_dilatated = np.abs( self.info["interpolation"]["x_max"] - self.info["interpolation"]["x_min"]) * dilatation_factor delta_x = (l_x_dilatated - l_x) / 2.0 dx = self.info["interpolation"]["d_x"] * dilatation_factor l_y = np.abs(self.info["interpolation"]["y_max"] - self.info["interpolation"]["y_min"]) l_y_dilatated = np.abs( self.info["interpolation"]["y_max"] - self.info["interpolation"]["y_min"]) * dilatation_factor delta_y = (l_y_dilatated - l_y) / 2.0 dy = self.info["interpolation"]["d_y"] * dilatation_factor hdf5_group.attrs.create('Dimension', "XY", dtype="S3") hdf5_group.attrs["Discretization"] = [dx, dy] hdf5_group.attrs["Origin"] = [ self.info["interpolation"]["x_min"] - delta_x, self.info["interpolation"]["y_min"] - delta_y ] hdf5_group.attrs["Interpolation_Method"] = "STEP"
def convert_header_to_hdf5(dataset: h5py.Dataset, header: Stats): """ Convert an :class:`~obspy.core.Stats` object and adds it to the provided hdf5 dataset. :param dataset: the dataset that the header should be added to :type dataset: h5py.Dataset :param header: The trace's header :type header: Stats """ header = dict(header) for key in header: try: if isinstance(header[key], UTCDateTime): # convert time to string header[key] = header[key].format_fissures() dataset.attrs[key] = header[key] except TypeError: warnings.warn( 'The header contains an item of type %s. Information\ of this type cannot be written to an hdf5 file.' % str(type(header[key])), UserWarning) continue
def read_dataset(dataset: h5py.Dataset): if H5PY_V3: string_dtype = h5py.check_string_dtype(dataset.dtype) if (string_dtype is not None) and (string_dtype.encoding == "utf-8"): dataset = dataset.asstr() value = dataset[()] if not hasattr(value, "dtype"): return value elif isinstance(value.dtype, str): pass elif issubclass(value.dtype.type, np.string_): value = value.astype(str) # Backwards compat, old datasets have strings as one element 1d arrays if len(value) == 1: return value[0] elif len(value.dtype.descr) > 1: # Compound dtype # For backwards compat, now strings are written as variable length dtype = value.dtype value = _from_fixed_length_strings(value) if H5PY_V3: value = _decode_structured_array(value, dtype=dtype) if value.shape == (): value = value[()] return value
def test_appropriate_low_level_id(self): " Binding Dataset to a non-DatasetID identifier fails with ValueError " with self.assertRaises(ValueError): Dataset(self.f['/'].id)
def maybe_resize(examples: h5py.Dataset, index: int, resize_chunk: int): if index >= examples.shape[0]: current_shape = list(examples.shape) current_shape[0] += resize_chunk examples.resize(current_shape)
def prop_to_dataframe(dset: h5py.Dataset, dtype: DTypeLike = None) -> pd.DataFrame: """Convert the passed property Dataset into a DataFrame. Examples -------- .. testsetup:: python >>> from dataCAT.testing_utils import HDF5_READ as hdf5_file .. code:: python >>> import h5py >>> from dataCAT import prop_to_dataframe >>> hdf5_file = str(...) # doctest: +SKIP >>> with h5py.File(hdf5_file, 'r') as f: ... dset = f['ligand/properties/E_solv'] ... df = prop_to_dataframe(dset) ... print(df) # doctest: +NORMALIZE_WHITESPACE E_solv_names water methanol ethanol ligand ligand anchor O=C=O O1 -0.918837 -0.151129 -0.177396 O3 -0.221182 -0.261591 -0.712906 CCCO O4 -0.314799 -0.784353 -0.190898 Parameters ---------- dset : :class:`h5py.Dataset` The property-containing Dataset of interest. dtype : dtype-like, optional The data type of the to-be returned DataFrame. Use :data:`None` to default to the data type of **dset**. Returns ------- :class:`pandas.DataFrame` A DataFrame constructed from the passed **dset**. """ # noqa: E501 # Construct the index dim0 = dset.dims[0] scale0 = dim0[0] index = index_to_pandas(scale0) # Construct the columns if dset.ndim == 1: full_name = dset.name name = full_name.rsplit('/', 1)[-1] columns = pd.Index([name]) else: dim1 = dset.dims[1] scale1 = dim1[0] columns = pd.Index(scale1[:].astype(str), name=dim1.label) # Create and return the dataframe if dtype is None: return pd.DataFrame(dset[:], index=index, columns=columns) # If possible, let h5py handle the datatype conversion # This will often fail when dset.dtype consists of variable-length bytes-strings try: with dset.astype(dtype): return pd.DataFrame(dset[:], index=index, columns=columns) except (ValueError, TypeError): return pd.DataFrame(dset[:].astype(dtype), index=index, columns=columns)
def _resize_prop_dset(dset: h5py.Dataset) -> None: """Ensure that **dset** is as long as its dimensional scale.""" scale = dset.dims[0][0] n = len(scale) if n > len(dset): dset.resize(n, axis=0)
# Open and initialize the file with h5py.File(raw_file, "w") as file: # Add some attributes to the file. file = generate_top_level_attributes(file) """ Here the datasets are created, that are then filled with random data. The structures used here are defined in the structure_definitions.py file. """ # Create the ego dataset ego_space = h5py.h5s.create_simple((1, ), (h5py.h5s.UNLIMITED, )) ego_plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) ego_plist.set_chunk((500, )) ego_plist.set_deflate(9) ego_plist.set_fill_value(c_ego_fill) ego_type = h5py.h5t.py_create(c_ego, logical=1) d_ego_id = h5py.h5d.create(file.id, str_ego.encode(), ego_type, ego_space, ego_plist) d_ego = Dataset(d_ego_id) print("Created egoVehicle dataset with a size of {0} bytes per timestamp".format(c_ego.itemsize)) d_ego = generate_attributes(d_ego, str_ego) # Create the lane dataset lane_space = h5py.h5s.create_simple((1, ), (h5py.h5s.UNLIMITED, )) lane_plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE) lane_plist.set_chunk((500, )) lane_plist.set_deflate(9) lane_plist.set_fill_value(c_lane_fill) lane_type = h5py.h5t.py_create(c_lane, logical=1) d_lane_id = h5py.h5d.create(file.id, str_lan.encode(), lane_type, lane_space, lane_plist) d_lane = Dataset(d_lane_id) print("Created lanes dataset with a size of {0} bytes per timestamp".format(c_lane.itemsize)) d_lane = generate_attributes(d_lane, str_lan)