Exemple #1
0
def sampletime(t: h5py.Dataset, bstride):
    """
    read the time of the pulses to the microsecond level
    t: h5py variable
    bstride: 2-D boolean

    returns: 2-D single of UTC time unix epoch
    """
    assert isinstance(t,
                      (np.ndarray, h5py.Dataset)), "Numpy or h5py array only"
    assert t.ndim == 2
    assert bstride.dtype == "bool"

    assert t.shape[0] == bstride.shape[0]  # number of times

    if bstride.sum() == 0:  # selected beam was never used in this file
        t = None
    elif t.shape == bstride.shape:  # usual case
        t = t[bstride]
        if t.max() > 1.01 * t.mean():
            logging.warning("at least one time gap in radar detected")
    elif t.shape[1] == 2:  # improvised case for the oldest AMISR files
        logging.info(
            "improvised time method for very old AMISR files 2006-2007, may be inaccurate time"
        )
        assert (bstride.sum(axis=1) <=
                1).all(), "were some times without pulses?"
        bstride = bstride.any(axis=1)

        t = t[bstride, 0]

    return t
    def extend_dataset(dataset: h5py.Dataset, data: Union[np.array, List]):
        if type(data) is list:
            data = np.array(data)

        if data.shape[0] == 0:
            return

        dataset.resize((dataset.shape[0] + data.shape[0]), axis=0)
        dataset[-data.shape[0]:] = data
def extend_dataset(dataset: h5py.Dataset, data: np.ndarray,) -> h5py.Dataset:
    """Ectend a dataset in the input HDF5 group. 
    Used to update the images in a split.
    """
    shape = dataset.shape
    newshape = (dataset.shape[0] + data.shape[0], *dataset.shape[1:])
    dataset.resize(newshape)
    dataset[shape[0] :] = data
    return dataset
 def append_buffer_to_dataset(dset: h5py.Dataset, buffer: ListBuffer):
     """Append values to resizable h5py dataset."""
     if len(buffer):  # Buffer is not empty
         logging.info("")
         values = np.array(buffer)
         new_shape = (buffer.end,) + values.shape[1:]
         dset.resize(new_shape)
         dset[buffer.start:] = values
     else:
         logging.warning("Buffer is empty.")
Exemple #5
0
def read_c4_dataset_as_c8(ds: h5py.Dataset, key=np.s_[...]):
    """
    Read a complex float16 HDF5 dataset as a numpy.complex64 array.

    Avoids h5py/numpy dtype bugs and uses numpy float16 -> float32 conversions
    which are about 10x faster than HDF5 ones.
    """
    # This context manager avoids h5py exception:
    # TypeError: data type '<c4' not understood
    with ds.astype(complex32):
        z = ds[key]
    # Define a similar datatype for complex64 to be sure we cast safely.
    complex64 = np.dtype([("r", np.float32), ("i", np.float32)])
    # Cast safely and then view as native complex64 numpy dtype.
    return z.astype(complex64).view(np.complex64)
    def add_default_attributes(self, hdf5_group: h5py.Dataset):
        dilatation_factor = float(globals.config.general.dilatation_factor)
        l_x = np.abs(self.info["interpolation"]["x_max"] -
                     self.info["interpolation"]["x_min"])
        l_x_dilatated = np.abs(
            self.info["interpolation"]["x_max"] -
            self.info["interpolation"]["x_min"]) * dilatation_factor
        delta_x = (l_x_dilatated - l_x) / 2.0
        dx = self.info["interpolation"]["d_x"] * dilatation_factor
        l_y = np.abs(self.info["interpolation"]["y_max"] -
                     self.info["interpolation"]["y_min"])
        l_y_dilatated = np.abs(
            self.info["interpolation"]["y_max"] -
            self.info["interpolation"]["y_min"]) * dilatation_factor
        delta_y = (l_y_dilatated - l_y) / 2.0
        dy = self.info["interpolation"]["d_y"] * dilatation_factor

        hdf5_group.attrs.create('Dimension', "XY", dtype="S3")
        hdf5_group.attrs["Discretization"] = [dx, dy]
        hdf5_group.attrs["Origin"] = [
            self.info["interpolation"]["x_min"] - delta_x,
            self.info["interpolation"]["y_min"] - delta_y
        ]
        hdf5_group.attrs["Interpolation_Method"] = "STEP"
Exemple #7
0
def convert_header_to_hdf5(dataset: h5py.Dataset, header: Stats):
    """
    Convert an :class:`~obspy.core.Stats` object and adds it to the provided
    hdf5 dataset.

    :param dataset: the dataset that the header should be added to
    :type dataset: h5py.Dataset
    :param header: The trace's header
    :type header: Stats
    """
    header = dict(header)
    for key in header:
        try:
            if isinstance(header[key], UTCDateTime):
                # convert time to string
                header[key] = header[key].format_fissures()
            dataset.attrs[key] = header[key]
        except TypeError:
            warnings.warn(
                'The header contains an item of type %s. Information\
            of this type cannot be written to an hdf5 file.' %
                str(type(header[key])), UserWarning)
            continue
Exemple #8
0
def read_dataset(dataset: h5py.Dataset):
    if H5PY_V3:
        string_dtype = h5py.check_string_dtype(dataset.dtype)
        if (string_dtype is not None) and (string_dtype.encoding == "utf-8"):
            dataset = dataset.asstr()
    value = dataset[()]
    if not hasattr(value, "dtype"):
        return value
    elif isinstance(value.dtype, str):
        pass
    elif issubclass(value.dtype.type, np.string_):
        value = value.astype(str)
        # Backwards compat, old datasets have strings as one element 1d arrays
        if len(value) == 1:
            return value[0]
    elif len(value.dtype.descr) > 1:  # Compound dtype
        # For backwards compat, now strings are written as variable length
        dtype = value.dtype
        value = _from_fixed_length_strings(value)
        if H5PY_V3:
            value = _decode_structured_array(value, dtype=dtype)
    if value.shape == ():
        value = value[()]
    return value
Exemple #9
0
 def test_appropriate_low_level_id(self):
     " Binding Dataset to a non-DatasetID identifier fails with ValueError "
     with self.assertRaises(ValueError):
         Dataset(self.f['/'].id)
def maybe_resize(examples: h5py.Dataset, index: int, resize_chunk: int):
    if index >= examples.shape[0]:
        current_shape = list(examples.shape)
        current_shape[0] += resize_chunk
        examples.resize(current_shape)
Exemple #11
0
def prop_to_dataframe(dset: h5py.Dataset,
                      dtype: DTypeLike = None) -> pd.DataFrame:
    """Convert the passed property Dataset into a DataFrame.

    Examples
    --------
    .. testsetup:: python

        >>> from dataCAT.testing_utils import HDF5_READ as hdf5_file

    .. code:: python

        >>> import h5py
        >>> from dataCAT import prop_to_dataframe

        >>> hdf5_file = str(...)  # doctest: +SKIP

        >>> with h5py.File(hdf5_file, 'r') as f:
        ...     dset = f['ligand/properties/E_solv']
        ...     df = prop_to_dataframe(dset)
        ...     print(df)  # doctest: +NORMALIZE_WHITESPACE
        E_solv_names             water  methanol   ethanol
        ligand ligand anchor
        O=C=O  O1            -0.918837 -0.151129 -0.177396
               O3            -0.221182 -0.261591 -0.712906
        CCCO   O4            -0.314799 -0.784353 -0.190898

    Parameters
    ----------
    dset : :class:`h5py.Dataset`
        The property-containing Dataset of interest.
    dtype : dtype-like, optional
        The data type of the to-be returned DataFrame.
        Use :data:`None` to default to the data type of **dset**.

    Returns
    -------
    :class:`pandas.DataFrame`
        A DataFrame constructed from the passed **dset**.

    """  # noqa: E501
    # Construct the index
    dim0 = dset.dims[0]
    scale0 = dim0[0]
    index = index_to_pandas(scale0)

    # Construct the columns
    if dset.ndim == 1:
        full_name = dset.name
        name = full_name.rsplit('/', 1)[-1]
        columns = pd.Index([name])

    else:
        dim1 = dset.dims[1]
        scale1 = dim1[0]
        columns = pd.Index(scale1[:].astype(str), name=dim1.label)

    # Create and return the dataframe
    if dtype is None:
        return pd.DataFrame(dset[:], index=index, columns=columns)

    # If possible, let h5py handle the datatype conversion
    # This will often fail when dset.dtype consists of variable-length bytes-strings
    try:
        with dset.astype(dtype):
            return pd.DataFrame(dset[:], index=index, columns=columns)
    except (ValueError, TypeError):
        return pd.DataFrame(dset[:].astype(dtype),
                            index=index,
                            columns=columns)
Exemple #12
0
def _resize_prop_dset(dset: h5py.Dataset) -> None:
    """Ensure that **dset** is as long as its dimensional scale."""
    scale = dset.dims[0][0]
    n = len(scale)
    if n > len(dset):
        dset.resize(n, axis=0)
Exemple #13
0
# Open and initialize the file
with h5py.File(raw_file, "w") as file:
    # Add some attributes to the file.
    file = generate_top_level_attributes(file)
    """ Here the datasets are created, that are then filled with random data. The structures used here are defined in 
        the structure_definitions.py file. """

    # Create the ego dataset
    ego_space = h5py.h5s.create_simple((1, ), (h5py.h5s.UNLIMITED, ))
    ego_plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
    ego_plist.set_chunk((500, ))
    ego_plist.set_deflate(9)
    ego_plist.set_fill_value(c_ego_fill)
    ego_type = h5py.h5t.py_create(c_ego, logical=1)
    d_ego_id = h5py.h5d.create(file.id, str_ego.encode(), ego_type, ego_space, ego_plist)
    d_ego = Dataset(d_ego_id)
    print("Created egoVehicle dataset with a size of {0} bytes per timestamp".format(c_ego.itemsize))
    d_ego = generate_attributes(d_ego, str_ego)

    # Create the lane dataset
    lane_space = h5py.h5s.create_simple((1, ), (h5py.h5s.UNLIMITED, ))
    lane_plist = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
    lane_plist.set_chunk((500, ))
    lane_plist.set_deflate(9)
    lane_plist.set_fill_value(c_lane_fill)
    lane_type = h5py.h5t.py_create(c_lane, logical=1)
    d_lane_id = h5py.h5d.create(file.id, str_lan.encode(), lane_type, lane_space, lane_plist)
    d_lane = Dataset(d_lane_id)
    print("Created lanes dataset with a size of {0} bytes per timestamp".format(c_lane.itemsize))
    d_lane = generate_attributes(d_lane, str_lan)