Exemple #1
0
def read_c4_dataset_as_c8(ds: h5py.Dataset, key=np.s_[...]):
    """
    Read a complex float16 HDF5 dataset as a numpy.complex64 array.

    Avoids h5py/numpy dtype bugs and uses numpy float16 -> float32 conversions
    which are about 10x faster than HDF5 ones.
    """
    # This context manager avoids h5py exception:
    # TypeError: data type '<c4' not understood
    with ds.astype(complex32):
        z = ds[key]
    # Define a similar datatype for complex64 to be sure we cast safely.
    complex64 = np.dtype([("r", np.float32), ("i", np.float32)])
    # Cast safely and then view as native complex64 numpy dtype.
    return z.astype(complex64).view(np.complex64)
Exemple #2
0
def prop_to_dataframe(dset: h5py.Dataset,
                      dtype: DTypeLike = None) -> pd.DataFrame:
    """Convert the passed property Dataset into a DataFrame.

    Examples
    --------
    .. testsetup:: python

        >>> from dataCAT.testing_utils import HDF5_READ as hdf5_file

    .. code:: python

        >>> import h5py
        >>> from dataCAT import prop_to_dataframe

        >>> hdf5_file = str(...)  # doctest: +SKIP

        >>> with h5py.File(hdf5_file, 'r') as f:
        ...     dset = f['ligand/properties/E_solv']
        ...     df = prop_to_dataframe(dset)
        ...     print(df)  # doctest: +NORMALIZE_WHITESPACE
        E_solv_names             water  methanol   ethanol
        ligand ligand anchor
        O=C=O  O1            -0.918837 -0.151129 -0.177396
               O3            -0.221182 -0.261591 -0.712906
        CCCO   O4            -0.314799 -0.784353 -0.190898

    Parameters
    ----------
    dset : :class:`h5py.Dataset`
        The property-containing Dataset of interest.
    dtype : dtype-like, optional
        The data type of the to-be returned DataFrame.
        Use :data:`None` to default to the data type of **dset**.

    Returns
    -------
    :class:`pandas.DataFrame`
        A DataFrame constructed from the passed **dset**.

    """  # noqa: E501
    # Construct the index
    dim0 = dset.dims[0]
    scale0 = dim0[0]
    index = index_to_pandas(scale0)

    # Construct the columns
    if dset.ndim == 1:
        full_name = dset.name
        name = full_name.rsplit('/', 1)[-1]
        columns = pd.Index([name])

    else:
        dim1 = dset.dims[1]
        scale1 = dim1[0]
        columns = pd.Index(scale1[:].astype(str), name=dim1.label)

    # Create and return the dataframe
    if dtype is None:
        return pd.DataFrame(dset[:], index=index, columns=columns)

    # If possible, let h5py handle the datatype conversion
    # This will often fail when dset.dtype consists of variable-length bytes-strings
    try:
        with dset.astype(dtype):
            return pd.DataFrame(dset[:], index=index, columns=columns)
    except (ValueError, TypeError):
        return pd.DataFrame(dset[:].astype(dtype),
                            index=index,
                            columns=columns)