예제 #1
0
파일: utils.py 프로젝트: minaskar/thejoker
def read_batch_slice(prior_samples_file, columns, slice, units=None):
    """
    Read a batch (row block) of prior samples into a plain numpy array,
    converting units where necessary.
    """

    path = JokerSamples._hdf5_path

    batch = None
    with tb.open_file(prior_samples_file, mode='r') as f:

        for i, name in enumerate(columns):
            arr = f.root[path].read(slice.start, slice.stop, slice.step,
                                    field=name)
            if batch is None:
                batch = np.zeros((len(arr), len(columns)), dtype=arr.dtype)
            batch[:, i] = arr

        if units is not None:
            table_units = table_header_to_units(f.root[meta_path(path)])
            for i, name in enumerate(columns):
                if name in units:
                    batch[:, i] *= table_units[name].to(units[name])

    return batch
예제 #2
0
def test_preserve_serialized(tmpdir):
    test_file = str(tmpdir.join('test.hdf5'))

    t1 = Table()
    t1['a'] = Column(data=[1, 2, 3], unit="s")
    t1['a'].meta['a0'] = "A0"
    t1['a'].meta['a1'] = {"a1": [0, 1]}
    t1['a'].format = '7.3f'
    t1['a'].description = 'A column'
    t1.meta['b'] = 1
    t1.meta['c'] = {"c0": [0, 1]}

    t1.write(test_file, path='the_table', serialize_meta=True, overwrite=True)

    t2 = Table.read(test_file, path='the_table')

    assert t1['a'].unit == t2['a'].unit
    assert t1['a'].format == t2['a'].format
    assert t1['a'].description == t2['a'].description
    assert t1['a'].meta == t2['a'].meta
    assert t1.meta == t2.meta

    # Check that the meta table is fixed-width bytes (see #11299)
    h5 = h5py.File(test_file, 'r')
    meta_lines = h5[meta_path('the_table')]
    assert meta_lines.dtype.kind == 'S'
예제 #3
0
파일: utils.py 프로젝트: minaskar/thejoker
def table_contains_column(root, column):
    from .samples import JokerSamples

    path = meta_path(JokerSamples._hdf5_path)
    header = get_header_from_yaml(h.decode('utf-8') for h in root[path])

    columns = []
    for row in header['datatype']:
        columns.append(row['name'])

    return column in columns
예제 #4
0
def test_table_header_to_units(tmpdir):
    filename = str(tmpdir / 'test.hdf5')

    tbl = QTable()
    tbl['a'] = np.arange(10) * u.kpc
    tbl['b'] = np.arange(10) * u.km / u.s
    tbl['c'] = np.arange(10) * u.day
    tbl.write(filename, path='test', serialize_meta=True)

    with tb.open_file(filename, mode='r') as f:
        units = table_header_to_units(f.root[meta_path('test')])

    for col in tbl.colnames:
        assert tbl[col].unit == units[col]
예제 #5
0
파일: utils.py 프로젝트: minaskar/thejoker
def read_batch_idx(prior_samples_file, columns, idx, units=None):
    """
    Read a batch (row block) of prior samples specified by the input index
    array, ``idx``, into a plain numpy array, converting units where necessary.
    """
    path = JokerSamples._hdf5_path

    batch = np.zeros((len(idx), len(columns)))
    with tb.open_file(prior_samples_file, mode='r') as f:
        for i, name in enumerate(columns):
            batch[:, i] = f.root[path].read_coordinates(idx, field=name)

        if units is not None:
            table_units = table_header_to_units(f.root[meta_path(path)])
            for i, name in enumerate(columns):
                if name in units:
                    batch[:, i] *= table_units[name].to(units[name])

    return batch
예제 #6
0
def write_table_hdf5(table,
                     output,
                     path=None,
                     compression=False,
                     append=False,
                     overwrite=False,
                     serialize_meta=False,
                     metadata_conflicts='error',
                     **create_dataset_kwargs):
    """
    Write a Table object to an HDF5 file

    This requires `h5py <http://www.h5py.org/>`_ to be installed.

    Parameters
    ----------
    table : `~astropy.table.Table`
        Data table that is to be written to file.
    output : str or :class:`h5py:File` or :class:`h5py:Group`
        If a string, the filename to write the table to. If an h5py object,
        either the file or the group object to write the table to.
    path : str
        The path to which to write the table inside the HDF5 file.
        This should be relative to the input file or group.
        If not specified, defaults to ``__astropy_table__``.
    compression : bool or str or int
        Whether to compress the table inside the HDF5 file. If set to `True`,
        ``'gzip'`` compression is used. If a string is specified, it should be
        one of ``'gzip'``, ``'szip'``, or ``'lzf'``. If an integer is
        specified (in the range 0-9), ``'gzip'`` compression is used, and the
        integer denotes the compression level.
    append : bool
        Whether to append the table to an existing HDF5 file.
    overwrite : bool
        Whether to overwrite any existing file without warning.
        If ``append=True`` and ``overwrite=True`` then only the dataset will be
        replaced; the file/group will not be overwritten.
    metadata_conflicts : str
        How to proceed with metadata conflicts. This should be one of:
            * ``'silent'``: silently pick the last conflicting meta-data value
            * ``'warn'``: pick the last conflicting meta-data value, but emit a
              warning (default)
            * ``'error'``: raise an exception.
    **create_dataset_kwargs
        Additional keyword arguments are passed to `h5py.File.create_dataset`.
    """

    from astropy.table import meta
    try:
        import h5py
    except ImportError:
        raise Exception("h5py is required to read and write HDF5 files")

    if path is None:
        # table is just an arbitrary, hardcoded string here.
        path = '__astropy_table__'
    elif path.endswith('/'):
        raise ValueError("table path should end with table name, not /")

    if '/' in path:
        group, name = path.rsplit('/', 1)
    else:
        group, name = None, path

    if isinstance(output, (h5py.File, h5py.Group)):
        if len(list(output.keys())) > 0 and name == '__astropy_table__':
            raise ValueError("table path should always be set via the "
                             "path= argument when writing to existing "
                             "files")
        elif name == '__astropy_table__':
            warnings.warn("table path was not set via the path= argument; "
                          "using default path {}".format(path))

        if group:
            try:
                output_group = output[group]
            except (KeyError, ValueError):
                output_group = output.create_group(group)
        else:
            output_group = output

    elif isinstance(output, str):

        if os.path.exists(output) and not append:
            if overwrite and not append:
                os.remove(output)
            else:
                raise OSError(f"File exists: {output}")

        # Open the file for appending or writing
        f = h5py.File(output, 'a' if append else 'w')

        # Recursively call the write function
        try:
            return write_table_hdf5(table,
                                    f,
                                    path=path,
                                    compression=compression,
                                    append=append,
                                    overwrite=overwrite,
                                    serialize_meta=serialize_meta,
                                    **create_dataset_kwargs)
        finally:
            f.close()

    else:

        raise TypeError('output should be a string or an h5py File or '
                        'Group object')

    # Check whether table already exists
    existing_header = None
    if name in output_group:
        if append and overwrite:
            # Delete only the dataset itself
            del output_group[name]
        elif append:
            # Data table exists, so we interpret "append" to mean "extend
            # existing table with the table passed in". However, this requires
            # the table to have been written by this function in the past, so it
            # should have a metadata header
            if meta_path(name) not in output_group:
                raise ValueError("No metadata exists for existing table. We "
                                 "can only append tables if metadata "
                                 "is consistent for all tables")

            # Load existing table header:
            existing_header = get_header_from_yaml(
                h.decode('utf-8') for h in output_group[meta_path(name)])
        else:
            raise OSError(f"Table {path} already exists")

    # Encode any mixin columns as plain columns + appropriate metadata
    table = _encode_mixins(table)

    # Table with numpy unicode strings can't be written in HDF5 so
    # to write such a table a copy of table is made containing columns as
    # bytestrings.  Now this copy of the table can be written in HDF5.
    if any(col.info.dtype.kind == 'U' for col in table.itercols()):
        table = table.copy(copy_data=False)
        table.convert_unicode_to_bytestring()

    # Warn if information will be lost when serialize_meta=False.  This is
    # hardcoded to the set difference between column info attributes and what
    # HDF5 can store natively (name, dtype) with no meta.
    if serialize_meta is False:
        for col in table.itercols():
            for attr in ('unit', 'format', 'description', 'meta'):
                if getattr(col.info, attr, None) not in (None, {}):
                    warnings.warn(
                        "table contains column(s) with defined 'unit', 'format',"
                        " 'description', or 'meta' info attributes. These will"
                        " be dropped since serialize_meta=False.",
                        AstropyUserWarning)

    if existing_header is None:  # Just write the table and metadata
        # Write the table to the file
        if compression:
            if compression is True:
                compression = 'gzip'
            dset = output_group.create_dataset(name,
                                               data=table.as_array(),
                                               compression=compression,
                                               **create_dataset_kwargs)
        else:
            dset = output_group.create_dataset(name,
                                               data=table.as_array(),
                                               **create_dataset_kwargs)

        if serialize_meta:
            header_yaml = meta.get_yaml_from_table(table)

            header_encoded = [h.encode('utf-8') for h in header_yaml]
            output_group.create_dataset(meta_path(name), data=header_encoded)

        else:
            # Write the Table meta dict key:value pairs to the file as HDF5
            # attributes.  This works only for a limited set of scalar data types
            # like numbers, strings, etc., but not any complex types.  This path
            # also ignores column meta like unit or format.
            for key in table.meta:
                val = table.meta[key]
                try:
                    dset.attrs[key] = val
                except TypeError:
                    warnings.warn(
                        "Attribute `{}` of type {} cannot be written to "
                        "HDF5 files - skipping. (Consider specifying "
                        "serialize_meta=True to write all meta data)".format(
                            key, type(val)), AstropyUserWarning)

    else:  # We need to append the tables!
        try:
            # FIXME: do something with the merged metadata!
            metadata.merge(existing_header['meta'],
                           table.meta,
                           metadata_conflicts=metadata_conflicts)
        except metadata.MergeConflictError:
            raise metadata.MergeConflictError(
                "Cannot append table to existing file because "
                "the existing file table metadata and this "
                "table object's metadata do not match. If you "
                "want to ignore this issue, or change to a "
                "warning, set metadata_conflicts='silent' or 'warn'.")

        # Now compare datatype of this object and on disk
        this_header = get_header_from_yaml(get_yaml_from_table(table))

        if not _custom_tbl_dtype_compare(existing_header['datatype'],
                                         this_header['datatype']):
            raise ValueError(
                "Cannot append table to existing file because "
                "the existing file table datatype and this "
                "object's table datatype do not match. "
                f"{existing_header['datatype']} vs. {this_header['datatype']}")

        # If we got here, we can now try to append:
        current_size = len(output_group[name])
        output_group[name].resize((current_size + len(table), ))
        output_group[name][current_size:] = table.as_array()