def read_batch_slice(prior_samples_file, columns, slice, units=None): """ Read a batch (row block) of prior samples into a plain numpy array, converting units where necessary. """ path = JokerSamples._hdf5_path batch = None with tb.open_file(prior_samples_file, mode='r') as f: for i, name in enumerate(columns): arr = f.root[path].read(slice.start, slice.stop, slice.step, field=name) if batch is None: batch = np.zeros((len(arr), len(columns)), dtype=arr.dtype) batch[:, i] = arr if units is not None: table_units = table_header_to_units(f.root[meta_path(path)]) for i, name in enumerate(columns): if name in units: batch[:, i] *= table_units[name].to(units[name]) return batch
def test_preserve_serialized(tmpdir): test_file = str(tmpdir.join('test.hdf5')) t1 = Table() t1['a'] = Column(data=[1, 2, 3], unit="s") t1['a'].meta['a0'] = "A0" t1['a'].meta['a1'] = {"a1": [0, 1]} t1['a'].format = '7.3f' t1['a'].description = 'A column' t1.meta['b'] = 1 t1.meta['c'] = {"c0": [0, 1]} t1.write(test_file, path='the_table', serialize_meta=True, overwrite=True) t2 = Table.read(test_file, path='the_table') assert t1['a'].unit == t2['a'].unit assert t1['a'].format == t2['a'].format assert t1['a'].description == t2['a'].description assert t1['a'].meta == t2['a'].meta assert t1.meta == t2.meta # Check that the meta table is fixed-width bytes (see #11299) h5 = h5py.File(test_file, 'r') meta_lines = h5[meta_path('the_table')] assert meta_lines.dtype.kind == 'S'
def table_contains_column(root, column): from .samples import JokerSamples path = meta_path(JokerSamples._hdf5_path) header = get_header_from_yaml(h.decode('utf-8') for h in root[path]) columns = [] for row in header['datatype']: columns.append(row['name']) return column in columns
def test_table_header_to_units(tmpdir): filename = str(tmpdir / 'test.hdf5') tbl = QTable() tbl['a'] = np.arange(10) * u.kpc tbl['b'] = np.arange(10) * u.km / u.s tbl['c'] = np.arange(10) * u.day tbl.write(filename, path='test', serialize_meta=True) with tb.open_file(filename, mode='r') as f: units = table_header_to_units(f.root[meta_path('test')]) for col in tbl.colnames: assert tbl[col].unit == units[col]
def read_batch_idx(prior_samples_file, columns, idx, units=None): """ Read a batch (row block) of prior samples specified by the input index array, ``idx``, into a plain numpy array, converting units where necessary. """ path = JokerSamples._hdf5_path batch = np.zeros((len(idx), len(columns))) with tb.open_file(prior_samples_file, mode='r') as f: for i, name in enumerate(columns): batch[:, i] = f.root[path].read_coordinates(idx, field=name) if units is not None: table_units = table_header_to_units(f.root[meta_path(path)]) for i, name in enumerate(columns): if name in units: batch[:, i] *= table_units[name].to(units[name]) return batch
def write_table_hdf5(table, output, path=None, compression=False, append=False, overwrite=False, serialize_meta=False, metadata_conflicts='error', **create_dataset_kwargs): """ Write a Table object to an HDF5 file This requires `h5py <http://www.h5py.org/>`_ to be installed. Parameters ---------- table : `~astropy.table.Table` Data table that is to be written to file. output : str or :class:`h5py:File` or :class:`h5py:Group` If a string, the filename to write the table to. If an h5py object, either the file or the group object to write the table to. path : str The path to which to write the table inside the HDF5 file. This should be relative to the input file or group. If not specified, defaults to ``__astropy_table__``. compression : bool or str or int Whether to compress the table inside the HDF5 file. If set to `True`, ``'gzip'`` compression is used. If a string is specified, it should be one of ``'gzip'``, ``'szip'``, or ``'lzf'``. If an integer is specified (in the range 0-9), ``'gzip'`` compression is used, and the integer denotes the compression level. append : bool Whether to append the table to an existing HDF5 file. overwrite : bool Whether to overwrite any existing file without warning. If ``append=True`` and ``overwrite=True`` then only the dataset will be replaced; the file/group will not be overwritten. metadata_conflicts : str How to proceed with metadata conflicts. This should be one of: * ``'silent'``: silently pick the last conflicting meta-data value * ``'warn'``: pick the last conflicting meta-data value, but emit a warning (default) * ``'error'``: raise an exception. **create_dataset_kwargs Additional keyword arguments are passed to `h5py.File.create_dataset`. """ from astropy.table import meta try: import h5py except ImportError: raise Exception("h5py is required to read and write HDF5 files") if path is None: # table is just an arbitrary, hardcoded string here. path = '__astropy_table__' elif path.endswith('/'): raise ValueError("table path should end with table name, not /") if '/' in path: group, name = path.rsplit('/', 1) else: group, name = None, path if isinstance(output, (h5py.File, h5py.Group)): if len(list(output.keys())) > 0 and name == '__astropy_table__': raise ValueError("table path should always be set via the " "path= argument when writing to existing " "files") elif name == '__astropy_table__': warnings.warn("table path was not set via the path= argument; " "using default path {}".format(path)) if group: try: output_group = output[group] except (KeyError, ValueError): output_group = output.create_group(group) else: output_group = output elif isinstance(output, str): if os.path.exists(output) and not append: if overwrite and not append: os.remove(output) else: raise OSError(f"File exists: {output}") # Open the file for appending or writing f = h5py.File(output, 'a' if append else 'w') # Recursively call the write function try: return write_table_hdf5(table, f, path=path, compression=compression, append=append, overwrite=overwrite, serialize_meta=serialize_meta, **create_dataset_kwargs) finally: f.close() else: raise TypeError('output should be a string or an h5py File or ' 'Group object') # Check whether table already exists existing_header = None if name in output_group: if append and overwrite: # Delete only the dataset itself del output_group[name] elif append: # Data table exists, so we interpret "append" to mean "extend # existing table with the table passed in". However, this requires # the table to have been written by this function in the past, so it # should have a metadata header if meta_path(name) not in output_group: raise ValueError("No metadata exists for existing table. We " "can only append tables if metadata " "is consistent for all tables") # Load existing table header: existing_header = get_header_from_yaml( h.decode('utf-8') for h in output_group[meta_path(name)]) else: raise OSError(f"Table {path} already exists") # Encode any mixin columns as plain columns + appropriate metadata table = _encode_mixins(table) # Table with numpy unicode strings can't be written in HDF5 so # to write such a table a copy of table is made containing columns as # bytestrings. Now this copy of the table can be written in HDF5. if any(col.info.dtype.kind == 'U' for col in table.itercols()): table = table.copy(copy_data=False) table.convert_unicode_to_bytestring() # Warn if information will be lost when serialize_meta=False. This is # hardcoded to the set difference between column info attributes and what # HDF5 can store natively (name, dtype) with no meta. if serialize_meta is False: for col in table.itercols(): for attr in ('unit', 'format', 'description', 'meta'): if getattr(col.info, attr, None) not in (None, {}): warnings.warn( "table contains column(s) with defined 'unit', 'format'," " 'description', or 'meta' info attributes. These will" " be dropped since serialize_meta=False.", AstropyUserWarning) if existing_header is None: # Just write the table and metadata # Write the table to the file if compression: if compression is True: compression = 'gzip' dset = output_group.create_dataset(name, data=table.as_array(), compression=compression, **create_dataset_kwargs) else: dset = output_group.create_dataset(name, data=table.as_array(), **create_dataset_kwargs) if serialize_meta: header_yaml = meta.get_yaml_from_table(table) header_encoded = [h.encode('utf-8') for h in header_yaml] output_group.create_dataset(meta_path(name), data=header_encoded) else: # Write the Table meta dict key:value pairs to the file as HDF5 # attributes. This works only for a limited set of scalar data types # like numbers, strings, etc., but not any complex types. This path # also ignores column meta like unit or format. for key in table.meta: val = table.meta[key] try: dset.attrs[key] = val except TypeError: warnings.warn( "Attribute `{}` of type {} cannot be written to " "HDF5 files - skipping. (Consider specifying " "serialize_meta=True to write all meta data)".format( key, type(val)), AstropyUserWarning) else: # We need to append the tables! try: # FIXME: do something with the merged metadata! metadata.merge(existing_header['meta'], table.meta, metadata_conflicts=metadata_conflicts) except metadata.MergeConflictError: raise metadata.MergeConflictError( "Cannot append table to existing file because " "the existing file table metadata and this " "table object's metadata do not match. If you " "want to ignore this issue, or change to a " "warning, set metadata_conflicts='silent' or 'warn'.") # Now compare datatype of this object and on disk this_header = get_header_from_yaml(get_yaml_from_table(table)) if not _custom_tbl_dtype_compare(existing_header['datatype'], this_header['datatype']): raise ValueError( "Cannot append table to existing file because " "the existing file table datatype and this " "object's table datatype do not match. " f"{existing_header['datatype']} vs. {this_header['datatype']}") # If we got here, we can now try to append: current_size = len(output_group[name]) output_group[name].resize((current_size + len(table), )) output_group[name][current_size:] = table.as_array()