def write_table(self, table_name, table, metadata=None): table = _encode_mixins(table) if any(col.info.dtype.kind == 'U' for col in table.itercols()): table = table.copy(copy_data=False) table.convert_unicode_to_bytestring() self._entry.create_dataset(str(table_name), data=table.as_array()) header_yaml = meta.get_yaml_from_table(table) header_encoded = [h.encode('utf-8') for h in header_yaml] self._entry.create_dataset(str(table_name) + '.' + META_KEY, data=header_encoded) tg = self._entry.create_group('{}_to_group'.format(str(table_name))) for col in table.keys(): tg_c = tg.create_group(str(col)) tg_c.create_dataset('value', data=table[col]) if (table[col].unit != None): tg_c.create_dataset('unit', data=str(table[col].unit))
def write_table_hdf5(table, output, path=None, compression=False, append=False, overwrite=False, serialize_meta=False, **create_dataset_kwargs): """ Write a Table object to an HDF5 file This requires `h5py <http://www.h5py.org/>`_ to be installed. Parameters ---------- table : `~astropy.table.Table` Data table that is to be written to file. output : str or :class:`h5py.File` or :class:`h5py.Group` If a string, the filename to write the table to. If an h5py object, either the file or the group object to write the table to. path : str The path to which to write the table inside the HDF5 file. This should be relative to the input file or group. If not specified, defaults to ``__astropy_table__``. compression : bool or str or int Whether to compress the table inside the HDF5 file. If set to `True`, ``'gzip'`` compression is used. If a string is specified, it should be one of ``'gzip'``, ``'szip'``, or ``'lzf'``. If an integer is specified (in the range 0-9), ``'gzip'`` compression is used, and the integer denotes the compression level. append : bool Whether to append the table to an existing HDF5 file. overwrite : bool Whether to overwrite any existing file without warning. If ``append=True`` and ``overwrite=True`` then only the dataset will be replaced; the file/group will not be overwritten. serialize_meta : bool Whether to serialize rich table meta-data when writing the HDF5 file, in particular such data required to write and read back mixin columns like ``Time``, ``SkyCoord``, or ``Quantity`` to the file. **create_dataset_kwargs Additional keyword arguments are passed to ``h5py.File.create_dataset()`` or ``h5py.Group.create_dataset()``. """ from astropy.table import meta try: import h5py except ImportError: raise Exception("h5py is required to read and write HDF5 files") if path is None: # table is just an arbitrary, hardcoded string here. path = '__astropy_table__' elif path.endswith('/'): raise ValueError("table path should end with table name, not /") if '/' in path: group, name = path.rsplit('/', 1) else: group, name = None, path if isinstance(output, (h5py.File, h5py.Group)): if len(list(output.keys())) > 0 and name == '__astropy_table__': raise ValueError("table path should always be set via the " "path= argument when writing to existing " "files") elif name == '__astropy_table__': warnings.warn("table path was not set via the path= argument; " "using default path {}".format(path)) if group: try: output_group = output[group] except (KeyError, ValueError): output_group = output.create_group(group) else: output_group = output elif isinstance(output, str): if os.path.exists(output) and not append: if overwrite and not append: os.remove(output) else: raise OSError(NOT_OVERWRITING_MSG.format(output)) # Open the file for appending or writing f = h5py.File(output, 'a' if append else 'w') # Recursively call the write function try: return write_table_hdf5(table, f, path=path, compression=compression, append=append, overwrite=overwrite, serialize_meta=serialize_meta) finally: f.close() else: raise TypeError('output should be a string or an h5py File or ' 'Group object') # Check whether table already exists if name in output_group: if append and overwrite: # Delete only the dataset itself del output_group[name] if serialize_meta and name + '.__table_column_meta__' in output_group: del output_group[name + '.__table_column_meta__'] else: raise OSError(f"Table {path} already exists") # Encode any mixin columns as plain columns + appropriate metadata table = _encode_mixins(table) # Table with numpy unicode strings can't be written in HDF5 so # to write such a table a copy of table is made containing columns as # bytestrings. Now this copy of the table can be written in HDF5. if any(col.info.dtype.kind == 'U' for col in table.itercols()): table = table.copy(copy_data=False) table.convert_unicode_to_bytestring() # Warn if information will be lost when serialize_meta=False. This is # hardcoded to the set difference between column info attributes and what # HDF5 can store natively (name, dtype) with no meta. if serialize_meta is False: for col in table.itercols(): for attr in ('unit', 'format', 'description', 'meta'): if getattr(col.info, attr, None) not in (None, {}): warnings.warn( "table contains column(s) with defined 'unit', 'format'," " 'description', or 'meta' info attributes. These will" " be dropped since serialize_meta=False.", AstropyUserWarning) # Write the table to the file if compression: if compression is True: compression = 'gzip' dset = output_group.create_dataset(name, data=table.as_array(), compression=compression, **create_dataset_kwargs) else: dset = output_group.create_dataset(name, data=table.as_array(), **create_dataset_kwargs) if serialize_meta: header_yaml = meta.get_yaml_from_table(table) header_encoded = np.array([h.encode('utf-8') for h in header_yaml]) output_group.create_dataset(meta_path(name), data=header_encoded) else: # Write the Table meta dict key:value pairs to the file as HDF5 # attributes. This works only for a limited set of scalar data types # like numbers, strings, etc., but not any complex types. This path # also ignores column meta like unit or format. for key in table.meta: val = table.meta[key] try: dset.attrs[key] = val except TypeError: warnings.warn( "Attribute `{}` of type {} cannot be written to " "HDF5 files - skipping. (Consider specifying " "serialize_meta=True to write all meta data)".format( key, type(val)), AstropyUserWarning)
def _encode_mixins(tbl): """Encode a Table ``tbl`` that may have mixin columns to a Table with only astropy Columns + appropriate meta-data to allow subsequent decoding. """ # Determine if information will be lost without serializing meta. This is hardcoded # to the set difference between column info attributes and what FITS can store # natively (name, dtype, unit). See _get_col_attributes() in table/meta.py for where # this comes from. info_lost = any(any(getattr(col.info, attr, None) not in (None, {}) for attr in ('description', 'meta')) for col in tbl.itercols()) # Convert the table to one with no mixins, only Column objects. This adds # meta data which is extracted with meta.get_yaml_from_table. This ignores # Time-subclass columns and leave them in the table so that the downstream # FITS Time handling does the right thing. with serialize_context_as('fits'): encode_tbl = serialize.represent_mixins_as_columns( tbl, exclude_classes=(Time,)) # If the encoded table is unchanged then there were no mixins. But if there # is column metadata (format, description, meta) that would be lost, then # still go through the serialized columns machinery. if encode_tbl is tbl and not info_lost: return tbl # Copy the meta dict if it was not copied by represent_mixins_as_columns. # We will modify .meta['comments'] below and we do not want to see these # comments in the input table. if encode_tbl is tbl: meta_copy = deepcopy(tbl.meta) encode_tbl = Table(tbl.columns, meta=meta_copy, copy=False) # Get the YAML serialization of information describing the table columns. # This is re-using ECSV code that combined existing table.meta with with # the extra __serialized_columns__ key. For FITS the table.meta is handled # by the native FITS connect code, so don't include that in the YAML # output. ser_col = '__serialized_columns__' # encode_tbl might not have a __serialized_columns__ key if there were no mixins, # but machinery below expects it to be available, so just make an empty dict. encode_tbl.meta.setdefault(ser_col, {}) tbl_meta_copy = encode_tbl.meta.copy() try: encode_tbl.meta = {ser_col: encode_tbl.meta[ser_col]} meta_yaml_lines = meta.get_yaml_from_table(encode_tbl) finally: encode_tbl.meta = tbl_meta_copy del encode_tbl.meta[ser_col] if 'comments' not in encode_tbl.meta: encode_tbl.meta['comments'] = [] encode_tbl.meta['comments'].append('--BEGIN-ASTROPY-SERIALIZED-COLUMNS--') for line in meta_yaml_lines: if len(line) == 0: lines = [''] else: # Split line into 70 character chunks for COMMENT cards idxs = list(range(0, len(line) + 70, 70)) lines = [line[i0:i1] + '\\' for i0, i1 in zip(idxs[:-1], idxs[1:])] lines[-1] = lines[-1][:-1] encode_tbl.meta['comments'].extend(lines) encode_tbl.meta['comments'].append('--END-ASTROPY-SERIALIZED-COLUMNS--') return encode_tbl
def _encode_mixins(tbl): """Encode a Table ``tbl`` that may have mixin columns to a Table with only astropy Columns + appropriate meta-data to allow subsequent decoding. """ # Determine if information will be lost without serializing meta. This is hardcoded # to the set difference between column info attributes and what FITS can store # natively (name, dtype, unit). See _get_col_attributes() in table/meta.py for where # this comes from. info_lost = any( any( getattr(col.info, attr, None) not in (None, {}) for attr in ('description', 'meta')) for col in tbl.itercols()) # If PyYAML is not available then check to see if there are any mixin cols # that *require* YAML serialization. FITS already has support for Time, # Quantity, so if those are the only mixins the proceed without doing the # YAML bit, for backward compatibility (i.e. not requiring YAML to write # Time or Quantity). In this case other mixin column meta (e.g. # description or meta) will be silently dropped, consistent with astropy <= # 2.0 behavior. try: import yaml # noqa except ImportError: for col in tbl.itercols(): if (has_info_class(col, MixinInfo) and col.__class__ not in (u.Quantity, Time)): raise TypeError("cannot write type {} column '{}' " "to FITS without PyYAML installed.".format( col.__class__.__name__, col.info.name)) else: if info_lost: warnings.warn( "table contains column(s) with defined 'format'," " 'description', or 'meta' info attributes. These" " will be dropped unless you install PyYAML.", AstropyUserWarning) return tbl # Convert the table to one with no mixins, only Column objects. This adds # meta data which is extracted with meta.get_yaml_from_table. This ignores # Time-subclass columns and leave them in the table so that the downstream # FITS Time handling does the right thing. with serialize_context_as('fits'): encode_tbl = serialize._represent_mixins_as_columns( tbl, exclude_classes=(Time, )) # If the encoded table is unchanged then there were no mixins. But if there # is column metadata (format, description, meta) that would be lost, then # still go through the serialized columns machinery. if encode_tbl is tbl and not info_lost: return tbl # Get the YAML serialization of information describing the table columns. # This is re-using ECSV code that combined existing table.meta with with # the extra __serialized_columns__ key. For FITS the table.meta is handled # by the native FITS connect code, so don't include that in the YAML # output. ser_col = '__serialized_columns__' # encode_tbl might not have a __serialized_columns__ key if there were no mixins, # but machinery below expects it to be available, so just make an empty dict. encode_tbl.meta.setdefault(ser_col, {}) tbl_meta_copy = encode_tbl.meta.copy() try: encode_tbl.meta = {ser_col: encode_tbl.meta[ser_col]} meta_yaml_lines = meta.get_yaml_from_table(encode_tbl) finally: encode_tbl.meta = tbl_meta_copy del encode_tbl.meta[ser_col] if 'comments' not in encode_tbl.meta: encode_tbl.meta['comments'] = [] encode_tbl.meta['comments'].append('--BEGIN-ASTROPY-SERIALIZED-COLUMNS--') for line in meta_yaml_lines: if len(line) == 0: lines = [''] else: # Split line into 70 character chunks for COMMENT cards idxs = list(range(0, len(line) + 70, 70)) lines = [line[i0:i1] + '\\' for i0, i1 in zip(idxs[:-1], idxs[1:])] lines[-1] = lines[-1][:-1] encode_tbl.meta['comments'].extend(lines) encode_tbl.meta['comments'].append('--END-ASTROPY-SERIALIZED-COLUMNS--') return encode_tbl
def _encode_mixins(tbl): """Encode a Table ``tbl`` that may have mixin columns to a Table with only astropy Columns + appropriate meta-data to allow subsequent decoding. """ # Determine if information will be lost without serializing meta. This is hardcoded # to the set difference between column info attributes and what FITS can store # natively (name, dtype, unit). See _get_col_attributes() in table/meta.py for where # this comes from. info_lost = any(any(getattr(col.info, attr, None) not in (None, {}) for attr in ('description', 'meta')) for col in tbl.itercols()) # If PyYAML is not available then check to see if there are any mixin cols # that *require* YAML serialization. FITS already has support for Time, # Quantity, so if those are the only mixins the proceed without doing the # YAML bit, for backward compatibility (i.e. not requiring YAML to write # Time or Quantity). In this case other mixin column meta (e.g. # description or meta) will be silently dropped, consistent with astropy <= # 2.0 behavior. try: import yaml # noqa except ImportError: for col in tbl.itercols(): if (has_info_class(col, MixinInfo) and col.__class__ not in (u.Quantity, Time)): raise TypeError("cannot write type {} column '{}' " "to FITS without PyYAML installed." .format(col.__class__.__name__, col.info.name)) else: if info_lost: warnings.warn("table contains column(s) with defined 'format'," " 'description', or 'meta' info attributes. These" " will be dropped unless you install PyYAML.", AstropyUserWarning) return tbl # Convert the table to one with no mixins, only Column objects. This adds # meta data which is extracted with meta.get_yaml_from_table. This ignores # Time-subclass columns and leave them in the table so that the downstream # FITS Time handling does the right thing. with serialize_context_as('fits'): encode_tbl = serialize._represent_mixins_as_columns( tbl, exclude_classes=(Time,)) # If the encoded table is unchanged then there were no mixins. But if there # is column metadata (format, description, meta) that would be lost, then # still go through the serialized columns machinery. if encode_tbl is tbl and not info_lost: return tbl # Get the YAML serialization of information describing the table columns. # This is re-using ECSV code that combined existing table.meta with with # the extra __serialized_columns__ key. For FITS the table.meta is handled # by the native FITS connect code, so don't include that in the YAML # output. ser_col = '__serialized_columns__' # encode_tbl might not have a __serialized_columns__ key if there were no mixins, # but machinery below expects it to be available, so just make an empty dict. encode_tbl.meta.setdefault(ser_col, {}) tbl_meta_copy = encode_tbl.meta.copy() try: encode_tbl.meta = {ser_col: encode_tbl.meta[ser_col]} meta_yaml_lines = meta.get_yaml_from_table(encode_tbl) finally: encode_tbl.meta = tbl_meta_copy del encode_tbl.meta[ser_col] if 'comments' not in encode_tbl.meta: encode_tbl.meta['comments'] = [] encode_tbl.meta['comments'].append('--BEGIN-ASTROPY-SERIALIZED-COLUMNS--') for line in meta_yaml_lines: if len(line) == 0: lines = [''] else: # Split line into 70 character chunks for COMMENT cards idxs = list(range(0, len(line) + 70, 70)) lines = [line[i0:i1] + '\\' for i0, i1 in zip(idxs[:-1], idxs[1:])] lines[-1] = lines[-1][:-1] encode_tbl.meta['comments'].extend(lines) encode_tbl.meta['comments'].append('--END-ASTROPY-SERIALIZED-COLUMNS--') return encode_tbl
def write_table_hdf5(table, output, path=None, compression=False, append=False, overwrite=False, serialize_meta=False, compatibility_mode=False): """ Write a Table object to an HDF5 file This requires `h5py <http://www.h5py.org/>`_ to be installed. Parameters ---------- table : `~astropy.table.Table` Data table that is to be written to file. output : str or :class:`h5py:File` or :class:`h5py:Group` If a string, the filename to write the table to. If an h5py object, either the file or the group object to write the table to. path : str The path to which to write the table inside the HDF5 file. This should be relative to the input file or group. If not specified, defaults to ``__astropy_table__``. compression : bool or str or int Whether to compress the table inside the HDF5 file. If set to `True`, ``'gzip'`` compression is used. If a string is specified, it should be one of ``'gzip'``, ``'szip'``, or ``'lzf'``. If an integer is specified (in the range 0-9), ``'gzip'`` compression is used, and the integer denotes the compression level. append : bool Whether to append the table to an existing HDF5 file. overwrite : bool Whether to overwrite any existing file without warning. If ``append=True`` and ``overwrite=True`` then only the dataset will be replaced; the file/group will not be overwritten. """ from astropy.table import meta try: import h5py except ImportError: raise Exception("h5py is required to read and write HDF5 files") if path is None: # table is just an arbitrary, hardcoded string here. path = '__astropy_table__' elif path.endswith('/'): raise ValueError("table path should end with table name, not /") if '/' in path: group, name = path.rsplit('/', 1) else: group, name = None, path if isinstance(output, (h5py.File, h5py.Group)): if len(list(output.keys())) > 0 and name == '__astropy_table__': raise ValueError("table path should always be set via the " "path= argument when writing to existing " "files") elif name == '__astropy_table__': warnings.warn("table path was not set via the path= argument; " "using default path {}".format(path)) if group: try: output_group = output[group] except (KeyError, ValueError): output_group = output.create_group(group) else: output_group = output elif isinstance(output, str): if os.path.exists(output) and not append: if overwrite and not append: os.remove(output) else: raise OSError("File exists: {0}".format(output)) # Open the file for appending or writing f = h5py.File(output, 'a' if append else 'w') # Recursively call the write function try: return write_table_hdf5(table, f, path=path, compression=compression, append=append, overwrite=overwrite, serialize_meta=serialize_meta, compatibility_mode=compatibility_mode) finally: f.close() else: raise TypeError('output should be a string or an h5py File or ' 'Group object') # Check whether table already exists if name in output_group: if append and overwrite: # Delete only the dataset itself del output_group[name] else: raise OSError("Table {0} already exists".format(path)) # Encode any mixin columns as plain columns + appropriate metadata table = _encode_mixins(table) # Table with numpy unicode strings can't be written in HDF5 so # to write such a table a copy of table is made containing columns as # bytestrings. Now this copy of the table can be written in HDF5. if any(col.info.dtype.kind == 'U' for col in table.itercols()): table = table.copy(copy_data=False) table.convert_unicode_to_bytestring() # Warn if information will be lost when serialize_meta=False. This is # hardcoded to the set difference between column info attributes and what # HDF5 can store natively (name, dtype) with no meta. if serialize_meta is False: for col in table.itercols(): for attr in ('unit', 'format', 'description', 'meta'): if getattr(col.info, attr, None) not in (None, {}): warnings.warn("table contains column(s) with defined 'unit', 'format'," " 'description', or 'meta' info attributes. These will" " be dropped since serialize_meta=False.", AstropyUserWarning) # Write the table to the file if compression: if compression is True: compression = 'gzip' dset = output_group.create_dataset(name, data=table.as_array(), compression=compression) else: dset = output_group.create_dataset(name, data=table.as_array()) if serialize_meta: header_yaml = meta.get_yaml_from_table(table) header_encoded = [h.encode('utf-8') for h in header_yaml] if compatibility_mode: warnings.warn("compatibility mode for writing is deprecated", AstropyDeprecationWarning) try: dset.attrs[META_KEY] = header_encoded except Exception as e: warnings.warn( "Attributes could not be written to the output HDF5 " "file: {0}".format(e)) else: output_group.create_dataset(meta_path(name), data=header_encoded) else: # Write the Table meta dict key:value pairs to the file as HDF5 # attributes. This works only for a limited set of scalar data types # like numbers, strings, etc., but not any complex types. This path # also ignores column meta like unit or format. for key in table.meta: val = table.meta[key] try: dset.attrs[key] = val except TypeError: warnings.warn("Attribute `{0}` of type {1} cannot be written to " "HDF5 files - skipping. (Consider specifying " "serialize_meta=True to write all meta data)".format(key, type(val)), AstropyUserWarning)
def write_table_parquet(table, output, overwrite=False): """ Write a Table object to a Parquet file This requires `pyarrow <https://arrow.apache.org/docs/python/>`_ to be installed. Parameters ---------- table : `~astropy.table.Table` Data table that is to be written to file. output : str or path-like The filename to write the table to. overwrite : bool, optional Whether to overwrite any existing file without warning. Default `False`. """ from astropy.table import meta, serialize from astropy.utils.data_info import serialize_context_as pa, parquet, writer_version = get_pyarrow() if not isinstance(output, (str, os.PathLike)): raise TypeError( f'`output` should be a string or path-like, not {output}') # Convert all compound columns into serialized column names, where # e.g. 'time' becomes ['time.jd1', 'time.jd2']. with serialize_context_as('parquet'): encode_table = serialize.represent_mixins_as_columns(table) # We store the encoded serialization metadata as a yaml string. meta_yaml = meta.get_yaml_from_table(encode_table) meta_yaml_str = '\n'.join(meta_yaml) metadata = {} for name, col in encode_table.columns.items(): # Parquet will retain the datatypes of columns, but string and # byte column length is lost. Therefore, we special-case these # types to record the length for precise round-tripping. if col.dtype.type is np.str_: metadata[f'table::len::{name}'] = str(col.dtype.itemsize // 4) elif col.dtype.type is np.bytes_: metadata[f'table::len::{name}'] = str(col.dtype.itemsize) metadata['table_meta_yaml'] = meta_yaml_str # Pyarrow stores all metadata as byte strings, so we explicitly encode # our unicode strings in metadata as UTF-8 byte strings here. metadata_encode = { k.encode('UTF-8'): v.encode('UTF-8') for k, v in metadata.items() } # Build the pyarrow schema by converting from the numpy dtype of each # column to an equivalent pyarrow type with from_numpy_dtype() type_list = [(name, pa.from_numpy_dtype(encode_table.dtype[name].type)) for name in encode_table.dtype.names] schema = pa.schema(type_list, metadata=metadata_encode) if os.path.exists(output): if overwrite: # We must remove the file prior to writing below. os.remove(output) else: raise OSError(NOT_OVERWRITING_MSG.format(output)) # We use version='2.0' for full support of datatypes including uint32. with parquet.ParquetWriter(output, schema, version=writer_version) as writer: # Convert each Table column to a pyarrow array arrays = [pa.array(col) for col in encode_table.itercols()] # Create a pyarrow table from the list of arrays and the schema pa_table = pa.Table.from_arrays(arrays, schema=schema) # Write the pyarrow table to a file writer.write_table(pa_table)
def write_table_hdf5(table, output, path=None, compression=False, append=False, overwrite=False, serialize_meta=False, metadata_conflicts='error', **create_dataset_kwargs): """ Write a Table object to an HDF5 file This requires `h5py <http://www.h5py.org/>`_ to be installed. Parameters ---------- table : `~astropy.table.Table` Data table that is to be written to file. output : str or :class:`h5py:File` or :class:`h5py:Group` If a string, the filename to write the table to. If an h5py object, either the file or the group object to write the table to. path : str The path to which to write the table inside the HDF5 file. This should be relative to the input file or group. If not specified, defaults to ``__astropy_table__``. compression : bool or str or int Whether to compress the table inside the HDF5 file. If set to `True`, ``'gzip'`` compression is used. If a string is specified, it should be one of ``'gzip'``, ``'szip'``, or ``'lzf'``. If an integer is specified (in the range 0-9), ``'gzip'`` compression is used, and the integer denotes the compression level. append : bool Whether to append the table to an existing HDF5 file. overwrite : bool Whether to overwrite any existing file without warning. If ``append=True`` and ``overwrite=True`` then only the dataset will be replaced; the file/group will not be overwritten. metadata_conflicts : str How to proceed with metadata conflicts. This should be one of: * ``'silent'``: silently pick the last conflicting meta-data value * ``'warn'``: pick the last conflicting meta-data value, but emit a warning (default) * ``'error'``: raise an exception. **create_dataset_kwargs Additional keyword arguments are passed to `h5py.File.create_dataset`. """ from astropy.table import meta try: import h5py except ImportError: raise Exception("h5py is required to read and write HDF5 files") if path is None: # table is just an arbitrary, hardcoded string here. path = '__astropy_table__' elif path.endswith('/'): raise ValueError("table path should end with table name, not /") if '/' in path: group, name = path.rsplit('/', 1) else: group, name = None, path if isinstance(output, (h5py.File, h5py.Group)): if len(list(output.keys())) > 0 and name == '__astropy_table__': raise ValueError("table path should always be set via the " "path= argument when writing to existing " "files") elif name == '__astropy_table__': warnings.warn("table path was not set via the path= argument; " "using default path {}".format(path)) if group: try: output_group = output[group] except (KeyError, ValueError): output_group = output.create_group(group) else: output_group = output elif isinstance(output, str): if os.path.exists(output) and not append: if overwrite and not append: os.remove(output) else: raise OSError(f"File exists: {output}") # Open the file for appending or writing f = h5py.File(output, 'a' if append else 'w') # Recursively call the write function try: return write_table_hdf5(table, f, path=path, compression=compression, append=append, overwrite=overwrite, serialize_meta=serialize_meta, **create_dataset_kwargs) finally: f.close() else: raise TypeError('output should be a string or an h5py File or ' 'Group object') # Check whether table already exists existing_header = None if name in output_group: if append and overwrite: # Delete only the dataset itself del output_group[name] elif append: # Data table exists, so we interpret "append" to mean "extend # existing table with the table passed in". However, this requires # the table to have been written by this function in the past, so it # should have a metadata header if meta_path(name) not in output_group: raise ValueError("No metadata exists for existing table. We " "can only append tables if metadata " "is consistent for all tables") # Load existing table header: existing_header = get_header_from_yaml( h.decode('utf-8') for h in output_group[meta_path(name)]) else: raise OSError(f"Table {path} already exists") # Encode any mixin columns as plain columns + appropriate metadata table = _encode_mixins(table) # Table with numpy unicode strings can't be written in HDF5 so # to write such a table a copy of table is made containing columns as # bytestrings. Now this copy of the table can be written in HDF5. if any(col.info.dtype.kind == 'U' for col in table.itercols()): table = table.copy(copy_data=False) table.convert_unicode_to_bytestring() # Warn if information will be lost when serialize_meta=False. This is # hardcoded to the set difference between column info attributes and what # HDF5 can store natively (name, dtype) with no meta. if serialize_meta is False: for col in table.itercols(): for attr in ('unit', 'format', 'description', 'meta'): if getattr(col.info, attr, None) not in (None, {}): warnings.warn( "table contains column(s) with defined 'unit', 'format'," " 'description', or 'meta' info attributes. These will" " be dropped since serialize_meta=False.", AstropyUserWarning) if existing_header is None: # Just write the table and metadata # Write the table to the file if compression: if compression is True: compression = 'gzip' dset = output_group.create_dataset(name, data=table.as_array(), compression=compression, **create_dataset_kwargs) else: dset = output_group.create_dataset(name, data=table.as_array(), **create_dataset_kwargs) if serialize_meta: header_yaml = meta.get_yaml_from_table(table) header_encoded = [h.encode('utf-8') for h in header_yaml] output_group.create_dataset(meta_path(name), data=header_encoded) else: # Write the Table meta dict key:value pairs to the file as HDF5 # attributes. This works only for a limited set of scalar data types # like numbers, strings, etc., but not any complex types. This path # also ignores column meta like unit or format. for key in table.meta: val = table.meta[key] try: dset.attrs[key] = val except TypeError: warnings.warn( "Attribute `{}` of type {} cannot be written to " "HDF5 files - skipping. (Consider specifying " "serialize_meta=True to write all meta data)".format( key, type(val)), AstropyUserWarning) else: # We need to append the tables! try: # FIXME: do something with the merged metadata! metadata.merge(existing_header['meta'], table.meta, metadata_conflicts=metadata_conflicts) except metadata.MergeConflictError: raise metadata.MergeConflictError( "Cannot append table to existing file because " "the existing file table metadata and this " "table object's metadata do not match. If you " "want to ignore this issue, or change to a " "warning, set metadata_conflicts='silent' or 'warn'.") # Now compare datatype of this object and on disk this_header = get_header_from_yaml(get_yaml_from_table(table)) if not _custom_tbl_dtype_compare(existing_header['datatype'], this_header['datatype']): raise ValueError( "Cannot append table to existing file because " "the existing file table datatype and this " "object's table datatype do not match. " f"{existing_header['datatype']} vs. {this_header['datatype']}") # If we got here, we can now try to append: current_size = len(output_group[name]) output_group[name].resize((current_size + len(table), )) output_group[name][current_size:] = table.as_array()