Example #1
0
def write_table_fits(input, output, overwrite=False, append=False):
    """
    Write a Table object to a FITS file

    Parameters
    ----------
    input : Table
        The table to write out.
    output : str
        The filename to write the table to.
    overwrite : bool
        Whether to overwrite any existing file without warning.
    append : bool
        Whether to append the table to an existing file
    """

    # Encode any mixin columns into standard Columns.
    input = _encode_mixins(input)

    table_hdu = table_to_hdu(input, character_as_bytes=True)

    # Check if output file already exists
    if isinstance(output, str) and os.path.exists(output):
        if overwrite:
            os.remove(output)
        elif not append:
            raise OSError(NOT_OVERWRITING_MSG.format(output))

    if append:
        # verify=False stops it reading and checking the existing file.
        fits_append(output, table_hdu.data, table_hdu.header, verify=False)
    else:
        table_hdu.writeto(output)
Example #2
0
File: ui.py Project: maxnoe/astropy
def write(table, output=None, format=None, Writer=None, fast_writer=True, *,
          overwrite=False, **kwargs):
    # Docstring inserted below

    _validate_read_write_kwargs('write', format=format, fast_writer=fast_writer,
                                overwrite=overwrite, **kwargs)

    if isinstance(output, (str, bytes, os.PathLike)):
        output = os.path.expanduser(output)
        if not overwrite and os.path.lexists(output):
            raise OSError(NOT_OVERWRITING_MSG.format(output))

    if output is None:
        output = sys.stdout

    # Ensure that `table` is a Table subclass.
    names = kwargs.get('names')
    if isinstance(table, Table):
        # While we are only going to read data from columns, we may need to
        # to adjust info attributes such as format, so we make a shallow copy.
        table = table.__class__(table, names=names, copy=False)
    else:
        # Otherwise, create a table from the input.
        table = Table(table, names=names, copy=False)

    table0 = table[:0].copy()
    core._apply_include_exclude_names(table0, kwargs.get('names'),
                                      kwargs.get('include_names'), kwargs.get('exclude_names'))
    diff_format_with_names = set(kwargs.get('formats', [])) - set(table0.colnames)

    if diff_format_with_names:
        warnings.warn(
            'The key(s) {} specified in the formats argument do not match a column name.'
            .format(diff_format_with_names), AstropyWarning)

    if table.has_mixin_columns:
        fast_writer = False

    Writer = _get_format_class(format, Writer, 'Writer')
    writer = get_writer(Writer=Writer, fast_writer=fast_writer, **kwargs)
    if writer._format_name in core.FAST_CLASSES:
        writer.write(table, output)
        return

    lines = writer.write(table)

    # Write the lines to output
    outstr = os.linesep.join(lines)
    if not hasattr(output, 'write'):
        # NOTE: we need to specify newline='', otherwise the default
        # behavior is for Python to translate \r\n (which we write because
        # of os.linesep) into \r\r\n. Specifying newline='' disables any
        # auto-translation.
        output = open(output, 'w', newline='')
        output.write(outstr)
        output.write(os.linesep)
        output.close()
    else:
        output.write(outstr)
        output.write(os.linesep)
Example #3
0
def test_logging(capsys, tmp_path):

    # Run skypy with default verbosity and check log is empty
    config_filename = get_pkg_data_filename('data/test_config.yml')
    output_filename = str(tmp_path / 'logging.fits')
    skypy.main([config_filename, output_filename])
    out, err = capsys.readouterr()
    assert (not err)

    # Run again with increased verbosity and capture log. Force an exception by
    # not using the "--overwrite" flag when the output file already exists.
    with pytest.raises(SystemExit):
        skypy.main([config_filename, output_filename, '--verbose'])
    out, err = capsys.readouterr()

    # Determine all DAG jobs and function calls from config
    config = load_skypy_yaml(config_filename)
    cosmology = config.pop('cosmology', None)
    tables = config.pop('tables', {})
    config.update({k: v.pop('.init', Call(Table)) for k, v in tables.items()})
    columns = [f'{t}.{c}' for t, cols in tables.items() for c in cols]
    functions = [f for f in config.values() if isinstance(f, Call)]
    functions += [
        f for t, cols in tables.items() for f in cols.values()
        if isinstance(f, Call)
    ]

    # Check all jobs appear in the log
    for job in list(config) + list(tables) + columns:
        log_string = f"[INFO] skypy.pipeline: Generating {job}"
        assert (log_string in err)

    # Check all functions appear in the log
    for f in functions:
        log_string = f"[INFO] skypy.pipeline: Calling {f.function.__name__}"
        assert (log_string in err)

    # Check cosmology appears in the log
    if cosmology:
        assert ("[INFO] skypy.pipeline: Setting cosmology" in err)

    # Check writing output file is in the log
    assert (f"[INFO] skypy: Writing {output_filename}" in err)

    # Check error for existing output file is in the log
    try:
        # New error message introduced in astropy PR #12179
        from astropy.utils.misc import NOT_OVERWRITING_MSG
        error_string = NOT_OVERWRITING_MSG.format(output_filename)
    except ImportError:
        # Fallback on old error message from astropy v4.x
        error_string = f"[ERROR] skypy: File {output_filename!r} already exists."
    assert (error_string in err)

    # Run again with decreased verbosity and check the log is empty
    with pytest.raises(SystemExit):
        skypy.main([config_filename, output_filename, '-qq'])
    out, err = capsys.readouterr()
    assert (not err)
Example #4
0
def write_table_votable(input,
                        output,
                        table_id=None,
                        overwrite=False,
                        tabledata_format=None):
    """
    Write a Table object to an VO table file

    Parameters
    ----------
    input : Table
        The table to write out.

    output : str
        The filename to write the table to.

    table_id : str, optional
        The table ID to use. If this is not specified, the 'ID' keyword in the
        ``meta`` object of the table will be used.

    overwrite : bool, optional
        Whether to overwrite any existing file without warning.

    tabledata_format : str, optional
        The format of table data to write.  Must be one of ``tabledata``
        (text representation), ``binary`` or ``binary2``.  Default is
        ``tabledata``.  See :ref:`astropy:votable-serialization`.
    """

    # Only those columns which are instances of BaseColumn or Quantity can be written
    unsupported_cols = input.columns.not_isinstance((BaseColumn, Quantity))
    if unsupported_cols:
        unsupported_names = [col.info.name for col in unsupported_cols]
        raise ValueError(
            'cannot write table with mixin column(s) {} to VOTable'.format(
                unsupported_names))

    # Check if output file already exists
    if isinstance(output, str) and os.path.exists(output):
        if overwrite:
            os.remove(output)
        else:
            raise OSError(NOT_OVERWRITING_MSG.format(output))

    # Create a new VOTable file
    table_file = from_table(input, table_id=table_id)

    # Write out file
    table_file.to_xml(output, tabledata_format=tabledata_format)
Example #5
0
    def _overwrite_existing(self, overwrite, fileobj, closed):
        """Overwrite an existing file if ``overwrite`` is ``True``, otherwise
        raise an OSError.  The exact behavior of this method depends on the
        _File object state and is only meant for use within the ``_open_*``
        internal methods.
        """

        # The file will be overwritten...
        if ((self.file_like and hasattr(fileobj, 'len') and fileobj.len > 0) or
            (os.path.exists(self.name) and os.path.getsize(self.name) != 0)):
            if overwrite:
                if self.file_like and hasattr(fileobj, 'truncate'):
                    fileobj.truncate(0)
                else:
                    if not closed:
                        fileobj.close()
                    os.remove(self.name)
            else:
                raise OSError(NOT_OVERWRITING_MSG.format(self.name))
Example #6
0
def _pandas_write(fmt, tbl, filespec, overwrite=False, **kwargs):
    """Provide io Table connector to write table using pandas.

    """
    pandas_fmt = fmt[len(PANDAS_PREFIX):]  # chop the 'pandas.' in front

    # Get defaults and then override with user-supplied values
    write_kwargs = PANDAS_FMTS[pandas_fmt]['write'].copy()
    write_kwargs.update(kwargs)

    df = tbl.to_pandas()
    write_method = getattr(df, 'to_' + pandas_fmt)

    if not overwrite:
        try:  # filespec is not always a path-like
            exists = os.path.exists(filespec)
        except TypeError:  # skip invalid arguments
            pass
        else:
            if exists:  # only error if file already exists
                raise OSError(NOT_OVERWRITING_MSG.format(filespec))

    return write_method(filespec, **write_kwargs)
Example #7
0
def write_table_hdf5(table,
                     output,
                     path=None,
                     compression=False,
                     append=False,
                     overwrite=False,
                     serialize_meta=False,
                     **create_dataset_kwargs):
    """
    Write a Table object to an HDF5 file

    This requires `h5py <http://www.h5py.org/>`_ to be installed.

    Parameters
    ----------
    table : `~astropy.table.Table`
        Data table that is to be written to file.
    output : str or :class:`h5py.File` or :class:`h5py.Group`
        If a string, the filename to write the table to. If an h5py object,
        either the file or the group object to write the table to.
    path : str
        The path to which to write the table inside the HDF5 file.
        This should be relative to the input file or group.
        If not specified, defaults to ``__astropy_table__``.
    compression : bool or str or int
        Whether to compress the table inside the HDF5 file. If set to `True`,
        ``'gzip'`` compression is used. If a string is specified, it should be
        one of ``'gzip'``, ``'szip'``, or ``'lzf'``. If an integer is
        specified (in the range 0-9), ``'gzip'`` compression is used, and the
        integer denotes the compression level.
    append : bool
        Whether to append the table to an existing HDF5 file.
    overwrite : bool
        Whether to overwrite any existing file without warning.
        If ``append=True`` and ``overwrite=True`` then only the dataset will be
        replaced; the file/group will not be overwritten.
    serialize_meta : bool
        Whether to serialize rich table meta-data when writing the HDF5 file, in
        particular such data required to write and read back mixin columns like
        ``Time``, ``SkyCoord``, or ``Quantity`` to the file.
    **create_dataset_kwargs
        Additional keyword arguments are passed to
        ``h5py.File.create_dataset()`` or ``h5py.Group.create_dataset()``.
    """

    from astropy.table import meta
    try:
        import h5py
    except ImportError:
        raise Exception("h5py is required to read and write HDF5 files")

    if path is None:
        # table is just an arbitrary, hardcoded string here.
        path = '__astropy_table__'
    elif path.endswith('/'):
        raise ValueError("table path should end with table name, not /")

    if '/' in path:
        group, name = path.rsplit('/', 1)
    else:
        group, name = None, path

    if isinstance(output, (h5py.File, h5py.Group)):
        if len(list(output.keys())) > 0 and name == '__astropy_table__':
            raise ValueError("table path should always be set via the "
                             "path= argument when writing to existing "
                             "files")
        elif name == '__astropy_table__':
            warnings.warn("table path was not set via the path= argument; "
                          "using default path {}".format(path))

        if group:
            try:
                output_group = output[group]
            except (KeyError, ValueError):
                output_group = output.create_group(group)
        else:
            output_group = output

    elif isinstance(output, str):

        if os.path.exists(output) and not append:
            if overwrite and not append:
                os.remove(output)
            else:
                raise OSError(NOT_OVERWRITING_MSG.format(output))

        # Open the file for appending or writing
        f = h5py.File(output, 'a' if append else 'w')

        # Recursively call the write function
        try:
            return write_table_hdf5(table,
                                    f,
                                    path=path,
                                    compression=compression,
                                    append=append,
                                    overwrite=overwrite,
                                    serialize_meta=serialize_meta)
        finally:
            f.close()

    else:

        raise TypeError('output should be a string or an h5py File or '
                        'Group object')

    # Check whether table already exists
    if name in output_group:
        if append and overwrite:
            # Delete only the dataset itself
            del output_group[name]
            if serialize_meta and name + '.__table_column_meta__' in output_group:
                del output_group[name + '.__table_column_meta__']
        else:
            raise OSError(f"Table {path} already exists")

    # Encode any mixin columns as plain columns + appropriate metadata
    table = _encode_mixins(table)

    # Table with numpy unicode strings can't be written in HDF5 so
    # to write such a table a copy of table is made containing columns as
    # bytestrings.  Now this copy of the table can be written in HDF5.
    if any(col.info.dtype.kind == 'U' for col in table.itercols()):
        table = table.copy(copy_data=False)
        table.convert_unicode_to_bytestring()

    # Warn if information will be lost when serialize_meta=False.  This is
    # hardcoded to the set difference between column info attributes and what
    # HDF5 can store natively (name, dtype) with no meta.
    if serialize_meta is False:
        for col in table.itercols():
            for attr in ('unit', 'format', 'description', 'meta'):
                if getattr(col.info, attr, None) not in (None, {}):
                    warnings.warn(
                        "table contains column(s) with defined 'unit', 'format',"
                        " 'description', or 'meta' info attributes. These will"
                        " be dropped since serialize_meta=False.",
                        AstropyUserWarning)

    # Write the table to the file
    if compression:
        if compression is True:
            compression = 'gzip'
        dset = output_group.create_dataset(name,
                                           data=table.as_array(),
                                           compression=compression,
                                           **create_dataset_kwargs)
    else:
        dset = output_group.create_dataset(name,
                                           data=table.as_array(),
                                           **create_dataset_kwargs)

    if serialize_meta:
        header_yaml = meta.get_yaml_from_table(table)
        header_encoded = np.array([h.encode('utf-8') for h in header_yaml])
        output_group.create_dataset(meta_path(name), data=header_encoded)

    else:
        # Write the Table meta dict key:value pairs to the file as HDF5
        # attributes.  This works only for a limited set of scalar data types
        # like numbers, strings, etc., but not any complex types.  This path
        # also ignores column meta like unit or format.
        for key in table.meta:
            val = table.meta[key]
            try:
                dset.attrs[key] = val
            except TypeError:
                warnings.warn(
                    "Attribute `{}` of type {} cannot be written to "
                    "HDF5 files - skipping. (Consider specifying "
                    "serialize_meta=True to write all meta data)".format(
                        key, type(val)), AstropyUserWarning)
Example #8
0
def write_table_parquet(table, output, overwrite=False):
    """
    Write a Table object to a Parquet file

    This requires `pyarrow <https://arrow.apache.org/docs/python/>`_
    to be installed.

    Parameters
    ----------
    table : `~astropy.table.Table`
        Data table that is to be written to file.
    output : str or path-like
        The filename to write the table to.
    overwrite : bool, optional
        Whether to overwrite any existing file without warning. Default `False`.
    """

    from astropy.table import meta, serialize
    from astropy.utils.data_info import serialize_context_as

    pa, parquet, writer_version = get_pyarrow()

    if not isinstance(output, (str, os.PathLike)):
        raise TypeError(
            f'`output` should be a string or path-like, not {output}')

    # Convert all compound columns into serialized column names, where
    # e.g. 'time' becomes ['time.jd1', 'time.jd2'].
    with serialize_context_as('parquet'):
        encode_table = serialize.represent_mixins_as_columns(table)
    # We store the encoded serialization metadata as a yaml string.
    meta_yaml = meta.get_yaml_from_table(encode_table)
    meta_yaml_str = '\n'.join(meta_yaml)

    metadata = {}
    for name, col in encode_table.columns.items():
        # Parquet will retain the datatypes of columns, but string and
        # byte column length is lost.  Therefore, we special-case these
        # types to record the length for precise round-tripping.
        if col.dtype.type is np.str_:
            metadata[f'table::len::{name}'] = str(col.dtype.itemsize // 4)
        elif col.dtype.type is np.bytes_:
            metadata[f'table::len::{name}'] = str(col.dtype.itemsize)

        metadata['table_meta_yaml'] = meta_yaml_str

    # Pyarrow stores all metadata as byte strings, so we explicitly encode
    # our unicode strings in metadata as UTF-8 byte strings here.
    metadata_encode = {
        k.encode('UTF-8'): v.encode('UTF-8')
        for k, v in metadata.items()
    }

    # Build the pyarrow schema by converting from the numpy dtype of each
    # column to an equivalent pyarrow type with from_numpy_dtype()
    type_list = [(name, pa.from_numpy_dtype(encode_table.dtype[name].type))
                 for name in encode_table.dtype.names]
    schema = pa.schema(type_list, metadata=metadata_encode)

    if os.path.exists(output):
        if overwrite:
            # We must remove the file prior to writing below.
            os.remove(output)
        else:
            raise OSError(NOT_OVERWRITING_MSG.format(output))

    # We use version='2.0' for full support of datatypes including uint32.
    with parquet.ParquetWriter(output, schema,
                               version=writer_version) as writer:
        # Convert each Table column to a pyarrow array
        arrays = [pa.array(col) for col in encode_table.itercols()]
        # Create a pyarrow table from the list of arrays and the schema
        pa_table = pa.Table.from_arrays(arrays, schema=schema)
        # Write the pyarrow table to a file
        writer.write_table(pa_table)