Ejemplo n.º 1
0
def test_metadata_merging_conflict_exception():
    """Regression test for issue #3294.

    Ensure that an exception is raised when a metadata conflict exists
    and ``metadata_conflicts='error'`` has been set.
    """
    data1 = ExampleData()
    data2 = ExampleData()
    data1.meta['somekey'] = {'x': 1, 'y': 1}
    data2.meta['somekey'] = {'x': 1, 'y': 999}
    with pytest.raises(MergeConflictError):
        merge(data1.meta, data2.meta, metadata_conflicts='error')
Ejemplo n.º 2
0
def test_metadata_merging_conflict_exception():
    """Regression test for issue #3294.

    Ensure that an exception is raised when a metadata conflict exists
    and ``metadata_conflicts='error'`` has been set.
    """
    data1 = ExampleData()
    data2 = ExampleData()
    data1.meta['somekey'] = {'x': 1, 'y': 1}
    data2.meta['somekey'] = {'x': 1, 'y': 999}
    with pytest.raises(MergeConflictError):
        merge(data1.meta, data2.meta, metadata_conflicts='error')
Ejemplo n.º 3
0
def test_metadata_merging_new_strategy():
    original_merge_strategies = list(metadata.MERGE_STRATEGIES)

    class MergeNumbersAsList(metadata.MergeStrategy):
        """
        Scalar float or int values are joined in a list.
        """
        types = ((int, float), (int, float))

        @classmethod
        def merge(cls, left, right):
            return [left, right]

    class MergeConcatStrings(metadata.MergePlus):
        """
        Scalar string values are concatenated
        """
        types = (str, str)
        enabled = False

    # Normally can't merge two scalar types
    meta1 = {'k1': 1, 'k2': 'a'}
    meta2 = {'k1': 2, 'k2': 'b'}

    # Enable new merge strategy
    with enable_merge_strategies(MergeNumbersAsList, MergeConcatStrings):
        assert MergeNumbersAsList.enabled
        assert MergeConcatStrings.enabled
        out = merge(meta1, meta2, metadata_conflicts='error')
    assert out['k1'] == [1, 2]
    assert out['k2'] == 'ab'
    assert not MergeNumbersAsList.enabled
    assert not MergeConcatStrings.enabled

    # Confirm the default enabled=False behavior
    with pytest.raises(MergeConflictError):
        merge(meta1, meta2, metadata_conflicts='error')

    # Enable all MergeStrategy subclasses
    with enable_merge_strategies(metadata.MergeStrategy):
        assert MergeNumbersAsList.enabled
        assert MergeConcatStrings.enabled
        out = merge(meta1, meta2, metadata_conflicts='error')
    assert out['k1'] == [1, 2]
    assert out['k2'] == 'ab'
    assert not MergeNumbersAsList.enabled
    assert not MergeConcatStrings.enabled

    metadata.MERGE_STRATEGIES = original_merge_strategies
Ejemplo n.º 4
0
def test_metadata_merging_new_strategy():
    original_merge_strategies = list(metadata.MERGE_STRATEGIES)

    class MergeNumbersAsList(metadata.MergeStrategy):
        """
        Scalar float or int values are joined in a list.
        """
        types = ((int, float), (int, float))

        @classmethod
        def merge(cls, left, right):
            return [left, right]

    class MergeConcatStrings(metadata.MergePlus):
        """
        Scalar string values are concatenated
        """
        types = (str, str)
        enabled = False

    # Normally can't merge two scalar types
    meta1 = {'k1': 1, 'k2': 'a'}
    meta2 = {'k1': 2, 'k2': 'b'}

    # Enable new merge strategy
    with enable_merge_strategies(MergeNumbersAsList, MergeConcatStrings):
        assert MergeNumbersAsList.enabled
        assert MergeConcatStrings.enabled
        out = merge(meta1, meta2, metadata_conflicts='error')
    assert out['k1'] == [1, 2]
    assert out['k2'] == 'ab'
    assert not MergeNumbersAsList.enabled
    assert not MergeConcatStrings.enabled

    # Confirm the default enabled=False behavior
    with pytest.raises(MergeConflictError):
        merge(meta1, meta2, metadata_conflicts='error')

    # Enable all MergeStrategy subclasses
    with enable_merge_strategies(metadata.MergeStrategy):
        assert MergeNumbersAsList.enabled
        assert MergeConcatStrings.enabled
        out = merge(meta1, meta2, metadata_conflicts='error')
    assert out['k1'] == [1, 2]
    assert out['k2'] == 'ab'
    assert not MergeNumbersAsList.enabled
    assert not MergeConcatStrings.enabled

    metadata.MERGE_STRATEGIES = original_merge_strategies
Ejemplo n.º 5
0
def _merge_table_meta(out, tables, metadata_conflicts='warn'):
    out_meta = deepcopy(tables[0].meta)
    for table in tables[1:]:
        out_meta = metadata.merge(out_meta,
                                  table.meta,
                                  metadata_conflicts=metadata_conflicts)
    out.meta.update(out_meta)
Ejemplo n.º 6
0
def _model_tree_evaluate_metadata(root):
    # Not a CompoundModel, grab metadata and be done.
    if not hasattr(root, 'op'):
        return _get_meta(root)

    m1 = _model_tree_evaluate_metadata(root.left)
    m2 = _model_tree_evaluate_metadata(root.right)
    return metadata.merge(m1, m2, metadata_conflicts='silent')
Ejemplo n.º 7
0
def test_metadata_merging():
    # Recursive merge
    meta1 = {'k1': {'k1': [1, 2],
                    'k2': 2},
             'k2': 2,
             'k4': (1, 2)}
    meta2 = {'k1': {'k1': [3]},
             'k3': 3,
             'k4': (3,)}
    out = merge(meta1, meta2, metadata_conflicts='error')
    assert out == {'k1': {'k2': 2,
                          'k1': [1, 2, 3]},
                   'k2': 2,
                   'k3': 3,
                   'k4': (1, 2, 3)}

    # Merge two ndarrays
    meta1 = {'k1': np.array([1, 2])}
    meta2 = {'k1': np.array([3])}
    out = merge(meta1, meta2, metadata_conflicts='error')
    assert np.all(out['k1'] == np.array([1, 2, 3]))

    # Merge list and np.ndarray
    meta1 = {'k1': [1, 2]}
    meta2 = {'k1': np.array([3])}
    assert np.all(out['k1'] == np.array([1, 2, 3]))

    # Can't merge two scalar types
    meta1 = {'k1': 1}
    meta2 = {'k1': 2}
    with pytest.raises(MergeConflictError):
        merge(meta1, meta2, metadata_conflicts='error')

    # Conflicting shape
    meta1 = {'k1': np.array([1, 2])}
    meta2 = {'k1': np.array([[3]])}
    with pytest.raises(MergeConflictError):
        merge(meta1, meta2, metadata_conflicts='error')

    # Conflicting array type
    meta1 = {'k1': np.array([1, 2])}
    meta2 = {'k1': np.array(['3'])}
    with pytest.raises(MergeConflictError):
        merge(meta1, meta2, metadata_conflicts='error')

    # Conflicting array type with 'silent' merging
    meta1 = {'k1': np.array([1, 2])}
    meta2 = {'k1': np.array(['3'])}
    out = merge(meta1, meta2, metadata_conflicts='silent')
    assert np.all(out['k1'] == np.array(['3']))
Ejemplo n.º 8
0
def test_metadata_merging():
    # Recursive merge
    meta1 = {'k1': {'k1': [1, 2], 'k2': 2}, 'k2': 2, 'k4': (1, 2)}
    meta2 = {'k1': {'k1': [3]}, 'k3': 3, 'k4': (3, )}
    out = merge(meta1, meta2, metadata_conflicts='error')
    assert out == {
        'k1': {
            'k2': 2,
            'k1': [1, 2, 3]
        },
        'k2': 2,
        'k3': 3,
        'k4': (1, 2, 3)
    }

    # Merge two ndarrays
    meta1 = {'k1': np.array([1, 2])}
    meta2 = {'k1': np.array([3])}
    out = merge(meta1, meta2, metadata_conflicts='error')
    assert np.all(out['k1'] == np.array([1, 2, 3]))

    # Merge list and np.ndarray
    meta1 = {'k1': [1, 2]}
    meta2 = {'k1': np.array([3])}
    assert np.all(out['k1'] == np.array([1, 2, 3]))

    # Can't merge two scalar types
    meta1 = {'k1': 1}
    meta2 = {'k1': 2}
    with pytest.raises(MergeConflictError):
        merge(meta1, meta2, metadata_conflicts='error')

    # Conflicting shape
    meta1 = {'k1': np.array([1, 2])}
    meta2 = {'k1': np.array([[3]])}
    with pytest.raises(MergeConflictError):
        merge(meta1, meta2, metadata_conflicts='error')

    # Conflicting array type
    meta1 = {'k1': np.array([1, 2])}
    meta2 = {'k1': np.array(['3'])}
    with pytest.raises(MergeConflictError):
        merge(meta1, meta2, metadata_conflicts='error')

    # Conflicting array type with 'silent' merging
    meta1 = {'k1': np.array([1, 2])}
    meta2 = {'k1': np.array(['3'])}
    out = merge(meta1, meta2, metadata_conflicts='silent')
    assert np.all(out['k1'] == np.array(['3']))
Ejemplo n.º 9
0
def _merge_meta(model1, model2):
    """Simple merge of samplesets."""
    w1 = _get_meta(model1)
    w2 = _get_meta(model2)
    return metadata.merge(w1, w2, metadata_conflicts='silent')
Ejemplo n.º 10
0
def _merge_meta(model1, model2):
    """Simple merge of samplesets."""
    w1 = _get_meta(model1)
    w2 = _get_meta(model2)
    return metadata.merge(w1, w2, metadata_conflicts="silent")
Ejemplo n.º 11
0
def _merge_table_meta(out, tables, metadata_conflicts='warn'):
    out_meta = deepcopy(tables[0].meta)
    for table in tables[1:]:
        out_meta = metadata.merge(out_meta, table.meta, metadata_conflicts=metadata_conflicts)
    out.meta.update(out_meta)
Ejemplo n.º 12
0
 def _merge_meta_compat(model1, model2):
     # Simple merge of samplesets.
     m1 = _get_meta_compat(model1)
     m2 = _get_meta_compat(model2)
     return metadata.merge(m1, m2, metadata_conflicts='silent')
Ejemplo n.º 13
0
def write_table_hdf5(table,
                     output,
                     path=None,
                     compression=False,
                     append=False,
                     overwrite=False,
                     serialize_meta=False,
                     metadata_conflicts='error',
                     **create_dataset_kwargs):
    """
    Write a Table object to an HDF5 file

    This requires `h5py <http://www.h5py.org/>`_ to be installed.

    Parameters
    ----------
    table : `~astropy.table.Table`
        Data table that is to be written to file.
    output : str or :class:`h5py:File` or :class:`h5py:Group`
        If a string, the filename to write the table to. If an h5py object,
        either the file or the group object to write the table to.
    path : str
        The path to which to write the table inside the HDF5 file.
        This should be relative to the input file or group.
        If not specified, defaults to ``__astropy_table__``.
    compression : bool or str or int
        Whether to compress the table inside the HDF5 file. If set to `True`,
        ``'gzip'`` compression is used. If a string is specified, it should be
        one of ``'gzip'``, ``'szip'``, or ``'lzf'``. If an integer is
        specified (in the range 0-9), ``'gzip'`` compression is used, and the
        integer denotes the compression level.
    append : bool
        Whether to append the table to an existing HDF5 file.
    overwrite : bool
        Whether to overwrite any existing file without warning.
        If ``append=True`` and ``overwrite=True`` then only the dataset will be
        replaced; the file/group will not be overwritten.
    metadata_conflicts : str
        How to proceed with metadata conflicts. This should be one of:
            * ``'silent'``: silently pick the last conflicting meta-data value
            * ``'warn'``: pick the last conflicting meta-data value, but emit a
              warning (default)
            * ``'error'``: raise an exception.
    **create_dataset_kwargs
        Additional keyword arguments are passed to `h5py.File.create_dataset`.
    """

    from astropy.table import meta
    try:
        import h5py
    except ImportError:
        raise Exception("h5py is required to read and write HDF5 files")

    if path is None:
        # table is just an arbitrary, hardcoded string here.
        path = '__astropy_table__'
    elif path.endswith('/'):
        raise ValueError("table path should end with table name, not /")

    if '/' in path:
        group, name = path.rsplit('/', 1)
    else:
        group, name = None, path

    if isinstance(output, (h5py.File, h5py.Group)):
        if len(list(output.keys())) > 0 and name == '__astropy_table__':
            raise ValueError("table path should always be set via the "
                             "path= argument when writing to existing "
                             "files")
        elif name == '__astropy_table__':
            warnings.warn("table path was not set via the path= argument; "
                          "using default path {}".format(path))

        if group:
            try:
                output_group = output[group]
            except (KeyError, ValueError):
                output_group = output.create_group(group)
        else:
            output_group = output

    elif isinstance(output, str):

        if os.path.exists(output) and not append:
            if overwrite and not append:
                os.remove(output)
            else:
                raise OSError(f"File exists: {output}")

        # Open the file for appending or writing
        f = h5py.File(output, 'a' if append else 'w')

        # Recursively call the write function
        try:
            return write_table_hdf5(table,
                                    f,
                                    path=path,
                                    compression=compression,
                                    append=append,
                                    overwrite=overwrite,
                                    serialize_meta=serialize_meta,
                                    **create_dataset_kwargs)
        finally:
            f.close()

    else:

        raise TypeError('output should be a string or an h5py File or '
                        'Group object')

    # Check whether table already exists
    existing_header = None
    if name in output_group:
        if append and overwrite:
            # Delete only the dataset itself
            del output_group[name]
        elif append:
            # Data table exists, so we interpret "append" to mean "extend
            # existing table with the table passed in". However, this requires
            # the table to have been written by this function in the past, so it
            # should have a metadata header
            if meta_path(name) not in output_group:
                raise ValueError("No metadata exists for existing table. We "
                                 "can only append tables if metadata "
                                 "is consistent for all tables")

            # Load existing table header:
            existing_header = get_header_from_yaml(
                h.decode('utf-8') for h in output_group[meta_path(name)])
        else:
            raise OSError(f"Table {path} already exists")

    # Encode any mixin columns as plain columns + appropriate metadata
    table = _encode_mixins(table)

    # Table with numpy unicode strings can't be written in HDF5 so
    # to write such a table a copy of table is made containing columns as
    # bytestrings.  Now this copy of the table can be written in HDF5.
    if any(col.info.dtype.kind == 'U' for col in table.itercols()):
        table = table.copy(copy_data=False)
        table.convert_unicode_to_bytestring()

    # Warn if information will be lost when serialize_meta=False.  This is
    # hardcoded to the set difference between column info attributes and what
    # HDF5 can store natively (name, dtype) with no meta.
    if serialize_meta is False:
        for col in table.itercols():
            for attr in ('unit', 'format', 'description', 'meta'):
                if getattr(col.info, attr, None) not in (None, {}):
                    warnings.warn(
                        "table contains column(s) with defined 'unit', 'format',"
                        " 'description', or 'meta' info attributes. These will"
                        " be dropped since serialize_meta=False.",
                        AstropyUserWarning)

    if existing_header is None:  # Just write the table and metadata
        # Write the table to the file
        if compression:
            if compression is True:
                compression = 'gzip'
            dset = output_group.create_dataset(name,
                                               data=table.as_array(),
                                               compression=compression,
                                               **create_dataset_kwargs)
        else:
            dset = output_group.create_dataset(name,
                                               data=table.as_array(),
                                               **create_dataset_kwargs)

        if serialize_meta:
            header_yaml = meta.get_yaml_from_table(table)

            header_encoded = [h.encode('utf-8') for h in header_yaml]
            output_group.create_dataset(meta_path(name), data=header_encoded)

        else:
            # Write the Table meta dict key:value pairs to the file as HDF5
            # attributes.  This works only for a limited set of scalar data types
            # like numbers, strings, etc., but not any complex types.  This path
            # also ignores column meta like unit or format.
            for key in table.meta:
                val = table.meta[key]
                try:
                    dset.attrs[key] = val
                except TypeError:
                    warnings.warn(
                        "Attribute `{}` of type {} cannot be written to "
                        "HDF5 files - skipping. (Consider specifying "
                        "serialize_meta=True to write all meta data)".format(
                            key, type(val)), AstropyUserWarning)

    else:  # We need to append the tables!
        try:
            # FIXME: do something with the merged metadata!
            metadata.merge(existing_header['meta'],
                           table.meta,
                           metadata_conflicts=metadata_conflicts)
        except metadata.MergeConflictError:
            raise metadata.MergeConflictError(
                "Cannot append table to existing file because "
                "the existing file table metadata and this "
                "table object's metadata do not match. If you "
                "want to ignore this issue, or change to a "
                "warning, set metadata_conflicts='silent' or 'warn'.")

        # Now compare datatype of this object and on disk
        this_header = get_header_from_yaml(get_yaml_from_table(table))

        if not _custom_tbl_dtype_compare(existing_header['datatype'],
                                         this_header['datatype']):
            raise ValueError(
                "Cannot append table to existing file because "
                "the existing file table datatype and this "
                "object's table datatype do not match. "
                f"{existing_header['datatype']} vs. {this_header['datatype']}")

        # If we got here, we can now try to append:
        current_size = len(output_group[name])
        output_group[name].resize((current_size + len(table), ))
        output_group[name][current_size:] = table.as_array()