Exemple #1
0
    def _validate_table(self, table_name, table_meta, table_data=None):
        """Validate table metadata.

        Validate the type and subtype combination for each field in ``table_meta``.
        If a field has type ``id``, validate that it either is the ``primary_key`` or
        has a ``ref`` entry.

        If the table has ``primary_key``, make sure that the corresponding field exists
        and its type is ``id``.

        If ``table_data`` is provided, also check that the list of columns corresponds
        to the ones indicated in the metadata and that all the dtypes are valid.

        Args:
            table_name (str):
                Name of the table to validate.
            table_meta (dict):
                Metadata of the table to validate.
            table_data (pandas.DataFrame):
                If provided, make sure that the data matches the one described
                on the metadata.

        Raises:
            MetadataError:
                If there is any error in the metadata or the data does not
                match the metadata description.
        """
        dtypes = self.get_dtypes(table_name, ids=True)

        # Primary key field exists and its type is 'id'
        primary_key = table_meta.get('primary_key')
        if primary_key:
            pk_field = table_meta['fields'].get(primary_key)

            if not pk_field:
                raise MetadataError('Primary key is not an existing field.')

            if pk_field['type'] != 'id':
                raise MetadataError('Primary key is not of type `id`.')

        if table_data is not None:
            for column in table_data:
                try:
                    dtype = dtypes.pop(column)
                    table_data[column].dropna().astype(dtype)
                except KeyError:
                    message = 'Unexpected column in table `{}`: `{}`'.format(
                        table_name, column)
                    raise MetadataError(message) from None
                except ValueError as ve:
                    message = 'Invalid values found in column `{}` of table `{}`: `{}`'.format(
                        column, table_name, ve)
                    raise MetadataError(message) from None

            # assert all dtypes are in data
            if dtypes:
                raise MetadataError('Missing columns on table {}: {}.'.format(
                    table_name, list(dtypes.keys())))
Exemple #2
0
    def validate(self, tables=None):
        """Validate this metadata.

        Validate the metadata of each table:

            * If ``tables`` are provided or they have been loaded, check
              that all the metadata tables exists in the ``tables`` dictionary.
            * Validate the type/subtype combination for each field and
              if a field of type ``id`` exists it must be the ``primary_key``
              or must have a ``ref`` entry.
            * If ``primary_key`` entry exists, check that it's an existing
              field and its type is ``id``.
            * If ``tables`` are provided or they have been loaded, check
              all the data types for the table correspond to each column and
              all the data types exists on the table.
            * Validate that there is no circular relatioship in the metadata.
            * Check that all the tables have at most one parent.

        Args:
            tables (bool, dict):
                If a dict of table is passed, validate that the columns and
                dtypes match the metadata. If ``True`` is passed, load the
                tables from the Metadata instead. If ``None``, omit the data
                validation. Defaults to ``None``.
        """
        tables_meta = self._metadata.get('tables')
        if not tables_meta:
            raise MetadataError('"tables" entry not found in Metadata.')

        if tables and not isinstance(tables, dict):
            tables = self.load_tables()

        errors = []
        for table_name, table_meta in tables_meta.items():
            if tables:
                table = tables.get(table_name)
                if table is None:
                    errors.append(
                        'Table `{}` not found in tables'.format(table_name))

            else:
                table = None

            self._validate_table(table_name, table_meta, table, errors)
            self._validate_circular_relationships(table_name, errors=errors)

        if errors:
            raise MetadataError('Invalid Metadata specification:\n - ' +
                                '\n - '.join(errors))
Exemple #3
0
    def get_dtypes(self, table_name, ids=False):
        """Get a ``dict`` with the ``dtypes`` for each field of a given table.

        Args:
            table_name (str):
                Table name for which to retrive the ``dtypes``.
            ids (bool):
                Whether or not include the id fields. Defaults to ``False``.

        Returns:
            dict:
                Dictionary that contains the field names and data types from a table.

        Raises:
            ValueError:
                If a field has an invalid type or subtype or if the table does not
                exist in this metadata.
        """
        dtypes = dict()
        table_meta = self.get_table_meta(table_name)
        for name, field in table_meta['fields'].items():
            field_type = field['type']
            field_subtype = field.get('subtype')
            dtype = self._DTYPES.get((field_type, field_subtype))
            if not dtype:
                raise MetadataError(
                    'Invalid type and subtype combination for field {}: ({}, {})'
                    .format(name, field_type, field_subtype))

            if ids and field_type == 'id':
                if (name != table_meta.get('primary_key')
                    ) and not field.get('ref'):
                    for child_table in self.get_children(table_name):
                        if name == self.get_foreign_key(
                                table_name, child_table):
                            break

                    else:
                        raise MetadataError(
                            'id field `{}` is neither a primary or a foreign key'
                            .format(name))

            if ids or (field_type != 'id'):
                dtypes[name] = dtype

        return dtypes
Exemple #4
0
    def _get_field_dtype(self, field_name, field_metadata):
        field_type = field_metadata['type']
        field_subtype = field_metadata.get('subtype')
        dtype = self._TYPES_TO_DTYPES.get((field_type, field_subtype))
        if not dtype:
            raise MetadataError(
                'Invalid type and subtype combination for field {}: ({}, {})'.
                format(field_name, field_type, field_subtype))

        return dtype
Exemple #5
0
    def _validate_circular_relationships(self, parent, children=None):
        """Validate that there is no circular relatioship in the metadata."""
        if children is None:
            children = self.get_children(parent)

        if parent in children:
            raise MetadataError('Circular relationship found for table "{}"'.format(parent))

        for child in children:
            self._validate_circular_relationships(parent, self.get_children(child))