Esempio n. 1
0
    def create(self, bucket, descriptor, force=False):
        """https://github.com/frictionlessdata/tableschema-pandas-py#storage
        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]

        # Check buckets for existence
        for bucket in buckets:
            if bucket in self.buckets:
                if not force:
                    message = 'Bucket "%s" already exists' % bucket
                    raise tableschema.exceptions.StorageError(message)
                self.delete(bucket)

        # Define dataframes
        for bucket, descriptor in zip(buckets, descriptors):
            tableschema.validate(descriptor)
            self.__descriptors[bucket] = descriptor
            self.__dataframes[bucket] = pd.DataFrame()
    def create(self, bucket, descriptor, force=False):
        """https://github.com/frictionlessdata/tableschema-pandas-py#storage
        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]

        # Check buckets for existence
        for bucket in buckets:
            if bucket in self.buckets:
                if not force:
                    message = 'Bucket "%s" already exists' % bucket
                    raise tableschema.exceptions.StorageError(message)
                self.delete(bucket)

        # Define dataframes
        for bucket, descriptor in zip(buckets, descriptors):
            tableschema.validate(descriptor)
            self.__descriptors[bucket] = descriptor
            self.__dataframes[bucket] = pd.DataFrame()
    def create(self, bucket, descriptor, force=False):

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise tableschema.exceptions.StorageError(message)
                self.delete(bucket)

        # Iterate over buckets/descriptors
        for bucket, descriptor in zip(buckets, descriptors):
            # Define resources
            tableschema.validate(descriptor)
            self.__descriptors[bucket] = descriptor
            datastore_dict = \
                self.__mapper.descriptor_to_datastore_dict(descriptor, bucket)
            datastore_create_url = \
                "{}/datastore_create".format(self.__base_endpoint)
            self._make_ckan_request(datastore_create_url,
                                    method='POST',
                                    json=datastore_dict)

        # Invalidate cache
        self.__bucket_cache = None
Esempio n. 4
0
def validate(schema):
    """Validate that a supposed schema is in fact a Table Schema."""
    try:
        tableschema.validate(schema)
        click.echo(False)
    except tableschema.exceptions.ValidationError as exception:
        click.echo(True)
        click.echo(exception.errors)
Esempio n. 5
0
def validate(schema):
    """Validate that a supposed schema is in fact a Table Schema."""
    try:
        tableschema.validate(schema)
        click.echo(False)
    except tableschema.exceptions.ValidationError as exception:
        click.echo(True)
        click.echo(exception.errors)
Esempio n. 6
0
 def test_schema_multiple_errors_no_fail_fast_true(self):
     filepath = os.path.join(self.data_dir,
                             'schema_invalid_multiple_errors.json')
     with io.open(filepath) as stream:
         schema = json.load(stream)
         try:
             tableschema.validate(schema, no_fail_fast=True)
         except exceptions.MultipleInvalid as exception:
             self.assertEquals(3, len(exception.errors))
Esempio n. 7
0
def validate(schema):
    """Validate that a supposed schema is in fact a Table Schema."""
    try:
        tableschema.validate(schema)
        click.echo("Schema is valid")
        sys.exit(0)
    except tableschema.exceptions.ValidationError as exception:
        click.echo("Schema is not valid")
        click.echo(exception.errors)
        sys.exit(1)
Esempio n. 8
0
 def check_schema(self, filename):
     try:
         tableschema.validate(self.filepath(filename))
     except tableschema.exceptions.ValidationError as e:
         errors = "; ".join([repr(e) for e in e.errors])
         message = "Schema %s is not a valid TableSchema schema. Errors: %s" % (
             filename,
             errors,
         )
         raise exceptions.InvalidSchemaException(self.repo, message)
Esempio n. 9
0
    def create(self, bucket, descriptor, force=False, indexes_fields=None):
        """Create bucket

        # Arguments
            indexes_fields (str[]):
                list of tuples containing field names, or list of such lists

        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]
        if indexes_fields is None or len(indexes_fields) == 0:
            indexes_fields = [()] * len(descriptors)
        elif type(indexes_fields[0][0]) not in {list, tuple}:
            indexes_fields = [indexes_fields]

        # Check dimensions
        if not (len(buckets) == len(descriptors) == len(indexes_fields)):
            raise tableschema.exceptions.StorageError('Wrong argument dimensions')

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise tableschema.exceptions.StorageError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor, index_fields in zip(buckets, descriptors, indexes_fields):
            tableschema.validate(descriptor)
            table_name = self.__mapper.convert_bucket(bucket)
            autoincrement = self.__get_autoincrement_for_bucket(bucket)
            columns, constraints, indexes, fallbacks, table_comment = self.__mapper \
                .convert_descriptor(bucket, descriptor, index_fields, autoincrement)
            Table(table_name, self.__metadata, *(columns + constraints + indexes),
                  comment=table_comment)
            self.__descriptors[bucket] = descriptor
            self.__fallbacks[bucket] = fallbacks

        # Create tables, update metadata
        try:
            self.__metadata.create_all()
        except sqlalchemy.exc.ProgrammingError as exception:
            if 'there is no unique constraint matching given keys' in str(exception):
                message = 'Foreign keys can only reference primary key or unique fields\n%s'
                six.raise_from(
                    tableschema.exceptions.ValidationError(message % str(exception)),
                    None)
Esempio n. 10
0
def resource_schema_validator(value, context):

    if not value:
        return

    msg = None

    if isinstance(value, string_types):

        if value.lower().startswith('http'):
            return value

        try:
            descriptor = json.loads(str(value))
            if not isinstance(descriptor, dict):
                msg = u'Invalid Table Schema descriptor: {}'.format(value)
                raise Invalid(msg)

        except ValueError as e:
            msg = u'JSON error in Table Schema descriptor: {}'.format(e)
            raise Invalid(msg)

    elif isinstance(value, binary_type):
        try:
            # Decode UTF-8 bytes to Unicode, and convert single quotes
            # to double quotes to make it valid JSON
            decoded_value = value.decode('utf8').replace("'", '"')

            descriptor = json.loads(decoded_value)
            if not isinstance(descriptor, dict):
                msg = u'Invalid Table Schema descriptor: {}'.format(value)
                raise Invalid(msg)
        except ValueError as e:
            msg = u'JSON error in Table Schema descriptor: {}'.format(e)
            raise Invalid(msg)
    else:
        descriptor = value

    try:
        tableschema.validate(descriptor)
    except tableschema.exceptions.ValidationError as e:
        errors = []
        for error in e.errors:
            errors.append(str(error))
        msg = u'Invalid Table Schema: {}'.format(u', '.join(errors))

    if msg:
        raise Invalid(msg)

    return json.dumps(descriptor)
Esempio n. 11
0
def test_validate_error_message():
    descriptor = {
        'fields': [
            {
                'name': 'name',
                'type': 'other'
            },
        ],
    }
    with pytest.raises(exceptions.ValidationError) as excinfo:
        validate(descriptor)
    message = str(excinfo.value.errors[0])
    assert 'Descriptor validation error' in message
    assert 'at "fields/0" in descriptor' in message
    assert 'at "properties/fields/items/anyOf" in profile' in message
 def _filter_row(self, row, **kwargs):
     id = int(row.pop(self._id_field_name)) if self._id_field_name in row else None
     values = self._get_values(row)
     if self.db_table is None:
         tableschema.validate(self._table_schema)
         prefix, bucket = "", self.table_name
         index_fields = []
         autoincrement = None
         tablename = mappers.bucket_to_tablename(prefix, bucket)
         columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints(prefix, bucket,
                                                                                       self._table_schema,
                                                                                       index_fields,
                                                                                       autoincrement)
         self.db_table = Table(tablename, self.db_meta, *(columns + constraints + indexes))
         self.db_table.create()
         logging.info("Created DB table {}".format(tablename))
     res = self._upsert(id, values)
     if res:
         yield res
Esempio n. 13
0
    def create(self, bucket, descriptor, force=False, indexes_fields=None):
        """https://github.com/frictionlessdata/tableschema-sql-py#storage
        """

        # Make lists
        buckets = bucket
        if isinstance(bucket, six.string_types):
            buckets = [bucket]
        descriptors = descriptor
        if isinstance(descriptor, dict):
            descriptors = [descriptor]
        if indexes_fields is None or len(indexes_fields) == 0:
            indexes_fields = [()] * len(descriptors)
        elif type(indexes_fields[0][0]) not in {list, tuple}:
            indexes_fields = [indexes_fields]

        # Check dimensions
        if not (len(buckets) == len(descriptors) == len(indexes_fields)):
            raise tableschema.exceptions.StorageError(
                'Wrong argument dimensions')

        # Check buckets for existence
        for bucket in reversed(self.buckets):
            if bucket in buckets:
                if not force:
                    message = 'Bucket "%s" already exists.' % bucket
                    raise tableschema.exceptions.StorageError(message)
                self.delete(bucket)

        # Define buckets
        for bucket, descriptor, index_fields in zip(buckets, descriptors,
                                                    indexes_fields):
            tableschema.validate(descriptor)
            table_name = self.__mapper.convert_bucket(bucket)
            columns, constraints, indexes, fallbacks = self.__mapper.convert_descriptor(
                bucket, descriptor, index_fields, self.__autoincrement)
            Table(table_name, self.__metadata,
                  *(columns + constraints + indexes))
            self.__descriptors[bucket] = descriptor
            self.__fallbacks[bucket] = fallbacks

        # Create tables, update metadata
        self.__metadata.create_all()
 def _filter_row(self, row, **kwargs):
     id = int(row.pop(
         self._id_field_name)) if self._id_field_name in row else None
     values = self._get_values(row)
     if self.db_table is None:
         tableschema.validate(self._table_schema)
         prefix, bucket = "", self.table_name
         index_fields = []
         autoincrement = None
         tablename = mappers.bucket_to_tablename(prefix, bucket)
         columns, constraints, indexes = mappers.descriptor_to_columns_and_constraints(
             prefix, bucket, self._table_schema, index_fields,
             autoincrement)
         self.db_table = Table(tablename, self.db_meta,
                               *(columns + constraints + indexes))
         self.db_table.create()
         logging.info("Created DB table {}".format(tablename))
     res = self._upsert(id, values)
     if res:
         yield res
Esempio n. 15
0
    def query(
        self,
        data_model,
        data_resource_name,
        restricted_fields,
        table_schema,
        request_obj,
    ):
        """Query the data resource."""

        try:
            request_obj = request_obj.json
        except Exception:
            raise ApiError("No request body found.", 400)

        errors = []
        _ = Schema(table_schema)
        accepted_fields = []
        response = OrderedDict()
        response["results"] = []
        if validate(table_schema):
            for field in table_schema["fields"]:
                if field["name"] not in restricted_fields:
                    accepted_fields.append(field["name"])
            for field in request_obj.keys():
                if field not in accepted_fields:
                    errors.append(
                        "Unknown or restricted field '{}' found.".format(
                            field))
            if len(errors) > 0:
                raise ApiUnhandledError("Invalid request body.", 400, errors)
            else:
                try:
                    session = Session()
                    results = session.query(data_model).filter_by(
                        **request_obj)
                    for row in results:
                        response["results"].append(
                            self.build_json_from_object(
                                row, restricted_fields))

                    if len(response["results"]) == 0:
                        return {"message": "No matches found"}, 404
                    else:
                        return response, 200
                except Exception:
                    raise ApiUnhandledError("Failed to create new resource.",
                                            400)
                finally:
                    session.close()
        else:
            raise SchemaValidationFailure()

        return {"message": "querying data resource"}, 200
Esempio n. 16
0
 def test_primary_key_is_not_a_valid_type(self):
     filepath = os.path.join(self.data_dir,
                             'schema_invalid_pk_is_wrong_type.json')
     with io.open(filepath) as stream:
         schema = json.load(stream)
         try:
             errors = [
                 i for i in tableschema.validate(schema, no_fail_fast=True)
             ]
         except exceptions.MultipleInvalid as error:
             self.assertEquals(2, len(error.errors))
def resource_schema_validator(value, context):
    '''
    '''
    if not value:
        return

    msg = None

    if isinstance(value, basestring):

        if value.lower().startswith('http'):
            return value

        try:
            descriptor = json.loads(str(value))
            if not isinstance(descriptor, dict):
                msg = u'Invalid Table Schema descriptor: {}'.format(value)
                raise Invalid(msg)

        except ValueError as e:
            msg = u'JSON error in Table Schema descriptor: {}'.format(e)
            raise Invalid(msg)
    else:
        descriptor = value

    try:
        tableschema.validate(descriptor)
    except tableschema.exceptions.ValidationError as e:
        errors = []
        for error in e.errors:
            errors.append(error.message)
        msg = u'Invalid Table Schema: {}'.format(u', '.join(errors))

    if msg:
        raise Invalid(msg)

    return json.dumps(descriptor)
def load_data_from_local_csv(csv_file=ASSET_DATA_FILE):
    table = Table(csv_file, schema=SCHEMA_FILE)

    try:
        valid = validate(table.schema.descriptor)
        if valid:
            for keyed_row in table.iter(keyed=True):
                yield keyed_row
    except exceptions.ValidationError as exception:
        for error in exception.errors:
            print(error)
    except exceptions.CastError as exception:
        if not exception.errors:
            print(exception)

        for error in exception.errors:
            write_skipped_assets(error, [])
Esempio n. 19
0
    def update_one(
        self,
        id,
        data_model,
        data_resource_name,
        table_schema,
        restricted_fields,
        request_obj,
        mode="PATCH",
    ):
        """Update a single object from the data model based on it's primary
        key.

        Args:
            id (any): The primary key for the specific object.
            data_model (object): SQLAlchemy ORM model.
            data_resource_name (str): Name of the data resource.
            table_schema (dict): The Table Schema object to use for validation.

        Return:
            dict, int: The response object and the HTTP status code.
        """
        try:
            request_obj = request_obj.json
        except Exception:
            raise ApiError("No request body found.", 400)

        try:
            primary_key = table_schema["primaryKey"]
            session = Session()
            data_obj = (session.query(data_model).filter(
                getattr(data_model, primary_key) == id).first())
            if data_obj is None:
                session.close()
                raise ApiUnhandledError(f"Resource with id '{id}' not found.",
                                        404)
        except Exception:
            raise ApiUnhandledError(f"Resource with id '{id}' not found.", 404)

        _ = Schema(table_schema)
        errors = []
        accepted_fields = []
        if validate(table_schema):
            for field in table_schema["fields"]:
                accepted_fields.append(field["name"])
            for field in request_obj.keys():
                if field not in accepted_fields:
                    errors.append(f"Unknown field '{field}' found.")
                elif field in restricted_fields:
                    errors.append(f"Cannot update restricted field '{field}'.")
        else:
            session.close()
            raise ApiError("Data schema validation error.", 400)

        if len(errors) > 0:
            session.close()
            raise ApiError("Invalid request body.", 400, errors)

        if mode == "PATCH":
            for key, value in request_obj.items():
                setattr(data_obj, key, value)
            session.commit()
        elif mode == "PUT":
            for field in table_schema["fields"]:
                if field["required"] and field["name"] not in request_obj.keys(
                ):
                    errors.append(
                        f"Required field '{field['name']}' is missing.")

            if len(errors) > 0:
                session.close()
                raise ApiError("Invalid request body.", 400, errors)

            for key, value in request_obj.items():
                setattr(data_obj, key, value)
            session.commit()

        session.close()
        return {"message": f"Successfully updated resource '{id}'."}, 201
Esempio n. 20
0
def test_schema_invalid_fk_no_reference():
    with pytest.raises(exceptions.ValidationError):
        valid = validate('data/schema_invalid_fk_no_reference.json')
Esempio n. 21
0
def _validate_schema(schema):
    try:
        validate(schema)
    except exceptions.ValidationError as exception:
        for error in exception.errors:
            raise error
Esempio n. 22
0
    def insert_one(self, data_model, data_resource_name, table_schema,
                   request_obj):
        """Insert a new object.

        Args:
            data_model (object): SQLAlchemy ORM model.
            data_resource_name (str): Name of the data resource.
            table_schema (dict): The Table Schema object to use for validation.
            request_obj (dict): HTTP request object.

        Return:
            dict, int: The response object and associated HTTP status code.
        """
        try:
            request_obj = request_obj.json
        except Exception:
            raise ApiError("No request body found.", 400)

        _ = Schema(table_schema)
        errors = []
        accepted_fields = []

        if not validate(table_schema):
            raise SchemaValidationFailure()

        # Check for required fields
        for field in table_schema["fields"]:
            accepted_fields.append(field["name"])

            if field["required"] and not field["name"] in request_obj.keys():
                errors.append(f"Required field '{field['name']}' is missing.")

        valid_fields = []
        many_query = []

        for field in request_obj.keys():
            if field in accepted_fields:
                valid_fields.append(field)
            else:
                junc_table = JuncHolder.lookup_table(field, data_resource_name)

                if junc_table is not None:
                    values = request_obj[field]
                    if not isinstance(values, list):
                        values = [values]
                    many_query.append([field, values, junc_table])
                else:
                    errors.append(f"Unknown field '{field}' found.")

        if len(errors) > 0:
            raise ApiError("Invalid request body.", 400, errors)

        try:
            session = Session()
            new_object = data_model()
            for field in valid_fields:
                value = request_obj[field]
                setattr(new_object, field, value)
            session.add(new_object)
            session.commit()
            id_value = getattr(new_object, table_schema["primaryKey"])

            # process the many_query
            for field, values, table in many_query:
                self.process_many_query(session, table, id_value, field,
                                        data_resource_name, values)

            return {
                "message": "Successfully added new resource.",
                "id": id_value
            }, 201
        except Exception:
            raise ApiUnhandledError("Failed to create new resource.", 400)
        finally:
            session.close()
Esempio n. 23
0
def test_schema_invalid_fk_reference_array_number_mismatch():
    with pytest.raises(exceptions.ValidationError):
        valid = validate('data/schema_invalid_fk_array_wrong_number.json')
Esempio n. 24
0
def test_schema_invalid_pk_string():
    with pytest.raises(exceptions.ValidationError):
        valid = validate('data/schema_invalid_pk_string.json')
Esempio n. 25
0
def test_primary_key_is_not_a_valid_type():
    with pytest.raises(exceptions.ValidationError) as excinfo:
        valid = validate('data/schema_invalid_pk_is_wrong_type.json')
    assert len(excinfo.value.errors) == 2
Esempio n. 26
0
def test_schema_valid_full():
    valid = validate('data/schema_valid_full.json')
    assert valid
Esempio n. 27
0
def test_schema_invalid_wrong_type():
    with pytest.raises(exceptions.ValidationError):
        valid = validate([])
Esempio n. 28
0
def test_schema_invalid_fk_reference_is_a_string_fields_is_an_array():
    with pytest.raises(exceptions.ValidationError):
        valid = validate('data/schema_invalid_fk_array_string_ref.json')
Esempio n. 29
0
def test_schema_invalid_fk_array():
    with pytest.raises(exceptions.ValidationError):
        valid = validate('data/schema_invalid_fk_array.json')
Esempio n. 30
0
 def test_schema_valid_fk_array(self):
     filepath = os.path.join(self.data_dir, 'schema_valid_fk_array.json')
     with io.open(filepath) as stream:
         schema = json.load(stream)
     valid = tableschema.validate(schema)
     self.assertTrue(valid)
Esempio n. 31
0
def test_schema_valid_fk_array():
    valid = validate('data/schema_valid_fk_array.json')
    assert valid
Esempio n. 32
0
# schema validator. use this to validate the schema before using it

from tableschema import validate, exceptions
import sys

try:
    # validate the schema
    valid = validate('ugms_inbound_table_schema_swt_v0.01.json')
    print('OK')
    sys.exit(0)
except exceptions.ValidationError as exception:
    for error in exception.errors:
        print(error)
    sys.exit(1)
Esempio n. 33
0
def test_schema_multiple_errors_no_fail_fast_true():
    with pytest.raises(exceptions.ValidationError) as excinfo:
        valid = validate('data/schema_invalid_multiple_errors.json')
    assert len(excinfo.value.errors) == 5