Example #1
0
 def test_validate(self):
   print_test_name('TEST VALIDATE')
   passed = 0
   for example_schema, datum in SCHEMAS_TO_VALIDATE:
     print 'Schema: %s' % example_schema
     print 'Datum: %s' % datum
     validated = False
     try:
         io.validate(schema.parse(example_schema), datum)
         validated = True
     except io.AvroTypeException:
         pass
     print 'Valid: %s' % validated
     if validated: passed += 1
   self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
 def _validate(self, schema, datum):
     """
     Validate a datum matches a schema.
     :param schema: Avro schema to match against the `datum`
     :param datum: Data to validate
     """
     return validate(schema, datum)
 def _serialize_union(self, schema, datum):
     """
     With union schema has multiple possible schemas.
     We iterate over possible schemas and see which one fits `datum` passed.
     Union serialization:
     if null:
         "null"
     else:
         {"<type>": value}
     Then used one that matches to serialize `datum`
     :param schema: Avro schema for this union
     :param datum: Data to serialize
     :return: dict {"type": value} or "null"
     """
     for candiate_schema in schema.schemas:
         if validate(candiate_schema, datum):
             if candiate_schema.type == "null":
                 return self._serialize_null()
             else:
                 field_type_name = candiate_schema.type
                 if isinstance(candiate_schema, avro.schema.NamedSchema):
                     field_type_name = candiate_schema.name
                 return {
                     field_type_name: self._serialize_data(candiate_schema, datum)
                 }
     raise schema.AvroTypeException(schema, datum)
Example #4
0
    def __validate(self, writers_schema, datum):
        logical_type = writers_schema.props.get('logicalType')
        if logical_type:
            lt = self.logical_types.get(logical_type)
            if lt:
                if lt.can_convert(writers_schema):
                    if lt.validate(writers_schema, datum):
                        return True
                    return False

        schema_type = writers_schema.type
        if schema_type == 'array':
            return (isinstance(datum, list) and False not in [
                self.__validate(writers_schema.items, d) for d in datum
            ])
        elif schema_type == 'map':
            return (isinstance(datum, dict) and False
                    not in [isinstance(k, basestring) for k in datum.keys()]
                    and False not in [
                        self.__validate(writers_schema.values, v)
                        for v in datum.values()
                    ])
        elif schema_type in ['union', 'error_union']:
            return True in [
                self.__validate(s, datum) for s in writers_schema.schemas
            ]
        elif schema_type in ['record', 'error', 'request']:
            return (isinstance(datum, dict) and False not in [
                self.__validate(f.type, datum.get(f.name))
                for f in writers_schema.fields
            ])

        return io.validate(writers_schema, datum)
Example #5
0
 def _serialize_union(self, schema, datum):
     """
     With union schema has multiple possible schemas.
     We iterate over possible schemas and see which one fits `datum` passed.
     Union serialization:
     if null:
         "null"
     else:
         {"<type>": value}
     Then used one that matches to serialize `datum`
     :param schema: Avro schema for this union
     :param datum: Data to serialize
     :return: dict {"type": value} or "null"
     """
     for candiate_schema in schema.schemas:
         if validate(candiate_schema, datum):
             if candiate_schema.type == "null":
                 return self._serialize_null()
             else:
                 field_type_name = candiate_schema.type
                 if isinstance(candiate_schema, avro.schema.NamedSchema):
                     field_type_name = candiate_schema.name
                 return {
                     field_type_name:
                     self._serialize_data(candiate_schema, datum)
                 }
     raise schema.AvroTypeException(schema, datum)
    def _validate(self, schema, datum):
        """
        Validate a datum matches a schema.
        :param schema: Avro schema to match against the `datum`
        :param datum: Data to validate
        """
        if datum == self.UNSET:
            return False

        schema_type = schema.type

        # Deserialized as unicode, convert to str for avro validation
        if schema_type in ['fixed', 'bytes']:
            datum = self._deserialize_binary_string(schema, datum)

        # From the `avro.io.Validate` function in avro-python3.
        # Recursive calls replaced so missing field values and binary fields in containers
        # are handled properly (see self.UNSET and above binary handling).
        if schema_type == 'array':
            return (isinstance(datum, list)
                    and all(self._validate(schema.items, d) for d in datum))
        elif schema_type == 'map':
            return (isinstance(datum, dict)
                    and all(isinstance(k, basestring) for k in datum.keys())
                    and all(
                        self._validate(schema.values, v)
                        for v in datum.values()))
        elif schema_type in ['union', 'error_union']:
            return any(self._validate_union(s, datum) for s in schema.schemas)
        elif schema_type in ['record', 'error', 'request']:
            return (isinstance(datum, dict) and all(
                self._validate(f.type, datum.get(f.name, self.UNSET))
                for f in schema.fields))

        return validate(schema, datum)
Example #7
0
    def __validate(self, writers_schema, datum):
        logical_type = writers_schema.props.get('logicalType')
        if logical_type:
            lt = self.logical_types.get(logical_type)
            if lt:
                if lt.can_convert(writers_schema):
                    if lt.validate(writers_schema, datum):
                        return True
                    return False

        schema_type = writers_schema.type
        if schema_type == 'array':
            return (isinstance(datum, list) and
                    False not in [self.__validate(writers_schema.items, d) for d in datum])
        elif schema_type == 'map':
            return (isinstance(datum, dict) and
                    False not in [isinstance(k, basestring) for k in datum.keys()] and
                    False not in
                    [self.__validate(writers_schema.values, v) for v in datum.values()])
        elif schema_type in ['union', 'error_union']:
            return True in [self.__validate(s, datum) for s in writers_schema.schemas]
        elif schema_type in ['record', 'error', 'request']:
            return (isinstance(datum, dict) and
                    False not in
                    [self.__validate(f.type, datum.get(f.name)) for f in writers_schema.fields])

        return io.validate(writers_schema, datum)
    def _validate(self, schema, datum):
        """
        Validate a datum matches a schema.
        :param schema: Avro schema to match against the `datum`
        :param datum: Data to validate
        """
        if datum == self.UNSET:
            return False

        schema_type = schema.type

        # Deserialized as unicode, convert to str for avro validation
        if schema_type in ['fixed', 'bytes']:
            datum = self._deserialize_binary_string(schema, datum)

        # From the `avro.io.Validate` function in avro-python3.
        # Recursive calls replaced so missing field values and binary fields in containers
        # are handled properly (see self.UNSET and above binary handling).
        if schema_type == 'array':
            return (isinstance(datum, list) and
                    all(self._validate(schema.items, d) for d in datum))
        elif schema_type == 'map':
            return (isinstance(datum, dict) and
                    all(isinstance(k, basestring) for k in datum.keys()) and
                    all(self._validate(schema.values, v) for v in datum.values()))
        elif schema_type in ['union', 'error_union']:
            return any(self._validate_union(s, datum) for s in schema.schemas)
        elif schema_type in ['record', 'error', 'request']:
            return (isinstance(datum, dict) and
                    all(self._validate(f.type, datum.get(f.name, self.UNSET))
                        for f in schema.fields))

        return validate(schema, datum)
 def _validate(self, schema, datum):
     """
     Validate a datum matches a schema.
     :param schema: Avro schema to match against the `datum`
     :param datum: Data to validate
     """
     return validate(schema, datum)
Example #10
0
def _is_valid(schema, d):
    try:
        from avro.io import Validate as validate
    except ImportError:
        from avro.io import validate
    # warnings.warn("Avro support is deprecated and will be removed",
    #              DeprecationWarning)
    return validate(schema, d)
Example #11
0
 def test_validate(self):
     passed = 0
     for example_schema, datum in SCHEMAS_TO_VALIDATE:
         logging.debug('Schema: %r', example_schema)
         logging.debug('Datum: %r', datum)
         validated = avro_io.validate(schema.parse(example_schema), datum)
         logging.debug('Valid: %s', validated)
         if validated: passed += 1
     self.assertEqual(passed, len(SCHEMAS_TO_VALIDATE))
Example #12
0
 def test_validate(self):
     passed = 0
     for example_schema, datum in SCHEMAS_TO_VALIDATE:
         logging.debug('Schema: %r', example_schema)
         logging.debug('Datum: %r', datum)
         validated = avro_io.validate(schema.parse(example_schema), datum)
         logging.debug('Valid: %s', validated)
         if validated: passed += 1
     self.assertEqual(passed, len(SCHEMAS_TO_VALIDATE))
Example #13
0
 def test_validate(self):
     print_test_name('TEST VALIDATE')
     passed = 0
     for example_schema, datum in SCHEMAS_TO_VALIDATE:
         print('Schema: %s' % example_schema)
         print('Datum: %s' % datum)
         validated = io.validate(schema.parse(example_schema), datum)
         print('Valid: %s' % validated)
         if validated: passed += 1
     self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
Example #14
0
 def test_validate(self):
   print_test_name('TEST VALIDATE')
   passed = 0
   for example_schema, datum in SCHEMAS_TO_VALIDATE:
     print 'Schema: %s' % example_schema
     print 'Datum: %s' % datum
     validated = io.validate(schema.parse(example_schema), datum)
     print 'Valid: %s' % validated
     if validated: passed += 1
   self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
 def from_json(self, json_str):
     args = ()
     kwargs = {"schema": self._avro_schema}
     annotated_datum = json.loads(json_str,
                                  cls=self._json_decoder,
                                  *args,
                                  **kwargs)
     datum = self._deserialize_data(self._avro_schema, annotated_datum)
     if not validate(self._avro_schema, datum):
         raise AvroTypeException(schema, datum)
     return datum
Example #16
0
 def __init__(self, value, evaluator, schema, input_binding=None, key=''):
     self.evaluator = evaluator
     self.schema = schema
     self.adapter = input_binding or (isinstance(self.schema, Schema) and
                                      self.schema.props.get('inputBinding'))
     self.has_adapter = self.adapter is not None
     self.adapter = self.adapter or {}
     self.key = key
     if isinstance(self.schema, UnionSchema):
         for opt in self.schema.schemas:
             if validate(opt, value):
                 self.schema = opt
                 break
     self.value = evaluator.resolve(value)
Example #17
0
def _validate(schema, msg, d):
    try:
        """Validate a python dict against a avro schema"""
        try:
            from avro.io import Validate as validate
        except ImportError:
            from avro.io import validate
        # warnings.warn("Avro support is deprecated and will be removed",
        #              DeprecationWarning)
        # FIXME(mkocher)(2016-7-16) Add a better error message than "Invalid"
        if not validate(schema, d):
            raise IOError("Invalid {m} ".format(m=msg))
        return True
    except ImportError:
        raise IOError("Invalid {m} ".format(m=msg))
Example #18
0
 def __init__(self, value, evaluator, schema, input_binding=None, key=''):
     self.evaluator = evaluator
     self.schema = schema
     self.adapter = input_binding or (
         isinstance(self.schema, Schema) and
         self.schema.props.get('inputBinding')
     )
     self.has_adapter = self.adapter is not None
     self.adapter = self.adapter or {}
     self.key = key
     if isinstance(self.schema, UnionSchema):
         for opt in self.schema.schemas:
             if validate(opt, value):
                 self.schema = opt
                 break
     self.value = evaluator.resolve(value)
Example #19
0
    def _serialize_data(self, schema, datum):
        """
        Non-specific serialize function.
        It checks type in the schema and calls correct serialization.
        :param schema: Avro schema of the `datum`
        :param datum: Data to serialize
        """
        if not validate(schema, datum):
            raise AvroTypeException(schema, datum)

        if schema.type in AvroJsonSerializer.PRIMITIVE_CONVERTERS:
            return datum

        if schema.type in AvroJsonSerializer.COMPLEX_CONVERTERS:
            return self.COMPLEX_CONVERTERS[schema.type](self, schema, datum)

        raise avro.schema.AvroException("Unknown type: %s" % schema.type)
    def _serialize_data(self, schema, datum):
        """
        Non-specific serialize function.
        It checks type in the schema and calls correct serialization.
        :param schema: Avro schema of the `datum`
        :param datum: Data to serialize
        """
        if not validate(schema, datum):
            raise AvroTypeException(schema, datum)

        if schema.type in AvroJsonSerializer.PRIMITIVE_CONVERTERS:
            return self.PRIMITIVE_CONVERTERS[schema.type](datum)

        if schema.type in AvroJsonSerializer.COMPLEX_CONVERTERS:
            return self.COMPLEX_CONVERTERS[schema.type](self, schema, datum)

        raise avro.schema.AvroException("Unknown type: %s" % schema.type)
Example #21
0
def construct_files(val, schema):
    if schema.type == 'array':
        return [construct_files(e, schema.items) for e in val]

    if schema.type == 'record':
        if schema.name == 'File':
            return map_rec_list(File, val) if val else val
        else:
            ret = {}
            for fld in schema.fields:
                ret[fld.name] = construct_files(val.get(fld.name), fld.type)
            return ret

    if schema.type == 'union':
        for s in schema.schemas:
            if validate(s, val):
                return construct_files(val, s)
    return val
Example #22
0
def construct_files(val, schema):
    if schema.type == 'array':
        return [construct_files(e, schema.items) for e in val]

    if schema.type == 'record':
        if schema.name == 'File':
            return File(val) if val else val
        else:
            ret = {}
            for fld in schema.fields:
                ret[fld.name] = construct_files(val.get(fld.name), fld.type)
            return ret

    if schema.type == 'union':
        for s in schema.schemas:
            if validate(s, val):
                return construct_files(val, s)
    return val
Example #23
0
    def __init__(self, value, evaluator, schema, input_binding=None, key=''):
        self.evaluator = evaluator
        self.schema = schema

        if input_binding is None and isinstance(self.schema, Schema):
            input_binding = self.schema.props.get('inputBinding')

        self.has_adapter = input_binding is not None
        self.adapter = input_binding or {}
        self.key = key

        if isinstance(self.schema, UnionSchema):
            for opt in self.schema.schemas:
                if validate(opt, value):
                    self.schema = opt
                    break
        expr = self.adapter.get('valueFrom')
        json = value.to_dict() if hasattr(value, 'to_dict') else value
        self.value = evaluator.resolve(expr, json) if expr else value
Example #24
0
    def __init__(self, value, evaluator, schema, input_binding=None, key=''):
        self.evaluator = evaluator
        self.schema = schema

        if input_binding is None and isinstance(self.schema, Schema):
            input_binding = self.schema.props.get('inputBinding')

        self.has_adapter = input_binding is not None
        self.adapter = input_binding or {}
        self.key = key

        if isinstance(self.schema, UnionSchema):
            for opt in self.schema.schemas:
                if validate(opt, value):
                    self.schema = opt
                    break
        expr = self.adapter.get('valueFrom')
        json = value.to_dict() if hasattr(value, 'to_dict') else value
        self.value = evaluator.resolve(expr, json) if expr else value
Example #25
0
def validate(schema, obj):
    """Validate an object against its schema.

    Right now, this just checks it against the Avro schema; however in
    the future we will want to impose additional constraints which
    aren't expressable in the schema.
    """
    if schema == 'uuid' and isinstance(obj, uuid.UUID):
        return True
    elif schema == 'Readings':
        return True

    s = SCHEMA_NAMES.get_name(schema, None)
    # swap the uuid for the byte-packed encoding we use with avro
    try:
        id = convert_uuids(obj)
        rv = io.validate(s, obj)
        if id: obj['uuid'] = id
        return rv
    except:
        return False
Example #26
0
def main(argv):
    valid = set()
    invalid_avro = set()
    invalid_json = set()

    if len(argv) < 3:
        print "Give me an avro schema file and a whitespace-separated list of json files to validate against it."
    else:
        schema = parse(open(argv[1]).read())
        for arg in argv[2:]:
            try:
                json = loads(open(arg, 'r').read())
                if validate(schema, json):
                    valid.add(arg)
                else:
                    invalid_avro.add(arg)
            except ValueError:
                invalid_json.add(arg)
    print 'Valid files:\n\t' + '\n\t'.join(valid)
    print 'Invalid avro:\n\t' + '\n\t'.join(invalid_avro)
    print 'Invalid json:\n\t' + '\n\t'.join(invalid_json)
Example #27
0
def main(argv):
    valid = set()
    invalid_avro = set()
    invalid_json = set()

    if len(argv) < 3:
        print "Give me an avro schema file and a whitespace-separated list of json files to validate against it."
    else:
        schema = parse(open(argv[1]).read())
        for arg in argv[2:]:
            try:
                json = loads(open(arg, 'r').read())
                if validate(schema, json):
                    valid.add(arg)
                else:
                    invalid_avro.add(arg)
            except ValueError:
                invalid_json.add(arg)
    print 'Valid files:\n\t' + '\n\t'.join(valid)
    print 'Invalid avro:\n\t' + '\n\t'.join(invalid_avro)
    print 'Invalid json:\n\t' + '\n\t'.join(invalid_json)
 def _serialize_union(self, schema, datum):
     """
     With union schema has multiple possible schemas.
     We iterate over possible schemas and see which one fits `datum` passed.
     Union serialization:
     if null:
         "null"
     else:
         value
     Then used one that matches to serialize `datum`
     :param schema: Avro schema for this union
     :param datum: Data to serialize
     :return: value or "null"
     """
     for candidate_schema in schema.schemas:
         if validate(candidate_schema, datum):
             if candidate_schema.type == "null":
                 return self._process_null()
             else:
                 self._process_data(candidate_schema, datum)
     raise AvroTypeException(schema, datum)
def validate_schema(request):
    config = get_config(request)
    storage_path = config.get('repo.storage_path')
    repo = get_repository(os.path.join(storage_path,
                                       request.matchdict['name']))
    uuid = request.matchdict['uuid']
    content_type = request.matchdict['content_type']
    schema = get_schema(repo, content_type)
    data = json.loads(request.body)

    if not validate(schema, data):
        request.errors.status = 400
        request.errors.add(
            'body', 'schema',
            'Data does not match the schema for %s' % (content_type, ))
    elif uuid is not None and data['uuid'] != uuid:
        request.errors.status = 400
        request.errors.add('body', 'uuid',
                           'Payload UUID does not match URL UUID.')
    else:
        request.schema = schema.to_json()
        request.schema_data = data
Example #30
0
def _validate_with_schema(schema, d):
    validate(schema, d)
    return d
Example #31
0
def _validate_with_schema(schema, d):
    validate(schema, d)
    return d
Example #32
0
def _validate(schema, d):
    """Validate a python dict against a avro schema"""
    return validate(schema, d)
Example #33
0
def _validate(schema, msg, d):
    """Validate a python dict against a avro schema"""
    # FIXME(mkocher)(2016-7-16) Add a better error message than "Invalid"
    if not validate(schema, d):
        raise IOError("Invalid {m} ".format(m=msg))
    return True
Example #34
0
def _is_valid(schema, d):
    return validate(schema, d)
Example #35
0
def _validate(schema, msg, d):
    """Validate a python dict against a avro schema"""
    # FIXME(mkocher)(2016-7-16) Add a better error message than "Invalid"
    if not validate(schema, d):
        raise IOError("Invalid {m} ".format(m=msg))
    return True
Example #36
0
def _is_valid(schema, d):
    return validate(schema, d)