def test_validate(self): print_test_name('TEST VALIDATE') passed = 0 for example_schema, datum in SCHEMAS_TO_VALIDATE: print 'Schema: %s' % example_schema print 'Datum: %s' % datum validated = False try: io.validate(schema.parse(example_schema), datum) validated = True except io.AvroTypeException: pass print 'Valid: %s' % validated if validated: passed += 1 self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
def _validate(self, schema, datum): """ Validate a datum matches a schema. :param schema: Avro schema to match against the `datum` :param datum: Data to validate """ return validate(schema, datum)
def _serialize_union(self, schema, datum): """ With union schema has multiple possible schemas. We iterate over possible schemas and see which one fits `datum` passed. Union serialization: if null: "null" else: {"<type>": value} Then used one that matches to serialize `datum` :param schema: Avro schema for this union :param datum: Data to serialize :return: dict {"type": value} or "null" """ for candiate_schema in schema.schemas: if validate(candiate_schema, datum): if candiate_schema.type == "null": return self._serialize_null() else: field_type_name = candiate_schema.type if isinstance(candiate_schema, avro.schema.NamedSchema): field_type_name = candiate_schema.name return { field_type_name: self._serialize_data(candiate_schema, datum) } raise schema.AvroTypeException(schema, datum)
def __validate(self, writers_schema, datum): logical_type = writers_schema.props.get('logicalType') if logical_type: lt = self.logical_types.get(logical_type) if lt: if lt.can_convert(writers_schema): if lt.validate(writers_schema, datum): return True return False schema_type = writers_schema.type if schema_type == 'array': return (isinstance(datum, list) and False not in [ self.__validate(writers_schema.items, d) for d in datum ]) elif schema_type == 'map': return (isinstance(datum, dict) and False not in [isinstance(k, basestring) for k in datum.keys()] and False not in [ self.__validate(writers_schema.values, v) for v in datum.values() ]) elif schema_type in ['union', 'error_union']: return True in [ self.__validate(s, datum) for s in writers_schema.schemas ] elif schema_type in ['record', 'error', 'request']: return (isinstance(datum, dict) and False not in [ self.__validate(f.type, datum.get(f.name)) for f in writers_schema.fields ]) return io.validate(writers_schema, datum)
def _validate(self, schema, datum): """ Validate a datum matches a schema. :param schema: Avro schema to match against the `datum` :param datum: Data to validate """ if datum == self.UNSET: return False schema_type = schema.type # Deserialized as unicode, convert to str for avro validation if schema_type in ['fixed', 'bytes']: datum = self._deserialize_binary_string(schema, datum) # From the `avro.io.Validate` function in avro-python3. # Recursive calls replaced so missing field values and binary fields in containers # are handled properly (see self.UNSET and above binary handling). if schema_type == 'array': return (isinstance(datum, list) and all(self._validate(schema.items, d) for d in datum)) elif schema_type == 'map': return (isinstance(datum, dict) and all(isinstance(k, basestring) for k in datum.keys()) and all( self._validate(schema.values, v) for v in datum.values())) elif schema_type in ['union', 'error_union']: return any(self._validate_union(s, datum) for s in schema.schemas) elif schema_type in ['record', 'error', 'request']: return (isinstance(datum, dict) and all( self._validate(f.type, datum.get(f.name, self.UNSET)) for f in schema.fields)) return validate(schema, datum)
def __validate(self, writers_schema, datum): logical_type = writers_schema.props.get('logicalType') if logical_type: lt = self.logical_types.get(logical_type) if lt: if lt.can_convert(writers_schema): if lt.validate(writers_schema, datum): return True return False schema_type = writers_schema.type if schema_type == 'array': return (isinstance(datum, list) and False not in [self.__validate(writers_schema.items, d) for d in datum]) elif schema_type == 'map': return (isinstance(datum, dict) and False not in [isinstance(k, basestring) for k in datum.keys()] and False not in [self.__validate(writers_schema.values, v) for v in datum.values()]) elif schema_type in ['union', 'error_union']: return True in [self.__validate(s, datum) for s in writers_schema.schemas] elif schema_type in ['record', 'error', 'request']: return (isinstance(datum, dict) and False not in [self.__validate(f.type, datum.get(f.name)) for f in writers_schema.fields]) return io.validate(writers_schema, datum)
def _validate(self, schema, datum): """ Validate a datum matches a schema. :param schema: Avro schema to match against the `datum` :param datum: Data to validate """ if datum == self.UNSET: return False schema_type = schema.type # Deserialized as unicode, convert to str for avro validation if schema_type in ['fixed', 'bytes']: datum = self._deserialize_binary_string(schema, datum) # From the `avro.io.Validate` function in avro-python3. # Recursive calls replaced so missing field values and binary fields in containers # are handled properly (see self.UNSET and above binary handling). if schema_type == 'array': return (isinstance(datum, list) and all(self._validate(schema.items, d) for d in datum)) elif schema_type == 'map': return (isinstance(datum, dict) and all(isinstance(k, basestring) for k in datum.keys()) and all(self._validate(schema.values, v) for v in datum.values())) elif schema_type in ['union', 'error_union']: return any(self._validate_union(s, datum) for s in schema.schemas) elif schema_type in ['record', 'error', 'request']: return (isinstance(datum, dict) and all(self._validate(f.type, datum.get(f.name, self.UNSET)) for f in schema.fields)) return validate(schema, datum)
def _is_valid(schema, d): try: from avro.io import Validate as validate except ImportError: from avro.io import validate # warnings.warn("Avro support is deprecated and will be removed", # DeprecationWarning) return validate(schema, d)
def test_validate(self): passed = 0 for example_schema, datum in SCHEMAS_TO_VALIDATE: logging.debug('Schema: %r', example_schema) logging.debug('Datum: %r', datum) validated = avro_io.validate(schema.parse(example_schema), datum) logging.debug('Valid: %s', validated) if validated: passed += 1 self.assertEqual(passed, len(SCHEMAS_TO_VALIDATE))
def test_validate(self): print_test_name('TEST VALIDATE') passed = 0 for example_schema, datum in SCHEMAS_TO_VALIDATE: print('Schema: %s' % example_schema) print('Datum: %s' % datum) validated = io.validate(schema.parse(example_schema), datum) print('Valid: %s' % validated) if validated: passed += 1 self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
def test_validate(self): print_test_name('TEST VALIDATE') passed = 0 for example_schema, datum in SCHEMAS_TO_VALIDATE: print 'Schema: %s' % example_schema print 'Datum: %s' % datum validated = io.validate(schema.parse(example_schema), datum) print 'Valid: %s' % validated if validated: passed += 1 self.assertEquals(passed, len(SCHEMAS_TO_VALIDATE))
def from_json(self, json_str): args = () kwargs = {"schema": self._avro_schema} annotated_datum = json.loads(json_str, cls=self._json_decoder, *args, **kwargs) datum = self._deserialize_data(self._avro_schema, annotated_datum) if not validate(self._avro_schema, datum): raise AvroTypeException(schema, datum) return datum
def __init__(self, value, evaluator, schema, input_binding=None, key=''): self.evaluator = evaluator self.schema = schema self.adapter = input_binding or (isinstance(self.schema, Schema) and self.schema.props.get('inputBinding')) self.has_adapter = self.adapter is not None self.adapter = self.adapter or {} self.key = key if isinstance(self.schema, UnionSchema): for opt in self.schema.schemas: if validate(opt, value): self.schema = opt break self.value = evaluator.resolve(value)
def _validate(schema, msg, d): try: """Validate a python dict against a avro schema""" try: from avro.io import Validate as validate except ImportError: from avro.io import validate # warnings.warn("Avro support is deprecated and will be removed", # DeprecationWarning) # FIXME(mkocher)(2016-7-16) Add a better error message than "Invalid" if not validate(schema, d): raise IOError("Invalid {m} ".format(m=msg)) return True except ImportError: raise IOError("Invalid {m} ".format(m=msg))
def __init__(self, value, evaluator, schema, input_binding=None, key=''): self.evaluator = evaluator self.schema = schema self.adapter = input_binding or ( isinstance(self.schema, Schema) and self.schema.props.get('inputBinding') ) self.has_adapter = self.adapter is not None self.adapter = self.adapter or {} self.key = key if isinstance(self.schema, UnionSchema): for opt in self.schema.schemas: if validate(opt, value): self.schema = opt break self.value = evaluator.resolve(value)
def _serialize_data(self, schema, datum): """ Non-specific serialize function. It checks type in the schema and calls correct serialization. :param schema: Avro schema of the `datum` :param datum: Data to serialize """ if not validate(schema, datum): raise AvroTypeException(schema, datum) if schema.type in AvroJsonSerializer.PRIMITIVE_CONVERTERS: return datum if schema.type in AvroJsonSerializer.COMPLEX_CONVERTERS: return self.COMPLEX_CONVERTERS[schema.type](self, schema, datum) raise avro.schema.AvroException("Unknown type: %s" % schema.type)
def _serialize_data(self, schema, datum): """ Non-specific serialize function. It checks type in the schema and calls correct serialization. :param schema: Avro schema of the `datum` :param datum: Data to serialize """ if not validate(schema, datum): raise AvroTypeException(schema, datum) if schema.type in AvroJsonSerializer.PRIMITIVE_CONVERTERS: return self.PRIMITIVE_CONVERTERS[schema.type](datum) if schema.type in AvroJsonSerializer.COMPLEX_CONVERTERS: return self.COMPLEX_CONVERTERS[schema.type](self, schema, datum) raise avro.schema.AvroException("Unknown type: %s" % schema.type)
def construct_files(val, schema): if schema.type == 'array': return [construct_files(e, schema.items) for e in val] if schema.type == 'record': if schema.name == 'File': return map_rec_list(File, val) if val else val else: ret = {} for fld in schema.fields: ret[fld.name] = construct_files(val.get(fld.name), fld.type) return ret if schema.type == 'union': for s in schema.schemas: if validate(s, val): return construct_files(val, s) return val
def construct_files(val, schema): if schema.type == 'array': return [construct_files(e, schema.items) for e in val] if schema.type == 'record': if schema.name == 'File': return File(val) if val else val else: ret = {} for fld in schema.fields: ret[fld.name] = construct_files(val.get(fld.name), fld.type) return ret if schema.type == 'union': for s in schema.schemas: if validate(s, val): return construct_files(val, s) return val
def __init__(self, value, evaluator, schema, input_binding=None, key=''): self.evaluator = evaluator self.schema = schema if input_binding is None and isinstance(self.schema, Schema): input_binding = self.schema.props.get('inputBinding') self.has_adapter = input_binding is not None self.adapter = input_binding or {} self.key = key if isinstance(self.schema, UnionSchema): for opt in self.schema.schemas: if validate(opt, value): self.schema = opt break expr = self.adapter.get('valueFrom') json = value.to_dict() if hasattr(value, 'to_dict') else value self.value = evaluator.resolve(expr, json) if expr else value
def validate(schema, obj): """Validate an object against its schema. Right now, this just checks it against the Avro schema; however in the future we will want to impose additional constraints which aren't expressable in the schema. """ if schema == 'uuid' and isinstance(obj, uuid.UUID): return True elif schema == 'Readings': return True s = SCHEMA_NAMES.get_name(schema, None) # swap the uuid for the byte-packed encoding we use with avro try: id = convert_uuids(obj) rv = io.validate(s, obj) if id: obj['uuid'] = id return rv except: return False
def main(argv): valid = set() invalid_avro = set() invalid_json = set() if len(argv) < 3: print "Give me an avro schema file and a whitespace-separated list of json files to validate against it." else: schema = parse(open(argv[1]).read()) for arg in argv[2:]: try: json = loads(open(arg, 'r').read()) if validate(schema, json): valid.add(arg) else: invalid_avro.add(arg) except ValueError: invalid_json.add(arg) print 'Valid files:\n\t' + '\n\t'.join(valid) print 'Invalid avro:\n\t' + '\n\t'.join(invalid_avro) print 'Invalid json:\n\t' + '\n\t'.join(invalid_json)
def _serialize_union(self, schema, datum): """ With union schema has multiple possible schemas. We iterate over possible schemas and see which one fits `datum` passed. Union serialization: if null: "null" else: value Then used one that matches to serialize `datum` :param schema: Avro schema for this union :param datum: Data to serialize :return: value or "null" """ for candidate_schema in schema.schemas: if validate(candidate_schema, datum): if candidate_schema.type == "null": return self._process_null() else: self._process_data(candidate_schema, datum) raise AvroTypeException(schema, datum)
def validate_schema(request): config = get_config(request) storage_path = config.get('repo.storage_path') repo = get_repository(os.path.join(storage_path, request.matchdict['name'])) uuid = request.matchdict['uuid'] content_type = request.matchdict['content_type'] schema = get_schema(repo, content_type) data = json.loads(request.body) if not validate(schema, data): request.errors.status = 400 request.errors.add( 'body', 'schema', 'Data does not match the schema for %s' % (content_type, )) elif uuid is not None and data['uuid'] != uuid: request.errors.status = 400 request.errors.add('body', 'uuid', 'Payload UUID does not match URL UUID.') else: request.schema = schema.to_json() request.schema_data = data
def _validate_with_schema(schema, d): validate(schema, d) return d
def _validate(schema, d): """Validate a python dict against a avro schema""" return validate(schema, d)
def _validate(schema, msg, d): """Validate a python dict against a avro schema""" # FIXME(mkocher)(2016-7-16) Add a better error message than "Invalid" if not validate(schema, d): raise IOError("Invalid {m} ".format(m=msg)) return True
def _is_valid(schema, d): return validate(schema, d)