def test_user_record(self): """ This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html """ schema_dict = { "namespace": "example.avro", "type": "record", "name": "User", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"]}, {"name": "favorite_color", "type": ["string", "null"]} ] } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) deserializer = AvroJsonDeserializer(avro_schema) alyssa = {"name": "Alyssa", "favorite_number": 256} alyssa_full = {"name": "Alyssa", "favorite_number": 256, "favorite_color": None} alyssa_json = """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}""" self.assertEquals(serializer.to_json(alyssa), alyssa_json) self.assertEquals(deserializer.from_json(alyssa_json), alyssa_full) ben = {"name": "Ben", "favorite_number": 7, "favorite_color": "red"} ben_json = """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}""" self.assertEquals(serializer.to_json(ben), ben_json) self.assertEquals(deserializer.from_json(ben_json), ben) lion = {"name": "Lion"} lion_full = {"name": "Lion", "favorite_number": None, "favorite_color": None} lion_json = """{"name":"Lion","favorite_number":null,"favorite_color":null}""" self.assertEquals(serializer.to_json(lion), lion_json) self.assertEquals(deserializer.from_json(lion_json), lion_full)
def _encode_payload(schema_cache, topic, batch): value_schema = avro.schema.make_avsc_object(schema_cache[topic]['value'], avro.schema.Names()) value_serializer = AvroJsonSerializer(value_schema) if schema_cache[topic].get('key') is not None: key_schema = avro.schema.make_avsc_object(schema_cache[topic]['key'], avro.schema.Names()) key_serializer = AvroJsonSerializer(key_schema) body = {'records': [{'value': value_serializer.to_ordered_dict(message.value), 'key': key_serializer.to_ordered_dict(message.key) if message.key is not None else None, 'partition': message.partition} for message in batch]} # The REST proxy's API requires us to double-encode the schemas. # Don't ask why, because I have no idea. if schema_cache[topic].get('value-id') is None: body['value_schema'] = json_encode(schema_cache[topic]['value']) else: body['value_schema_id'] = schema_cache[topic]['value-id'] if schema_cache[topic].get('key') is not None: if schema_cache[topic].get('key-id') is None: body['key_schema'] = json_encode(schema_cache[topic]['key']) else: body['key_schema_id'] = schema_cache[topic]['key-id'] return json_encode(body)
def dict_to_json(data: Dict): # to JSON # avro_schema = schema.SchemaFromJSONData(schema_dict) avro_schema = schema.Parse(open("rate.avsc", "rb").read()) serializer = AvroJsonSerializer(avro_schema) json_str = serializer.to_json(data) pretty_print(json_str)
def test_records_union(self): avro_schema = make_avsc_object(self.UNION_RECORDS_SCHEMA, avro.schema.Names()) data = {"funion_rec": {"field": 1}} avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_rec":{"rec1":{"field":1}}}""") data_another_record = {"funion_rec": {"field": "hi"}} another_record_json = AvroJsonSerializer(avro_schema).to_json( data_another_record) self.assertEquals(another_record_json, """{"funion_rec":{"rec2":{"field":"hi"}}}""")
def test_fixed_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [self.FIELD_FIXED] } data = {"ffixed": b"fixed text here!"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals(json_data, """{"ffixed":"fixed text here!"}""")
def test_fixed_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [ self.FIELD_FIXED ] } data = {"ffixed": "fixed text here!"} avro_schema = avro.schema.make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals(json_data, """{"ffixed":"fixed text here!"}""")
def test_fixed_non_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [ self.FIELD_FIXED ] } data = {"ffixed": "(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"} avro_schema = avro.schema.make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals(json_data, """{"ffixed":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000"}""")
def test_fixed_non_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [self.FIELD_FIXED] } data = {"ffixed": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals( json_data, """{"ffixed":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000"}""" )
def test_nested_union_records(self): schema_dict = { "namespace": "nested", "name": "OuterType", "type": "record", "fields": [{ "name": "outer", "type": [ "null", { "name": "MiddleType", "type": "record", "fields": [{ "name": "middle", "type": [ "null", { "name": "InnerType", "type": "record", "fields": [{ "name": "inner", "type": "int" }] } ] }] } ] }] } data1 = {"outer": {"middle": {"inner": 1}}} data2 = {"outer": {"middle": None}} avro1 = """{"outer":{"nested.MiddleType":{"middle":{"nested.InnerType":{"inner":1}}}}}""" avro2 = """{"outer":{"nested.MiddleType":{"middle":null}}}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) self.assertEquals(serializer.to_json(data1), avro1) self.assertEquals(serializer.to_json(data2), avro2) deserializer = AvroJsonDeserializer(avro_schema) self.assertEquals(deserializer.from_json(avro1), data1) self.assertEquals(deserializer.from_json(avro2), data2)
def test_bytes_field_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [{ "type": "bytes", "name": "fbytes" }] } data = {"fbytes": b"this is some long bytes field"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals(json_data, """{"fbytes":"this is some long bytes field"}""")
class AvroSerializer(EventSerializer): def __init__(self, schema: Schema): self._schema: Schema = schema self._serializer: Union[DatumWriter, AvroJsonSerializer] = None def serialize(self, event, serialization_type: SerializationType) -> bytes: if serialization_type is SerializationType.AVRO_BINARY: return self._serialize_avro_binary(event) elif serialization_type is SerializationType.AVRO_JSON: return self._serialize_avro_json(event) else: raise UnsupportedSerializationTypeException( f"The serialization type {serialization_type} is not supported for Avro" ) def _serialize_avro_binary(self, event_data: object) -> bytes: if self._serializer is None or not isinstance(self._serializer, DatumWriter): self._serializer = avro.io.DatumWriter(self._schema) bytes_writer = BytesIO() encoder = avro.io.BinaryEncoder(bytes_writer) self._serializer.write(event_data, encoder) bytes_writer.flush() return bytes_writer.getvalue() def _serialize_avro_json(self, event_data: object) -> bytes: if self._serializer is None or not isinstance(self._serializer, AvroJsonSerializer): self._serializer = AvroJsonSerializer(self._schema) return self._serializer.to_json(event_data).encode("utf-8")
def test_fails_validation(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) data = dict(self.VALID_DATA_ALL_FIELDS) data["ffloat"] = "hi" serializer = AvroJsonSerializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
def test_user_record(self): """ This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html """ schema_dict = { "namespace": "example.avro", "type": "record", "name": "User", "fields": [{ "name": "name", "type": "string" }, { "name": "favorite_number", "type": ["int", "null"] }, { "name": "favorite_color", "type": ["string", "null"] }] } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) deserializer = AvroJsonDeserializer(avro_schema) alyssa = {"name": "Alyssa", "favorite_number": 256} alyssa_full = { "name": "Alyssa", "favorite_number": 256, "favorite_color": None } alyssa_json = """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}""" self.assertEquals(serializer.to_json(alyssa), alyssa_json) self.assertEquals(deserializer.from_json(alyssa_json), alyssa_full) ben = {"name": "Ben", "favorite_number": 7, "favorite_color": "red"} ben_json = """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}""" self.assertEquals(serializer.to_json(ben), ben_json) self.assertEquals(deserializer.from_json(ben_json), ben) lion = {"name": "Lion"} lion_full = { "name": "Lion", "favorite_number": None, "favorite_color": None } lion_json = """{"name":"Lion","favorite_number":null,"favorite_color":null}""" self.assertEquals(serializer.to_json(lion), lion_json) self.assertEquals(deserializer.from_json(lion_json), lion_full)
def test_union_serialization_not_null(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = {"funion_null": 1} avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_null":{"int":1}}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_bytes_field_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [ { "type": "bytes", "name": "fbytes" } ] } data = {"fbytes": "this is some long bytes field"} avro_schema = avro.schema.make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals(json_data, """{"fbytes":"this is some long bytes field"}""")
def test_all_supported_types(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json( self.VALID_DATA_ALL_FIELDS) self.assertEquals( avro_json, """{"fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0}""" )
def test_user_record(self): """ This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html """ schema_dict = { "namespace": "example.avro", "type": "record", "name": "User", "fields": [{ "name": "name", "type": "string" }, { "name": "favorite_number", "type": ["int", "null"] }, { "name": "favorite_color", "type": ["string", "null"] }] } avro_schema = avro.schema.make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) self.assertEquals( serializer.to_json({ "name": "Alyssa", "favorite_number": 256 }), """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}""" ) self.assertEquals( serializer.to_json({ "name": "Ben", "favorite_number": 7, "favorite_color": "red" }), """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}""" ) self.assertEquals( serializer.to_json({"name": "Lion"}), """{"name":"Lion","favorite_number":null,"favorite_color":null}""")
def test_bytes_field_non_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [ { "type": "bytes", "name": "fbytes" } ] } data = {"fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals(avro_json, """{"fbytes":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000\\u000b+\\u0000\\u0000"}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_array(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_ARRAY_INT] } data = {"intarr": [1, 2, 3]} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"intarr":[1,2,3]}""")
def test_individually_allowed_fields_separately(self): for field in self.INDIVIDUALLY_SERIALIZABLE: # unwrap enum, fixed, array, and map but save the name for value lookup name = field['name'] if isinstance(field['type'], dict): field = field['type'] avro_schema = make_avsc_object(field, avro.schema.Names()) data = self.VALID_DATA_ALL_FIELDS[name] avro_json = AvroJsonSerializer(avro_schema).to_json(data) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_map(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_MAP_INT] } data = {"intmap": {"one": 1, "two": 2}} avro_schema = avro.schema.make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"intmap":{"two":2,"one":1}}""")
def test_bytes_field_non_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [{ "type": "bytes", "name": "fbytes" }] } data = {"fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals( avro_json, """{"fbytes":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000\\u000b+\\u0000\\u0000"}""" ) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_all_supported_types(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) data = self.VALID_DATA_ALL_FIELDS avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals( avro_json, """{"fruit":"ORANGE","fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0,"intarr":[1,2,3],"intmap":{"one":1}}""" ) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def _encode_payload(schema_cache, topic, batch): value_schema = avro.schema.make_avsc_object(schema_cache[topic]['value'], avro.schema.Names()) value_serializer = AvroJsonSerializer(value_schema) if schema_cache[topic].get('key') is not None: key_schema = avro.schema.make_avsc_object(schema_cache[topic]['key'], avro.schema.Names()) key_serializer = AvroJsonSerializer(key_schema) body = { 'records': [{ 'value': value_serializer.to_ordered_dict(message.value), 'key': key_serializer.to_ordered_dict(message.key) if message.key is not None else None, 'partition': message.partition } for message in batch] } # The REST proxy's API requires us to double-encode the schemas. # Don't ask why, because I have no idea. if schema_cache[topic].get('value-id') is None: body['value_schema'] = json_encode(schema_cache[topic]['value']) else: body['value_schema_id'] = schema_cache[topic]['value-id'] if schema_cache[topic].get('key') is not None: if schema_cache[topic].get('key-id') is None: body['key_schema'] = json_encode(schema_cache[topic]['key']) else: body['key_schema_id'] = schema_cache[topic]['key-id'] return json_encode(body)
def test_user_record(self): """ This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html """ schema_dict = { "namespace": "example.avro", "type": "record", "name": "User", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"]}, {"name": "favorite_color", "type": ["string", "null"]} ] } avro_schema = avro.schema.make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) self.assertEquals(serializer.to_json({"name": "Alyssa", "favorite_number": 256}), """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}""") self.assertEquals(serializer.to_json({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}), """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}""") self.assertEquals(serializer.to_json({"name": "Lion"}), """{"name":"Lion","favorite_number":null,"favorite_color":null}""")
def test_map(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_MAP_INT] } data = {"intmap": {"one": 1, "two": 2}} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) # Dictionaries are unsorted self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""", """{"intmap":{"two":2,"one":1}}"""))
def test_nested_union_records(self): schema_dict = { "namespace": "nested", "name": "OuterType", "type": "record", "fields": [{ "name": "outer", "type": ["null", { "name": "MiddleType", "type": "record", "fields": [{ "name": "middle", "type": ["null", { "name": "InnerType", "type": "record", "fields": [{ "name": "inner", "type": "int" }] }] }] }] }] } data1 = {"outer": {"middle": {"inner": 1}}} data2 = {"outer": {"middle": None}} avro1 = """{"outer":{"nested.MiddleType":{"middle":{"nested.InnerType":{"inner":1}}}}}""" avro2 = """{"outer":{"nested.MiddleType":{"middle":null}}}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) self.assertEquals(serializer.to_json(data1), avro1) self.assertEquals(serializer.to_json(data2), avro2) deserializer = AvroJsonDeserializer(avro_schema) self.assertEquals(deserializer.from_json(avro1), data1) self.assertEquals(deserializer.from_json(avro2), data2)
def test_map(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_MAP_INT] } data = {"intmap": {"one": 1, "two": 2}} unicode_dict = {'intmap': {'one': 1, u'two': 2}} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) # Dictionaries are unsorted self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""", """{"intmap":{"two":2,"one":1}}""")) deserializer = AvroJsonDeserializer(avro_schema) json_data = deserializer.from_json(avro_json) self.assertEquals(json_data, data) mixed_unicode = deserializer.from_dict(unicode_dict) self.assertEquals(mixed_unicode, data)
def test_union_serialization_invalid(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = {"funion_null": "hi"} serializer = AvroJsonSerializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
def test_union_serialization_null(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = {"funion_null": None} avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_null":null}""")
{"name": "uint8_t", "type":"fixed", "size":1}, {"name": "uint16_t", "type":"fixed", "size":2}, {"name": "uint32_t", "type":"fixed", "size":4}, "uint64_t", {"name": "int8_t", "type":"fixed", "size":1}, {"name": "int16_t", "type":"fixed", "size":2}, {"name": "int32_t", "type":"fixed", "size":4}, "int64_t"]}, {"name": "tags", "type": ["null", {"type": "map", "values": "string"}]}, {"name": "metadata", "type": ["null", {"type": "map", "values": "string"}]} ] } # Create avro schema and serializer for schema avro_schema = avro.schema.make_avsc_object(schema, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) # Create record (JSON) record = { "metric": 'hey', "timestamp": 983993882838, "value": 9, "tags": { "hey": "there", } } # Serialize JSON record, this will add proper union formatting (this will return a string) record = serializer.to_json(record) # Add proper 'value' wrapper around record (must json.loads(record) because record is a string) message = {'value':json.loads(record)}
def serialize_to_json(self, result_dict): avro_schema = make_avsc_object(self.avro_schema, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) return serializer.to_json(result_dict)
def serialize(schema, data): avro_schema = SchemaFromJSONData(schema, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) return serializer.to_json(data)
def _serialize_avro_json(self, event_data: object) -> bytes: if self._serializer is None or not isinstance(self._serializer, AvroJsonSerializer): self._serializer = AvroJsonSerializer(self._schema) return self._serializer.to_json(event_data).encode("utf-8")