def test_dict_with_unicode_bytes(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithBytes", "fields": [{ "type": "bytes", "name": "fbytes" }] } # byte arrays should be left alone byte_data = { "fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00" } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) self.assertEquals( AvroJsonDeserializer(avro_schema).from_dict(byte_data), byte_data) # unicode strings should be turned into iso-8859-1 bytes iso8859_data = {'fbytes': b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"} unicode_data = {u'fbytes': u'(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00'} self.assertEquals( AvroJsonDeserializer(avro_schema).from_dict(unicode_data), iso8859_data)
def test_user_record(self): """ This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html """ schema_dict = { "namespace": "example.avro", "type": "record", "name": "User", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"]}, {"name": "favorite_color", "type": ["string", "null"]} ] } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) deserializer = AvroJsonDeserializer(avro_schema) alyssa = {"name": "Alyssa", "favorite_number": 256} alyssa_full = {"name": "Alyssa", "favorite_number": 256, "favorite_color": None} alyssa_json = """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}""" self.assertEquals(serializer.to_json(alyssa), alyssa_json) self.assertEquals(deserializer.from_json(alyssa_json), alyssa_full) ben = {"name": "Ben", "favorite_number": 7, "favorite_color": "red"} ben_json = """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}""" self.assertEquals(serializer.to_json(ben), ben_json) self.assertEquals(deserializer.from_json(ben_json), ben) lion = {"name": "Lion"} lion_full = {"name": "Lion", "favorite_number": None, "favorite_color": None} lion_json = """{"name":"Lion","favorite_number":null,"favorite_color":null}""" self.assertEquals(serializer.to_json(lion), lion_json) self.assertEquals(deserializer.from_json(lion_json), lion_full)
def test_map(self): schema_dict = { "type": "record", "name": "rec", "fields": [ self.FIELD_MAP_INT ] } data = { "intmap": { "one": 1, "two": 2 } } unicode_dict = { 'intmap': { 'one': 1, u'two': 2 } } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) # Dictionaries are unsorted self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""", """{"intmap":{"two":2,"one":1}}""")) deserializer = AvroJsonDeserializer(avro_schema) json_data = deserializer.from_json(avro_json) self.assertEquals(json_data, data) mixed_unicode = deserializer.from_dict(unicode_dict) self.assertEquals(mixed_unicode, data)
def test_nested_union_records(self): schema_dict = { "namespace": "nested", "name": "OuterType", "type": "record", "fields": [{ "name": "outer", "type": [ "null", { "name": "MiddleType", "type": "record", "fields": [{ "name": "middle", "type": [ "null", { "name": "InnerType", "type": "record", "fields": [{ "name": "inner", "type": "int" }] } ] }] } ] }] } data1 = {"outer": {"middle": {"inner": 1}}} data2 = {"outer": {"middle": None}} avro1 = """{"outer":{"nested.MiddleType":{"middle":{"nested.InnerType":{"inner":1}}}}}""" avro2 = """{"outer":{"nested.MiddleType":{"middle":null}}}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) self.assertEquals(serializer.to_json(data1), avro1) self.assertEquals(serializer.to_json(data2), avro2) deserializer = AvroJsonDeserializer(avro_schema) self.assertEquals(deserializer.from_json(avro1), data1) self.assertEquals(deserializer.from_json(avro2), data2)
def test_user_record(self): """ This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html """ schema_dict = { "namespace": "example.avro", "type": "record", "name": "User", "fields": [{ "name": "name", "type": "string" }, { "name": "favorite_number", "type": ["int", "null"] }, { "name": "favorite_color", "type": ["string", "null"] }] } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) deserializer = AvroJsonDeserializer(avro_schema) alyssa = {"name": "Alyssa", "favorite_number": 256} alyssa_full = { "name": "Alyssa", "favorite_number": 256, "favorite_color": None } alyssa_json = """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}""" self.assertEquals(serializer.to_json(alyssa), alyssa_json) self.assertEquals(deserializer.from_json(alyssa_json), alyssa_full) ben = {"name": "Ben", "favorite_number": 7, "favorite_color": "red"} ben_json = """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}""" self.assertEquals(serializer.to_json(ben), ben_json) self.assertEquals(deserializer.from_json(ben_json), ben) lion = {"name": "Lion"} lion_full = { "name": "Lion", "favorite_number": None, "favorite_color": None } lion_json = """{"name":"Lion","favorite_number":null,"favorite_color":null}""" self.assertEquals(serializer.to_json(lion), lion_json) self.assertEquals(deserializer.from_json(lion_json), lion_full)
def test_union_serialization_not_null(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = {"funion_null": 1} avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_null":{"int":1}}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_records_union(self): avro_schema = make_avsc_object(self.UNION_RECORDS_SCHEMA, avro.schema.Names()) data = {"funion_rec": {"field": 1}} avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_rec":{"rec1":{"field":1}}}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data) data_another_record = {"funion_rec": {"field": "hi"}} another_record_json = AvroJsonSerializer(avro_schema).to_json( data_another_record) self.assertEquals( another_record_json, """{"funion_rec":{"example.avro.rec2":{"field":"hi"}}}""") another_json_data = AvroJsonDeserializer(avro_schema).from_json( another_record_json) self.assertEquals(another_json_data, data_another_record)
def test_individually_allowed_fields_separately(self): for field in self.INDIVIDUALLY_SERIALIZABLE: # unwrap enum, fixed, array, and map but save the name for value lookup name = field['name'] if isinstance(field['type'], dict): field = field['type'] avro_schema = make_avsc_object(field, avro.schema.Names()) data = self.VALID_DATA_ALL_FIELDS[name] avro_json = AvroJsonSerializer(avro_schema).to_json(data) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_all_supported_types(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) data = self.VALID_DATA_ALL_FIELDS avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals( avro_json, """{"fruit":"ORANGE","fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0,"intarr":[1,2,3],"intmap":{"one":1}}""" ) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_array(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_ARRAY_INT] } data = {"intarr": [1, 2, 3]} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"intarr":[1,2,3]}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_map(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_MAP_INT] } data = {"intmap": {"one": 1, "two": 2}} unicode_dict = {'intmap': {'one': 1, u'two': 2}} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) # Dictionaries are unsorted self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""", """{"intmap":{"two":2,"one":1}}""")) deserializer = AvroJsonDeserializer(avro_schema) json_data = deserializer.from_json(avro_json) self.assertEquals(json_data, data) mixed_unicode = deserializer.from_dict(unicode_dict) self.assertEquals(mixed_unicode, data)
def test_unknown_fields_are_ignored(self): schema_dict = { "type": "record", "name": "BasicName", "fields": [{ "type": "string", "name": "name" }] } avro_json = """{"name":"todd","age":1}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, {"name": "todd"})
def test_fixed_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [self.FIELD_FIXED] } data = {"ffixed": b"fixed text here!"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals(avro_json, """{"ffixed":"fixed text here!"}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_nested_union_records(self): schema_dict = { "namespace": "nested", "name": "OuterType", "type": "record", "fields": [{ "name": "outer", "type": ["null", { "name": "MiddleType", "type": "record", "fields": [{ "name": "middle", "type": ["null", { "name": "InnerType", "type": "record", "fields": [{ "name": "inner", "type": "int" }] }] }] }] }] } data1 = {"outer": {"middle": {"inner": 1}}} data2 = {"outer": {"middle": None}} avro1 = """{"outer":{"nested.MiddleType":{"middle":{"nested.InnerType":{"inner":1}}}}}""" avro2 = """{"outer":{"nested.MiddleType":{"middle":null}}}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) self.assertEquals(serializer.to_json(data1), avro1) self.assertEquals(serializer.to_json(data2), avro2) deserializer = AvroJsonDeserializer(avro_schema) self.assertEquals(deserializer.from_json(avro1), data1) self.assertEquals(deserializer.from_json(avro2), data2)
def test_fixed_non_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [self.FIELD_FIXED] } data = {"ffixed": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals( avro_json, """{"ffixed":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000"}""" ) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_bytes_field_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [{ "type": "bytes", "name": "fbytes" }] } data = {"fbytes": b"this is some long bytes field"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals(avro_json, """{"fbytes":"this is some long bytes field"}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_missing_nullable_field(self): schema_dict = { "type": "record", "name": "WithDefault", "fields": [{ "type": "string", "name": "name" }, { "type": ["null", "int"], "name": "version", "default": None }] } avro_json = """{"name":"mcnameface"}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) deserializer = AvroJsonDeserializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, deserializer.from_json, avro_json)
def json_to_faust(self, record): return AvroJsonDeserializer(self.avro_schema).from_json(record)