def test_aliases_not_present(): schema = { "type": "record", "fields": [{ "name": "test", "type": "double" }] } new_schema = { "type": "record", "fields": [ {"name": "newtest", "type": "double", "aliases": ["testX"]}, ] } new_file = MemoryIO() records = [{"test": 1.2}] fastavro.writer(new_file, schema, records) new_file.seek(0) reader = fastavro.reader(new_file, new_schema) with pytest.raises(fastavro.read.SchemaResolutionError): list(reader)
def test_schema_migration_schema_mismatch(): schema = { "type": "record", "name": "test_schema_migration_schema_mismatch", "fields": [{ "name": "test", "type": "string", }] } new_schema = { "type": "enum", "name": "test", "symbols": ["FOO", "BAR"], } new_file = MemoryIO() records = [{"test": "test"}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) with pytest.raises(fastavro.read.SchemaResolutionError): list(new_reader)
def test_schema_migration_remove_field(): schema = { "type": "record", "name": "test_schema_migration_remove_field", "fields": [{ "name": "test", "type": "string", }] } new_schema = { "type": "record", "name": "test_schema_migration_remove_field_new", "fields": [] } new_file = MemoryIO() records = [{'test': 'test'}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) new_records = list(new_reader) assert new_records == [{}]
def test_write_union_tuple_primitive(): ''' Test that when we can use tuple style of writing unions (see function `write_union` in `_write`) with primitives not only with records. ''' schema = { 'name': 'test_name', 'namespace': 'test', 'type': 'record', 'fields': [ { 'name': 'val', 'type': ['string', 'int'] } ] } data = [ {"val": ("int", 1)}, {"val": ("string", "string")}, ] expected_data = [ {"val": 1}, {"val": "string"}, ] new_file = MemoryIO() fastavro.writer(new_file, schema, data) new_file.seek(0) new_reader = fastavro.reader(new_file) new_records = list(new_reader) assert new_records == expected_data
def test_schema_migration_array_failure(): schema = { "type": "record", "name": "test_schema_migration_array_failure", "fields": [{ "name": "test", "type": { "type": "array", "items": ["string", "int"] }, }] } new_schema = { "type": "record", "name": "test_schema_migration_array_failure_new", "fields": [{ "name": "test", "type": { "type": "array", "items": ["string", "boolean"] }, }] } new_file = MemoryIO() records = [{"test": [1, 2, 3]}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) with pytest.raises(fastavro.read.SchemaResolutionError): list(new_reader)
def test_schema_migration_enum_failure(): schema = { "type": "enum", "name": "test", "symbols": ["FOO", "BAR"], } new_schema = { "type": "enum", "name": "test", "symbols": ["BAZ", "BAR"], } new_file = MemoryIO() records = ["FOO"] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) try: list(new_reader) except fastavro._reader.SchemaResolutionError: pass else: assert False
def test_write_long_union_type(): schema = { 'name': 'test_name', 'namespace': 'test', 'type': 'record', 'fields': [ { 'name': 'time', 'type': ['null', 'long'] }, ], } new_file = MemoryIO() records = [ { u'time': 809066167221092352 }, ] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file) new_records = list(new_reader) assert new_records == [{u'time': 809066167221092352}]
def test_schema_migration_union_failure(): schema = { "type": "record", "fields": [{ "name": "test", "type": "boolean" }] } new_schema = { "type": "record", "fields": [{ "name": "test", "type": ["string", "int"] }] } new_file = MemoryIO() records = [{"test": True}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) list(new_reader)
def test_schema_migration_array_failure(): schema = { "type": "record", "fields": [{ "name": "test", "type": { "type": "array", "items": ["string", "int"] }, }] } new_schema = { "type": "record", "fields": [{ "name": "test", "type": { "type": "array", "items": ["string", "boolean"] }, }] } new_file = MemoryIO() records = [{"test": [1, 2, 3]}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) try: list(new_reader) except fastavro._reader.SchemaResolutionError: pass else: assert False
def test_schema_migration_array_with_union_promotion(): schema = { "type": "record", "name": "test_schema_migration_array_with_union_promotion", "fields": [{ "name": "test", "type": { "type": "array", "items": ["boolean", "long"] }, }] } new_schema = { "type": "record", "name": "test_schema_migration_array_with_union_promotion_new", "fields": [{ "name": "test", "type": { "type": "array", "items": ["string", "float"] }, }] } new_file = MemoryIO() records = [{"test": [1, 2, 3]}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) new_records = list(new_reader) assert new_records == records
def test_schema_migration_writer_union(): schema = { "type": "record", "fields": [{ "name": "test", "type": ["string", "int"] }] } new_schema = { "type": "record", "fields": [{ "name": "test", "type": "int" }] } new_file = MemoryIO() records = [{"test": 1}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) new_records = list(new_reader) assert new_records == records
def test_schema_migration_maps_with_union_promotion(): schema = { "type": "record", "name": "test_schema_migration_maps_with_union_promotion", "fields": [{ "name": "test", "type": { "type": "map", "values": ["string", "int"] }, }] } new_schema = { "type": "record", "name": "test_schema_migration_maps_with_union_promotion_new", "fields": [{ "name": "test", "type": { "type": "map", "values": ["string", "long"] }, }] } new_file = MemoryIO() records = [{"test": {"foo": 1}}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file, new_schema) new_records = list(new_reader) assert new_records == records
def snappy_read_block(decoder): length = read_long(decoder) data = decoder.read_fixed(length - 4) decoder.read_fixed(4) # CRC return MemoryIO(snappy.decompress(data))
def bzip2_read_block(decoder): """Read block in "bzip2" codec.""" data = decoder.read_bytes() return MemoryIO(bz2.decompress(data))
def deflate_read_block(decoder): """Read block in "deflate" codec.""" data = decoder.read_bytes() # -15 is the log of the window size; negative indicates "raw" (no # zlib headers) decompression. See zlib.h. return MemoryIO(zlib.decompress(data, -15))
def null_read_block(decoder): """Read block in "null" codec.""" return MemoryIO(decoder.read_bytes())
def snappy_read_block(fo): length = read_long(fo) data = fo.read(length - 4) fo.read(4) # CRC return MemoryIO(snappy.decompress(data))
def __init__(self, *args): self.underlying = MemoryIO(*args)
def zstandard_read_block(decoder): length = read_long(decoder) data = decoder.read_fixed(length) return MemoryIO(zstd.ZstdDecompressor().decompress(data))
def test_empty(): io = MemoryIO() with pytest.raises(ValueError) as exc: fastavro.reader(io) assert 'cannot read header - is it an avro file?' in str(exc)
def test_json_encoding(record): schema = { "type": "record", "namespace": "com.example", "name": "NameUnion", "fields": [{ "name": "name", "type": [ "null", { "type": "record", "namespace": "com.example", "name": "FullName", "fields": [{ "name": "first", "type": "string" }, { "name": "last", "type": "string" }] }, { "type": "record", "namespace": "com.example", "name": "ConcatenatedFullName", "fields": [{ "name": "entireName", "type": "string" }] }, { "type": "array", "items": "com.example.ConcatenatedFullName" }, { "type": "map", "values": "com.example.ConcatenatedFullName" }, { "type": "boolean" }, { "type": "int" }, { "type": "long" }, { "type": "float" }, { "type": "double" }, { "type": "string" }, { "type": "bytes" } ] }] } new_file = MemoryIO() fastavro.schemaless_json_writer(new_file, schema, record) new_file.seek(0) new_record = fastavro.schemaless_json_reader(new_file, schema) assert record == new_record
def roundtrip(schema, record): new_file = MemoryIO() fastavro.schemaless_writer(new_file, schema, record) new_file.seek(0) new_record = fastavro.schemaless_reader(new_file, schema) return new_record
def test_schemaless_writer_and_reader_with_union(): """Testing basic functionality of reader with union when option to return_record_name is true. """ schema = { "name": "Message", "type": "record", "namespace": "test", "fields": [{ "name": "id", "type": "long" }, { "name": "payload", "type": [ { "name": "ApplicationCreated", "type": "record", "fields": [{ "name": "applicationId", "type": "string" }, { "name": "data", "type": "string" }] }, { "name": "ApplicationSubmitted", "type": "record", "fields": [{ "name": "applicationId", "type": "string" }, { "name": "data", "type": "string" }] }, ] }] } record = { "id": 123, "payload": ("test.ApplicationSubmitted", { "applicationId": "123456789UT", "data": "..." }) } new_file = MemoryIO() fastavro.schemaless_writer(new_file, schema, record) new_file.seek(0) new_record = fastavro.schemaless_reader(new_file, schema, return_record_name=True) assert record == new_record
def xz_read_block(decoder): length = read_long(decoder) data = decoder.read_fixed(length) return MemoryIO(lzma.decompress(data))
def test_xz_works_by_default_on_windows_python3(): schema = { "doc": "A weather reading.", "name": "Weather", "namespace": "test", "type": "record", "fields": [ { "name": "station", "type": "string" }, { "name": "time", "type": "long" }, { "name": "temp", "type": "int" }, ], } records = [ { "station": "011990-99999", "temp": 0, "time": 1433269388 }, { "station": "011990-99999", "temp": 22, "time": 1433270389 }, { "station": "011990-99999", "temp": -11, "time": 1433273379 }, { "station": "012650-99999", "temp": 111, "time": 1433275478 }, ] file = MemoryIO() if sys.version_info >= (3, 0): fastavro.writer(file, schema, records, codec="xz") file.seek(0) out_records = list(fastavro.reader(file)) assert records == out_records else: with pytest.raises( ValueError, match="xz codec is supported but you need to install"): fastavro.writer(file, schema, records, codec="xz")
def test_repo_caching_issue(): schema = { "type": "record", "name": "B", "fields": [{ "name": "b", "type": { "type": "record", "name": "C", "fields": [{ "name": "c", "type": "string" }] } }] } new_file = MemoryIO() records = [{"b": {"c": "test"}}] fastavro.writer(new_file, schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file) new_records = list(new_reader) assert new_records == records other_schema = { "name": "A", "type": "record", "fields": [{ "name": "a", "type": { "type": "record", "name": "B", "fields": [{ "name": "b", "type": { "type": "record", "name": "C", "fields": [{ "name": "c", "type": "int" }] } }] } }, { "name": "aa", "type": "B" }] } new_file = MemoryIO() records = [{"a": {"b": {"c": 1}}, "aa": {"b": {"c": 2}}}] fastavro.writer(new_file, other_schema, records) new_file.seek(0) new_reader = fastavro.reader(new_file) new_records = list(new_reader) assert new_records == records
def test_unsupported_codec(): schema = { "doc": "A weather reading.", "name": "Weather", "namespace": "test", "type": "record", "fields": [ { "name": "station", "type": "string" }, { "name": "time", "type": "long" }, { "name": "temp", "type": "int" }, ], } records = [ { "station": "011990-99999", "temp": 0, "time": 1433269388 }, { "station": "011990-99999", "temp": 22, "time": 1433270389 }, { "station": "011990-99999", "temp": -11, "time": 1433273379 }, { "station": "012650-99999", "temp": 111, "time": 1433275478 }, ] file = MemoryIO() with pytest.raises(ValueError, match="unrecognized codec"): fastavro.writer(file, schema, records, codec="unsupported") file = MemoryIO() fastavro.writer(file, schema, records, codec="deflate") # Change the avro binary to act as if it were written with a codec called # `unsupported` modified_avro = file.getvalue().replace(b"\x0edeflate", b"\x16unsupported") modified_file = MemoryIO(modified_avro) with pytest.raises(ValueError, match="Unrecognized codec"): list(fastavro.reader(modified_file))
def null_read_block(fo): """Read block in "null" codec.""" return MemoryIO(read_bytes(fo))
def test_union_records(): # schema = { 'name': 'test_name', 'namespace': 'test', 'type': 'record', 'fields': [{ 'name': 'val', 'type': [{ 'name': 'a', 'namespace': 'common', 'type': 'record', 'fields': [ { 'name': 'x', 'type': 'int' }, { 'name': 'y', 'type': 'int' }, ], }, { 'name': 'b', 'namespace': 'common', 'type': 'record', 'fields': [ { 'name': 'x', 'type': 'int' }, { 'name': 'y', 'type': 'int' }, { 'name': 'z', 'type': ['null', 'int'] }, ], }] }] } data = [{ 'val': { 'x': 3, 'y': 4, 'z': 5, } }] new_file = MemoryIO() fastavro.writer(new_file, schema, data) new_file.seek(0) new_reader = fastavro.reader(new_file) new_records = list(new_reader) assert new_records == data