예제 #1
0
def test_aliases_not_present():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": "double"
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [
            {"name": "newtest", "type": "double", "aliases": ["testX"]},
        ]
    }

    new_file = MemoryIO()
    records = [{"test": 1.2}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    reader = fastavro.reader(new_file, new_schema)
    with pytest.raises(fastavro.read.SchemaResolutionError):
        list(reader)
예제 #2
0
def test_schema_migration_schema_mismatch():
    schema = {
        "type": "record",
        "name": "test_schema_migration_schema_mismatch",
        "fields": [{
            "name": "test",
            "type": "string",
        }]
    }

    new_schema = {
        "type": "enum",
        "name": "test",
        "symbols": ["FOO", "BAR"],
    }

    new_file = MemoryIO()
    records = [{"test": "test"}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    with pytest.raises(fastavro.read.SchemaResolutionError):
        list(new_reader)
예제 #3
0
def test_schema_migration_remove_field():
    schema = {
        "type": "record",
        "name": "test_schema_migration_remove_field",
        "fields": [{
            "name": "test",
            "type": "string",
        }]
    }

    new_schema = {
        "type": "record",
        "name": "test_schema_migration_remove_field_new",
        "fields": []
    }

    new_file = MemoryIO()
    records = [{'test': 'test'}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == [{}]
예제 #4
0
def test_write_union_tuple_primitive():
    '''
    Test that when we can use tuple style of writing unions
    (see function `write_union` in `_write`) with primitives
     not only with records.
    '''

    schema = {
        'name': 'test_name',
        'namespace': 'test',
        'type': 'record',
        'fields': [
            {
                'name': 'val',
                'type': ['string', 'int']
            }
        ]
    }

    data = [
        {"val": ("int", 1)},
        {"val": ("string", "string")},
    ]

    expected_data = [
        {"val": 1},
        {"val": "string"},
    ]

    new_file = MemoryIO()
    fastavro.writer(new_file, schema, data)
    new_file.seek(0)

    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)

    assert new_records == expected_data
예제 #5
0
def test_schema_migration_array_failure():
    schema = {
        "type":
        "record",
        "name":
        "test_schema_migration_array_failure",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "int"]
            },
        }]
    }

    new_schema = {
        "type":
        "record",
        "name":
        "test_schema_migration_array_failure_new",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "boolean"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": [1, 2, 3]}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)

    with pytest.raises(fastavro.read.SchemaResolutionError):
        list(new_reader)
예제 #6
0
def test_schema_migration_enum_failure():
    schema = {
        "type": "enum",
        "name": "test",
        "symbols": ["FOO", "BAR"],
    }

    new_schema = {
        "type": "enum",
        "name": "test",
        "symbols": ["BAZ", "BAR"],
    }

    new_file = MemoryIO()
    records = ["FOO"]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
예제 #7
0
def test_write_long_union_type():
    schema = {
        'name': 'test_name',
        'namespace': 'test',
        'type': 'record',
        'fields': [
            {
                'name': 'time',
                'type': ['null', 'long']
            },
        ],
    }

    new_file = MemoryIO()
    records = [
        {
            u'time': 809066167221092352
        },
    ]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == [{u'time': 809066167221092352}]
예제 #8
0
def test_schema_migration_union_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": "boolean"
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": ["string", "int"]
        }]
    }

    new_file = MemoryIO()
    records = [{"test": True}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)

    list(new_reader)
예제 #9
0
def test_schema_migration_array_failure():
    schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "int"]
            },
        }]
    }

    new_schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "boolean"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": [1, 2, 3]}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
예제 #10
0
def test_schema_migration_array_with_union_promotion():
    schema = {
        "type":
        "record",
        "name":
        "test_schema_migration_array_with_union_promotion",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["boolean", "long"]
            },
        }]
    }

    new_schema = {
        "type":
        "record",
        "name":
        "test_schema_migration_array_with_union_promotion_new",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "float"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": [1, 2, 3]}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
예제 #11
0
def test_schema_migration_writer_union():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": ["string", "int"]
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": "int"
        }]
    }

    new_file = MemoryIO()
    records = [{"test": 1}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
예제 #12
0
def test_schema_migration_maps_with_union_promotion():
    schema = {
        "type":
        "record",
        "name":
        "test_schema_migration_maps_with_union_promotion",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": ["string", "int"]
            },
        }]
    }

    new_schema = {
        "type":
        "record",
        "name":
        "test_schema_migration_maps_with_union_promotion_new",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": ["string", "long"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": {"foo": 1}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
예제 #13
0
def snappy_read_block(decoder):
    length = read_long(decoder)
    data = decoder.read_fixed(length - 4)
    decoder.read_fixed(4)  # CRC
    return MemoryIO(snappy.decompress(data))
예제 #14
0
def bzip2_read_block(decoder):
    """Read block in "bzip2" codec."""
    data = decoder.read_bytes()
    return MemoryIO(bz2.decompress(data))
예제 #15
0
def deflate_read_block(decoder):
    """Read block in "deflate" codec."""
    data = decoder.read_bytes()
    # -15 is the log of the window size; negative indicates "raw" (no
    # zlib headers) decompression.  See zlib.h.
    return MemoryIO(zlib.decompress(data, -15))
예제 #16
0
def null_read_block(decoder):
    """Read block in "null" codec."""
    return MemoryIO(decoder.read_bytes())
예제 #17
0
def snappy_read_block(fo):
    length = read_long(fo)
    data = fo.read(length - 4)
    fo.read(4)  # CRC
    return MemoryIO(snappy.decompress(data))
예제 #18
0
 def __init__(self, *args):
     self.underlying = MemoryIO(*args)
예제 #19
0
def zstandard_read_block(decoder):
    length = read_long(decoder)
    data = decoder.read_fixed(length)
    return MemoryIO(zstd.ZstdDecompressor().decompress(data))
예제 #20
0
def test_empty():
    io = MemoryIO()
    with pytest.raises(ValueError) as exc:
        fastavro.reader(io)

    assert 'cannot read header - is it an avro file?' in str(exc)
예제 #21
0
def test_json_encoding(record):
    schema = {
        "type":
        "record",
        "namespace":
        "com.example",
        "name":
        "NameUnion",
        "fields": [{
            "name":
            "name",
            "type": [
                "null", {
                    "type":
                    "record",
                    "namespace":
                    "com.example",
                    "name":
                    "FullName",
                    "fields": [{
                        "name": "first",
                        "type": "string"
                    }, {
                        "name": "last",
                        "type": "string"
                    }]
                }, {
                    "type": "record",
                    "namespace": "com.example",
                    "name": "ConcatenatedFullName",
                    "fields": [{
                        "name": "entireName",
                        "type": "string"
                    }]
                }, {
                    "type": "array",
                    "items": "com.example.ConcatenatedFullName"
                }, {
                    "type": "map",
                    "values": "com.example.ConcatenatedFullName"
                }, {
                    "type": "boolean"
                }, {
                    "type": "int"
                }, {
                    "type": "long"
                }, {
                    "type": "float"
                }, {
                    "type": "double"
                }, {
                    "type": "string"
                }, {
                    "type": "bytes"
                }
            ]
        }]
    }
    new_file = MemoryIO()
    fastavro.schemaless_json_writer(new_file, schema, record)
    new_file.seek(0)

    new_record = fastavro.schemaless_json_reader(new_file, schema)
    assert record == new_record
예제 #22
0
def roundtrip(schema, record):
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file, schema)
    return new_record
예제 #23
0
def test_schemaless_writer_and_reader_with_union():
    """Testing basic functionality of reader with union when option to
    return_record_name is true.
    """
    schema = {
        "name":
        "Message",
        "type":
        "record",
        "namespace":
        "test",
        "fields": [{
            "name": "id",
            "type": "long"
        }, {
            "name":
            "payload",
            "type": [
                {
                    "name":
                    "ApplicationCreated",
                    "type":
                    "record",
                    "fields": [{
                        "name": "applicationId",
                        "type": "string"
                    }, {
                        "name": "data",
                        "type": "string"
                    }]
                },
                {
                    "name":
                    "ApplicationSubmitted",
                    "type":
                    "record",
                    "fields": [{
                        "name": "applicationId",
                        "type": "string"
                    }, {
                        "name": "data",
                        "type": "string"
                    }]
                },
            ]
        }]
    }
    record = {
        "id":
        123,
        "payload": ("test.ApplicationSubmitted", {
            "applicationId": "123456789UT",
            "data": "..."
        })
    }
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file,
                                            schema,
                                            return_record_name=True)
    assert record == new_record
예제 #24
0
def xz_read_block(decoder):
    length = read_long(decoder)
    data = decoder.read_fixed(length)
    return MemoryIO(lzma.decompress(data))
예제 #25
0
def test_xz_works_by_default_on_windows_python3():
    schema = {
        "doc":
        "A weather reading.",
        "name":
        "Weather",
        "namespace":
        "test",
        "type":
        "record",
        "fields": [
            {
                "name": "station",
                "type": "string"
            },
            {
                "name": "time",
                "type": "long"
            },
            {
                "name": "temp",
                "type": "int"
            },
        ],
    }

    records = [
        {
            "station": "011990-99999",
            "temp": 0,
            "time": 1433269388
        },
        {
            "station": "011990-99999",
            "temp": 22,
            "time": 1433270389
        },
        {
            "station": "011990-99999",
            "temp": -11,
            "time": 1433273379
        },
        {
            "station": "012650-99999",
            "temp": 111,
            "time": 1433275478
        },
    ]

    file = MemoryIO()

    if sys.version_info >= (3, 0):
        fastavro.writer(file, schema, records, codec="xz")

        file.seek(0)
        out_records = list(fastavro.reader(file))
        assert records == out_records
    else:
        with pytest.raises(
                ValueError,
                match="xz codec is supported but you need to install"):
            fastavro.writer(file, schema, records, codec="xz")
예제 #26
0
def test_repo_caching_issue():
    schema = {
        "type":
        "record",
        "name":
        "B",
        "fields": [{
            "name": "b",
            "type": {
                "type": "record",
                "name": "C",
                "fields": [{
                    "name": "c",
                    "type": "string"
                }]
            }
        }]
    }

    new_file = MemoryIO()
    records = [{"b": {"c": "test"}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == records

    other_schema = {
        "name":
        "A",
        "type":
        "record",
        "fields": [{
            "name": "a",
            "type": {
                "type":
                "record",
                "name":
                "B",
                "fields": [{
                    "name": "b",
                    "type": {
                        "type": "record",
                        "name": "C",
                        "fields": [{
                            "name": "c",
                            "type": "int"
                        }]
                    }
                }]
            }
        }, {
            "name": "aa",
            "type": "B"
        }]
    }

    new_file = MemoryIO()
    records = [{"a": {"b": {"c": 1}}, "aa": {"b": {"c": 2}}}]
    fastavro.writer(new_file, other_schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == records
예제 #27
0
def test_unsupported_codec():
    schema = {
        "doc":
        "A weather reading.",
        "name":
        "Weather",
        "namespace":
        "test",
        "type":
        "record",
        "fields": [
            {
                "name": "station",
                "type": "string"
            },
            {
                "name": "time",
                "type": "long"
            },
            {
                "name": "temp",
                "type": "int"
            },
        ],
    }

    records = [
        {
            "station": "011990-99999",
            "temp": 0,
            "time": 1433269388
        },
        {
            "station": "011990-99999",
            "temp": 22,
            "time": 1433270389
        },
        {
            "station": "011990-99999",
            "temp": -11,
            "time": 1433273379
        },
        {
            "station": "012650-99999",
            "temp": 111,
            "time": 1433275478
        },
    ]

    file = MemoryIO()
    with pytest.raises(ValueError, match="unrecognized codec"):
        fastavro.writer(file, schema, records, codec="unsupported")

    file = MemoryIO()
    fastavro.writer(file, schema, records, codec="deflate")

    # Change the avro binary to act as if it were written with a codec called
    # `unsupported`
    modified_avro = file.getvalue().replace(b"\x0edeflate", b"\x16unsupported")
    modified_file = MemoryIO(modified_avro)

    with pytest.raises(ValueError, match="Unrecognized codec"):
        list(fastavro.reader(modified_file))
예제 #28
0
def null_read_block(fo):
    """Read block in "null" codec."""
    return MemoryIO(read_bytes(fo))
예제 #29
0
def test_union_records():
    #
    schema = {
        'name':
        'test_name',
        'namespace':
        'test',
        'type':
        'record',
        'fields': [{
            'name':
            'val',
            'type': [{
                'name':
                'a',
                'namespace':
                'common',
                'type':
                'record',
                'fields': [
                    {
                        'name': 'x',
                        'type': 'int'
                    },
                    {
                        'name': 'y',
                        'type': 'int'
                    },
                ],
            }, {
                'name':
                'b',
                'namespace':
                'common',
                'type':
                'record',
                'fields': [
                    {
                        'name': 'x',
                        'type': 'int'
                    },
                    {
                        'name': 'y',
                        'type': 'int'
                    },
                    {
                        'name': 'z',
                        'type': ['null', 'int']
                    },
                ],
            }]
        }]
    }

    data = [{
        'val': {
            'x': 3,
            'y': 4,
            'z': 5,
        }
    }]
    new_file = MemoryIO()
    fastavro.writer(new_file, schema, data)
    new_file.seek(0)

    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == data