Ejemplo n.º 1
0
def test_schema_migration_maps_with_union_promotion():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": ["string", "int"]
            },
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": ["string", "long"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": {"foo": 1}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 2
0
def check(filename):
    with open(filename, 'rb') as fo:
        reader = fastavro.reader(fo)
        assert hasattr(reader, 'schema'), 'no schema on file'

        if basename(filename) in NO_DATA:
            return

        records = list(reader)
        assert len(records) > 0, 'no records found'

    new_file = MemoryIO()
    fastavro.writer(new_file, reader.schema, records, reader.codec)
    new_file_bytes = new_file.getvalue()

    new_file = NoSeekMemoryIO(new_file_bytes)
    new_reader = fastavro.reader(new_file)
    assert hasattr(new_reader, 'schema'), "schema wasn't written"
    assert new_reader.schema == reader.schema
    assert new_reader.codec == reader.codec
    new_records = list(new_reader)

    assert new_records == records

    # Test schema migration with the same schema
    new_file = NoSeekMemoryIO(new_file_bytes)
    schema_migration_reader = fastavro.reader(new_file, reader.schema)
    assert schema_migration_reader.reader_schema == reader.schema
    new_records = list(schema_migration_reader)

    assert new_records == records
Ejemplo n.º 3
0
def test_schema_migration_array_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "int"]
            },
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "boolean"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": [1, 2, 3]}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
Ejemplo n.º 4
0
def test_schema_migration_array_with_union_promotion():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["boolean", "long"]
            },
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "float"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": [1, 2, 3]}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 5
0
def test_schema_migration_array_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "int"]
            },
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "boolean"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": [1, 2, 3]}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    list(new_reader)
Ejemplo n.º 6
0
def test_schema_migration_maps_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": "string"
            },
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": "long"
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": {"foo": "a"}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    list(new_reader)
Ejemplo n.º 7
0
def test_write_long_union_type():
    schema = {
        'name': 'test_name',
        'namespace': 'test_ns',
        'type': 'record',
        'fields': [
            {
                'name': 'time',
                'type': ['null', 'long']
            },
        ],
    }

    new_file = MemoryIO()
    records = [
        {
            u'time': 809066167221092352
        },
    ]
    try:
        fastavro.writer(new_file, schema, records)
    except ValueError:
        assert False
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == [{u'time': 809066167221092352}]
Ejemplo n.º 8
0
def test_schema_migration_maps_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": "string"
            },
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": "long"
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": {"foo": "a"}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
Ejemplo n.º 9
0
def test_schema_migration_schema_mismatch():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": "string",
        }]
    }

    new_schema = {
        "type": "enum",
        "name": "test",
        "symbols": ["FOO", "BAR"],
    }

    new_file = MemoryIO()
    records = [{"test": "test"}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
Ejemplo n.º 10
0
def test_schema_migration_union_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": "boolean"
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": ["string", "int"]
        }]
    }

    new_file = MemoryIO()
    records = [{"test": True}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)

    with pytest.raises(fastavro.read.SchemaResolutionError):
        list(new_reader)
Ejemplo n.º 11
0
def test_aliases_not_present():
    schema = {
        "type": "record",
        "name": "test_aliases_not_present",
        "fields": [{
            "name": "test",
            "type": "double"
        }]
    }

    new_schema = {
        "type": "record",
        "name": "test_aliases_not_present_new",
        "fields": [
            {
                "name": "newtest",
                "type": "double",
                "aliases": ["testX"]
            },
        ]
    }

    new_file = MemoryIO()
    records = [{"test": 1.2}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    reader = fastavro.reader(new_file, new_schema)
    with pytest.raises(fastavro.read.SchemaResolutionError):
        list(reader)
Ejemplo n.º 12
0
def test_fastavro_errors_read_enum():
    fo = MemoryIO()

    writer_schema = {
        "type": "enum",
        "name": "Suit",
        "symbols": [
            "SPADES",
            "HEARTS",
            "DIAMONDS",
            "CLUBS",
        ]
    }

    reader_schema = {
        "type": "enum",
        "name": "Suit",
        "symbols": [
            "SPADES",
            "HEARTS",
            "DIAMONDS",
        ]
    }

    given = "CLUBS"

    write_data(fo, given, writer_schema)
    fo.seek(0)
    try:
        read_data(fo, reader_schema)
        assert False, 'bad schema did not raise!'
    except AvroValueError as e:
        assert '<enum>' in str(e)
Ejemplo n.º 13
0
def test_schema_migration_maps_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": "string"
            },
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": "long"
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": {"foo": "a"}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
Ejemplo n.º 14
0
def test_schema_migration_schema_mismatch():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": "string",
        }]
    }

    new_schema = {
        "type": "enum",
        "name": "test",
        "symbols": ["FOO", "BAR"],
    }

    new_file = MemoryIO()
    records = [{"test": "test"}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
Ejemplo n.º 15
0
def test_file(filename):
    with open(filename, 'rb') as fo:
        reader = fastavro.reader(fo)
        assert hasattr(reader, 'schema'), 'no schema on file'

        if basename(filename) in NO_DATA:
            return

        records = list(reader)
        assert len(records) > 0, 'no records found'

    new_file = MemoryIO()
    fastavro.writer(new_file, reader.schema, records, reader.codec)
    new_file_bytes = new_file.getvalue()

    new_file = NoSeekMemoryIO(new_file_bytes)
    new_reader = fastavro.reader(new_file)
    assert hasattr(new_reader, 'schema'), "schema wasn't written"
    assert new_reader.schema == reader.schema
    assert new_reader.codec == reader.codec
    new_records = list(new_reader)

    assert new_records == records

    # Test schema migration with the same schema
    new_file = NoSeekMemoryIO(new_file_bytes)
    schema_migration_reader = fastavro.reader(new_file, reader.schema)
    assert schema_migration_reader.reader_schema == reader.schema
    new_records = list(schema_migration_reader)

    assert new_records == records
Ejemplo n.º 16
0
def test_schema_migration_union_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": "boolean"
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": ["string", "int"]
        }]
    }

    new_file = MemoryIO()
    records = [{"test": True}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
Ejemplo n.º 17
0
def test_nullable_values():
    schema = {
        "type":
        "record",
        "fields": [{
            "name": "nullable_field",
            "type": ["string", "null"]
        }, {
            "name": "field",
            "type": "string"
        }]
    }
    new_file = MemoryIO()
    records = [{"field": "val"}, {"field": "val", "nullable_field": "no_null"}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    print(new_records)
    assert new_records == [{
        'nullable_field': None,
        'field': 'val'
    }, {
        'nullable_field': 'no_null',
        'field': 'val'
    }]
Ejemplo n.º 18
0
def test_schema_migration_reader_union():
    schema = {
        "type": "record",
        "name": "test_schema_migration_reader_union",
        "fields": [{
            "name": "test",
            "type": "int"
        }]
    }

    new_schema = {
        "type": "record",
        "name": "test_schema_migration_reader_union_new",
        "fields": [{
            "name": "test",
            "type": ["string", "int"]
        }]
    }

    new_file = MemoryIO()
    records = [{"test": 1}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 19
0
def test_ordered_dict_record():
    """
    Write an Avro record using an OrderedDict and read it back. This tests for
    a bug where dict was supported but not dict-like types.
    """
    schema = {
        "type": "record",
        "name": "Test",
        "namespace": "test",
        "fields": [{
            "name": "field",
            "type": {
                "type": "string"
            }
        }]
    }

    new_file = MemoryIO()
    record = OrderedDict()
    record["field"] = "foobar"
    records = [record]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 20
0
def test_schema_migration_maps_failure():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": "string"
            },
        }]
    }

    new_schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": "long"
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": {"foo": "a"}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    with pytest.raises(fastavro.read.SchemaResolutionError):
        list(new_reader)
Ejemplo n.º 21
0
def test_aliases_in_reader_schema():
    schema = {
        "type": "record",
        "name": "test_aliases_in_reader_schema",
        "fields": [{
            "name": "test",
            "type": "int"
        }]
    }

    new_schema = {
        "type": "record",
        "name": "test_aliases_in_reader_schema_new",
        "fields": [{
            "name": "newtest",
            "type": "int",
            "aliases": ["test"]
        }]
    }

    new_file = MemoryIO()
    records = [{"test": 1}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records[0]["newtest"] == records[0]["test"]
Ejemplo n.º 22
0
def roundtrip(record, writer_schema, reader_schema):
    new_file = MemoryIO()
    fastavro.writer(new_file, writer_schema, [record])
    new_file.seek(0)

    new_records = list(fastavro.reader(new_file, reader_schema))
    return new_records[0]
Ejemplo n.º 23
0
def test_schema_migration_maps_with_union_promotion():
    schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": ["string", "int"]
            },
        }]
    }

    new_schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": ["string", "long"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": {"foo": 1}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 24
0
def test_schema_migration_array_with_union_promotion():
    schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["boolean", "long"]
            },
        }]
    }

    new_schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "float"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": [1, 2, 3]}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 25
0
def test_schema_is_custom_dict_type():
    """https://github.com/tebeka/fastavro/issues/168"""
    class CustomDict(dict):
        pass

    schema = {
        'type':
        'record',
        'fields': [{
            'name':
            'description',
            "type": ["null", {
                "type": "array",
                "items": "string"
            }, "string"],
        }],
        "name":
        "description",
        "doc":
        "A description of the thing."
    }
    other_type_schema = CustomDict(schema)

    record = {
        'description': 'value',
    }

    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file, other_type_schema)
    assert record == new_record
Ejemplo n.º 26
0
def test_schema_migration_array_failure():
    schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "int"]
            },
        }]
    }

    new_schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "array",
                "items": ["string", "boolean"]
            },
        }]
    }

    new_file = MemoryIO()
    records = [{"test": [1, 2, 3]}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    list(new_reader)
Ejemplo n.º 27
0
def test_ordered_dict_map():
    """
    Write an Avro record containing a map field stored in an OrderedDict, then
    read it back. This tests for a bug where dict was supported but not
    dict-like types.
    """
    schema = {
        "type":
        "record",
        "fields": [{
            "name": "test",
            "type": {
                "type": "map",
                "values": ["string", "int"]
            },
        }]
    }

    new_file = MemoryIO()
    map_ = OrderedDict()
    map_["foo"] = 1
    records = [{"test": map_}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 28
0
def test_compression_level():
    """https://github.com/fastavro/fastavro/issues/377"""
    schema = {
        "doc":
        "A weather reading.",
        "name":
        "Weather",
        "namespace":
        "test",
        "type":
        "record",
        "fields": [
            {
                "name": "station",
                "type": "string"
            },
            {
                "name": "time",
                "type": "long"
            },
            {
                "name": "temp",
                "type": "int"
            },
        ],
    }

    records = [
        {
            "station": "011990-99999",
            "temp": 0,
            "time": 1433269388
        },
        {
            "station": "011990-99999",
            "temp": 22,
            "time": 1433270389
        },
        {
            "station": "011990-99999",
            "temp": -11,
            "time": 1433273379
        },
        {
            "station": "012650-99999",
            "temp": 111,
            "time": 1433275478
        },
    ]

    file = MemoryIO()
    fastavro.writer(file,
                    schema,
                    records,
                    codec="deflate",
                    codec_compression_level=9)

    file.seek(0)
    out_records = list(fastavro.reader(file))
    assert records == out_records
Ejemplo n.º 29
0
def test_write_union_shortcut():
    schema = {
        "type":
        "record",
        "name":
        "A",
        "fields": [{
            "name":
            "a",
            "type": [{
                "type": "record",
                "name": "B",
                "fields": [{
                    "name": "b",
                    "type": "string"
                }]
            }, {
                "type": "record",
                "name": "C",
                "fields": [{
                    "name": "c",
                    "type": "string"
                }]
            }]
        }]
    }

    new_file = MemoryIO()
    records = [{"a": ("B", {"b": "test"})}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == [{"a": {"b": "test"}}]
Ejemplo n.º 30
0
def test_write_read():
    new_file = MemoryIO()
    writer(new_file, parsed_schema, records)
    new_file.seek(0)
    new_records = list(reader(new_file, parsed_schema))

    assert new_records == records
Ejemplo n.º 31
0
def test_schemaless_writer_and_reader_with_union():
    """Testing basic functionality of reader with union when option to return_record_name is true.
    """
    schema = {
        "name":
        "Message",
        "type":
        "record",
        "namespace":
        "test",
        "fields": [{
            "name": "id",
            "type": "long"
        }, {
            "name":
            "payload",
            "type": [
                {
                    "name":
                    "ApplicationCreated",
                    "type":
                    "record",
                    "fields": [{
                        "name": "applicationId",
                        "type": "string"
                    }, {
                        "name": "data",
                        "type": "string"
                    }]
                },
                {
                    "name":
                    "ApplicationSubmitted",
                    "type":
                    "record",
                    "fields": [{
                        "name": "applicationId",
                        "type": "string"
                    }, {
                        "name": "data",
                        "type": "string"
                    }]
                },
            ]
        }]
    }
    record = input_record = {
        "id":
        123,
        "payload": ("test.ApplicationSubmitted", {
            "applicationId": "123456789UT",
            "data": "..."
        })
    }
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file, schema, None, True)
    assert record == new_record
Ejemplo n.º 32
0
def roundtrip(schema, records, new_schema):
    new_file = MemoryIO()
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)

    reader = fastavro.reader(new_file, new_schema)
    new_records = list(reader)
    return new_records
Ejemplo n.º 33
0
def roundtrip(schema, records):
    new_file = MemoryIO()
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)

    reader = fastavro.reader(new_file)
    new_records = list(reader)
    return new_records
Ejemplo n.º 34
0
def test_default_values():
    schema = {"type": "record", "fields": [{"name": "default_field", "type": "string", "default": "default_value"}]}
    new_file = MemoryIO()
    records = [{}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == [{"default_field": "default_value"}]
Ejemplo n.º 35
0
def test_metadata():
    schema = {"type": "record", "fields": []}

    new_file = MemoryIO()
    records = [{}]
    metadata = {"key": "value"}
    fastavro.writer(new_file, schema, records, metadata=metadata)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    assert new_reader.metadata["key"] == metadata["key"]
Ejemplo n.º 36
0
def test_metadata():
    schema = {"type": "record", "fields": []}

    new_file = MemoryIO()
    records = [{}]
    metadata = {'key': 'value'}
    fastavro.writer(new_file, schema, records, metadata=metadata)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    assert new_reader.metadata['key'] == metadata['key']
Ejemplo n.º 37
0
def test_builtin_codecs(codec):
    schema = {
        "doc":
        "A weather reading.",
        "name":
        "Weather",
        "namespace":
        "test",
        "type":
        "record",
        "fields": [
            {
                "name": "station",
                "type": "string"
            },
            {
                "name": "time",
                "type": "long"
            },
            {
                "name": "temp",
                "type": "int"
            },
        ],
    }

    records = [
        {
            "station": "011990-99999",
            "temp": 0,
            "time": 1433269388
        },
        {
            "station": "011990-99999",
            "temp": 22,
            "time": 1433270389
        },
        {
            "station": "011990-99999",
            "temp": -11,
            "time": 1433273379
        },
        {
            "station": "012650-99999",
            "temp": 111,
            "time": 1433275478
        },
    ]

    file = MemoryIO()
    fastavro.writer(file, schema, records, codec=codec)

    file.seek(0)
    out_records = list(fastavro.reader(file))
    assert records == out_records
Ejemplo n.º 38
0
def test_schema_migration_reader_union():
    schema = {"type": "record", "fields": [{"name": "test", "type": "int"}]}

    new_schema = {"type": "record", "fields": [{"name": "test", "type": ["string", "int"]}]}

    new_file = MemoryIO()
    records = [{"test": 1}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 39
0
def test_schema_migration_add_default_field():
    schema = {"type": "record", "fields": []}

    new_schema = {"type": "record", "fields": [{"name": "test", "type": "string", "default": "default"}]}

    new_file = MemoryIO()
    records = [{}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    new_records = list(new_reader)
    assert new_records == [{"test": "default"}]
Ejemplo n.º 40
0
def roundtrip(schema, records, pass_schema_to_reader=False):
    new_file = MemoryIO()
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)

    if pass_schema_to_reader:
        reader = fastavro.reader(new_file, schema)
    else:
        reader = fastavro.reader(new_file)

    new_records = list(reader)
    return new_records
Ejemplo n.º 41
0
def test_schemaless_writer_and_reader():
    schema = {
        "type": "record",
        "name": "Test",
        "namespace": "test",
        "fields": [{"name": "field", "type": {"type": "string"}}],
    }
    record = {"field": "test"}
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file, schema)
    assert record == new_record
Ejemplo n.º 42
0
class NoSeekMemoryIO(object):
    """Shim around MemoryIO which blocks access to everything but read.
    Used to ensure seek API isn't being depended on."""
    def __init__(self, *args):
        self.underlying = MemoryIO(*args)

    def read(self, n):
        return self.underlying.read(n)

    def tell(self):
        return self.underlying.tell()

    def seek(self, *args):
        raise AssertionError("fastavro reader should not depend on seek")
Ejemplo n.º 43
0
def test_appending_records_with_io_stream():
    """https://github.com/fastavro/fastavro/issues/276"""
    schema = {
        "type": "record",
        "name": "test_appending_records_with_io_stream",
        "fields": [{
            "name": "field",
            "type": "string",
        }]
    }

    stream = MemoryIO()

    fastavro.writer(stream, schema, [{"field": "foo"}])

    # Should be able to append to the existing stream
    fastavro.writer(stream, schema, [{"field": "bar"}])

    stream.seek(0)
    reader = fastavro.reader(stream)
    new_records = list(reader)

    assert new_records == [{"field": "foo"}, {"field": "bar"}]

    # If we seek to the beginning and write, it will be treated like a brand
    # new file
    stream.seek(0)
    fastavro.writer(stream, schema, [{"field": "abcdefghijklmnopqrstuvwxyz"}])

    stream.seek(0)
    reader = fastavro.reader(stream)
    new_records = list(reader)

    assert new_records == [{"field": "abcdefghijklmnopqrstuvwxyz"}]
Ejemplo n.º 44
0
def make_blocks(num_records=2000, codec='null'):
    records = make_records(num_records)

    new_file = MemoryIO()
    fastavro.writer(new_file, schema, records, codec=codec)

    new_file.seek(0)
    block_reader = fastavro.block_reader(new_file, schema)

    blocks = list(block_reader)

    new_file.close()

    return blocks, records
Ejemplo n.º 45
0
def check_concatenate(source_codec='null', output_codec='null'):
    blocks1, records1 = make_blocks(codec=source_codec)
    blocks2, records2 = make_blocks(codec=source_codec)

    new_file = MemoryIO()
    w = fastavro.write.Writer(new_file, schema, codec=output_codec)
    for block in blocks1:
        w.write_block(block)
    for block in blocks2:
        w.write_block(block)

    # Read the file back to make sure we get back the same stuff
    new_file.seek(0)
    new_records = list(fastavro.reader(new_file, schema))
    assert new_records == records1 + records2
Ejemplo n.º 46
0
def check_concatenate(source_codec='null', output_codec='null'):
    blocks1, records1 = make_blocks(codec=source_codec)
    blocks2, records2 = make_blocks(codec=source_codec)

    new_file = MemoryIO()
    w = fastavro.write.Writer(new_file, schema, codec=output_codec)
    for block in blocks1:
        w.write_block(block)
    for block in blocks2:
        w.write_block(block)

    # Read the file back to make sure we get back the same stuff
    new_file.seek(0)
    new_records = list(fastavro.reader(new_file, schema))
    assert new_records == records1 + records2
Ejemplo n.º 47
0
def test_newer_versions_of_named_schemas():
    """https://github.com/fastavro/fastavro/issues/450"""
    schema_v1 = [
        {
            "name": "Location",
            "type": "record",
            "fields": [{
                "name": "city",
                "type": "string"
            }],
        },
        {
            "name": "Weather",
            "type": "record",
            "fields": [{
                "name": "of",
                "type": "Location"
            }],
        },
    ]

    schema_v2 = [
        {
            "name": "Location",
            "type": "record",
            "fields": [{
                "name": "city",
                "type": "long"
            }],
        },
        {
            "name": "Weather",
            "type": "record",
            "fields": [{
                "name": "of",
                "type": "Location"
            }],
        },
    ]

    example_1 = {"of": {"city": "London"}}
    example_2 = {"of": {"city": 123}}

    parse_v1 = fastavro.parse_schema(schema_v1)
    parse_v2 = fastavro.parse_schema(schema_v2)

    fastavro.schemaless_writer(MemoryIO(), parse_v2, example_2)
    fastavro.schemaless_writer(MemoryIO(), parse_v1, example_1)
Ejemplo n.º 48
0
def test_schema_migration_enum_failure():
    schema = {"type": "enum", "name": "test", "symbols": ["FOO", "BAR"]}

    new_schema = {"type": "enum", "name": "test", "symbols": ["BAZ", "BAR"]}

    new_file = MemoryIO()
    records = ["FOO"]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    try:
        list(new_reader)
    except fastavro._reader.SchemaResolutionError:
        pass
    else:
        assert False
Ejemplo n.º 49
0
def test_fastavro_errors_read_record():
    fo = MemoryIO()

    writer_schema = {
        "type": "record",
        "name": "extension_test",
        "doc": "Complex schema with avro extensions",
        "fields": [
            {"name": "x",
             "type": {
                "type": "record",
                "name": "inner",
                "fields": [
                    {"name": "y", "type": "int"}
                ]
             }}
        ]
    }

    reader_schema = {
        "type": "record",
        "name": "extension_test",
        "doc": "Complex schema with avro extensions",
        "fields": [
            {"name": "x",
             "type": {
                "type": "record",
                "name": "inner",
                "fields": [
                    {"name": "y", "type": "float"}
                ]
             }}
        ]
    }

    given = {"x": {"y": 0}}

    write_data(fo, given, writer_schema)
    fo.seek(0)
    try:
        read_data(fo, reader_schema)
        assert False, 'bad schema did not raise!'
    except AvroValueError as e:
        assert '<record>.x.<record>.y' in str(e)
Ejemplo n.º 50
0
def test_schema_migration_enum_failure():
    schema = {
        "type": "enum",
        "name": "test",
        "symbols": ["FOO", "BAR"],
    }

    new_schema = {
        "type": "enum",
        "name": "test",
        "symbols": ["BAZ", "BAR"],
    }

    new_file = MemoryIO()
    records = ["FOO"]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    list(new_reader)
Ejemplo n.º 51
0
def test_fastavro_complex_nested():
    fo = MemoryIO()
    with open(join(data_dir, 'complex-nested.avsc')) as f:
        schema = json.load(f)

    records = [{
        "test_boolean": True,
        "test_int": 10,
        "test_long": 20,
        "test_float": 2.0,
        "test_double": 2.0,
        "test_bytes": b'asdf',
        "test_string": 'qwerty',
        "second_level": {
            "test_int2": 100,
            "test_string2": "asdf",
            "default_level": {
                "test_int_def": 1,
                "test_string_def": "nope",
            }
        },
        "fixed_int8": 1,
        "fixed_int16": 2,
        "fixed_int32": 3,
        "fixed_int64": 4,
        "fixed_uint8": 1,
        "fixed_uint16": 2,
        "fixed_uint32": 3,
        "fixed_uint64": 4,
        "fixed_int8_2": 12,
    }]

    fastavro.writer(fo, schema, records, enable_extensions=True)

    fo.seek(0)
    new_reader = fastavro.reader(fo, enable_extensions=True)

    assert new_reader.schema == schema

    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 52
0
def test_schema_migration_schema_mismatch():
    schema = {
        "type": "record",
        "fields": [{
            "name": "test",
            "type": "string",
        }]
    }

    new_schema = {
        "type": "enum",
        "name": "test",
        "symbols": ["FOO", "BAR"],
    }

    new_file = MemoryIO()
    records = [{"test": "test"}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file, new_schema)
    list(new_reader)
Ejemplo n.º 53
0
class NoSeekMemoryIO(object):
    """Shim around MemoryIO which blocks access to everything but read.
    Used to ensure seek API isn't being depended on."""

    def __init__(self, *args):
        self.underlying = MemoryIO(*args)

    def read(self, n):
        return self.underlying.read(n)

    def seek(self, *args):
        raise AssertionError("fastavro reader should not depend on seek")
Ejemplo n.º 54
0
def test_default_values_in_reader():
    writer_schema = {
        'name': 'name1',
        'type': 'record',
        'namespace': 'namespace1',
        'fields': [{
            'doc': 'test',
            'type': 'int',
            'name': 'good_field'
        }],
        'doc': 'test'
    }

    reader_schema = {
        'name': 'name1',
        'doc': 'test',
        'namespace': 'namespace1',
        'fields': [{
            'name': 'good_field',
            'doc': 'test',
            'type': 'int'
        }, {
            'name': 'good_compatible_field',
            'doc': 'test',
            'default': 1,
            'type': 'int'
        }],
        'type': 'record'
    }

    record = {'good_field': 1}
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, writer_schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(
        new_file,
        writer_schema,
        reader_schema,
    )
    assert new_record == {'good_field': 1, 'good_compatible_field': 1}
Ejemplo n.º 55
0
def test_fastavro_errors_read_map():
    fo = MemoryIO()

    writer_schema = {
        "type": "map",
        "values": "float"
    }

    reader_schema = {
        "type": "map",
        "values": "double"
    }

    given = {"x": 0}

    write_data(fo, given, writer_schema)
    fo.seek(0)
    try:
        read_data(fo, reader_schema)
        assert False, 'bad schema did not raise!'
    except AvroValueError as e:
        assert '<map>.x.<double>' in str(e)
Ejemplo n.º 56
0
def check(filename):
    with open(filename, 'rb') as fo:
        reader = fastavro.reader(fo)
        assert hasattr(reader, 'schema'), 'no schema on file'

        if basename(filename) in NO_DATA:
            return

        records = list(reader)
        assert len(records) > 0, 'no records found'

    new_file = MemoryIO()
    fastavro.writer(new_file, reader.schema, records, reader.codec)

    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    assert hasattr(new_reader, 'schema'), "schema wasn't written"
    assert new_reader.schema == reader.schema
    assert new_reader.codec == reader.codec
    new_records = list(new_reader)

    assert new_records == records
Ejemplo n.º 57
0
def test_repo_caching_issue():
    schema = {
        "type": "record",
        "name": "B",
        "fields": [{
            "name": "b",
            "type": {
                "type": "record",
                "name": "C",
                "fields": [{
                    "name": "c",
                    "type": "string"
                }]
            }
        }]
    }

    new_file = MemoryIO()
    records = [{"b": {"c": "test"}}]
    fastavro.writer(new_file, schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == records

    other_schema = {
        "name": "A",
        "type": "record",
        "fields": [{
            "name": "a",
            "type": {
                "type": "record",
                "name": "B",
                "fields": [{
                    "name": "b",
                    "type": {
                        "type": "record",
                        "name": "C",
                        "fields": [{
                            "name": "c",
                            "type": "int"
                        }]
                    }
                }]
            }
        }, {
            "name": "aa",
            "type": "B"
        }]
    }

    new_file = MemoryIO()
    records = [{"a": {"b": {"c": 1}}, "aa": {"b": {"c": 2}}}]
    fastavro.writer(new_file, other_schema, records)
    new_file.seek(0)
    new_reader = fastavro.reader(new_file)
    new_records = list(new_reader)
    assert new_records == records
Ejemplo n.º 58
0
def test_fastavro_errors_read_array():
    fo = MemoryIO()

    writer_schema = {
        "type": "array",
        "items": "int",
    }

    reader_schema = {
        "type": "array",
        "items": "float",
    }

    given = [10, 20, 30]

    write_data(fo, given, writer_schema)
    fo.seek(0)
    try:
        read_data(fo, reader_schema)
        assert False, 'bad schema did not raise!'
    except AvroValueError as e:
        # .[1] because the first element is read succesfully
        # (but would be corrupt)
        assert '<array>.[1].<float>' in str(e)
Ejemplo n.º 59
0
def test_boolean_roundtrip():
    schema = {"type": "record", "fields": [{"name": "field", "type": "boolean"}]}
    record = {"field": True}
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file, schema)
    assert record == new_record

    record = {"field": False}
    new_file = MemoryIO()
    fastavro.schemaless_writer(new_file, schema, record)
    new_file.seek(0)
    new_record = fastavro.schemaless_reader(new_file, schema)
    assert record == new_record