Esempio n. 1
0
 def parse_avro(schema_str: str):  # pylint: disable=inconsistent-return-statements
     try:
         ts = TypedSchema(parse_avro_schema_definition(schema_str),
                          SchemaType.AVRO, schema_str)
         return ts
     except SchemaParseException as e:
         raise InvalidSchema from e
Esempio n. 2
0
def test_union_to_simple_comparison(field):
    writer = {
        "type": "record",
        "name": "name",
        "namespace": "namespace",
        "fields": [field]
    }
    reader = {
        "type": "record",
        "name": "name",
        "namespace": "namespace",
        "fields": [{
            "type": ["null", field["type"]],
            "name": "fn",
        }]
    }
    reader = parse_avro_schema_definition(json.dumps(reader))
    writer = parse_avro_schema_definition(json.dumps(writer))
    assert are_compatible(reader, writer)
Esempio n. 3
0
def test_schema_compatibility():
    # testValidateSchemaPairMissingField
    writer = parse_avro_schema_definition(
        json.dumps({
            "type":
            "record",
            "name":
            "Record",
            "fields": [{
                "name": "oldField1",
                "type": "int"
            }, {
                "name": "oldField2",
                "type": "string"
            }]
        }))
    reader = parse_avro_schema_definition(
        json.dumps({
            "type": "record",
            "name": "Record",
            "fields": [{
                "name": "oldField1",
                "type": "int"
            }]
        }))
    assert are_compatible(reader, writer)
    # testValidateSchemaPairMissingSecondField
    reader = parse_avro_schema_definition(
        json.dumps({
            "type": "record",
            "name": "Record",
            "fields": [{
                "name": "oldField2",
                "type": "string"
            }]
        }))
    assert are_compatible(reader, writer)
    # testValidateSchemaPairAllFields
    reader = parse_avro_schema_definition(
        json.dumps({
            "type":
            "record",
            "name":
            "Record",
            "fields": [{
                "name": "oldField1",
                "type": "int"
            }, {
                "name": "oldField2",
                "type": "string"
            }]
        }))
    assert are_compatible(reader, writer)
    # testValidateSchemaNewFieldWithDefault
    reader = parse_avro_schema_definition(
        json.dumps({
            "type":
            "record",
            "name":
            "Record",
            "fields": [{
                "name": "oldField1",
                "type": "int"
            }, {
                "name": "newField2",
                "type": "int",
                "default": 42
            }]
        }))
    assert are_compatible(reader, writer)
    # testValidateSchemaNewField
    reader = parse_avro_schema_definition(
        json.dumps({
            "type":
            "record",
            "name":
            "Record",
            "fields": [{
                "name": "oldField1",
                "type": "int"
            }, {
                "name": "newField2",
                "type": "int"
            }]
        }))
    assert not are_compatible(reader, writer)
    # testValidateArrayWriterSchema
    writer = parse_avro_schema_definition(
        json.dumps({
            "type": "array",
            "items": {
                "type": "string"
            }
        }))
    reader = parse_avro_schema_definition(
        json.dumps({
            "type": "array",
            "items": {
                "type": "string"
            }
        }))
    assert are_compatible(reader, writer)
    reader = parse_avro_schema_definition(
        json.dumps({
            "type": "map",
            "values": {
                "type": "string"
            }
        }))
    assert not are_compatible(reader, writer)
    # testValidatePrimitiveWriterSchema
    writer = parse_avro_schema_definition(json.dumps({"type": "string"}))
    reader = parse_avro_schema_definition(json.dumps({"type": "string"}))
    assert are_compatible(reader, writer)
    reader = parse_avro_schema_definition(json.dumps({"type": "int"}))
    assert not are_compatible(reader, writer)
    # testUnionReaderWriterSubsetIncompatibility
    # cannot have a union as a top level data type, so im cheating a bit here
    writer = parse_avro_schema_definition(
        json.dumps({
            "name":
            "Record",
            "type":
            "record",
            "fields": [{
                "name": "f1",
                "type": ["int", "string", "long"]
            }]
        }))
    reader = parse_avro_schema_definition(
        json.dumps({
            "name": "Record",
            "type": "record",
            "fields": [{
                "name": "f1",
                "type": ["int", "string"]
            }]
        }))
    reader = reader.fields[0].type
    writer = writer.fields[0].type
    assert isinstance(reader, UnionSchema)
    assert isinstance(writer, UnionSchema)
    assert not are_compatible(reader, writer)
    # testReaderWriterCompatibility
    compatible_reader_writer_test_cases = [
        (BOOLEAN_SCHEMA, BOOLEAN_SCHEMA),
        (INT_SCHEMA, INT_SCHEMA),
        (LONG_SCHEMA, INT_SCHEMA),
        (LONG_SCHEMA, LONG_SCHEMA),
        (FLOAT_SCHEMA, INT_SCHEMA),
        (FLOAT_SCHEMA, LONG_SCHEMA),
        (DOUBLE_SCHEMA, LONG_SCHEMA),
        (DOUBLE_SCHEMA, INT_SCHEMA),
        (DOUBLE_SCHEMA, FLOAT_SCHEMA),
        (STRING_SCHEMA, STRING_SCHEMA),
        (BYTES_SCHEMA, BYTES_SCHEMA),
        (STRING_SCHEMA, BYTES_SCHEMA),
        (BYTES_SCHEMA, STRING_SCHEMA),
        (INT_ARRAY_SCHEMA, INT_ARRAY_SCHEMA),
        (LONG_ARRAY_SCHEMA, INT_ARRAY_SCHEMA),
        (INT_MAP_SCHEMA, INT_MAP_SCHEMA),
        (LONG_MAP_SCHEMA, INT_MAP_SCHEMA),
        (ENUM1_AB_SCHEMA, ENUM1_AB_SCHEMA),
        (ENUM1_ABC_SCHEMA, ENUM1_AB_SCHEMA),
        # Union related pairs
        (EMPTY_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
        (FLOAT_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
        (FLOAT_UNION_SCHEMA, INT_UNION_SCHEMA),
        (FLOAT_UNION_SCHEMA, LONG_UNION_SCHEMA),
        (FLOAT_UNION_SCHEMA, INT_LONG_UNION_SCHEMA),
        (INT_UNION_SCHEMA, INT_UNION_SCHEMA),
        (INT_STRING_UNION_SCHEMA, STRING_INT_UNION_SCHEMA),
        (INT_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
        (LONG_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
        (LONG_UNION_SCHEMA, INT_UNION_SCHEMA),
        (FLOAT_UNION_SCHEMA, INT_UNION_SCHEMA),
        (DOUBLE_UNION_SCHEMA, INT_UNION_SCHEMA),
        (FLOAT_UNION_SCHEMA, LONG_UNION_SCHEMA),
        (DOUBLE_UNION_SCHEMA, LONG_UNION_SCHEMA),
        (FLOAT_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
        (DOUBLE_UNION_SCHEMA, FLOAT_UNION_SCHEMA),
        (STRING_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
        (STRING_UNION_SCHEMA, BYTES_UNION_SCHEMA),
        (BYTES_UNION_SCHEMA, EMPTY_UNION_SCHEMA),
        (BYTES_UNION_SCHEMA, STRING_UNION_SCHEMA),
        (DOUBLE_UNION_SCHEMA, INT_FLOAT_UNION_SCHEMA),
        # Readers capable of reading all branches of a union are compatible
        (FLOAT_SCHEMA, INT_FLOAT_UNION_SCHEMA),
        (LONG_SCHEMA, INT_LONG_UNION_SCHEMA),
        (DOUBLE_SCHEMA, INT_FLOAT_UNION_SCHEMA),
        (DOUBLE_SCHEMA, INT_LONG_FLOAT_DOUBLE_UNION_SCHEMA),
        # Special case of singleton unions:
        (FLOAT_SCHEMA, FLOAT_UNION_SCHEMA),
        (INT_UNION_SCHEMA, INT_SCHEMA),
        (INT_SCHEMA, INT_UNION_SCHEMA),
        # Fixed types
        (FIXED_4_BYTES, FIXED_4_BYTES),
        # Tests involving records:
        (EMPTY_RECORD1, EMPTY_RECORD1),
        (EMPTY_RECORD1, A_INT_RECORD1),
        (A_INT_RECORD1, A_INT_RECORD1),
        (A_DINT_RECORD1, A_INT_RECORD1),
        (A_DINT_RECORD1, A_DINT_RECORD1),
        (A_INT_RECORD1, A_DINT_RECORD1),
        (A_LONG_RECORD1, A_INT_RECORD1),
        (A_INT_RECORD1, A_INT_B_INT_RECORD1),
        (A_DINT_RECORD1, A_INT_B_INT_RECORD1),
        (A_INT_B_DINT_RECORD1, A_INT_RECORD1),
        (A_DINT_B_DINT_RECORD1, EMPTY_RECORD1),
        (A_DINT_B_DINT_RECORD1, A_INT_RECORD1),
        (A_INT_B_INT_RECORD1, A_DINT_B_DINT_RECORD1),
        (
            parse_avro_schema_definition(json.dumps({"type": "null"})),
            parse_avro_schema_definition(json.dumps({"type": "null"})),
        ),
        (INT_LIST_RECORD, INT_LIST_RECORD),
        (LONG_LIST_RECORD, LONG_LIST_RECORD),
        (LONG_LIST_RECORD, INT_LIST_RECORD),
        (NULL_SCHEMA, NULL_SCHEMA),
        (ENUM_AB_ENUM_DEFAULT_A_RECORD, ENUM_ABC_ENUM_DEFAULT_A_RECORD),
        (ENUM_AB_FIELD_DEFAULT_A_ENUM_DEFAULT_B_RECORD,
         ENUM_ABC_FIELD_DEFAULT_B_ENUM_DEFAULT_A_RECORD),
        (NS_RECORD1, NS_RECORD2),
    ]

    for (reader, writer) in compatible_reader_writer_test_cases:
        assert are_compatible(reader, writer)
Esempio n. 4
0
def test_simple_schema_promotion():
    reader = parse_avro_schema_definition(
        json.dumps({
            "name": "foo",
            "type": "record",
            "fields": [{
                "type": "int",
                "name": "f1"
            }]
        }))
    field_alias_reader = parse_avro_schema_definition(
        json.dumps({
            "name":
            "foo",
            "type":
            "record",
            "fields": [{
                "type": "int",
                "name": "bar",
                "aliases": ["f1"]
            }]
        }))
    record_alias_reader = parse_avro_schema_definition(
        json.dumps({
            "name": "other",
            "type": "record",
            "fields": [{
                "type": "int",
                "name": "f1"
            }],
            "aliases": ["foo"]
        }))

    writer = parse_avro_schema_definition(
        json.dumps({
            "name":
            "foo",
            "type":
            "record",
            "fields": [{
                "type": "int",
                "name": "f1"
            }, {
                "type": "string",
                "name": "f2",
            }]
        }))
    # alias testing
    res = ReaderWriterCompatibilityChecker().get_compatibility(
        field_alias_reader, writer)
    assert res.compatibility is SchemaCompatibilityType.compatible, res.locations
    res = ReaderWriterCompatibilityChecker().get_compatibility(
        record_alias_reader, writer)
    assert res.compatibility is SchemaCompatibilityType.compatible, res.locations

    res = ReaderWriterCompatibilityChecker().get_compatibility(reader, writer)
    assert res == SchemaCompatibilityResult.compatible(), res
    res = ReaderWriterCompatibilityChecker().get_compatibility(writer, reader)
    assert res != SchemaCompatibilityResult.compatible(), res

    writer = parse_avro_schema_definition(
        json.dumps({
            "type":
            "record",
            "name":
            "CA",
            "namespace":
            "ns1",
            "fields": [{
                "type": "string",
                "name": "provider"
            }, {
                "type": ["null", "string"],
                "name": "name",
                "default": None
            }, {
                "type": ["null", "string"],
                "name": "phone",
                "default": None
            }, {
                "type": ["null", "string"],
                "name": "email",
                "default": None
            }, {
                "type": ["null", "string"],
                "name": "reference",
                "default": None
            }, {
                "type": ["null", "double"],
                "name": "price",
                "default": None
            }]
        }))
    reader = parse_avro_schema_definition(
        json.dumps({
            "type":
            "record",
            "name":
            "CA",
            "namespace":
            "ns1",
            "fields": [{
                "type": "string",
                "name": "provider"
            }, {
                "type": ["null", "string"],
                "name": "name",
                "default": None
            }, {
                "type": ["null", "string"],
                "name": "phone",
                "default": None
            }, {
                "type": ["null", "string"],
                "name": "email",
                "default": None
            }, {
                "type": ["null", "string"],
                "name": "reference",
                "default": None
            }, {
                "type": ["null", "double"],
                "name": "price",
                "default": None
            }, {
                "type": ["null", "string"],
                "name": "status_date",
                "default": None
            }]
        }))
    res = ReaderWriterCompatibilityChecker().get_compatibility(writer=writer,
                                                               reader=reader)
    assert res == SchemaCompatibilityResult.compatible(), res
Esempio n. 5
0
"""
    These are duplicates of other test_schema.py tests, but do not make use of the registry client fixture
    and are here for debugging and speed, and as an initial sanity check
"""
from avro.schema import ArraySchema, Field, MapSchema, RecordSchema, Schema, UnionSchema
from karapace.avro_compatibility import (parse_avro_schema_definition,
                                         ReaderWriterCompatibilityChecker,
                                         SchemaCompatibilityResult,
                                         SchemaCompatibilityType)

import json
import pytest

# Schemas defined in AvroCompatibilityTest.java. Used here to ensure compatibility with the schema-registry
schema1 = parse_avro_schema_definition(
    '{"type":"record","name":"myrecord","fields":[{"type":"string","name":"f1"}]}'
)
schema2 = parse_avro_schema_definition(
    '{"type":"record","name":"myrecord","fields":[{"type":"string","name":"f1"},{"type":"string",'
    '"name":"f2","default":"foo"}]}')
schema3 = parse_avro_schema_definition(
    '{"type":"record","name":"myrecord","fields":[{"type":"string","name":"f1"},{"type":"string","name":"f2"}]}'
)
schema4 = parse_avro_schema_definition(
    '{"type":"record","name":"myrecord","fields":[{"type":"string","name":"f1_new","aliases":["f1"]}]}'
)
schema6 = parse_avro_schema_definition(
    '{"type":"record","name":"myrecord","fields":[{"type":["null","string"],"name":"f1","doc":"doc of f1"}]}'
)
schema7 = parse_avro_schema_definition(
    '{"type":"record","name":"myrecord","fields":[{"type":["null","string","int"],"name":"f1","doc":"doc of f1"}]}'