def test_render_metadata(): field = fields.AvroField("first_name", str, metadata={"desc": "English Language Name"}) expected = [("desc", "English Language Name")] assert expected == field.get_metadata() field = fields.AvroField("engine_name", str) expected = [] assert expected == field.get_metadata() field = fields.AvroField( "breed_name", str, metadata={ "encoding": "some_exotic_encoding", "doc": "Official Breed Name" }, ) expected = [("encoding", "some_exotic_encoding"), ("doc", "Official Breed Name")] assert expected == field.get_metadata()
def test_mapping_logical_type(mapping, python_primitive_type, python_type_str, value): """ When the type is Dict, the Avro field type should be map with the values attribute present. The keys are always string type. """ name = "a_map_field" python_type = mapping[str, python_primitive_type] field = fields.AvroField(name, python_type, dataclasses.MISSING) expected = { "name": name, "type": { "type": "map", "name": name, "values": python_type_str }, } assert expected == field.to_dict() field = fields.AvroField(name, python_type, None) expected = { "name": name, "type": { "type": "map", "name": name, "values": python_type_str }, "default": {}, } assert expected == field.to_dict() values = {"key": value} field = fields.AvroField(name, python_type, default=dataclasses.MISSING, default_factory=lambda: values) expected = { "name": name, "type": { "type": "map", "name": name, "values": python_type_str }, "default": { key: fields.LOGICAL_TYPES_FIELDS_CLASSES[python_primitive_type].to_avro( value) for key, value in values.items() }, } assert expected == field.to_dict()
def test_sequence_with_logical_type(sequence, python_primitive_type, python_type_str, value): """ When the type is List, the Avro field type should be array with the items attribute present. """ name = "an_array_field" python_type = sequence[python_primitive_type] field = fields.AvroField(name, python_type, dataclasses.MISSING) expected = { "name": name, "type": { "type": "array", "name": name, "items": python_type_str }, } assert expected == field.to_dict() field = fields.AvroField(name, python_type, None) expected = { "name": name, "type": { "type": "array", "name": name, "items": python_type_str }, "default": [], } assert expected == field.to_dict() values = [value] field = fields.AvroField(name, python_type, default=values, default_factory=lambda: values) expected = { "name": name, "type": { "type": "array", "name": name, "items": python_type_str }, "default": [ fields.LOGICAL_TYPES_FIELDS_CLASSES[python_primitive_type].to_avro( value) for value in values ], } assert expected == field.to_dict()
def test_mapping_type(mapping, python_primitive_type, python_type_str): """ When the type is Dict, the Avro field type should be map with the values attribute present. The keys are always string type. """ name = "a_map_field" python_type = mapping[str, python_primitive_type] field = fields.AvroField(name, python_type, dataclasses.MISSING) expected = { "name": name, "type": { "type": "map", "name": name, "values": python_type_str }, } assert expected == field.to_dict() field = fields.AvroField(name, python_type, None) expected = { "name": name, "type": { "type": "map", "name": name, "values": python_type_str }, "default": {}, } assert expected == field.to_dict() if python_type_str == fields.BYTES: value = {"hola": b"hi"} default = {"hola": "hi"} else: value = default = faker.pydict(2, True, python_primitive_type) field = fields.AvroField(name, python_type, default=default, default_factory=lambda: value) expected = { "name": name, "type": { "type": "map", "name": name, "values": python_type_str }, "default": default, } assert expected == field.to_dict()
def test_sequence_type(sequence, python_primitive_type, python_type_str): """ When the type is List, the Avro field type should be array with the items attribute present. """ name = "an_array_field" python_type = sequence[python_primitive_type] field = fields.AvroField(name, python_type, dataclasses.MISSING) expected = { "name": name, "type": { "type": "array", "name": name, "items": python_type_str }, } assert expected == field.to_dict() field = fields.AvroField(name, python_type, None) expected = { "name": name, "type": { "type": "array", "name": name, "items": python_type_str }, "default": [], } assert expected == field.to_dict() if python_type_str == fields.BYTES: values = [b"hola", b"hi"] default = ["hola", "hi"] else: values = default = faker.pylist(2, True, python_primitive_type) field = fields.AvroField(name, python_type, default=default, default_factory=lambda: values) expected = { "name": name, "type": { "type": "array", "name": name, "items": python_type_str }, "default": default, } assert expected == field.to_dict()
def test_enum_type(): """ When the type is types.Enum, the Avro field type should be Enum with symbols attribute present. """ name = "an_enum_field" namespace = "my_emum" aliases = ["enum", "first enum"] default = types.Enum(["SPADES", "HEARTS", "DIAMONDS", "CLUBS"], namespace=namespace, aliases=aliases, default="CLUBS") python_type = types.Enum field = fields.AvroField(name, python_type, default) expected = { "name": name, "type": { "type": "enum", "name": name, "symbols": default.symbols, "namespace": namespace, "aliases": aliases, }, "default": default.default, } assert expected == field.to_dict() default = types.Enum(["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]) field = fields.AvroField(name, python_type, default) expected = { "name": name, "type": { "type": "enum", "name": name, "symbols": default.symbols, }, } assert expected == field.to_dict() with pytest.raises(AssertionError): default = types.Enum(["SPADES", "HEARTS", "DIAMONDS", "CLUBS"], default="BLUE") field = fields.AvroField(name, python_type, default) field.to_dict()
def test_invalid_default_values(primitive_type, invalid_default): name = "a_field" field = fields.AvroField(name, primitive_type, invalid_default) msg = f"Invalid default type. Default should be {primitive_type}" with pytest.raises(AssertionError, match=msg): field.to_dict()
def test_logical_type_time_with_default(): name = "a time" python_type = datetime.time time = consts.now.time() field = fields.AvroField(name, python_type, time) hour, minutes, seconds, microseconds = ( time.hour, time.minute, time.second, time.microsecond, ) miliseconds = int((((hour * 60 + minutes) * 60 + seconds) * 1000) + (microseconds / 1000)) expected = { "name": name, "type": { "type": fields.INT, "logicalType": fields.TIME_MILLIS }, "default": miliseconds, } assert expected == field.to_dict()
def test_invalid_default_values(logical_type, invalid_default, msg): name = "a_field" field = fields.AvroField(name, logical_type, invalid_default) msg = msg or f"Invalid default type. Default should be {logical_type}" with pytest.raises(AssertionError, match=msg): field.to_dict()
def test_invalid_type_container_field(): python_type = typing.Set name = "test_field" msg = f"Invalid Type for field {name}. Accepted types are list, tuple, dict or typing.Union" with pytest.raises(ValueError, match=msg): fields.AvroField(name, python_type, dataclasses.MISSING)
def test_union_type(primitive_types, avro_types, default): name = "an_union_field" python_type = typing.Union[primitive_types] field = fields.AvroField(name, python_type) expected = {"name": name, "type": [*avro_types]} assert expected == field.to_dict()
def test_logical_types(python_type, avro_type, logical_type): name = "a logical type" python_type = python_type field = fields.AvroField(name, python_type) expected = {"name": name, "type": {"type": avro_type, "logicalType": logical_type}} assert expected == field.to_dict()
def test_sequence_with_union_type(union, items, default): name = "an_array_field" python_type = typing.List[union] field = fields.AvroField(name, python_type, default=dataclasses.MISSING) expected = { "name": name, "type": { "type": "array", "name": name, "items": items } } assert expected == field.to_dict() field = fields.AvroField(name, python_type, default_factory=lambda: default) expected = { "name": name, "type": { "type": "array", "name": name, "items": items }, "default": default, } assert expected == field.to_dict() field = fields.AvroField(name, python_type, default=None) items.insert(0, fields.NULL) expected = { "name": name, "type": { "type": "array", "name": name, "items": items }, "default": [], } assert expected == field.to_dict()
def test_primitive_types_with_default_value_none(primitive_type): name = "a_field" field = fields.AvroField(name, primitive_type, None) avro_type = [fields.NULL, fields.PYTHON_TYPE_TO_AVRO[primitive_type]] assert { "name": name, "type": avro_type, "default": None } == field.to_dict()
def test_render(): field = fields.AvroField("first_name", str, metadata={"desc": "English Language Name"}) expected = { "name": "first_name", "type": "string", "desc": "English Language Name", } assert expected == field.render() field = fields.AvroField("engine_name", str) expected = { "name": "engine_name", "type": "string", } assert expected == field.render() field = fields.AvroField( "breed_name", str, "test", metadata={ "encoding": "some_exotic_encoding", "doc": "Official Breed Name" }, ) expected = { "name": "breed_name", "type": "string", "default": "test", "encoding": "some_exotic_encoding", "doc": "Official Breed Name", } assert expected == field.render()
def test_logical_type_uuid_with_default(python_type): name = "a uuid" default = uuid.UUID("d793fc4f-2eef-440a-af8b-a8e884d7b1a8") field = fields.AvroField(name, python_type, default) expected = { "name": name, "type": {"type": fields.STRING, "logicalType": fields.UUID}, "default": str(default), } assert expected == field.to_dict()
def test_logical_types_with_null_as_default(python_type, avro_type, logical_type): name = "a logical type" python_type = python_type field = fields.AvroField(name, python_type, None) expected = { "name": name, "type": ["null", {"type": avro_type, "logicalType": logical_type}], "default": None, } assert expected == field.to_dict()
def test_primitive_types_with_default_value(primitive_type, default): name = "a_field" field = fields.AvroField(name, primitive_type, default) avro_type = fields.PYTHON_TYPE_TO_AVRO[primitive_type] if primitive_type is bytes: default = default.decode() assert { "name": name, "type": avro_type, "default": default } == field.to_dict()
def test_union_as_optional_with_primitives(primitive_type, avro_type): """ Test cases when typing.Optional is used. The result of typing.Optional[Any] is typing.Union[Any, NoneType] Always NoneType is placed at the end """ name = "an_optional_union_field" python_type = typing.Optional[primitive_type] field = fields.AvroField(name, python_type) expected = {"name": name, "type": [avro_type, "null"]} assert expected == field.to_dict()
def test_union_type_with_default(primitive_types, avro_types, default): name = "an_union_field" python_type = typing.Union[primitive_types] field = fields.AvroField(name, python_type, default=default) if isinstance(default, datetime.datetime): default = (default - datetime.datetime(1970, 1, 1)).total_seconds() * 1000 elif isinstance(default, bytes): default = default.decode() expected = {"name": name, "type": [*avro_types], "default": default} assert expected == field.to_dict()
def test_logical_type_datetime_with_default(): name = "a datetime" python_type = datetime.datetime field = fields.AvroField(name, python_type, consts.now) ts = (consts.now - datetime.datetime(1970, 1, 1)).total_seconds() expected = { "name": name, "type": {"type": fields.LONG, "logicalType": fields.TIMESTAMP_MILLIS}, "default": ts * 1000, } assert expected == field.to_dict()
def test_logical_type_date_with_default(): name = "a date" python_type = datetime.date field = fields.AvroField(name, python_type, consts.now.date()) date_time = datetime.datetime.combine(consts.now, datetime.datetime.min.time()) ts = (date_time - datetime.datetime(1970, 1, 1)).total_seconds() expected = { "name": name, "type": {"type": fields.INT, "logicalType": fields.DATE}, "default": ts / (3600 * 24), } assert expected == field.to_dict()
def test_union_with_maps(complex_type, avro_types): name = "an_union_field" python_type = typing.Union[complex_type] field = fields.AvroField(name, python_type) expected = { "name": name, "type": [{ "type": "map", "name": name, "values": avro_types[0] }, avro_types[1]] } assert expected == field.to_dict()
def test_union_type_with_records(): class User(AvroModel): "User" first_name: str class Car(AvroModel): "Car" engine_name: str name = "an_union_field" python_type = typing.Union[User, Car] field = fields.AvroField(name, python_type) expected = { "name": name, "type": [ { "name": "an_union_field_user_record", "type": "record", "doc": "User", "fields": [{ "name": "first_name", "type": "string" }], }, { "name": "an_union_field_car_record", "type": "record", "doc": "Car", "fields": [{ "name": "engine_name", "type": "string" }], }, ], } assert expected == field.to_dict()
def test_fixed_type(): """ When the type is types.Fixed, the Avro field type should be fixed with size attribute present. """ name = "a_fixed_field" namespace = "md5" aliases = ["md5", "hash"] default = types.Fixed(16, namespace=namespace, aliases=aliases) python_type = types.Fixed field = fields.AvroField(name, python_type, default) expected = { "name": name, "type": { "type": "fixed", "name": name, "size": default.size, "namespace": namespace, "aliases": aliases, }, } assert expected == field.to_dict()
def test_union_type_with_record_default(): class User(AvroModel): "User" first_name: str class Car(AvroModel): "Car" engine_name: str name = "an_union_field" python_type = typing.Union[User, Car] field = fields.AvroField(name, python_type, None) expected = { "name": name, "type": [ fields.NULL, { "name": "an_union_field_user_record", "type": "record", "doc": "User", "fields": [{ "name": "first_name", "type": "string" }], }, { "name": "an_union_field_car_record", "type": "record", "doc": "Car", "fields": [{ "name": "engine_name", "type": "string" }], }, ], "default": None, } assert expected == field.to_dict() field = fields.AvroField( name, python_type, default=dataclasses.MISSING, default_factory=lambda: {"first_name": "a name"}, ) expected = { "name": name, "type": [ { "name": "an_union_field_user_record", "type": "record", "doc": "User", "fields": [{ "name": "first_name", "type": "string" }], }, { "name": "an_union_field_car_record", "type": "record", "doc": "Car", "fields": [{ "name": "engine_name", "type": "string" }], }, ], "default": { "first_name": "a name" }, } assert expected == field.to_dict()
def test_primitive_types(primitive_type): name = "a_field" field = fields.AvroField(name, primitive_type, dataclasses.MISSING) avro_type = fields.PYTHON_TYPE_TO_AVRO[primitive_type] assert {"name": name, "type": avro_type} == field.to_dict()
def test_decimal_type(): """ When the type is types.Decimal, the Avro field type should be bytes, with logicalType=decimal and metadata attributes scale, precision present as ints """ name = "a_decimal_field" # A default decimal.Decimal sets precision and scale implicitly default = decimal.Decimal("3.14") python_type = decimal.Decimal field = fields.AvroField(name, python_type, default) expected = { "name": name, "type": { "type": "bytes", "logicalType": "decimal", "precision": 3, "scale": 2, }, "default": "\\u013a", } assert expected == field.to_dict() # Use types.Decimal to set explicitly default = types.Decimal(scale=5, precision=7) field = fields.AvroField(name, python_type, default) expected = { "name": name, "type": { "type": "bytes", "logicalType": "decimal", "precision": 7, "scale": 5, }, } assert expected == field.to_dict() default = types.Decimal(scale=5, precision=7, default=decimal.Decimal("3.14")) field = fields.AvroField(name, python_type, default) expected = { "name": name, "type": { "type": "bytes", "logicalType": "decimal", "precision": 7, "scale": 5, }, "default": "\\u04ca90", } assert expected == field.to_dict() # If default is missing, default out scale by Avro spec and pull precision from default decimal context # On my machine, this makes the "decimal" field a glorified 28-digit int, which is likely not what is wanted # so there is a good argument to error this out and force the dev to provide a default # default = types.MissingSentinel # field = fields.AvroField(name, python_type, default) # # expected = { # "name": name, # "type": { # "type": "bytes", # "logicalType": "decimal", # "precision": decimal.Context().prec, # "scale": 0, # }, # } # # assert expected == field.to_dict() # Validate 0 <= scale <= precision with pytest.raises( ValueError, match="Scale must be zero or a positive integer less than or equal to the precision." ): default = types.Decimal(scale=-1, precision=1) field = fields.AvroField(name, python_type, default) field.to_dict() # Validate 0 <= scale <= precision with pytest.raises( ValueError, match="Scale must be zero or a positive integer less than or equal to the precision." ): default = types.Decimal(scale=3, precision=1) field = fields.AvroField(name, python_type, default) field.to_dict() # Validate precision >= 0 with pytest.raises(ValueError, match="Precision must be a positive integer greater than zero"): default = types.Decimal(scale=2, precision=-1) field = fields.AvroField(name, python_type, default) field.to_dict() # Require a default be provided for decimal.Decimal with pytest.raises(ValueError): default = types.MissingSentinel field = fields.AvroField(name, python_type, default) field.to_dict() # Catch unexpected default value for decimal.Decimal with pytest.raises(ValueError): default = 7 field = fields.AvroField(name, python_type, default) field.to_dict() # Default decimal.Decimal has more digits than listed precision with pytest.raises(ValueError): default = types.Decimal(scale=2, precision=3, default=decimal.Decimal("3.14159")) field = fields.AvroField(name, python_type, default) field.to_dict() # Default decimal.Decimal has more digits past decimal than scale with pytest.raises(ValueError): default = types.Decimal(scale=1, precision=3, default=decimal.Decimal("3.14")) field = fields.AvroField(name, python_type, default) field.to_dict() # Just for code coverage with pytest.raises(ValueError): default = types.Decimal(scale=1, precision=3, default=decimal.Decimal("3.14")) field = fields.AvroField(name, python_type, default) field.to_dict()
def test_invalid_type(): msg = f"Type {typing.Any} is unknown. Please check the valid types at https://marcosschroh.github.io/dataclasses-avroschema/fields_specification/#avro-field-and-python-types-summary" with pytest.raises(ValueError, match=msg): name = "a_field" fields.AvroField(name, typing.Any)