def _get_decoder_func(self, schema_id, payload, is_key=False):
        if schema_id in self.id_to_decoder_func:
            return self.id_to_decoder_func[schema_id]

        # fetch writer schema from schema reg
        try:
            writer_schema_obj = self.registry_client.get_by_id(schema_id)
        except ClientError as e:
            raise SerializerError("unable to fetch schema with id %d: %s" %
                                  (schema_id, str(e)))

        if writer_schema_obj is None:
            raise SerializerError("unable to fetch schema with id %d" %
                                  (schema_id))

        curr_pos = payload.tell()

        reader_schema_obj = (self.reader_key_schema
                             if is_key else self.reader_value_schema)

        if HAS_FAST:
            # try to use fast avro
            try:
                fast_avro_writer_schema = parse_schema(
                    writer_schema_obj.to_json())
                fast_avro_reader_schema = parse_schema(
                    reader_schema_obj.to_json())
                schemaless_reader(payload, fast_avro_writer_schema)

                # If we reach this point, this means we have fastavro and it can
                # do this deserialization. Rewind since this method just determines
                # the reader function and we need to deserialize again along the
                # normal path.
                payload.seek(curr_pos)

                self.id_to_decoder_func[
                    schema_id] = lambda p: schemaless_reader(
                        p, fast_avro_writer_schema, fast_avro_reader_schema)
                return self.id_to_decoder_func[schema_id]
            except Exception:
                # Fast avro failed, fall thru to standard avro below.
                pass

        # here means we should just delegate to slow avro
        # rewind
        payload.seek(curr_pos)
        # Avro DatumReader py2/py3 inconsistency, hence no param keywords
        # should be revisited later
        # https://github.com/apache/avro/blob/master/lang/py3/avro/io.py#L459
        # https://github.com/apache/avro/blob/master/lang/py/src/avro/io.py#L423
        # def __init__(self, writers_schema=None, readers_schema=None)
        # def __init__(self, writer_schema=None, reader_schema=None)
        avro_reader = avro.io.DatumReader(writer_schema_obj, reader_schema_obj)

        def decoder(p):
            bin_decoder = avro.io.BinaryDecoder(p)
            return avro_reader.read(bin_decoder)

        self.id_to_decoder_func[schema_id] = decoder
        return self.id_to_decoder_func[schema_id]
Exemple #2
0
def test_unknown_type():
    schema = {
        "type": "unknown",
    }

    with pytest.raises(UnknownType):
        parse_schema(schema)
Exemple #3
0
def test_union_schemas_must_have_names_in_order():
    """https://github.com/fastavro/fastavro/issues/450"""
    schema1 = [
        {
            "name": "Location",
            "type": "record",
            "fields": [{"name": "city", "type": "long"}],
        },
        {
            "name": "Weather",
            "type": "record",
            "fields": [{"name": "of", "type": "Location"}],
        },
    ]
    # This should work because Location is defined first
    parse_schema(schema1)

    schema2 = [
        {
            "name": "Weather",
            "type": "record",
            "fields": [{"name": "of", "type": "Location"}],
        },
        {
            "name": "Location",
            "type": "record",
            "fields": [{"name": "city", "type": "long"}],
        },
    ]
    # This should not work because Location is defined after it is used
    with pytest.raises(UnknownType):
        parse_schema(schema2)
Exemple #4
0
def test_decimal_fixed_accommodates_precision():
    """https://github.com/fastavro/fastavro/issues/457"""
    schema = {
        "type":
        "record",
        "name":
        "test_scale_is_an_int",
        "fields": [{
            "name": "field",
            "type": {
                "name": "fixed_decimal",
                "logicalType": "decimal",
                "precision": 10,
                "scale": 2,
                "type": "fixed",
                "size": 2,
            },
        }],
    }

    with pytest.raises(
            SchemaParseException,
            match=
            r"decimal precision of \d+ doesn't fit into array of length \d+",
    ):
        parse_schema(schema)
Exemple #5
0
def test_aliases_is_a_list():
    """https://github.com/fastavro/fastavro/issues/206"""
    schema = {
        "type": "record",
        "name": "test_parse_schema",
        "fields": [{
            "name": "field",
            "type": "string",
            "aliases": "foobar",
        }],
    }

    with pytest.raises(SchemaParseException):
        parse_schema(schema)
Exemple #6
0
def test_parse_schema():
    schema = {
        "type": "record",
        "name": "test_parse_schema",
        "fields": [{
            "name": "field",
            "type": "string",
        }],
    }

    parsed_schema = parse_schema(schema)
    assert "__fastavro_parsed" in parsed_schema

    parsed_schema_again = parse_schema(parsed_schema)
    assert parsed_schema_again == parsed_schema
Exemple #7
0
def test_parse_schema_includes_hint_with_list():
    """https://github.com/fastavro/fastavro/issues/444"""
    schema = [
        {
            "type": "record",
            "name": "test_parse_schema_includes_hint_with_list_1",
            "doc": "blah",
            "fields": [{
                "name": "field1",
                "type": "string",
                "default": ""
            }],
        },
        {
            "type": "record",
            "name": "test_parse_schema_includes_hint_with_list_2",
            "doc": "blah",
            "fields": [{
                "name": "field2",
                "type": "string",
                "default": ""
            }],
        },
    ]
    parsed_schema = parse_schema(schema)
    for s in parsed_schema:
        assert "__fastavro_parsed" in s
Exemple #8
0
def test_enum_named_type():
    """https://github.com/fastavro/fastavro/issues/450"""
    schema = {
        "type":
        "record",
        "name":
        "test_enum_named_type",
        "fields": [
            {
                "name": "test1",
                "type": {
                    "type": "enum",
                    "name": "my_enum",
                    "symbols": ["FOO", "BAR"],
                },
            },
            {
                "name": "test2",
                "type": "my_enum",
            },
        ],
    }

    records = [{"test1": "FOO", "test2": "BAR"}]
    parsed_schema = parse_schema(schema)
    assert records == roundtrip(parsed_schema, records)
Exemple #9
0
def test_fixed_named_type():
    """https://github.com/fastavro/fastavro/issues/450"""
    schema = {
        "type":
        "record",
        "name":
        "test_fixed_named_type",
        "fields": [
            {
                "name": "test1",
                "type": {
                    "type": "fixed",
                    "name": "my_fixed",
                    "size": 4,
                },
            },
            {
                "name": "test2",
                "type": "my_fixed",
            },
        ],
    }

    records = [{"test1": b"1234", "test2": b"4321"}]
    parsed_schema = parse_schema(schema)
    assert records == roundtrip(parsed_schema, records)
Exemple #10
0
def test_record_named_type():
    """https://github.com/fastavro/fastavro/issues/450"""
    schema = {
        "type":
        "record",
        "name":
        "test_record_named_type",
        "fields": [
            {
                "name": "test1",
                "type": {
                    "type": "record",
                    "name": "my_record",
                    "fields": [{
                        "name": "field1",
                        "type": "string",
                    }],
                },
            },
            {
                "name": "test2",
                "type": "my_record",
            },
        ],
    }

    records = [{"test1": {"field1": "foo"}, "test2": {"field1": "bar"}}]
    parsed_schema = parse_schema(schema)
    assert records == roundtrip(parsed_schema, records)
 def _get_encoder_func(self, writer_schema):
     if HAS_FAST:
         schema = writer_schema.to_json()
         parsed_schema = parse_schema(schema)
         return lambda record, fp: schemaless_writer(fp, parsed_schema, record)
     writer = avro.io.DatumWriter(writer_schema)
     return lambda record, fp: writer.write(record, avro.io.BinaryEncoder(fp))
Exemple #12
0
def write_read(in_data):
    buff = io.BytesIO()
    rec_schema = rec_avro_schema()
    writer(buff, schema.parse_schema(rec_schema), in_data)
    buff.seek(0)

    return [r for r in reader(buff)]
Exemple #13
0
def test_with_dependent_schema():
    """Tests a schema with dependent schema
    https://github.com/fastavro/fastavro/issues/418"""
    dependency = {
        "type": "record",
        "name": "Dependency",
        "namespace": "test",
        "fields": [{"name": "_name", "type": "string"}],
    }

    schema = {
        "type": "record",
        "name": "Test",
        "namespace": "test",
        "fields": [
            {"name": "_name", "type": "string"},
            {"name": "_dependency", "type": "Dependency"},
        ],
    }

    records = [{"_name": "parent", "_dependency": {"_name": "child"}}]

    parsed_schema = parse_schema([dependency, schema])

    new_records = roundtrip(parsed_schema, records)
    assert records == new_records
Exemple #14
0
def test_doc_left_in_parse_schema():
    schema = {
        "type": "record",
        "name": "test_doc_left_in_parse_schema",
        "doc": "blah",
        "fields": [{"name": "field1", "type": "string", "default": ""}],
    }
    assert schema == parse_schema(schema, _write_hint=False)
    def _get_encoder_func(self, writer_schema: str) -> callable:
        if HAS_FAST:
            schema = json.loads(writer_schema)
            parsed_schema = parse_schema(schema)
            return lambda record, outf: schemaless_writer(outf, parsed_schema, record)

        parsed_schema = avro.schema.parse(writer_schema)
        writer = avro.io.DatumWriter(parsed_schema)
        return lambda record, outf: writer.write(record, avro.io.BinaryEncoder(outf))
Exemple #16
0
def test_scale_is_an_int():
    """https://github.com/fastavro/fastavro/issues/262"""
    schema = {
        "type": "record",
        "name": "test_scale_is_an_int",
        "fields": [{
            "name": "field",
            "type": {
                "logicalType": "decimal",
                "precision": 5,
                "scale": "2",
                "type": "bytes",
            },
        }],
    }

    with pytest.raises(
        SchemaParseException, match="decimal scale must be a postive integer"
    ):
        parse_schema(schema)
Exemple #17
0
def test_enum_symbols_validation__uniqueness():
    """https://github.com/fastavro/fastavro/issues/551"""
    invalid_schema = {
        "type":
        "record",
        "name":
        "my_schema",
        "fields": [{
            "name": "enum_field",
            "type": {
                "name": "my_enum",
                "type": "enum",
                "symbols": ["FOO", "BAR", "FOO"],
            },
        }],
    }

    with pytest.raises(SchemaParseException) as err:
        parse_schema(invalid_schema)

    assert str(err.value) == "All symbols in an enum must be unique"
Exemple #18
0
def test_using_named_schemas_to_handle_references():
    location = {
        "name": "Location",
        "type": "record",
        "fields": [{
            "name": "city",
            "type": "long"
        }],
    }
    weather = {
        "name": "Weather",
        "type": "record",
        "fields": [{
            "name": "of",
            "type": "Location"
        }],
    }

    named_schemas = {}
    parse_schema(location, named_schemas)
    parse_schema(weather, named_schemas)

    # This should not work because didn't supply the named schemas
    with pytest.raises(UnknownType):
        parse_schema(weather)
Exemple #19
0
def test_explicit_null_namespace_2():
    """https://github.com/fastavro/fastavro/issues/537"""
    schema = {
        "type": "record",
        "name": "my_schema",
        "namespace": None,
        "fields": [{
            "name": "subfield",
            "type": "string"
        }],
    }
    parsed_schema = parse_schema(schema)
    assert parsed_schema["name"] == "my_schema"
Exemple #20
0
def test_aliases_are_preserved():
    schema = {
        "type": "record",
        "name": "test_parse_schema",
        "fields": [{
            "name": "field",
            "type": "string",
            "aliases": ["test"],
        }],
    }

    parsed_schema = parse_schema(schema)
    assert "aliases" in parsed_schema["fields"][0]
Exemple #21
0
def test_enum_symbols_validation__correct(symbol):
    """https://github.com/fastavro/fastavro/issues/551"""
    invalid_schema = {
        "type":
        "record",
        "name":
        "my_schema",
        "fields": [{
            "name": "enum_field",
            "type": {
                "name": "my_enum",
                "type": "enum",
                "symbols": [symbol],
            },
        }],
    }

    try:
        parse_schema(invalid_schema)
    except SchemaParseException:
        pytest.fail(
            f"valid symbol {symbol} has been incorrectly marked as invalid.")
Exemple #22
0
def test_precision_is_an_int():
    """https://github.com/fastavro/fastavro/issues/262"""
    schema = {
        "type":
        "record",
        "name":
        "test_scale_is_an_int",
        "fields": [{
            "name": "field",
            "type": {
                "logicalType": "decimal",
                "precision": "5",
                "scale": 2,
                "type": "bytes",
            },
        }],
    }

    with pytest.raises(SchemaParseException) as exc:
        parse_schema(schema)

    assert "decimal precision must be a postive integer" in str(exc)
Exemple #23
0
  def test_writer_open_and_close(self):
    # Create and then close a temp file so we can manually open it later
    dst = tempfile.NamedTemporaryFile(delete=False)
    dst.close()

    schema = parse_schema(json.loads(self.SCHEMA_STRING))
    sink = _create_avro_sink(
        'some_avro_sink', schema, 'null', '.end', 0, None, 'application/x-avro')

    w = sink.open(dst.name)

    sink.close(w)

    os.unlink(dst.name)
Exemple #24
0
def test_schema_expansion_3():
    """https://github.com/fastavro/fastavro/issues/538"""
    references = {
        "com.namespace.dependencies.Dependency": {
            "name": "Dependency",
            "namespace": "com.namespace.dependencies",
            "type": "record",
            "fields": [{
                "name": "sub_field_1",
                "type": "string"
            }],
        }
    }

    original_schema = {
        "name":
        "MasterSchema",
        "namespace":
        "com.namespace.master",
        "type":
        "record",
        "fields": [{
            "name": "field_2",
            "type": "com.namespace.dependencies.Dependency"
        }],
    }

    expected_expanded_schema_fields = [{
        "name": "field_2",
        "type": {
            "name": "Dependency",
            "namespace": "com.namespace.dependencies",
            "type": "record",
            "fields": [{
                "name": "sub_field_1",
                "type": "string"
            }],
        },
    }]

    assert isinstance(original_schema, dict)

    try:
        parsed_schema = parse_schema(original_schema, named_schemas=references)
        assert expected_expanded_schema_fields == expand_schema(
            parsed_schema)["fields"]
    except UnknownType:
        pytest.fail(
            "expand_schema raised UnknownType even though referenced type is part of named_schemas"
        )
Exemple #25
0
def test_decimal_precision_is_greater_than_scale():
    """https://github.com/fastavro/fastavro/issues/457"""
    schema = {
        "type":
        "record",
        "name":
        "test_scale_is_an_int",
        "fields": [{
            "name": "field",
            "type": {
                "logicalType": "decimal",
                "precision": 5,
                "scale": 10,
                "type": "bytes",
            },
        }],
    }

    with pytest.raises(
            SchemaParseException,
            match="decimal scale must be less than or equal to",
    ):
        parse_schema(schema)
Exemple #26
0
def test_enum_symbols_validation__invalid(symbol):
    """https://github.com/fastavro/fastavro/issues/551"""
    invalid_schema = {
        "type":
        "record",
        "name":
        "my_schema",
        "fields": [{
            "name": "enum_field",
            "type": {
                "name": "my_enum",
                "type": "enum",
                "symbols": [symbol],
            },
        }],
    }

    with pytest.raises(SchemaParseException) as err:
        parse_schema(invalid_schema)

    assert (
        str(err.value) ==
        "Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]*"
    )
Exemple #27
0
def test_with_dependent_schema():
    """Tests a schema with dependent schema
    https://github.com/fastavro/fastavro/issues/418"""
    dependency = {
        "type": "record",
        "name": "Dependency",
        "namespace": "test",
        "fields": [{
            "name": "_name",
            "type": "string"
        }]
    }

    schema = {
        "type":
        "record",
        "name":
        "Test",
        "namespace":
        "test",
        "fields": [{
            "name": "_name",
            "type": "string"
        }, {
            "name": "_dependency",
            "type": "Dependency"
        }]
    }

    records = [{'_name': 'parent', '_dependency': {'_name': 'child'}}]

    parse_schema(dependency)
    parse_schema(schema)

    new_records = roundtrip(schema, records)
    assert records == new_records
Exemple #28
0
def test_named_type_cannot_be_redefined():
    schema = {
        "type":
        "record",
        "namespace":
        "test.avro.training",
        "name":
        "SomeMessage",
        "fields": [{
            "name": "is_error",
            "type": "boolean",
            "default": False,
        }, {
            "name":
            "outcome",
            "type": [{
                "type": "record",
                "name": "SomeMessage",
                "fields": [],
            }, {
                "type":
                "record",
                "name":
                "ErrorRecord",
                "fields": [{
                    "name": "errors",
                    "type": {
                        "type": "map",
                        "values": "string"
                    },
                    "doc": "doc",
                }],
            }],
        }],
    }

    with pytest.raises(
            SchemaParseException,
            match="redefined named type: test.avro.training.SomeMessage",
    ):
        parse_schema(schema)

    schema = {
        "type":
        "record",
        "name":
        "SomeMessage",
        "fields": [{
            "name": "field1",
            "type": {
                "type": "record",
                "name": "ThisName",
                "fields": [],
            },
        }, {
            "name": "field2",
            "type": {
                "type": "enum",
                "name": "ThisName",
                "symbols": ["FOO", "BAR"],
            },
        }],
    }

    with pytest.raises(SchemaParseException,
                       match="redefined named type: ThisName"):
        parse_schema(schema)

    schema = {
        "type":
        "record",
        "name":
        "SomeMessage",
        "fields": [{
            "name": "field1",
            "type": {
                "type": "record",
                "name": "ThatName",
                "fields": [],
            },
        }, {
            "name": "field2",
            "type": {
                "type": "fixed",
                "name": "ThatName",
                "size": 8,
            },
        }],
    }

    with pytest.raises(SchemaParseException,
                       match="redefined named type: ThatName"):
        parse_schema(schema)
Exemple #29
0
def test_unknown_type():
    with pytest.raises(UnknownType):
        parse_schema({"type": "unknown"})
Exemple #30
0
 def __init__(self, methodName='runTest'):
   super(TestFastAvro, self).__init__(methodName)
   self.use_fastavro = True
   self.SCHEMA = parse_schema(json.loads(self.SCHEMA_STRING))