Example #1
0
    async def compatibility_check(self, content_type, *, subject, version,
                                  request):
        """Check for schema compatibility"""
        body = request.json
        self.log.info(
            "Got request to check subject: %r, version_id: %r compatibility",
            subject, version)
        old = await self.subject_version_get(content_type=content_type,
                                             subject=subject,
                                             version=version,
                                             return_dict=True)
        self.log.info("Existing schema: %r, new_schema: %r", old["schema"],
                      body["schema"])
        try:
            schema_type = SchemaType(body.get("schemaType", "AVRO"))
            new_schema = TypedSchema.parse(schema_type, body["schema"])
        except InvalidSchema:
            self.log.warning("Invalid schema: %r", body["schema"])
            self.r(
                body={
                    "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message": "Invalid Avro schema",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )
        try:
            old_schema_type = SchemaType(old.get("schemaType", "AVRO"))
            old_schema = TypedSchema.parse(old_schema_type, old["schema"])
        except InvalidSchema:
            self.log.warning("Invalid existing schema: %r", old["schema"])
            self.r(
                body={
                    "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message": "Invalid Avro schema",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )

        compatibility_mode = self._get_compatibility_mode(
            subject=old, content_type=content_type)

        result = check_compatibility(
            old_schema=old_schema,
            new_schema=new_schema,
            compatibility_mode=compatibility_mode,
        )
        if is_incompatible(result):
            self.log.warning(
                "Invalid schema %s found by compatibility check: old: %s new: %s",
                result, old_schema, new_schema)
            self.r({"is_compatible": False}, content_type)
        self.r({"is_compatible": True}, content_type)
    async def compatibility_check(self, content_type, *, subject, version,
                                  request):
        """Check for schema compatibility"""
        body = request.json
        self.log.info(
            "Got request to check subject: %r, version_id: %r compatibility",
            subject, version)
        old = await self.subject_version_get(content_type=content_type,
                                             subject=subject,
                                             version=version,
                                             return_dict=True)
        self.log.info("Existing schema: %r, new_schema: %r", old["schema"],
                      body["schema"])
        try:
            schema_type = SchemaType(body.get("schemaType", "AVRO"))
            new = TypedSchema.parse(schema_type, body["schema"])
        except InvalidSchema:
            self.log.warning("Invalid schema: %r", body["schema"])
            self.r(body={
                "error_code": 44201,
                "message": "Invalid Avro schema"
            },
                   content_type=content_type,
                   status=422)
        try:
            old_schema_type = SchemaType(old.get("schemaType", "AVRO"))
            old_schema = TypedSchema.parse(old_schema_type, old["schema"])
        except InvalidSchema:
            self.log.warning("Invalid existing schema: %r", old["schema"])
            self.r(body={
                "error_code": 44201,
                "message": "Invalid Avro schema"
            },
                   content_type=content_type,
                   status=422)

        compat = Compatibility(source=old_schema,
                               target=new,
                               compatibility=old.get(
                                   "compatibility",
                                   self.ksr.config["compatibility"]))
        try:
            compat.check()
        except IncompatibleSchema as ex:
            self.log.warning(
                "Invalid schema %s found by compatibility check: old: %s new: %s",
                ex, old_schema, new)
            self.r({"is_compatible": False}, content_type)
        self.r({"is_compatible": True}, content_type)
Example #3
0
 def get_subject_name(self, topic_name: str, schema: str, subject_type: str,
                      schema_type: SchemaType) -> str:
     schema_typed = TypedSchema.parse(schema_type, schema)
     namespace = "dummy"
     if schema_type is SchemaType.AVRO:
         namespace = schema_typed.schema.namespace
     if schema_type is SchemaType.JSONSCHEMA:
         namespace = schema_typed.to_json().get("namespace", "dummy")
     return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}"
Example #4
0
 async def serialize(self, schema: TypedSchema, value: dict) -> bytes:
     schema_id = self.schemas_to_ids[schema.__str__()]
     with io.BytesIO() as bio:
         bio.write(struct.pack(HEADER_FORMAT, START_BYTE, schema_id))
         try:
             write_value(schema, bio, value)
             return bio.getvalue()
         except avro.io.AvroTypeException as e:
             raise InvalidMessageSchema(
                 "Object does not fit to stored schema") from e
Example #5
0
 async def post_new_schema(self, subject: str, schema: TypedSchema) -> int:
     payload = {
         "schema": json_encode(schema.to_json()),
         "schemaType": schema.schema_type.value
     }
     result = await self.client.post(f"subjects/{quote(subject)}/versions",
                                     json=payload)
     if not result.ok:
         raise SchemaRetrievalError(result.json())
     return result.json()["id"]
Example #6
0
 def get_subject_name(self, topic_name: str, schema: str, subject_type: str,
                      schema_type: SchemaType) -> str:
     schema_typed = TypedSchema.parse(schema_type, schema)
     namespace = "dummy"
     if schema_type is SchemaType.AVRO:
         namespace = schema_typed.schema.namespace
     if schema_type is SchemaType.JSONSCHEMA:
         namespace = schema_typed.to_json().get("namespace", "dummy")
     # TODO: PROTOBUF* Seems protobuf does not use namespaces in terms of AVRO
     return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}"
Example #7
0
 async def subjects_schema_post(self, content_type, *, subject, request):
     body = request.json
     self._validate_schema_request_body(content_type, body)
     subject_data = self._subject_get(subject, content_type)
     new_schema = None
     if "schema" not in body:
         self.r(
             body={
                 "error_code":
                 SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value,
                 "message":
                 f"Error while looking up schema under subject {subject}",
             },
             content_type=content_type,
             status=HTTPStatus.INTERNAL_SERVER_ERROR,
         )
     schema_str = body["schema"]
     schema_type = SchemaType(body.get("schemaType", "AVRO"))
     try:
         new_schema = TypedSchema.parse(schema_type, schema_str)
     except InvalidSchema:
         self.log.exception("No proper parser found")
         self.r(
             body={
                 "error_code":
                 SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value,
                 "message":
                 f"Error while looking up schema under subject {subject}",
             },
             content_type=content_type,
             status=HTTPStatus.INTERNAL_SERVER_ERROR,
         )
     for schema in subject_data["schemas"].values():
         typed_schema = schema["schema"]
         if typed_schema == new_schema:
             ret = {
                 "subject": subject,
                 "version": schema["version"],
                 "id": schema["id"],
                 "schema": typed_schema.schema_str,
             }
             if schema_type is not SchemaType.AVRO:
                 ret["schemaType"] = schema_type
             self.r(ret, content_type)
         else:
             self.log.debug("Schema %r did not match %r", schema,
                            typed_schema)
     self.r(
         body={
             "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value,
             "message": "Schema not found",
         },
         content_type=content_type,
         status=HTTPStatus.NOT_FOUND,
     )
Example #8
0
async def test_remote_client(registry_async_client):
    schema_avro = TypedSchema.parse(SchemaType.AVRO, schema_avro_json)
    reg_cli = SchemaRegistryClient()
    reg_cli.client = registry_async_client
    sc_id = await reg_cli.post_new_schema("foo", schema_avro)
    assert sc_id >= 0
    stored_schema = await reg_cli.get_schema_for_id(sc_id)
    assert stored_schema == schema_avro, f"stored schema {stored_schema.to_json()} is not {schema_avro.to_json()}"
    stored_id, stored_schema = await reg_cli.get_latest_schema("foo")
    assert stored_id == sc_id
    assert stored_schema == schema_avro
Example #9
0
async def test_remote_client_protobuf(registry_async_client):
    schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF,
                                        schema_protobuf_plain)
    reg_cli = SchemaRegistryClient()
    reg_cli.client = registry_async_client
    subject = new_random_name("subject")
    sc_id = await reg_cli.post_new_schema(subject, schema_protobuf)
    assert sc_id >= 0
    stored_schema = await reg_cli.get_schema_for_id(sc_id)
    assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}"
    stored_id, stored_schema = await reg_cli.get_latest_schema(subject)
    assert stored_id == sc_id
    assert stored_schema == schema_protobuf
Example #10
0
 async def get_id_for_schema(self, schema: str, subject: str,
                             schema_type: SchemaType) -> int:
     try:
         schema = TypedSchema.parse(schema_type, schema)
     except InvalidSchema as e:
         raise InvalidPayload(f"Schema string {schema} is invalid") from e
     ser_schema = schema.__str__()
     if ser_schema in self.schemas_to_ids:
         return self.schemas_to_ids[ser_schema]
     schema_id = await self.registry_client.post_new_schema(subject, schema)
     async with self.state_lock:
         self.schemas_to_ids[ser_schema] = schema_id
         self.ids_to_schemas[schema_id] = schema
     return schema_id
Example #11
0
 async def get_schema_for_id(self, schema_id: int) -> TypedSchema:
     result = await self.client.get(f"schemas/ids/{schema_id}")
     if not result.ok:
         raise SchemaRetrievalError(result.json()["message"])
     json_result = result.json()
     if "schema" not in json_result:
         raise SchemaRetrievalError(f"Invalid result format: {json_result}")
     try:
         schema_type = SchemaType(json_result.get("schemaType", "AVRO"))
         return TypedSchema.parse(schema_type, json_result["schema"])
     except InvalidSchema as e:
         raise SchemaRetrievalError(
             f"Failed to parse schema string from response: {json_result}"
         ) from e
async def test_remote_client(karapace, aiohttp_client):
    kc, _ = karapace()
    client = await aiohttp_client(kc.app)
    c = Client(client=client)
    schema_avro = TypedSchema.parse(SchemaType.AVRO, schema_avro_json)
    reg_cli = SchemaRegistryClient()
    reg_cli.client = c
    sc_id = await reg_cli.post_new_schema("foo", schema_avro)
    assert sc_id >= 0
    stored_schema = await reg_cli.get_schema_for_id(sc_id)
    assert stored_schema == schema_avro, f"stored schema {stored_schema.to_json()} is not {schema_avro.to_json()}"
    stored_id, stored_schema = await reg_cli.get_latest_schema("foo")
    assert stored_id == sc_id
    assert stored_schema == schema_avro
    await c.close()
 async def subjects_schema_post(self, content_type, *, subject, request):
     body = request.json
     self._validate_schema_request_body(content_type, body)
     subject_data = self._subject_get(subject, content_type)
     new_schema = None
     if "schema" not in body:
         self.r({
             "error_code": 500,
             "message": "Internal Server Error"
         },
                content_type,
                status=500)
     schema_str = body["schema"]
     schema_type = SchemaType(body.get("schemaType", "AVRO"))
     try:
         new_schema = TypedSchema.parse(schema_type, schema_str)
     except InvalidSchema:
         self.log.exception("No proper parser found")
         self.r(
             {
                 "error_code":
                 500,
                 "message":
                 f"Error while looking up schema under subject {subject}"
             },
             content_type,
             status=500)
     for schema in subject_data["schemas"].values():
         typed_schema = schema["schema"]
         if typed_schema == new_schema:
             ret = {
                 "subject": subject,
                 "version": schema["version"],
                 "id": schema["id"],
                 "schema": str(typed_schema),
             }
             if schema_type is not SchemaType.AVRO:
                 ret["schemaType"] = schema_type
             self.r(ret, content_type)
         else:
             self.log.debug("Schema %r did not match %r", schema,
                            typed_schema)
     self.r({
         "error_code": 40403,
         "message": "Schema not found"
     },
            content_type,
            status=404)
Example #14
0
 async def get_latest_schema(self, subject: str) -> (int, TypedSchema):
     result = await self.client.get(
         f"subjects/{quote(subject)}/versions/latest")
     if not result.ok:
         raise SchemaRetrievalError(result.json())
     json_result = result.json()
     if "id" not in json_result or "schema" not in json_result:
         raise SchemaRetrievalError(f"Invalid result format: {json_result}")
     try:
         schema_type = SchemaType(json_result.get("schemaType", "AVRO"))
         return json_result["id"], TypedSchema.parse(
             schema_type, json_result["schema"])
     except InvalidSchema as e:
         raise SchemaRetrievalError(
             f"Failed to parse schema string from response: {json_result}"
         ) from e
Example #15
0
 async def get_id_for_schema(self, schema: str, subject: str,
                             schema_type: SchemaType) -> int:
     assert self.registry_client, "must not call this method after the object is closed."
     try:
         schema_typed = TypedSchema.parse(schema_type, schema)
     except InvalidSchema as e:
         raise InvalidPayload(f"Schema string {schema} is invalid") from e
     schema_ser = schema_typed.__str__()
     if schema_ser in self.schemas_to_ids:
         return self.schemas_to_ids[schema_ser]
     schema_id = await self.registry_client.post_new_schema(
         subject, schema_typed)
     async with self.state_lock:
         self.schemas_to_ids[schema_ser] = schema_id
         self.ids_to_schemas[schema_id] = schema_typed
     return schema_id
 def send_schema_message(
     self,
     *,
     subject: str,
     schema: TypedSchema,
     schema_id: int,
     version: int,
     deleted: bool,
 ):
     key = '{{"subject":"{}","version":{},"magic":1,"keytype":"SCHEMA"}}'.format(
         subject, version)
     value = {
         "subject": subject,
         "version": version,
         "id": schema_id,
         "schema": json_encode(schema.to_json(), compact=True),
         "deleted": deleted
     }
     if schema.schema_type is not SchemaType.AVRO:
         value["schemaType"] = schema.schema_type
     return self.send_kafka_message(key, json_encode(value, compact=True))
    def write_new_schema_local(self, subject, body, content_type):
        """Since we're the master we get to write the new schema"""
        self.log.info("Writing new schema locally since we're the master")
        schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO))
        try:
            new_schema = TypedSchema.parse(schema_type=schema_type,
                                           schema_str=body["schema"])
        except (InvalidSchema, InvalidSchemaType):
            self.log.warning("Invalid schema: %r",
                             body["schema"],
                             exc_info=True)
            self.r(body={
                "error_code": 44201,
                "message": f"Invalid {schema_type} schema"
            },
                   content_type=content_type,
                   status=422)
        if subject not in self.ksr.subjects or not self.ksr.subjects.get(
                subject)["schemas"]:
            schema_id = self.ksr.get_schema_id(new_schema)
            version = 1
            self.log.info(
                "Registering new subject: %r with version: %r to schema %r, schema_id: %r",
                subject, version, new_schema.to_json(), schema_id)
        else:
            # First check if any of the existing schemas for the subject match
            subject_data = self.ksr.subjects[subject]
            schemas = self.ksr.get_schemas(subject)
            if not schemas:  # Previous ones have been deleted by the user.
                version = max(self.ksr.subjects[subject]["schemas"]) + 1
                schema_id = self.ksr.get_schema_id(new_schema)
                self.log.info(
                    "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                    subject, schema_id, version, new_schema.to_json(),
                    schema_id)
                self.send_schema_message(
                    subject=subject,
                    schema=new_schema,
                    schema_id=schema_id,
                    version=version,
                    deleted=False,
                )
                self.r({"id": schema_id}, content_type)

            schema_versions = sorted(list(schemas))
            # Go through these in version order
            for version in schema_versions:
                schema = subject_data["schemas"][version]
                if schema["schema"] == new_schema:
                    self.r({"id": schema["id"]}, content_type)
                else:
                    self.log.debug("schema: %s did not match with: %s", schema,
                                   new_schema)

            compatibility = subject_data.get("compatibility",
                                             self.ksr.config["compatibility"])

            # Run a compatibility check between on file schema(s) and the one being submitted now
            # the check is either towards the latest one or against all previous ones in case of
            # transitive mode
            if compatibility in TRANSITIVE_MODES:
                check_against = schema_versions
            else:
                check_against = [schema_versions[-1]]

            for old_version in check_against:
                old_schema = subject_data["schemas"][old_version]["schema"]
                compat = Compatibility(old_schema,
                                       new_schema,
                                       compatibility=compatibility)
                try:
                    compat.check()
                except IncompatibleSchema as ex:
                    self.log.warning("Incompatible schema: %s", ex)
                    self.r(body={
                        "error_code":
                        409,
                        "message":
                        "Schema being registered is incompatible with an earlier schema"
                    },
                           content_type=content_type,
                           status=409)

            # We didn't find an existing schema and the schema is compatible so go and create one
            schema_id = self.ksr.get_schema_id(new_schema)
            version = max(self.ksr.subjects[subject]["schemas"]) + 1
            self.log.info(
                "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                subject, schema_id, version, new_schema.to_json(), schema_id)
        self.send_schema_message(
            subject=subject,
            schema=new_schema,
            schema_id=schema_id,
            version=version,
            deleted=False,
        )
        self.r({"id": schema_id}, content_type)
Example #18
0
 async def get_schema_for_id(self, *args, **kwargs):
     return TypedSchema.parse(SchemaType.AVRO, schema_avro_json)
Example #19
0
 async def get_latest_schema(self, *args, **kwargs):
     return 1, TypedSchema.parse(SchemaType.AVRO, schema_avro_json)
Example #20
0
    def write_new_schema_local(self, subject, body, content_type):
        """Since we're the master we get to write the new schema"""
        self.log.info("Writing new schema locally since we're the master")
        schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO))
        try:
            new_schema = TypedSchema.parse(schema_type=schema_type,
                                           schema_str=body["schema"])
        except (InvalidSchema, InvalidSchemaType) as e:
            self.log.warning("Invalid schema: %r",
                             body["schema"],
                             exc_info=True)
            if isinstance(e.__cause__,
                          (SchemaParseException, JSONDecodeError)):
                human_error = f"{e.__cause__.args[0]}"  # pylint: disable=no-member
            else:
                human_error = "Provided schema is not valid"
            self.r(
                body={
                    "error_code":
                    SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message":
                    f"Invalid {schema_type} schema. Error: {human_error}",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )
        if subject not in self.ksr.subjects or not self.ksr.subjects.get(
                subject)["schemas"]:
            schema_id = self.ksr.get_schema_id(new_schema)
            version = 1
            self.log.info(
                "Registering new subject: %r with version: %r to schema %r, schema_id: %r",
                subject, version, new_schema.schema_str, schema_id)
        else:
            # First check if any of the existing schemas for the subject match
            subject_data = self.ksr.subjects[subject]
            schemas = self.ksr.get_schemas(subject)
            if not schemas:  # Previous ones have been deleted by the user.
                version = max(self.ksr.subjects[subject]["schemas"]) + 1
                schema_id = self.ksr.get_schema_id(new_schema)
                self.log.info(
                    "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                    subject, schema_id, version, new_schema.schema_str,
                    schema_id)
                self.send_schema_message(
                    subject=subject,
                    schema=new_schema,
                    schema_id=schema_id,
                    version=version,
                    deleted=False,
                )
                self.r({"id": schema_id}, content_type)

            schema_versions = sorted(list(schemas))
            # Go through these in version order
            for version in schema_versions:
                schema = subject_data["schemas"][version]
                if schema["schema"] == new_schema:
                    self.r({"id": schema["id"]}, content_type)
                else:
                    self.log.debug("schema: %s did not match with: %s", schema,
                                   new_schema)

            compatibility_mode = self._get_compatibility_mode(
                subject=subject_data, content_type=content_type)

            # Run a compatibility check between on file schema(s) and the one being submitted now
            # the check is either towards the latest one or against all previous ones in case of
            # transitive mode
            if compatibility_mode.is_transitive():
                check_against = schema_versions
            else:
                check_against = [schema_versions[-1]]

            for old_version in check_against:
                old_schema = subject_data["schemas"][old_version]["schema"]
                result = check_compatibility(
                    old_schema=old_schema,
                    new_schema=new_schema,
                    compatibility_mode=compatibility_mode,
                )
                if is_incompatible(result):
                    message = set(
                        result.messages).pop() if result.messages else ""
                    self.log.warning("Incompatible schema: %s", result)
                    self.r(
                        body={
                            "error_code":
                            SchemaErrorCodes.HTTP_CONFLICT.value,
                            "message":
                            f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}",
                        },
                        content_type=content_type,
                        status=HTTPStatus.CONFLICT,
                    )

            # We didn't find an existing schema and the schema is compatible so go and create one
            schema_id = self.ksr.get_schema_id(new_schema)
            version = max(self.ksr.subjects[subject]["schemas"]) + 1
            self.log.info(
                "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                subject, schema_id, version, new_schema.to_json(), schema_id)
        self.send_schema_message(
            subject=subject,
            schema=new_schema,
            schema_id=schema_id,
            version=version,
            deleted=False,
        )
        self.r({"id": schema_id}, content_type)