Esempio n. 1
0
    async def compatibility_check(self, content_type, *, subject, version,
                                  request):
        """Check for schema compatibility"""
        body = request.json
        self.log.info(
            "Got request to check subject: %r, version_id: %r compatibility",
            subject, version)
        old = await self.subject_version_get(content_type=content_type,
                                             subject=subject,
                                             version=version,
                                             return_dict=True)
        self.log.info("Existing schema: %r, new_schema: %r", old["schema"],
                      body["schema"])
        try:
            schema_type = SchemaType(body.get("schemaType", "AVRO"))
            new_schema = TypedSchema.parse(schema_type, body["schema"])
        except InvalidSchema:
            self.log.warning("Invalid schema: %r", body["schema"])
            self.r(
                body={
                    "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message": "Invalid Avro schema",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )
        try:
            old_schema_type = SchemaType(old.get("schemaType", "AVRO"))
            old_schema = TypedSchema.parse(old_schema_type, old["schema"])
        except InvalidSchema:
            self.log.warning("Invalid existing schema: %r", old["schema"])
            self.r(
                body={
                    "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message": "Invalid Avro schema",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )

        compatibility_mode = self._get_compatibility_mode(
            subject=old, content_type=content_type)

        result = check_compatibility(
            old_schema=old_schema,
            new_schema=new_schema,
            compatibility_mode=compatibility_mode,
        )
        if is_incompatible(result):
            self.log.warning(
                "Invalid schema %s found by compatibility check: old: %s new: %s",
                result, old_schema, new_schema)
            self.r({"is_compatible": False}, content_type)
        self.r({"is_compatible": True}, content_type)
    async def compatibility_check(self, content_type, *, subject, version,
                                  request):
        """Check for schema compatibility"""
        body = request.json
        self.log.info(
            "Got request to check subject: %r, version_id: %r compatibility",
            subject, version)
        old = await self.subject_version_get(content_type=content_type,
                                             subject=subject,
                                             version=version,
                                             return_dict=True)
        self.log.info("Existing schema: %r, new_schema: %r", old["schema"],
                      body["schema"])
        try:
            schema_type = SchemaType(body.get("schemaType", "AVRO"))
            new = TypedSchema.parse(schema_type, body["schema"])
        except InvalidSchema:
            self.log.warning("Invalid schema: %r", body["schema"])
            self.r(body={
                "error_code": 44201,
                "message": "Invalid Avro schema"
            },
                   content_type=content_type,
                   status=422)
        try:
            old_schema_type = SchemaType(old.get("schemaType", "AVRO"))
            old_schema = TypedSchema.parse(old_schema_type, old["schema"])
        except InvalidSchema:
            self.log.warning("Invalid existing schema: %r", old["schema"])
            self.r(body={
                "error_code": 44201,
                "message": "Invalid Avro schema"
            },
                   content_type=content_type,
                   status=422)

        compat = Compatibility(source=old_schema,
                               target=new,
                               compatibility=old.get(
                                   "compatibility",
                                   self.ksr.config["compatibility"]))
        try:
            compat.check()
        except IncompatibleSchema as ex:
            self.log.warning(
                "Invalid schema %s found by compatibility check: old: %s new: %s",
                ex, old_schema, new)
            self.r({"is_compatible": False}, content_type)
        self.r({"is_compatible": True}, content_type)
 def _validate_schema_type(self, content_type, body):
     schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO.value))
     if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO}:
         self.r(body={
             "error_code": 422,
             "message": f"unrecognized schemaType: {schema_type}"
         },
                content_type=content_type,
                status=422)
Esempio n. 4
0
 async def subjects_schema_post(self, content_type, *, subject, request):
     body = request.json
     self._validate_schema_request_body(content_type, body)
     subject_data = self._subject_get(subject, content_type)
     new_schema = None
     if "schema" not in body:
         self.r(
             body={
                 "error_code":
                 SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value,
                 "message":
                 f"Error while looking up schema under subject {subject}",
             },
             content_type=content_type,
             status=HTTPStatus.INTERNAL_SERVER_ERROR,
         )
     schema_str = body["schema"]
     schema_type = SchemaType(body.get("schemaType", "AVRO"))
     try:
         new_schema = TypedSchema.parse(schema_type, schema_str)
     except InvalidSchema:
         self.log.exception("No proper parser found")
         self.r(
             body={
                 "error_code":
                 SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value,
                 "message":
                 f"Error while looking up schema under subject {subject}",
             },
             content_type=content_type,
             status=HTTPStatus.INTERNAL_SERVER_ERROR,
         )
     for schema in subject_data["schemas"].values():
         typed_schema = schema["schema"]
         if typed_schema == new_schema:
             ret = {
                 "subject": subject,
                 "version": schema["version"],
                 "id": schema["id"],
                 "schema": typed_schema.schema_str,
             }
             if schema_type is not SchemaType.AVRO:
                 ret["schemaType"] = schema_type
             self.r(ret, content_type)
         else:
             self.log.debug("Schema %r did not match %r", schema,
                            typed_schema)
     self.r(
         body={
             "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value,
             "message": "Schema not found",
         },
         content_type=content_type,
         status=HTTPStatus.NOT_FOUND,
     )
Esempio n. 5
0
 def _validate_schema_type(self, content_type, body) -> None:
     schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO.value))
     if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO, SchemaType.PROTOBUF}:
         self.r(
             body={
                 "error_code": SchemaErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value,
                 "message": f"unrecognized schemaType: {schema_type}",
             },
             content_type=content_type,
             status=HTTPStatus.UNPROCESSABLE_ENTITY,
         )
Esempio n. 6
0
 async def get_schema_for_id(self, schema_id: int) -> TypedSchema:
     result = await self.client.get(f"schemas/ids/{schema_id}")
     if not result.ok:
         raise SchemaRetrievalError(result.json()["message"])
     json_result = result.json()
     if "schema" not in json_result:
         raise SchemaRetrievalError(f"Invalid result format: {json_result}")
     try:
         schema_type = SchemaType(json_result.get("schemaType", "AVRO"))
         return TypedSchema.parse(schema_type, json_result["schema"])
     except InvalidSchema as e:
         raise SchemaRetrievalError(
             f"Failed to parse schema string from response: {json_result}"
         ) from e
 async def subjects_schema_post(self, content_type, *, subject, request):
     body = request.json
     self._validate_schema_request_body(content_type, body)
     subject_data = self._subject_get(subject, content_type)
     new_schema = None
     if "schema" not in body:
         self.r({
             "error_code": 500,
             "message": "Internal Server Error"
         },
                content_type,
                status=500)
     schema_str = body["schema"]
     schema_type = SchemaType(body.get("schemaType", "AVRO"))
     try:
         new_schema = TypedSchema.parse(schema_type, schema_str)
     except InvalidSchema:
         self.log.exception("No proper parser found")
         self.r(
             {
                 "error_code":
                 500,
                 "message":
                 f"Error while looking up schema under subject {subject}"
             },
             content_type,
             status=500)
     for schema in subject_data["schemas"].values():
         typed_schema = schema["schema"]
         if typed_schema == new_schema:
             ret = {
                 "subject": subject,
                 "version": schema["version"],
                 "id": schema["id"],
                 "schema": str(typed_schema),
             }
             if schema_type is not SchemaType.AVRO:
                 ret["schemaType"] = schema_type
             self.r(ret, content_type)
         else:
             self.log.debug("Schema %r did not match %r", schema,
                            typed_schema)
     self.r({
         "error_code": 40403,
         "message": "Schema not found"
     },
            content_type,
            status=404)
Esempio n. 8
0
 async def get_latest_schema(self, subject: str) -> (int, TypedSchema):
     result = await self.client.get(
         f"subjects/{quote(subject)}/versions/latest")
     if not result.ok:
         raise SchemaRetrievalError(result.json())
     json_result = result.json()
     if "id" not in json_result or "schema" not in json_result:
         raise SchemaRetrievalError(f"Invalid result format: {json_result}")
     try:
         schema_type = SchemaType(json_result.get("schemaType", "AVRO"))
         return json_result["id"], TypedSchema.parse(
             schema_type, json_result["schema"])
     except InvalidSchema as e:
         raise SchemaRetrievalError(
             f"Failed to parse schema string from response: {json_result}"
         ) from e
Esempio n. 9
0
    def write_new_schema_local(self, subject, body, content_type):
        """Since we're the master we get to write the new schema"""
        self.log.info("Writing new schema locally since we're the master")
        schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO))
        try:
            new_schema = TypedSchema.parse(schema_type=schema_type,
                                           schema_str=body["schema"])
        except (InvalidSchema, InvalidSchemaType) as e:
            self.log.warning("Invalid schema: %r",
                             body["schema"],
                             exc_info=True)
            if isinstance(e.__cause__,
                          (SchemaParseException, JSONDecodeError)):
                human_error = f"{e.__cause__.args[0]}"  # pylint: disable=no-member
            else:
                human_error = "Provided schema is not valid"
            self.r(
                body={
                    "error_code":
                    SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message":
                    f"Invalid {schema_type} schema. Error: {human_error}",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )
        if subject not in self.ksr.subjects or not self.ksr.subjects.get(
                subject)["schemas"]:
            schema_id = self.ksr.get_schema_id(new_schema)
            version = 1
            self.log.info(
                "Registering new subject: %r with version: %r to schema %r, schema_id: %r",
                subject, version, new_schema.schema_str, schema_id)
        else:
            # First check if any of the existing schemas for the subject match
            subject_data = self.ksr.subjects[subject]
            schemas = self.ksr.get_schemas(subject)
            if not schemas:  # Previous ones have been deleted by the user.
                version = max(self.ksr.subjects[subject]["schemas"]) + 1
                schema_id = self.ksr.get_schema_id(new_schema)
                self.log.info(
                    "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                    subject, schema_id, version, new_schema.schema_str,
                    schema_id)
                self.send_schema_message(
                    subject=subject,
                    schema=new_schema,
                    schema_id=schema_id,
                    version=version,
                    deleted=False,
                )
                self.r({"id": schema_id}, content_type)

            schema_versions = sorted(list(schemas))
            # Go through these in version order
            for version in schema_versions:
                schema = subject_data["schemas"][version]
                if schema["schema"] == new_schema:
                    self.r({"id": schema["id"]}, content_type)
                else:
                    self.log.debug("schema: %s did not match with: %s", schema,
                                   new_schema)

            compatibility_mode = self._get_compatibility_mode(
                subject=subject_data, content_type=content_type)

            # Run a compatibility check between on file schema(s) and the one being submitted now
            # the check is either towards the latest one or against all previous ones in case of
            # transitive mode
            if compatibility_mode.is_transitive():
                check_against = schema_versions
            else:
                check_against = [schema_versions[-1]]

            for old_version in check_against:
                old_schema = subject_data["schemas"][old_version]["schema"]
                result = check_compatibility(
                    old_schema=old_schema,
                    new_schema=new_schema,
                    compatibility_mode=compatibility_mode,
                )
                if is_incompatible(result):
                    message = set(
                        result.messages).pop() if result.messages else ""
                    self.log.warning("Incompatible schema: %s", result)
                    self.r(
                        body={
                            "error_code":
                            SchemaErrorCodes.HTTP_CONFLICT.value,
                            "message":
                            f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}",
                        },
                        content_type=content_type,
                        status=HTTPStatus.CONFLICT,
                    )

            # We didn't find an existing schema and the schema is compatible so go and create one
            schema_id = self.ksr.get_schema_id(new_schema)
            version = max(self.ksr.subjects[subject]["schemas"]) + 1
            self.log.info(
                "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                subject, schema_id, version, new_schema.to_json(), schema_id)
        self.send_schema_message(
            subject=subject,
            schema=new_schema,
            schema_id=schema_id,
            version=version,
            deleted=False,
        )
        self.r({"id": schema_id}, content_type)
Esempio n. 10
0
    def write_new_schema_local(self, subject, body, content_type):
        """Since we're the master we get to write the new schema"""
        self.log.info("Writing new schema locally since we're the master")
        schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO))
        try:
            new_schema = TypedSchema.parse(schema_type=schema_type,
                                           schema_str=body["schema"])
        except (InvalidSchema, InvalidSchemaType):
            self.log.warning("Invalid schema: %r",
                             body["schema"],
                             exc_info=True)
            self.r(body={
                "error_code": 44201,
                "message": f"Invalid {schema_type} schema"
            },
                   content_type=content_type,
                   status=422)
        if subject not in self.ksr.subjects or not self.ksr.subjects.get(
                subject)["schemas"]:
            schema_id = self.ksr.get_schema_id(new_schema)
            version = 1
            self.log.info(
                "Registering new subject: %r with version: %r to schema %r, schema_id: %r",
                subject, version, new_schema.to_json(), schema_id)
        else:
            # First check if any of the existing schemas for the subject match
            subject_data = self.ksr.subjects[subject]
            schemas = self.ksr.get_schemas(subject)
            if not schemas:  # Previous ones have been deleted by the user.
                version = max(self.ksr.subjects[subject]["schemas"]) + 1
                schema_id = self.ksr.get_schema_id(new_schema)
                self.log.info(
                    "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                    subject, schema_id, version, new_schema.to_json(),
                    schema_id)
                self.send_schema_message(
                    subject=subject,
                    schema=new_schema,
                    schema_id=schema_id,
                    version=version,
                    deleted=False,
                )
                self.r({"id": schema_id}, content_type)

            schema_versions = sorted(list(schemas))
            # Go through these in version order
            for version in schema_versions:
                schema = subject_data["schemas"][version]
                if schema["schema"] == new_schema:
                    self.r({"id": schema["id"]}, content_type)
                else:
                    self.log.debug("schema: %s did not match with: %s", schema,
                                   new_schema)

            compatibility = subject_data.get("compatibility",
                                             self.ksr.config["compatibility"])

            # Run a compatibility check between on file schema(s) and the one being submitted now
            # the check is either towards the latest one or against all previous ones in case of
            # transitive mode
            if compatibility in TRANSITIVE_MODES:
                check_against = schema_versions
            else:
                check_against = [schema_versions[-1]]

            for old_version in check_against:
                old_schema = subject_data["schemas"][old_version]["schema"]
                compat = Compatibility(old_schema,
                                       new_schema,
                                       compatibility=compatibility)
                try:
                    compat.check()
                except IncompatibleSchema as ex:
                    self.log.warning("Incompatible schema: %s", ex)
                    self.r(body={
                        "error_code":
                        409,
                        "message":
                        "Schema being registered is incompatible with an earlier schema"
                    },
                           content_type=content_type,
                           status=409)

            # We didn't find an existing schema and the schema is compatible so go and create one
            schema_id = self.ksr.get_schema_id(new_schema)
            version = max(self.ksr.subjects[subject]["schemas"]) + 1
            self.log.info(
                "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                subject, schema_id, version, new_schema.to_json(), schema_id)
        self.send_schema_message(
            subject=subject,
            schema=new_schema,
            schema_id=schema_id,
            version=version,
            deleted=False,
        )
        self.r({"id": schema_id}, content_type)