async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json self.log.info( "Got request to check subject: %r, version_id: %r compatibility", subject, version) old = await self.subject_version_get(content_type=content_type, subject=subject, version=version, return_dict=True) self.log.info("Existing schema: %r, new_schema: %r", old["schema"], body["schema"]) try: schema_type = SchemaType(body.get("schemaType", "AVRO")) new_schema = TypedSchema.parse(schema_type, body["schema"]) except InvalidSchema: self.log.warning("Invalid schema: %r", body["schema"]) self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": "Invalid Avro schema", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) try: old_schema_type = SchemaType(old.get("schemaType", "AVRO")) old_schema = TypedSchema.parse(old_schema_type, old["schema"]) except InvalidSchema: self.log.warning("Invalid existing schema: %r", old["schema"]) self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": "Invalid Avro schema", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) compatibility_mode = self._get_compatibility_mode( subject=old, content_type=content_type) result = check_compatibility( old_schema=old_schema, new_schema=new_schema, compatibility_mode=compatibility_mode, ) if is_incompatible(result): self.log.warning( "Invalid schema %s found by compatibility check: old: %s new: %s", result, old_schema, new_schema) self.r({"is_compatible": False}, content_type) self.r({"is_compatible": True}, content_type)
async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json self.log.info( "Got request to check subject: %r, version_id: %r compatibility", subject, version) old = await self.subject_version_get(content_type=content_type, subject=subject, version=version, return_dict=True) self.log.info("Existing schema: %r, new_schema: %r", old["schema"], body["schema"]) try: schema_type = SchemaType(body.get("schemaType", "AVRO")) new = TypedSchema.parse(schema_type, body["schema"]) except InvalidSchema: self.log.warning("Invalid schema: %r", body["schema"]) self.r(body={ "error_code": 44201, "message": "Invalid Avro schema" }, content_type=content_type, status=422) try: old_schema_type = SchemaType(old.get("schemaType", "AVRO")) old_schema = TypedSchema.parse(old_schema_type, old["schema"]) except InvalidSchema: self.log.warning("Invalid existing schema: %r", old["schema"]) self.r(body={ "error_code": 44201, "message": "Invalid Avro schema" }, content_type=content_type, status=422) compat = Compatibility(source=old_schema, target=new, compatibility=old.get( "compatibility", self.ksr.config["compatibility"])) try: compat.check() except IncompatibleSchema as ex: self.log.warning( "Invalid schema %s found by compatibility check: old: %s new: %s", ex, old_schema, new) self.r({"is_compatible": False}, content_type) self.r({"is_compatible": True}, content_type)
def _validate_schema_type(self, content_type, body): schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO.value)) if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO}: self.r(body={ "error_code": 422, "message": f"unrecognized schemaType: {schema_type}" }, content_type=content_type, status=422)
async def subjects_schema_post(self, content_type, *, subject, request): body = request.json self._validate_schema_request_body(content_type, body) subject_data = self._subject_get(subject, content_type) new_schema = None if "schema" not in body: self.r( body={ "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, "message": f"Error while looking up schema under subject {subject}", }, content_type=content_type, status=HTTPStatus.INTERNAL_SERVER_ERROR, ) schema_str = body["schema"] schema_type = SchemaType(body.get("schemaType", "AVRO")) try: new_schema = TypedSchema.parse(schema_type, schema_str) except InvalidSchema: self.log.exception("No proper parser found") self.r( body={ "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, "message": f"Error while looking up schema under subject {subject}", }, content_type=content_type, status=HTTPStatus.INTERNAL_SERVER_ERROR, ) for schema in subject_data["schemas"].values(): typed_schema = schema["schema"] if typed_schema == new_schema: ret = { "subject": subject, "version": schema["version"], "id": schema["id"], "schema": typed_schema.schema_str, } if schema_type is not SchemaType.AVRO: ret["schemaType"] = schema_type self.r(ret, content_type) else: self.log.debug("Schema %r did not match %r", schema, typed_schema) self.r( body={ "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, "message": "Schema not found", }, content_type=content_type, status=HTTPStatus.NOT_FOUND, )
def _validate_schema_type(self, content_type, body) -> None: schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO.value)) if schema_type not in {SchemaType.JSONSCHEMA, SchemaType.AVRO, SchemaType.PROTOBUF}: self.r( body={ "error_code": SchemaErrorCodes.HTTP_UNPROCESSABLE_ENTITY.value, "message": f"unrecognized schemaType: {schema_type}", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, )
async def get_schema_for_id(self, schema_id: int) -> TypedSchema: result = await self.client.get(f"schemas/ids/{schema_id}") if not result.ok: raise SchemaRetrievalError(result.json()["message"]) json_result = result.json() if "schema" not in json_result: raise SchemaRetrievalError(f"Invalid result format: {json_result}") try: schema_type = SchemaType(json_result.get("schemaType", "AVRO")) return TypedSchema.parse(schema_type, json_result["schema"]) except InvalidSchema as e: raise SchemaRetrievalError( f"Failed to parse schema string from response: {json_result}" ) from e
async def subjects_schema_post(self, content_type, *, subject, request): body = request.json self._validate_schema_request_body(content_type, body) subject_data = self._subject_get(subject, content_type) new_schema = None if "schema" not in body: self.r({ "error_code": 500, "message": "Internal Server Error" }, content_type, status=500) schema_str = body["schema"] schema_type = SchemaType(body.get("schemaType", "AVRO")) try: new_schema = TypedSchema.parse(schema_type, schema_str) except InvalidSchema: self.log.exception("No proper parser found") self.r( { "error_code": 500, "message": f"Error while looking up schema under subject {subject}" }, content_type, status=500) for schema in subject_data["schemas"].values(): typed_schema = schema["schema"] if typed_schema == new_schema: ret = { "subject": subject, "version": schema["version"], "id": schema["id"], "schema": str(typed_schema), } if schema_type is not SchemaType.AVRO: ret["schemaType"] = schema_type self.r(ret, content_type) else: self.log.debug("Schema %r did not match %r", schema, typed_schema) self.r({ "error_code": 40403, "message": "Schema not found" }, content_type, status=404)
async def get_latest_schema(self, subject: str) -> (int, TypedSchema): result = await self.client.get( f"subjects/{quote(subject)}/versions/latest") if not result.ok: raise SchemaRetrievalError(result.json()) json_result = result.json() if "id" not in json_result or "schema" not in json_result: raise SchemaRetrievalError(f"Invalid result format: {json_result}") try: schema_type = SchemaType(json_result.get("schemaType", "AVRO")) return json_result["id"], TypedSchema.parse( schema_type, json_result["schema"]) except InvalidSchema as e: raise SchemaRetrievalError( f"Failed to parse schema string from response: {json_result}" ) from e
def write_new_schema_local(self, subject, body, content_type): """Since we're the master we get to write the new schema""" self.log.info("Writing new schema locally since we're the master") schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO)) try: new_schema = TypedSchema.parse(schema_type=schema_type, schema_str=body["schema"]) except (InvalidSchema, InvalidSchemaType) as e: self.log.warning("Invalid schema: %r", body["schema"], exc_info=True) if isinstance(e.__cause__, (SchemaParseException, JSONDecodeError)): human_error = f"{e.__cause__.args[0]}" # pylint: disable=no-member else: human_error = "Provided schema is not valid" self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": f"Invalid {schema_type} schema. Error: {human_error}", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) if subject not in self.ksr.subjects or not self.ksr.subjects.get( subject)["schemas"]: schema_id = self.ksr.get_schema_id(new_schema) version = 1 self.log.info( "Registering new subject: %r with version: %r to schema %r, schema_id: %r", subject, version, new_schema.schema_str, schema_id) else: # First check if any of the existing schemas for the subject match subject_data = self.ksr.subjects[subject] schemas = self.ksr.get_schemas(subject) if not schemas: # Previous ones have been deleted by the user. version = max(self.ksr.subjects[subject]["schemas"]) + 1 schema_id = self.ksr.get_schema_id(new_schema) self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.schema_str, schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type) schema_versions = sorted(list(schemas)) # Go through these in version order for version in schema_versions: schema = subject_data["schemas"][version] if schema["schema"] == new_schema: self.r({"id": schema["id"]}, content_type) else: self.log.debug("schema: %s did not match with: %s", schema, new_schema) compatibility_mode = self._get_compatibility_mode( subject=subject_data, content_type=content_type) # Run a compatibility check between on file schema(s) and the one being submitted now # the check is either towards the latest one or against all previous ones in case of # transitive mode if compatibility_mode.is_transitive(): check_against = schema_versions else: check_against = [schema_versions[-1]] for old_version in check_against: old_schema = subject_data["schemas"][old_version]["schema"] result = check_compatibility( old_schema=old_schema, new_schema=new_schema, compatibility_mode=compatibility_mode, ) if is_incompatible(result): message = set( result.messages).pop() if result.messages else "" self.log.warning("Incompatible schema: %s", result) self.r( body={ "error_code": SchemaErrorCodes.HTTP_CONFLICT.value, "message": f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}", }, content_type=content_type, status=HTTPStatus.CONFLICT, ) # We didn't find an existing schema and the schema is compatible so go and create one schema_id = self.ksr.get_schema_id(new_schema) version = max(self.ksr.subjects[subject]["schemas"]) + 1 self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.to_json(), schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type)
def write_new_schema_local(self, subject, body, content_type): """Since we're the master we get to write the new schema""" self.log.info("Writing new schema locally since we're the master") schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO)) try: new_schema = TypedSchema.parse(schema_type=schema_type, schema_str=body["schema"]) except (InvalidSchema, InvalidSchemaType): self.log.warning("Invalid schema: %r", body["schema"], exc_info=True) self.r(body={ "error_code": 44201, "message": f"Invalid {schema_type} schema" }, content_type=content_type, status=422) if subject not in self.ksr.subjects or not self.ksr.subjects.get( subject)["schemas"]: schema_id = self.ksr.get_schema_id(new_schema) version = 1 self.log.info( "Registering new subject: %r with version: %r to schema %r, schema_id: %r", subject, version, new_schema.to_json(), schema_id) else: # First check if any of the existing schemas for the subject match subject_data = self.ksr.subjects[subject] schemas = self.ksr.get_schemas(subject) if not schemas: # Previous ones have been deleted by the user. version = max(self.ksr.subjects[subject]["schemas"]) + 1 schema_id = self.ksr.get_schema_id(new_schema) self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.to_json(), schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type) schema_versions = sorted(list(schemas)) # Go through these in version order for version in schema_versions: schema = subject_data["schemas"][version] if schema["schema"] == new_schema: self.r({"id": schema["id"]}, content_type) else: self.log.debug("schema: %s did not match with: %s", schema, new_schema) compatibility = subject_data.get("compatibility", self.ksr.config["compatibility"]) # Run a compatibility check between on file schema(s) and the one being submitted now # the check is either towards the latest one or against all previous ones in case of # transitive mode if compatibility in TRANSITIVE_MODES: check_against = schema_versions else: check_against = [schema_versions[-1]] for old_version in check_against: old_schema = subject_data["schemas"][old_version]["schema"] compat = Compatibility(old_schema, new_schema, compatibility=compatibility) try: compat.check() except IncompatibleSchema as ex: self.log.warning("Incompatible schema: %s", ex) self.r(body={ "error_code": 409, "message": "Schema being registered is incompatible with an earlier schema" }, content_type=content_type, status=409) # We didn't find an existing schema and the schema is compatible so go and create one schema_id = self.ksr.get_schema_id(new_schema) version = max(self.ksr.subjects[subject]["schemas"]) + 1 self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.to_json(), schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type)