async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json self.log.info( "Got request to check subject: %r, version_id: %r compatibility", subject, version) old = await self.subject_version_get(content_type=content_type, subject=subject, version=version, return_dict=True) self.log.info("Existing schema: %r, new_schema: %r", old["schema"], body["schema"]) try: schema_type = SchemaType(body.get("schemaType", "AVRO")) new_schema = TypedSchema.parse(schema_type, body["schema"]) except InvalidSchema: self.log.warning("Invalid schema: %r", body["schema"]) self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": "Invalid Avro schema", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) try: old_schema_type = SchemaType(old.get("schemaType", "AVRO")) old_schema = TypedSchema.parse(old_schema_type, old["schema"]) except InvalidSchema: self.log.warning("Invalid existing schema: %r", old["schema"]) self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": "Invalid Avro schema", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) compatibility_mode = self._get_compatibility_mode( subject=old, content_type=content_type) result = check_compatibility( old_schema=old_schema, new_schema=new_schema, compatibility_mode=compatibility_mode, ) if is_incompatible(result): self.log.warning( "Invalid schema %s found by compatibility check: old: %s new: %s", result, old_schema, new_schema) self.r({"is_compatible": False}, content_type) self.r({"is_compatible": True}, content_type)
async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json self.log.info( "Got request to check subject: %r, version_id: %r compatibility", subject, version) old = await self.subject_version_get(content_type=content_type, subject=subject, version=version, return_dict=True) self.log.info("Existing schema: %r, new_schema: %r", old["schema"], body["schema"]) try: schema_type = SchemaType(body.get("schemaType", "AVRO")) new = TypedSchema.parse(schema_type, body["schema"]) except InvalidSchema: self.log.warning("Invalid schema: %r", body["schema"]) self.r(body={ "error_code": 44201, "message": "Invalid Avro schema" }, content_type=content_type, status=422) try: old_schema_type = SchemaType(old.get("schemaType", "AVRO")) old_schema = TypedSchema.parse(old_schema_type, old["schema"]) except InvalidSchema: self.log.warning("Invalid existing schema: %r", old["schema"]) self.r(body={ "error_code": 44201, "message": "Invalid Avro schema" }, content_type=content_type, status=422) compat = Compatibility(source=old_schema, target=new, compatibility=old.get( "compatibility", self.ksr.config["compatibility"])) try: compat.check() except IncompatibleSchema as ex: self.log.warning( "Invalid schema %s found by compatibility check: old: %s new: %s", ex, old_schema, new) self.r({"is_compatible": False}, content_type) self.r({"is_compatible": True}, content_type)
def get_subject_name(self, topic_name: str, schema: str, subject_type: str, schema_type: SchemaType) -> str: schema_typed = TypedSchema.parse(schema_type, schema) namespace = "dummy" if schema_type is SchemaType.AVRO: namespace = schema_typed.schema.namespace if schema_type is SchemaType.JSONSCHEMA: namespace = schema_typed.to_json().get("namespace", "dummy") return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}"
def get_subject_name(self, topic_name: str, schema: str, subject_type: str, schema_type: SchemaType) -> str: schema_typed = TypedSchema.parse(schema_type, schema) namespace = "dummy" if schema_type is SchemaType.AVRO: namespace = schema_typed.schema.namespace if schema_type is SchemaType.JSONSCHEMA: namespace = schema_typed.to_json().get("namespace", "dummy") # TODO: PROTOBUF* Seems protobuf does not use namespaces in terms of AVRO return f"{self.subject_name_strategy(topic_name, namespace)}-{subject_type}"
async def subjects_schema_post(self, content_type, *, subject, request): body = request.json self._validate_schema_request_body(content_type, body) subject_data = self._subject_get(subject, content_type) new_schema = None if "schema" not in body: self.r( body={ "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, "message": f"Error while looking up schema under subject {subject}", }, content_type=content_type, status=HTTPStatus.INTERNAL_SERVER_ERROR, ) schema_str = body["schema"] schema_type = SchemaType(body.get("schemaType", "AVRO")) try: new_schema = TypedSchema.parse(schema_type, schema_str) except InvalidSchema: self.log.exception("No proper parser found") self.r( body={ "error_code": SchemaErrorCodes.HTTP_INTERNAL_SERVER_ERROR.value, "message": f"Error while looking up schema under subject {subject}", }, content_type=content_type, status=HTTPStatus.INTERNAL_SERVER_ERROR, ) for schema in subject_data["schemas"].values(): typed_schema = schema["schema"] if typed_schema == new_schema: ret = { "subject": subject, "version": schema["version"], "id": schema["id"], "schema": typed_schema.schema_str, } if schema_type is not SchemaType.AVRO: ret["schemaType"] = schema_type self.r(ret, content_type) else: self.log.debug("Schema %r did not match %r", schema, typed_schema) self.r( body={ "error_code": SchemaErrorCodes.SCHEMA_NOT_FOUND.value, "message": "Schema not found", }, content_type=content_type, status=HTTPStatus.NOT_FOUND, )
async def test_remote_client(registry_async_client): schema_avro = TypedSchema.parse(SchemaType.AVRO, schema_avro_json) reg_cli = SchemaRegistryClient() reg_cli.client = registry_async_client sc_id = await reg_cli.post_new_schema("foo", schema_avro) assert sc_id >= 0 stored_schema = await reg_cli.get_schema_for_id(sc_id) assert stored_schema == schema_avro, f"stored schema {stored_schema.to_json()} is not {schema_avro.to_json()}" stored_id, stored_schema = await reg_cli.get_latest_schema("foo") assert stored_id == sc_id assert stored_schema == schema_avro
async def test_remote_client_protobuf(registry_async_client): schema_protobuf = TypedSchema.parse(SchemaType.PROTOBUF, schema_protobuf_plain) reg_cli = SchemaRegistryClient() reg_cli.client = registry_async_client subject = new_random_name("subject") sc_id = await reg_cli.post_new_schema(subject, schema_protobuf) assert sc_id >= 0 stored_schema = await reg_cli.get_schema_for_id(sc_id) assert stored_schema == schema_protobuf, f"stored schema {stored_schema} is not {schema_protobuf}" stored_id, stored_schema = await reg_cli.get_latest_schema(subject) assert stored_id == sc_id assert stored_schema == schema_protobuf
async def get_id_for_schema(self, schema: str, subject: str, schema_type: SchemaType) -> int: try: schema = TypedSchema.parse(schema_type, schema) except InvalidSchema as e: raise InvalidPayload(f"Schema string {schema} is invalid") from e ser_schema = schema.__str__() if ser_schema in self.schemas_to_ids: return self.schemas_to_ids[ser_schema] schema_id = await self.registry_client.post_new_schema(subject, schema) async with self.state_lock: self.schemas_to_ids[ser_schema] = schema_id self.ids_to_schemas[schema_id] = schema return schema_id
async def get_schema_for_id(self, schema_id: int) -> TypedSchema: result = await self.client.get(f"schemas/ids/{schema_id}") if not result.ok: raise SchemaRetrievalError(result.json()["message"]) json_result = result.json() if "schema" not in json_result: raise SchemaRetrievalError(f"Invalid result format: {json_result}") try: schema_type = SchemaType(json_result.get("schemaType", "AVRO")) return TypedSchema.parse(schema_type, json_result["schema"]) except InvalidSchema as e: raise SchemaRetrievalError( f"Failed to parse schema string from response: {json_result}" ) from e
async def test_remote_client(karapace, aiohttp_client): kc, _ = karapace() client = await aiohttp_client(kc.app) c = Client(client=client) schema_avro = TypedSchema.parse(SchemaType.AVRO, schema_avro_json) reg_cli = SchemaRegistryClient() reg_cli.client = c sc_id = await reg_cli.post_new_schema("foo", schema_avro) assert sc_id >= 0 stored_schema = await reg_cli.get_schema_for_id(sc_id) assert stored_schema == schema_avro, f"stored schema {stored_schema.to_json()} is not {schema_avro.to_json()}" stored_id, stored_schema = await reg_cli.get_latest_schema("foo") assert stored_id == sc_id assert stored_schema == schema_avro await c.close()
async def subjects_schema_post(self, content_type, *, subject, request): body = request.json self._validate_schema_request_body(content_type, body) subject_data = self._subject_get(subject, content_type) new_schema = None if "schema" not in body: self.r({ "error_code": 500, "message": "Internal Server Error" }, content_type, status=500) schema_str = body["schema"] schema_type = SchemaType(body.get("schemaType", "AVRO")) try: new_schema = TypedSchema.parse(schema_type, schema_str) except InvalidSchema: self.log.exception("No proper parser found") self.r( { "error_code": 500, "message": f"Error while looking up schema under subject {subject}" }, content_type, status=500) for schema in subject_data["schemas"].values(): typed_schema = schema["schema"] if typed_schema == new_schema: ret = { "subject": subject, "version": schema["version"], "id": schema["id"], "schema": str(typed_schema), } if schema_type is not SchemaType.AVRO: ret["schemaType"] = schema_type self.r(ret, content_type) else: self.log.debug("Schema %r did not match %r", schema, typed_schema) self.r({ "error_code": 40403, "message": "Schema not found" }, content_type, status=404)
async def get_latest_schema(self, subject: str) -> (int, TypedSchema): result = await self.client.get( f"subjects/{quote(subject)}/versions/latest") if not result.ok: raise SchemaRetrievalError(result.json()) json_result = result.json() if "id" not in json_result or "schema" not in json_result: raise SchemaRetrievalError(f"Invalid result format: {json_result}") try: schema_type = SchemaType(json_result.get("schemaType", "AVRO")) return json_result["id"], TypedSchema.parse( schema_type, json_result["schema"]) except InvalidSchema as e: raise SchemaRetrievalError( f"Failed to parse schema string from response: {json_result}" ) from e
async def get_id_for_schema(self, schema: str, subject: str, schema_type: SchemaType) -> int: assert self.registry_client, "must not call this method after the object is closed." try: schema_typed = TypedSchema.parse(schema_type, schema) except InvalidSchema as e: raise InvalidPayload(f"Schema string {schema} is invalid") from e schema_ser = schema_typed.__str__() if schema_ser in self.schemas_to_ids: return self.schemas_to_ids[schema_ser] schema_id = await self.registry_client.post_new_schema( subject, schema_typed) async with self.state_lock: self.schemas_to_ids[schema_ser] = schema_id self.ids_to_schemas[schema_id] = schema_typed return schema_id
async def get_schema_for_id(self, *args, **kwargs): return TypedSchema.parse(SchemaType.AVRO, schema_avro_json)
def write_new_schema_local(self, subject, body, content_type): """Since we're the master we get to write the new schema""" self.log.info("Writing new schema locally since we're the master") schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO)) try: new_schema = TypedSchema.parse(schema_type=schema_type, schema_str=body["schema"]) except (InvalidSchema, InvalidSchemaType) as e: self.log.warning("Invalid schema: %r", body["schema"], exc_info=True) if isinstance(e.__cause__, (SchemaParseException, JSONDecodeError)): human_error = f"{e.__cause__.args[0]}" # pylint: disable=no-member else: human_error = "Provided schema is not valid" self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": f"Invalid {schema_type} schema. Error: {human_error}", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) if subject not in self.ksr.subjects or not self.ksr.subjects.get( subject)["schemas"]: schema_id = self.ksr.get_schema_id(new_schema) version = 1 self.log.info( "Registering new subject: %r with version: %r to schema %r, schema_id: %r", subject, version, new_schema.schema_str, schema_id) else: # First check if any of the existing schemas for the subject match subject_data = self.ksr.subjects[subject] schemas = self.ksr.get_schemas(subject) if not schemas: # Previous ones have been deleted by the user. version = max(self.ksr.subjects[subject]["schemas"]) + 1 schema_id = self.ksr.get_schema_id(new_schema) self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.schema_str, schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type) schema_versions = sorted(list(schemas)) # Go through these in version order for version in schema_versions: schema = subject_data["schemas"][version] if schema["schema"] == new_schema: self.r({"id": schema["id"]}, content_type) else: self.log.debug("schema: %s did not match with: %s", schema, new_schema) compatibility_mode = self._get_compatibility_mode( subject=subject_data, content_type=content_type) # Run a compatibility check between on file schema(s) and the one being submitted now # the check is either towards the latest one or against all previous ones in case of # transitive mode if compatibility_mode.is_transitive(): check_against = schema_versions else: check_against = [schema_versions[-1]] for old_version in check_against: old_schema = subject_data["schemas"][old_version]["schema"] result = check_compatibility( old_schema=old_schema, new_schema=new_schema, compatibility_mode=compatibility_mode, ) if is_incompatible(result): message = set( result.messages).pop() if result.messages else "" self.log.warning("Incompatible schema: %s", result) self.r( body={ "error_code": SchemaErrorCodes.HTTP_CONFLICT.value, "message": f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}", }, content_type=content_type, status=HTTPStatus.CONFLICT, ) # We didn't find an existing schema and the schema is compatible so go and create one schema_id = self.ksr.get_schema_id(new_schema) version = max(self.ksr.subjects[subject]["schemas"]) + 1 self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.to_json(), schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type)
async def get_latest_schema(self, *args, **kwargs): return 1, TypedSchema.parse(SchemaType.AVRO, schema_avro_json)
def write_new_schema_local(self, subject, body, content_type): """Since we're the master we get to write the new schema""" self.log.info("Writing new schema locally since we're the master") schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO)) try: new_schema = TypedSchema.parse(schema_type=schema_type, schema_str=body["schema"]) except (InvalidSchema, InvalidSchemaType): self.log.warning("Invalid schema: %r", body["schema"], exc_info=True) self.r(body={ "error_code": 44201, "message": f"Invalid {schema_type} schema" }, content_type=content_type, status=422) if subject not in self.ksr.subjects or not self.ksr.subjects.get( subject)["schemas"]: schema_id = self.ksr.get_schema_id(new_schema) version = 1 self.log.info( "Registering new subject: %r with version: %r to schema %r, schema_id: %r", subject, version, new_schema.to_json(), schema_id) else: # First check if any of the existing schemas for the subject match subject_data = self.ksr.subjects[subject] schemas = self.ksr.get_schemas(subject) if not schemas: # Previous ones have been deleted by the user. version = max(self.ksr.subjects[subject]["schemas"]) + 1 schema_id = self.ksr.get_schema_id(new_schema) self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.to_json(), schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type) schema_versions = sorted(list(schemas)) # Go through these in version order for version in schema_versions: schema = subject_data["schemas"][version] if schema["schema"] == new_schema: self.r({"id": schema["id"]}, content_type) else: self.log.debug("schema: %s did not match with: %s", schema, new_schema) compatibility = subject_data.get("compatibility", self.ksr.config["compatibility"]) # Run a compatibility check between on file schema(s) and the one being submitted now # the check is either towards the latest one or against all previous ones in case of # transitive mode if compatibility in TRANSITIVE_MODES: check_against = schema_versions else: check_against = [schema_versions[-1]] for old_version in check_against: old_schema = subject_data["schemas"][old_version]["schema"] compat = Compatibility(old_schema, new_schema, compatibility=compatibility) try: compat.check() except IncompatibleSchema as ex: self.log.warning("Incompatible schema: %s", ex) self.r(body={ "error_code": 409, "message": "Schema being registered is incompatible with an earlier schema" }, content_type=content_type, status=409) # We didn't find an existing schema and the schema is compatible so go and create one schema_id = self.ksr.get_schema_id(new_schema) version = max(self.ksr.subjects[subject]["schemas"]) + 1 self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.to_json(), schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type)