async def compatibility_check(self, content_type, *, subject, version, request): """Check for schema compatibility""" body = request.json self.log.info( "Got request to check subject: %r, version_id: %r compatibility", subject, version) old = await self.subject_version_get(content_type=content_type, subject=subject, version=version, return_dict=True) self.log.info("Existing schema: %r, new_schema: %r", old["schema"], body["schema"]) try: schema_type = SchemaType(body.get("schemaType", "AVRO")) new_schema = TypedSchema.parse(schema_type, body["schema"]) except InvalidSchema: self.log.warning("Invalid schema: %r", body["schema"]) self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": "Invalid Avro schema", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) try: old_schema_type = SchemaType(old.get("schemaType", "AVRO")) old_schema = TypedSchema.parse(old_schema_type, old["schema"]) except InvalidSchema: self.log.warning("Invalid existing schema: %r", old["schema"]) self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": "Invalid Avro schema", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) compatibility_mode = self._get_compatibility_mode( subject=old, content_type=content_type) result = check_compatibility( old_schema=old_schema, new_schema=new_schema, compatibility_mode=compatibility_mode, ) if is_incompatible(result): self.log.warning( "Invalid schema %s found by compatibility check: old: %s new: %s", result, old_schema, new_schema) self.r({"is_compatible": False}, content_type) self.r({"is_compatible": True}, content_type)
def compatibility_object(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult: # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.5 result = SchemaCompatibilityResult.compatible() assert get_type_of(reader_schema) == Instance.OBJECT, "types should have been previously checked" assert get_type_of(writer_schema) == Instance.OBJECT, "types should have been previously checked" properties_location = location + [Keyword.PROPERTIES.value] reader_properties = reader_schema.get(Keyword.PROPERTIES.value) writer_properties = writer_schema.get(Keyword.PROPERTIES.value) reader_property_set = set(reader_properties) if reader_properties else set() writer_property_set = set(writer_properties) if writer_properties else set() # These properties are unknown in the sense they don't have a direct # schema, however there may be an indirect schema (patternProperties or # additionalProperties) properties_unknown_to_writer = reader_property_set - writer_property_set properties_unknown_to_reader = writer_property_set - reader_property_set for common_property in reader_property_set & writer_property_set: this_property_location = properties_location + [common_property] reader_property = reader_properties[common_property] writer_property = writer_properties[common_property] is_required_by_reader = reader_property.get(Keyword.REQUIRED.value) is_required_by_writer = writer_property.get(Keyword.REQUIRED.value) if not is_required_by_writer and is_required_by_reader: result.add_incompatibility( incompat_type=Incompatibility.required_attribute_added, message=f"Property {common_property} became required", location=this_property_location, ) rec_result = compatibility_rec( reader_schema=reader_property, writer_schema=writer_property, location=this_property_location, ) if is_incompatible(rec_result): result = result.merged_with(rec_result) # With an open content model any property can be added without breaking # compatibility because those do not have assertions, so only check if the # reader is using a closed model if properties_unknown_to_reader and not is_object_content_model_open(reader_schema): for unknown_property_to_reader in properties_unknown_to_reader: schema_for_property = schema_from_partially_open_content_model(reader_schema, unknown_property_to_reader) if schema_for_property is None: result.add_incompatibility( incompat_type=Incompatibility.property_removed_from_closed_content_model, message=f"The property {unknown_property_to_reader} is not accepted anymore", location=properties_location, ) else: rec_result = compatibility_rec( reader_schema=schema_for_property, writer_schema=writer_properties[unknown_property_to_reader], location=properties_location, ) if is_incompatible(rec_result): result = result.merged_with(rec_result) result.add_incompatibility( incompat_type=Incompatibility.property_removed_not_covered_by_partially_open_content_model, message=f"property {unknown_property_to_reader} is not compatible", location=properties_location, ) elif properties_unknown_to_writer: is_writer_open_model = is_object_content_model_open(writer_schema) if is_writer_open_model: properties = ', '.join(properties_unknown_to_writer) message_property_added_to_open_content_model = ( f"Restricting acceptable values of properties is an incompatible " f"change. The following properties {properties} accepted any " f"value because of the lack of validation (the object schema had " f"neither patternProperties nor additionalProperties), now " f"these values are restricted." ) result.add_incompatibility( incompat_type=Incompatibility.property_added_to_open_content_model, message=message_property_added_to_open_content_model, location=properties_location ) if not is_writer_open_model: for unknown_property_to_writer in properties_unknown_to_writer: schema_for_property = schema_from_partially_open_content_model(writer_schema, unknown_property_to_writer) schema_for_property_exists = schema_for_property is not None schema_allows_writes = not is_false_schema(schema_for_property) if schema_for_property_exists and schema_allows_writes: rec_result = compatibility_rec( reader_schema=reader_properties[unknown_property_to_writer], writer_schema=schema_for_property, location=properties_location, ) if is_incompatible(rec_result): result.add_incompatibility( incompat_type=Incompatibility.property_added_not_covered_by_partially_open_content_model, message="incompatible schemas", location=properties_location, ) new_property_is_required_without_default = ( unknown_property_to_writer in reader_schema.get(Keyword.REQUIRED.value, list()) and Keyword.DEFAULT.value not in reader_properties.get(Keyword.REQUIRED.value, list()) ) if new_property_is_required_without_default: result.add_incompatibility( incompat_type=Incompatibility.required_property_added_to_unopen_content_model, message=f"Property {unknown_property_to_writer} added without a default", location=properties_location, ) reader_attribute_dependencies_schema = reader_schema.get(Keyword.DEPENDENCIES.value, dict()) writer_attribute_dependencies_schema = writer_schema.get(Keyword.DEPENDENCIES.value, dict()) for writer_attribute_dependency_name, writer_attribute_dependencies in writer_attribute_dependencies_schema.items(): reader_attribute_dependencies = reader_attribute_dependencies_schema.get(writer_attribute_dependency_name) if not reader_attribute_dependencies: result.add_incompatibility( incompat_type=Incompatibility.dependency_array_added, message="incompatible dependency array", location=location, ) new_dependencies = set(writer_attribute_dependencies) - set(reader_attribute_dependencies) if new_dependencies: result.add_incompatibility( incompat_type=Incompatibility.dependency_array_extended, message=f"new dependencies {new_dependencies}", location=location ) reader_dependent_schemas = reader_schema.get(Keyword.DEPENDENT_SCHEMAS.value, dict()) writer_dependent_schemas = writer_schema.get(Keyword.DEPENDENT_SCHEMAS.value, dict()) for writer_dependent_schema_name, writer_dependent_schema in writer_dependent_schemas.items(): reader_dependent_schema = reader_dependent_schemas.get(writer_dependent_schema_name) if introduced_constraint(reader_dependent_schema, writer_dependent_schemas): result.add_incompatibility( incompat_type=Incompatibility.dependency_schema_added, message=f"new dependency schema {writer_dependent_schema_name}", location=location ) rec_result = compatibility_rec(reader_dependent_schema, writer_dependent_schema, location) result = result.merged_with(rec_result) checks: List[AssertionCheck] = [MAX_PROPERTIES_CHECK, MIN_PROPERTIES_CHECK] for assertion_check in checks: check_result = check_assertion_compatibility( reader_schema, writer_schema, assertion_check, location, ) result = result.merged_with(check_result) reader_additional_properties = reader_schema.get(Keyword.ADDITIONAL_PROPERTIES.value) writer_additional_properties = writer_schema.get(Keyword.ADDITIONAL_PROPERTIES.value) location_additional_properties = location + [Keyword.ADDITIONAL_PROPERTIES.value] if introduced_constraint(reader_additional_properties, writer_additional_properties): result.add_incompatibility( incompat_type=Incompatibility.additional_properties_narrowed, message="additionalProperties instroduced", location=location_additional_properties, ) if reader_additional_properties and writer_additional_properties: rec_result = compatibility_rec( reader_additional_properties, writer_additional_properties, location_additional_properties ) result = result.merged_with(rec_result) return result
def compatibility_array(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult: # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.4 reader_type = get_type_of(reader_schema) writer_type = get_type_of(writer_schema) assert reader_type == Instance.ARRAY, "types should have been previously checked" assert writer_type == Instance.ARRAY, "types should have been previously checked" reader_items = reader_schema.get(Keyword.ITEMS.value) writer_items = writer_schema.get(Keyword.ITEMS.value) result = SchemaCompatibilityResult.compatible() reader_is_tuple = is_tuple(reader_schema) writer_is_tuple = is_tuple(writer_schema) if reader_is_tuple != writer_is_tuple: return type_mismatch(reader_type, writer_type, location) # Extend the array iterator to match the tuple size if reader_is_tuple and writer_is_tuple: reader_items_iter = iter(reader_items) writer_items_iter = iter(writer_items) reader_requires_more_items = len(reader_items) > len(writer_items) writer_has_more_items = len(writer_items) > len(reader_items) else: reader_items_iter = iter([reader_items]) writer_items_iter = iter([writer_items]) reader_requires_more_items = False writer_has_more_items = False pos = 0 for pos, (reader_item, writer_item) in enumerate(zip(reader_items_iter, writer_items_iter), start=pos): rec_result = compatibility_rec(reader_item, writer_item, location + ["items", f"{pos}"]) if is_incompatible(rec_result): result = result.merged_with(rec_result) reader_additional_items = reader_schema.get(Keyword.ADDITIONAL_ITEMS.value, True) reader_restricts_additional_items = not is_true_schema(reader_additional_items) location_additional_items = location + [Keyword.ADDITIONAL_ITEMS.value] if writer_has_more_items and reader_restricts_additional_items: reader_rejects_additional_items = is_false_schema(reader_restricts_additional_items) if reader_rejects_additional_items: result.add_incompatibility( incompat_type=Incompatibility.item_removed_from_closed_content_model, message=f"Elements starting from index {pos} are not allowed", location=location + [Keyword.ADDITIONAL_ITEMS.value], ) else: for pos, writer_item in enumerate(writer_items_iter, start=pos): rec_result = compatibility_rec(reader_restricts_additional_items, writer_item, location_additional_items) if is_incompatible(rec_result): result.add_incompatibility( incompat_type=Incompatibility.item_removed_not_covered_by_partially_open_content_model, message=f"Item in position {pos} is not compatible", location=location_additional_items, ) writer_additional_items = writer_schema.get(Keyword.ADDITIONAL_ITEMS.value, True) writer_restricts_additional_items = not is_true_schema(writer_additional_items) if reader_requires_more_items: # This is just for more detailed diagnostics if writer_restricts_additional_items: for pos, reader_item in enumerate(reader_items_iter, start=pos): location_reader_item = location + ["items", f"{pos}"] rec_result = compatibility_rec(reader_item, writer_additional_items, location_reader_item) if is_incompatible(rec_result): result.add_incompatibility( incompat_type=Incompatibility.item_added_not_covered_by_partially_open_content_model, message="New element schema incompatible with the other version", location=location_reader_item, ) result.add_incompatibility( incompat_type=Incompatibility.item_added_to_open_content_model, message=f"Elements starting from index {pos} are now required", location=location, ) if is_tuple_without_additional_items(reader_schema) and not is_tuple_without_additional_items(writer_schema): result.add_incompatibility( incompat_type=Incompatibility.additional_items_removed, message="Additional items are not longer allowed", location=location_additional_items, ) reader_additional_items = reader_schema.get(Keyword.ITEMS) writer_additional_items = writer_schema.get(Keyword.ITEMS) if introduced_constraint(reader_additional_items, writer_additional_items): result.add_incompatibility( incompat_type=Incompatibility.additional_items_removed, message="Items are now restricted, old values may not be valid anymore", location=location_additional_items, ) rec_result = compatibility_rec(reader_additional_items, writer_additional_items, location_additional_items) result = result.merged_with(rec_result) checks: List[AssertionCheck] = [MAX_ITEMS_CHECK, MIN_ITEMS_CHECK] for assertion_check in checks: check_result = check_assertion_compatibility( reader_schema, writer_schema, assertion_check, location, ) result = result.merged_with(check_result) reader_unique_items = reader_schema.get(Keyword.UNIQUE_ITEMS) writer_unique_items = reader_schema.get(Keyword.UNIQUE_ITEMS) if introduced_constraint(reader_unique_items, writer_unique_items): result.add_incompatibility( incompat_type=Incompatibility.unique_items_added, message=INTRODUCED_INCOMPATIBILITY_MSG_FMT.format( assert_name=Keyword.UNIQUE_ITEMS.value, introduced_value=writer_unique_items, ), location=location, ) return result
def write_new_schema_local(self, subject, body, content_type): """Since we're the master we get to write the new schema""" self.log.info("Writing new schema locally since we're the master") schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO)) try: new_schema = TypedSchema.parse(schema_type=schema_type, schema_str=body["schema"]) except (InvalidSchema, InvalidSchemaType) as e: self.log.warning("Invalid schema: %r", body["schema"], exc_info=True) if isinstance(e.__cause__, (SchemaParseException, JSONDecodeError)): human_error = f"{e.__cause__.args[0]}" # pylint: disable=no-member else: human_error = "Provided schema is not valid" self.r( body={ "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value, "message": f"Invalid {schema_type} schema. Error: {human_error}", }, content_type=content_type, status=HTTPStatus.UNPROCESSABLE_ENTITY, ) if subject not in self.ksr.subjects or not self.ksr.subjects.get( subject)["schemas"]: schema_id = self.ksr.get_schema_id(new_schema) version = 1 self.log.info( "Registering new subject: %r with version: %r to schema %r, schema_id: %r", subject, version, new_schema.schema_str, schema_id) else: # First check if any of the existing schemas for the subject match subject_data = self.ksr.subjects[subject] schemas = self.ksr.get_schemas(subject) if not schemas: # Previous ones have been deleted by the user. version = max(self.ksr.subjects[subject]["schemas"]) + 1 schema_id = self.ksr.get_schema_id(new_schema) self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.schema_str, schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type) schema_versions = sorted(list(schemas)) # Go through these in version order for version in schema_versions: schema = subject_data["schemas"][version] if schema["schema"] == new_schema: self.r({"id": schema["id"]}, content_type) else: self.log.debug("schema: %s did not match with: %s", schema, new_schema) compatibility_mode = self._get_compatibility_mode( subject=subject_data, content_type=content_type) # Run a compatibility check between on file schema(s) and the one being submitted now # the check is either towards the latest one or against all previous ones in case of # transitive mode if compatibility_mode.is_transitive(): check_against = schema_versions else: check_against = [schema_versions[-1]] for old_version in check_against: old_schema = subject_data["schemas"][old_version]["schema"] result = check_compatibility( old_schema=old_schema, new_schema=new_schema, compatibility_mode=compatibility_mode, ) if is_incompatible(result): message = set( result.messages).pop() if result.messages else "" self.log.warning("Incompatible schema: %s", result) self.r( body={ "error_code": SchemaErrorCodes.HTTP_CONFLICT.value, "message": f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}", }, content_type=content_type, status=HTTPStatus.CONFLICT, ) # We didn't find an existing schema and the schema is compatible so go and create one schema_id = self.ksr.get_schema_id(new_schema) version = max(self.ksr.subjects[subject]["schemas"]) + 1 self.log.info( "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r", subject, schema_id, version, new_schema.to_json(), schema_id) self.send_schema_message( subject=subject, schema=new_schema, schema_id=schema_id, version=version, deleted=False, ) self.r({"id": schema_id}, content_type)