Ejemplo n.º 1
0
    async def compatibility_check(self, content_type, *, subject, version,
                                  request):
        """Check for schema compatibility"""
        body = request.json
        self.log.info(
            "Got request to check subject: %r, version_id: %r compatibility",
            subject, version)
        old = await self.subject_version_get(content_type=content_type,
                                             subject=subject,
                                             version=version,
                                             return_dict=True)
        self.log.info("Existing schema: %r, new_schema: %r", old["schema"],
                      body["schema"])
        try:
            schema_type = SchemaType(body.get("schemaType", "AVRO"))
            new_schema = TypedSchema.parse(schema_type, body["schema"])
        except InvalidSchema:
            self.log.warning("Invalid schema: %r", body["schema"])
            self.r(
                body={
                    "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message": "Invalid Avro schema",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )
        try:
            old_schema_type = SchemaType(old.get("schemaType", "AVRO"))
            old_schema = TypedSchema.parse(old_schema_type, old["schema"])
        except InvalidSchema:
            self.log.warning("Invalid existing schema: %r", old["schema"])
            self.r(
                body={
                    "error_code": SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message": "Invalid Avro schema",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )

        compatibility_mode = self._get_compatibility_mode(
            subject=old, content_type=content_type)

        result = check_compatibility(
            old_schema=old_schema,
            new_schema=new_schema,
            compatibility_mode=compatibility_mode,
        )
        if is_incompatible(result):
            self.log.warning(
                "Invalid schema %s found by compatibility check: old: %s new: %s",
                result, old_schema, new_schema)
            self.r({"is_compatible": False}, content_type)
        self.r({"is_compatible": True}, content_type)
Ejemplo n.º 2
0
def compatibility_object(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult:
    # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.5
    result = SchemaCompatibilityResult.compatible()

    assert get_type_of(reader_schema) == Instance.OBJECT, "types should have been previously checked"
    assert get_type_of(writer_schema) == Instance.OBJECT, "types should have been previously checked"

    properties_location = location + [Keyword.PROPERTIES.value]
    reader_properties = reader_schema.get(Keyword.PROPERTIES.value)
    writer_properties = writer_schema.get(Keyword.PROPERTIES.value)

    reader_property_set = set(reader_properties) if reader_properties else set()
    writer_property_set = set(writer_properties) if writer_properties else set()

    # These properties are unknown in the sense they don't have a direct
    # schema, however there may be an indirect schema (patternProperties or
    # additionalProperties)
    properties_unknown_to_writer = reader_property_set - writer_property_set
    properties_unknown_to_reader = writer_property_set - reader_property_set

    for common_property in reader_property_set & writer_property_set:
        this_property_location = properties_location + [common_property]

        reader_property = reader_properties[common_property]
        writer_property = writer_properties[common_property]

        is_required_by_reader = reader_property.get(Keyword.REQUIRED.value)
        is_required_by_writer = writer_property.get(Keyword.REQUIRED.value)
        if not is_required_by_writer and is_required_by_reader:
            result.add_incompatibility(
                incompat_type=Incompatibility.required_attribute_added,
                message=f"Property {common_property} became required",
                location=this_property_location,
            )

        rec_result = compatibility_rec(
            reader_schema=reader_property,
            writer_schema=writer_property,
            location=this_property_location,
        )
        if is_incompatible(rec_result):
            result = result.merged_with(rec_result)

    # With an open content model any property can be added without breaking
    # compatibility because those do not have assertions, so only check if the
    # reader is using a closed model
    if properties_unknown_to_reader and not is_object_content_model_open(reader_schema):
        for unknown_property_to_reader in properties_unknown_to_reader:
            schema_for_property = schema_from_partially_open_content_model(reader_schema, unknown_property_to_reader)

            if schema_for_property is None:
                result.add_incompatibility(
                    incompat_type=Incompatibility.property_removed_from_closed_content_model,
                    message=f"The property {unknown_property_to_reader} is not accepted anymore",
                    location=properties_location,
                )
            else:
                rec_result = compatibility_rec(
                    reader_schema=schema_for_property,
                    writer_schema=writer_properties[unknown_property_to_reader],
                    location=properties_location,
                )
                if is_incompatible(rec_result):
                    result = result.merged_with(rec_result)
                    result.add_incompatibility(
                        incompat_type=Incompatibility.property_removed_not_covered_by_partially_open_content_model,
                        message=f"property {unknown_property_to_reader} is not compatible",
                        location=properties_location,
                    )

    elif properties_unknown_to_writer:
        is_writer_open_model = is_object_content_model_open(writer_schema)

        if is_writer_open_model:
            properties = ', '.join(properties_unknown_to_writer)
            message_property_added_to_open_content_model = (
                f"Restricting acceptable values of properties is an incompatible "
                f"change. The following properties {properties} accepted any "
                f"value because of the lack of validation (the object schema had "
                f"neither patternProperties nor additionalProperties), now "
                f"these values are restricted."
            )
            result.add_incompatibility(
                incompat_type=Incompatibility.property_added_to_open_content_model,
                message=message_property_added_to_open_content_model,
                location=properties_location
            )

        if not is_writer_open_model:
            for unknown_property_to_writer in properties_unknown_to_writer:
                schema_for_property = schema_from_partially_open_content_model(writer_schema, unknown_property_to_writer)

                schema_for_property_exists = schema_for_property is not None
                schema_allows_writes = not is_false_schema(schema_for_property)

                if schema_for_property_exists and schema_allows_writes:
                    rec_result = compatibility_rec(
                        reader_schema=reader_properties[unknown_property_to_writer],
                        writer_schema=schema_for_property,
                        location=properties_location,
                    )
                    if is_incompatible(rec_result):
                        result.add_incompatibility(
                            incompat_type=Incompatibility.property_added_not_covered_by_partially_open_content_model,
                            message="incompatible schemas",
                            location=properties_location,
                        )

                new_property_is_required_without_default = (
                    unknown_property_to_writer in reader_schema.get(Keyword.REQUIRED.value, list())
                    and Keyword.DEFAULT.value not in reader_properties.get(Keyword.REQUIRED.value, list())
                )
                if new_property_is_required_without_default:
                    result.add_incompatibility(
                        incompat_type=Incompatibility.required_property_added_to_unopen_content_model,
                        message=f"Property {unknown_property_to_writer} added without a default",
                        location=properties_location,
                    )

    reader_attribute_dependencies_schema = reader_schema.get(Keyword.DEPENDENCIES.value, dict())
    writer_attribute_dependencies_schema = writer_schema.get(Keyword.DEPENDENCIES.value, dict())

    for writer_attribute_dependency_name, writer_attribute_dependencies in writer_attribute_dependencies_schema.items():
        reader_attribute_dependencies = reader_attribute_dependencies_schema.get(writer_attribute_dependency_name)

        if not reader_attribute_dependencies:
            result.add_incompatibility(
                incompat_type=Incompatibility.dependency_array_added,
                message="incompatible dependency array",
                location=location,
            )

        new_dependencies = set(writer_attribute_dependencies) - set(reader_attribute_dependencies)
        if new_dependencies:
            result.add_incompatibility(
                incompat_type=Incompatibility.dependency_array_extended,
                message=f"new dependencies {new_dependencies}",
                location=location
            )

    reader_dependent_schemas = reader_schema.get(Keyword.DEPENDENT_SCHEMAS.value, dict())
    writer_dependent_schemas = writer_schema.get(Keyword.DEPENDENT_SCHEMAS.value, dict())

    for writer_dependent_schema_name, writer_dependent_schema in writer_dependent_schemas.items():
        reader_dependent_schema = reader_dependent_schemas.get(writer_dependent_schema_name)
        if introduced_constraint(reader_dependent_schema, writer_dependent_schemas):
            result.add_incompatibility(
                incompat_type=Incompatibility.dependency_schema_added,
                message=f"new dependency schema {writer_dependent_schema_name}",
                location=location
            )

        rec_result = compatibility_rec(reader_dependent_schema, writer_dependent_schema, location)
        result = result.merged_with(rec_result)

    checks: List[AssertionCheck] = [MAX_PROPERTIES_CHECK, MIN_PROPERTIES_CHECK]
    for assertion_check in checks:
        check_result = check_assertion_compatibility(
            reader_schema,
            writer_schema,
            assertion_check,
            location,
        )
        result = result.merged_with(check_result)

    reader_additional_properties = reader_schema.get(Keyword.ADDITIONAL_PROPERTIES.value)
    writer_additional_properties = writer_schema.get(Keyword.ADDITIONAL_PROPERTIES.value)
    location_additional_properties = location + [Keyword.ADDITIONAL_PROPERTIES.value]

    if introduced_constraint(reader_additional_properties, writer_additional_properties):
        result.add_incompatibility(
            incompat_type=Incompatibility.additional_properties_narrowed,
            message="additionalProperties instroduced",
            location=location_additional_properties,
        )

    if reader_additional_properties and writer_additional_properties:
        rec_result = compatibility_rec(
            reader_additional_properties, writer_additional_properties, location_additional_properties
        )
        result = result.merged_with(rec_result)

    return result
Ejemplo n.º 3
0
def compatibility_array(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult:
    # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.4
    reader_type = get_type_of(reader_schema)
    writer_type = get_type_of(writer_schema)
    assert reader_type == Instance.ARRAY, "types should have been previously checked"
    assert writer_type == Instance.ARRAY, "types should have been previously checked"

    reader_items = reader_schema.get(Keyword.ITEMS.value)
    writer_items = writer_schema.get(Keyword.ITEMS.value)

    result = SchemaCompatibilityResult.compatible()

    reader_is_tuple = is_tuple(reader_schema)
    writer_is_tuple = is_tuple(writer_schema)

    if reader_is_tuple != writer_is_tuple:
        return type_mismatch(reader_type, writer_type, location)

    # Extend the array iterator to match the tuple size
    if reader_is_tuple and writer_is_tuple:
        reader_items_iter = iter(reader_items)
        writer_items_iter = iter(writer_items)
        reader_requires_more_items = len(reader_items) > len(writer_items)
        writer_has_more_items = len(writer_items) > len(reader_items)
    else:
        reader_items_iter = iter([reader_items])
        writer_items_iter = iter([writer_items])
        reader_requires_more_items = False
        writer_has_more_items = False

    pos = 0
    for pos, (reader_item, writer_item) in enumerate(zip(reader_items_iter, writer_items_iter), start=pos):
        rec_result = compatibility_rec(reader_item, writer_item, location + ["items", f"{pos}"])
        if is_incompatible(rec_result):
            result = result.merged_with(rec_result)

    reader_additional_items = reader_schema.get(Keyword.ADDITIONAL_ITEMS.value, True)
    reader_restricts_additional_items = not is_true_schema(reader_additional_items)
    location_additional_items = location + [Keyword.ADDITIONAL_ITEMS.value]

    if writer_has_more_items and reader_restricts_additional_items:
        reader_rejects_additional_items = is_false_schema(reader_restricts_additional_items)
        if reader_rejects_additional_items:
            result.add_incompatibility(
                incompat_type=Incompatibility.item_removed_from_closed_content_model,
                message=f"Elements starting from index {pos} are not allowed",
                location=location + [Keyword.ADDITIONAL_ITEMS.value],
            )
        else:
            for pos, writer_item in enumerate(writer_items_iter, start=pos):
                rec_result = compatibility_rec(reader_restricts_additional_items, writer_item, location_additional_items)
                if is_incompatible(rec_result):
                    result.add_incompatibility(
                        incompat_type=Incompatibility.item_removed_not_covered_by_partially_open_content_model,
                        message=f"Item in position {pos} is not compatible",
                        location=location_additional_items,
                    )

    writer_additional_items = writer_schema.get(Keyword.ADDITIONAL_ITEMS.value, True)
    writer_restricts_additional_items = not is_true_schema(writer_additional_items)

    if reader_requires_more_items:
        # This is just for more detailed diagnostics
        if writer_restricts_additional_items:
            for pos, reader_item in enumerate(reader_items_iter, start=pos):
                location_reader_item = location + ["items", f"{pos}"]
                rec_result = compatibility_rec(reader_item, writer_additional_items, location_reader_item)
                if is_incompatible(rec_result):
                    result.add_incompatibility(
                        incompat_type=Incompatibility.item_added_not_covered_by_partially_open_content_model,
                        message="New element schema incompatible with the other version",
                        location=location_reader_item,
                    )

        result.add_incompatibility(
            incompat_type=Incompatibility.item_added_to_open_content_model,
            message=f"Elements starting from index {pos} are now required",
            location=location,
        )

    if is_tuple_without_additional_items(reader_schema) and not is_tuple_without_additional_items(writer_schema):
        result.add_incompatibility(
            incompat_type=Incompatibility.additional_items_removed,
            message="Additional items are not longer allowed",
            location=location_additional_items,
        )

    reader_additional_items = reader_schema.get(Keyword.ITEMS)
    writer_additional_items = writer_schema.get(Keyword.ITEMS)
    if introduced_constraint(reader_additional_items, writer_additional_items):
        result.add_incompatibility(
            incompat_type=Incompatibility.additional_items_removed,
            message="Items are now restricted, old values may not be valid anymore",
            location=location_additional_items,
        )

    rec_result = compatibility_rec(reader_additional_items, writer_additional_items, location_additional_items)
    result = result.merged_with(rec_result)

    checks: List[AssertionCheck] = [MAX_ITEMS_CHECK, MIN_ITEMS_CHECK]
    for assertion_check in checks:
        check_result = check_assertion_compatibility(
            reader_schema,
            writer_schema,
            assertion_check,
            location,
        )
        result = result.merged_with(check_result)

    reader_unique_items = reader_schema.get(Keyword.UNIQUE_ITEMS)
    writer_unique_items = reader_schema.get(Keyword.UNIQUE_ITEMS)

    if introduced_constraint(reader_unique_items, writer_unique_items):
        result.add_incompatibility(
            incompat_type=Incompatibility.unique_items_added,
            message=INTRODUCED_INCOMPATIBILITY_MSG_FMT.format(
                assert_name=Keyword.UNIQUE_ITEMS.value,
                introduced_value=writer_unique_items,
            ),
            location=location,
        )

    return result
Ejemplo n.º 4
0
    def write_new_schema_local(self, subject, body, content_type):
        """Since we're the master we get to write the new schema"""
        self.log.info("Writing new schema locally since we're the master")
        schema_type = SchemaType(body.get("schemaType", SchemaType.AVRO))
        try:
            new_schema = TypedSchema.parse(schema_type=schema_type,
                                           schema_str=body["schema"])
        except (InvalidSchema, InvalidSchemaType) as e:
            self.log.warning("Invalid schema: %r",
                             body["schema"],
                             exc_info=True)
            if isinstance(e.__cause__,
                          (SchemaParseException, JSONDecodeError)):
                human_error = f"{e.__cause__.args[0]}"  # pylint: disable=no-member
            else:
                human_error = "Provided schema is not valid"
            self.r(
                body={
                    "error_code":
                    SchemaErrorCodes.INVALID_AVRO_SCHEMA.value,
                    "message":
                    f"Invalid {schema_type} schema. Error: {human_error}",
                },
                content_type=content_type,
                status=HTTPStatus.UNPROCESSABLE_ENTITY,
            )
        if subject not in self.ksr.subjects or not self.ksr.subjects.get(
                subject)["schemas"]:
            schema_id = self.ksr.get_schema_id(new_schema)
            version = 1
            self.log.info(
                "Registering new subject: %r with version: %r to schema %r, schema_id: %r",
                subject, version, new_schema.schema_str, schema_id)
        else:
            # First check if any of the existing schemas for the subject match
            subject_data = self.ksr.subjects[subject]
            schemas = self.ksr.get_schemas(subject)
            if not schemas:  # Previous ones have been deleted by the user.
                version = max(self.ksr.subjects[subject]["schemas"]) + 1
                schema_id = self.ksr.get_schema_id(new_schema)
                self.log.info(
                    "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                    subject, schema_id, version, new_schema.schema_str,
                    schema_id)
                self.send_schema_message(
                    subject=subject,
                    schema=new_schema,
                    schema_id=schema_id,
                    version=version,
                    deleted=False,
                )
                self.r({"id": schema_id}, content_type)

            schema_versions = sorted(list(schemas))
            # Go through these in version order
            for version in schema_versions:
                schema = subject_data["schemas"][version]
                if schema["schema"] == new_schema:
                    self.r({"id": schema["id"]}, content_type)
                else:
                    self.log.debug("schema: %s did not match with: %s", schema,
                                   new_schema)

            compatibility_mode = self._get_compatibility_mode(
                subject=subject_data, content_type=content_type)

            # Run a compatibility check between on file schema(s) and the one being submitted now
            # the check is either towards the latest one or against all previous ones in case of
            # transitive mode
            if compatibility_mode.is_transitive():
                check_against = schema_versions
            else:
                check_against = [schema_versions[-1]]

            for old_version in check_against:
                old_schema = subject_data["schemas"][old_version]["schema"]
                result = check_compatibility(
                    old_schema=old_schema,
                    new_schema=new_schema,
                    compatibility_mode=compatibility_mode,
                )
                if is_incompatible(result):
                    message = set(
                        result.messages).pop() if result.messages else ""
                    self.log.warning("Incompatible schema: %s", result)
                    self.r(
                        body={
                            "error_code":
                            SchemaErrorCodes.HTTP_CONFLICT.value,
                            "message":
                            f"Incompatible schema, compatibility_mode={compatibility_mode.value} {message}",
                        },
                        content_type=content_type,
                        status=HTTPStatus.CONFLICT,
                    )

            # We didn't find an existing schema and the schema is compatible so go and create one
            schema_id = self.ksr.get_schema_id(new_schema)
            version = max(self.ksr.subjects[subject]["schemas"]) + 1
            self.log.info(
                "Registering subject: %r, id: %r new version: %r with schema %r, schema_id: %r",
                subject, schema_id, version, new_schema.to_json(), schema_id)
        self.send_schema_message(
            subject=subject,
            schema=new_schema,
            schema_id=schema_id,
            version=version,
            deleted=False,
        )
        self.r({"id": schema_id}, content_type)