def type_mismatch(reader_type, writer_type, location: List[str]) -> SchemaCompatibilityResult: return SchemaCompatibilityResult.incompatible( incompat_type=Incompatibility.type_changed, message=f'type {reader_type} is not compatible with type {writer_type}', location=location, )
def compatibility_enum(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult: # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.1.2 assert Keyword.ENUM.value in reader_schema, "types should have been previously checked" assert Keyword.ENUM.value in writer_schema, "types should have been previously checked" options_removed_by_reader = set(writer_schema[Keyword.ENUM.value]) - set(reader_schema[Keyword.ENUM.value]) if options_removed_by_reader: options = ", ".join(options_removed_by_reader) return SchemaCompatibilityResult.incompatible( Incompatibility.enum_array_narrowed, message=f"some of enum options are no longer valid {options}", location=location + [Keyword.ENUM.value], ) return SchemaCompatibilityResult.compatible()
def compatibility_boolean_schema(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult: assert get_type_of(reader_schema) is BooleanSchema, "types should have been previously checked" assert get_type_of(writer_schema) is BooleanSchema, "types should have been previously checked" # The true schema accepts everything, so the writer can produce anything if is_true_schema(reader_schema): return SchemaCompatibilityResult.compatible() # The false schema is only compatible with itself if is_false_schema(writer_schema): return SchemaCompatibilityResult.compatible() reader_has_not = isinstance(reader_schema, dict) and Subschema.NOT.value in reader_schema location_not = location + [Subschema.NOT.value] return SchemaCompatibilityResult.incompatible( incompat_type=Incompatibility.type_changed, message=f"All new values are rejected by {reader_schema}", location=location_not if reader_has_not else location, )
def compatibility_subschemas(reader_schema, writer_schema, location: List[str]) -> SchemaCompatibilityResult: # https://json-schema.org/draft/2020-12/json-schema-core.html#rfc.section.10 # pylint: disable=too-many-return-statements reader_subschemas_and_type = maybe_get_subschemas_and_type(reader_schema) writer_subschemas_and_type = maybe_get_subschemas_and_type(writer_schema) reader_subschemas: Optional[List[Any]] reader_type: JSONSCHEMA_TYPES if reader_subschemas_and_type is not None: reader_subschemas = reader_subschemas_and_type[0] reader_type = reader_subschemas_and_type[1] reader_has_subschema = reader_type in (Subschema.ALL_OF, Subschema.ANY_OF, Subschema.ONE_OF) else: reader_subschemas = None reader_type = get_type_of(reader_schema) reader_has_subschema = False writer_subschemas: Optional[List[Any]] writer_type: JSONSCHEMA_TYPES if writer_subschemas_and_type is not None: writer_subschemas = writer_subschemas_and_type[0] writer_type = writer_subschemas_and_type[1] writer_has_subschema = writer_type in (Subschema.ALL_OF, Subschema.ANY_OF, Subschema.ONE_OF) else: writer_subschemas = None writer_type = get_type_of(writer_schema) writer_has_subschema = False is_reader_special_case = reader_has_subschema and not writer_has_subschema and is_simple_subschema(reader_schema) is_writer_special_case = not reader_has_subschema and writer_has_subschema and is_simple_subschema(writer_schema) subschema_location = location + [get_name_of(reader_type)] if is_reader_special_case: assert reader_subschemas return check_simple_subschema(reader_subschemas[0], writer_schema, reader_type, writer_type, subschema_location) if is_writer_special_case: assert writer_subschemas return check_simple_subschema(reader_schema, writer_subschemas[0], reader_type, writer_type, subschema_location) if reader_type in (Subschema.ANY_OF, Subschema.ONE_OF) and not writer_has_subschema: assert isinstance(reader_type, Subschema) for reader_subschema in reader_schema[reader_type.value]: rec_result = compatibility_rec(reader_subschema, writer_schema, subschema_location) if is_compatible(rec_result): return rec_result return type_mismatch(reader_type, writer_type, subschema_location) if reader_subschemas is not None and writer_subschemas is not None: if reader_type not in (Subschema.ANY_OF, writer_type): return SchemaCompatibilityResult.incompatible( Incompatibility.combined_type_changed, message=f"incompatible subschema change, from {reader_type} to {writer_type}", location=subschema_location, ) len_reader_subschemas = len(reader_subschemas) len_writer_subschemas = len(writer_subschemas) if reader_type == Subschema.ALL_OF and len_writer_subschemas < len_reader_subschemas: msg = ( f"Not all required schemas were provided, number of required " f"schemas increased from {len_writer_subschemas} to " f"{len_reader_subschemas}" ) return SchemaCompatibilityResult.incompatible( Incompatibility.product_type_extended, message=msg, location=subschema_location, ) # oneOf/anyOf differ on annotation collection not validation. if reader_type in (Subschema.ANY_OF, Subschema.ONE_OF) and len_writer_subschemas > len_reader_subschemas: msg = ( f"Not all schemas are accepted, number of schemas " f"reduced from {len_writer_subschemas} to " f"{len_reader_subschemas}" ) return SchemaCompatibilityResult.incompatible( Incompatibility.sum_type_narrowed, message=msg, location=subschema_location, ) if reader_type == Subschema.ALL_OF: qty_of_required_compatible_subschemas = len_reader_subschemas else: qty_of_required_compatible_subschemas = len_writer_subschemas compatible_schemas_count = count_uniquely_compatible_schemas( reader_type, reader_subschemas, writer_subschemas, subschema_location, ) if compatible_schemas_count < qty_of_required_compatible_subschemas: return SchemaCompatibilityResult.incompatible( Incompatibility.combined_type_subschemas_changed, message="subschemas are incompatible", location=subschema_location, ) return SchemaCompatibilityResult.compatible() return type_mismatch(reader_type, writer_type, subschema_location)
def compatibility_rec( reader_schema: Optional[Any], writer_schema: Optional[Any], location: List[str] ) -> SchemaCompatibilityResult: if introduced_constraint(reader_schema, writer_schema): return SchemaCompatibilityResult.incompatible( incompat_type=Incompatibility.schema_added, message="schema added, previously used values may not be valid anymore", location=location, ) # Note: This is not always an incompatible change, jsonschema accepts # values unless there is an explicit assertion to reject it, meaning the # reader_schema would have to be `false` instead of undefined. However, on # some code paths this is really a incompatible change, specially when the # reader has type `array` to represent a list, and the writer is either a # different type or it is also an `array` but now it representes a tuple. if reader_schema is None and writer_schema is not None: LOG.debug("Schema removed reader_schema.type='%r'", get_type_of(reader_schema)) return SchemaCompatibilityResult.incompatible( incompat_type=Incompatibility.schema_removed, message="schema removed", location=location, ) # The type of reader_schema and writer_schema may vary wildly. Example: # # reader_schema = {"additionalProperties": {"type": "integer"}, ...} # writer_schema = {"additionalProperties": false, ...} # # When recursing `reader_schema` will be Instance.INTEGER and # `writer_schema` will be BooleanSchema # reader_type = get_type_of(reader_schema) writer_type = get_type_of(writer_schema) reader_is_number = reader_type in (Instance.NUMBER, Instance.INTEGER) writer_is_number = writer_type in (Instance.NUMBER, Instance.INTEGER) both_are_numbers = reader_is_number and writer_is_number reader_has_subschema = reader_type in (Subschema.ALL_OF, Subschema.ANY_OF, Subschema.ONE_OF) writer_has_subschema = writer_type in (Subschema.ALL_OF, Subschema.ANY_OF, Subschema.ONE_OF) either_has_subschema = reader_has_subschema or writer_has_subschema reader_is_true_schema = is_true_schema(reader_schema) reader_is_object = reader_type == Instance.OBJECT reader_is_true_schema = is_true_schema(reader_schema) writer_is_object = writer_type == Instance.OBJECT writer_is_true_schema = is_true_schema(writer_schema) both_are_object = (reader_is_object or reader_is_true_schema) and (writer_is_object or writer_is_true_schema) # https://json-schema.org/draft/2020-12/json-schema-validation.html#rfc.section.6.1.1 if not both_are_numbers and not either_has_subschema and not both_are_object and reader_type != writer_type: result = type_mismatch(reader_type, writer_type, location) elif both_are_numbers: result = compatibility_numerical(reader_schema, writer_schema, location) elif either_has_subschema: result = compatibility_subschemas(reader_schema, writer_schema, location) elif both_are_object: if reader_is_true_schema: reader_schema = {"type": Instance.OBJECT.value} if writer_is_true_schema: writer_schema = {"type": Instance.OBJECT.value} result = compatibility_object(reader_schema, writer_schema, location) elif reader_type is BooleanSchema: result = SchemaCompatibilityResult.compatible() elif reader_type is Subschema.NOT: assert reader_schema, "if just one schema is NOT the result should have been a type_mismatch" assert writer_schema, "if just one schema is NOT the result should have been a type_mismatch" location_not = location + [Subschema.NOT.value] return compatibility_rec( reader_schema[Subschema.NOT.value], writer_schema[Subschema.NOT.value], location_not, ) elif reader_type == Instance.BOOLEAN: result = SchemaCompatibilityResult.compatible() elif reader_type == Instance.STRING: result = compatibility_string(reader_schema, writer_schema, location) elif reader_type == Instance.ARRAY: result = compatibility_array(reader_schema, writer_schema, location) elif reader_type == Keyword.ENUM: result = compatibility_enum(reader_schema, writer_schema, location) elif reader_type is Instance.NULL: result = SchemaCompatibilityResult.compatible() else: raise ValueError(f"unknown type {reader_type}") return result
def check_compatibility( source: TypedSchema, target: TypedSchema, compatibility_mode: CompatibilityModes) -> SchemaCompatibilityResult: if source.schema_type is not target.schema_type: return SchemaCompatibilityResult.incompatible( incompat_type=SchemaIncompatibilityType.type_mismatch, message= f"Comparing different schema types: {source.schema_type} with {target.schema_type}", location=[], ) if compatibility_mode is CompatibilityModes.NONE: LOG.info( "Compatibility level set to NONE, no schema compatibility checks performed" ) return SchemaCompatibilityResult.compatible() if source.schema_type is SchemaType.AVRO: if compatibility_mode in { CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE }: result = check_avro_compatibility(reader_schema=target.schema, writer_schema=source.schema) elif compatibility_mode in { CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE }: result = check_avro_compatibility(reader_schema=source.schema, writer_schema=target.schema) elif compatibility_mode in { CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE }: result = check_avro_compatibility(reader_schema=target.schema, writer_schema=source.schema) result = result.merged_with( check_avro_compatibility(reader_schema=source.schema, writer_schema=target.schema)) elif source.schema_type is SchemaType.JSONSCHEMA: if compatibility_mode in { CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE }: result = check_jsonschema_compatibility(reader=target.schema, writer=source.schema) elif compatibility_mode in { CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE }: result = check_jsonschema_compatibility(reader=source.schema, writer=target.schema) elif compatibility_mode in { CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE }: result = check_jsonschema_compatibility(reader=target.schema, writer=source.schema) result = result.merged_with( check_jsonschema_compatibility(reader=source.schema, writer=target.schema)) elif source.schema_type is SchemaType.PROTOBUF: if compatibility_mode in { CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE }: result = check_protobuf_compatibility(reader_schema=target.schema, writer_schema=source.schema) elif compatibility_mode in { CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE }: result = check_protobuf_compatibility(reader_schema=source.schema, writer_schema=target.schema) elif compatibility_mode in { CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE }: result = check_protobuf_compatibility(reader_schema=target.schema, writer_schema=source.schema) result = result.merged_with( check_protobuf_compatibility(reader_schema=source.schema, writer_schema=target.schema)) else: result = SchemaCompatibilityResult.incompatible( incompat_type=SchemaIncompatibilityType.type_mismatch, message=f"Unknow schema_type {source.schema_type}", location=[], ) return result
def check_compatibility( old_schema: TypedSchema, new_schema: TypedSchema, compatibility_mode: CompatibilityModes) -> SchemaCompatibilityResult: """ Check that `old_schema` and `new_schema` are compatible under `compatibility_mode`. """ if old_schema.schema_type is not new_schema.schema_type: return SchemaCompatibilityResult.incompatible( incompat_type=SchemaIncompatibilityType.type_mismatch, message= f"Comparing different schema types: {old_schema.schema_type} with {new_schema.schema_type}", location=[], ) if compatibility_mode is CompatibilityModes.NONE: LOG.info( "Compatibility level set to NONE, no schema compatibility checks performed" ) return SchemaCompatibilityResult.compatible() if old_schema.schema_type is SchemaType.AVRO: if compatibility_mode in { CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE }: result = check_avro_compatibility( reader_schema=new_schema.schema, writer_schema=old_schema.schema, ) elif compatibility_mode in { CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE }: result = check_avro_compatibility( reader_schema=old_schema.schema, writer_schema=new_schema.schema, ) elif compatibility_mode in { CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE }: result = check_avro_compatibility( reader_schema=new_schema.schema, writer_schema=old_schema.schema, ) result = result.merged_with( check_avro_compatibility( reader_schema=old_schema.schema, writer_schema=new_schema.schema, )) elif old_schema.schema_type is SchemaType.JSONSCHEMA: if compatibility_mode in { CompatibilityModes.BACKWARD, CompatibilityModes.BACKWARD_TRANSITIVE }: result = check_jsonschema_compatibility( reader=new_schema.schema, writer=old_schema.schema, ) elif compatibility_mode in { CompatibilityModes.FORWARD, CompatibilityModes.FORWARD_TRANSITIVE }: result = check_jsonschema_compatibility( reader=old_schema.schema, writer=new_schema.schema, ) elif compatibility_mode in { CompatibilityModes.FULL, CompatibilityModes.FULL_TRANSITIVE }: result = check_jsonschema_compatibility( reader=new_schema.schema, writer=old_schema.schema, ) result = result.merged_with( check_jsonschema_compatibility( reader=old_schema.schema, writer=new_schema.schema, )) else: result = SchemaCompatibilityResult.incompatible( incompat_type=SchemaIncompatibilityType.type_mismatch, message=f"Unknow schema_type {old_schema.schema_type}", location=[], ) return result