def canonicalize_dict(d, outer_key=None): # Skip normal dict canonicalization # for object.properties/patternProperties # because these should be usual dict containers. if outer_key in ["properties", "patternProperties"]: for k, v in d.items(): d[k] = canonicalize_dict(v) return d # here, start dict canonicalization if not definitions.Jkeywords.intersection(d.keys()): return TOP t = d.get("type") has_connectors = definitions.Jconnectors.intersection(d.keys()) # Start canonicalization. Don't modify original dict. d = copy.deepcopy(d) if has_connectors: return canonicalize_connectors(d) elif "enum" in d.keys(): return canonicalize_enum(d) elif utils.is_str(t): return canonicalize_single_type(d) elif utils.is_list(t): return canonicalize_list_of_types(d) else: d["type"] = definitions.Jtypes return canonicalize_list_of_types(d)
def canonicalize_single_type(d): t = d.get("type") if t in definitions.Jtypes: # Remove irrelevant keywords for k, v in list(d.items()): if k not in definitions.Jcommonkw and k not in definitions.JtypesToKeywords.get( t): d.pop(k) elif utils.is_dict(v): d[k] = canonicalize_dict(v, k) elif utils.is_list(v): if k == "enum": v = utils.get_typed_enum_vals(v, t) if not v: return BOT else: d[k] = v elif k == "required": d[k] = sorted(set(v)) else: # "list" must be operand of boolean connectors d[k] = [canonicalize_dict(i) for i in v] if "enum" in d: return rewrite_enum(d) else: return d else: # TODO: or just return? print("Unknown schema type {} at:".format(t)) print(d) print("Exiting...") sys.exit(1)
def canonicalize_single_type(d): t = d.get("type") if t in definitions.Jtypes: # Remove irrelevant keywords for k, v in list(d.items()): if k not in definitions.Jcommonkw and k not in definitions.JtypesToKeywords.get( t) and k not in definitions.JNonValidation: d.pop(k) elif utils.is_dict(v): d[k] = canonicalize_dict(v, k) elif utils.is_list(v): if k == "enum": v = utils.get_typed_enum_vals(v, t) # if not v: # return BOT # else: d[k] = v elif k == "required": d[k] = sorted(set(v)) else: # "list" must be operand of boolean connectors d[k] = [canonicalize_dict(i) for i in v] if "enum" in d: return rewrite_enum(d) else: return d
def canoncalize_single_type(d): t = d.get("type") if t in typeToConstructor.keys(): # remove irrelevant keywords for k, v in list(d.items()): if k not in definitions.Jcommonkw and k not in definitions.JtypesToKeywords.get(t): d.pop(k) elif utils.is_dict(v): d[k] = canoncalize_dict(v, k) elif utils.is_list(v): if k == "enum": v = utils.get_valid_enum_vals(v, d) # if we have a schema with enum key and the # enum does not have any valid value against the schema, # then this entire schema with the enum is uninhabited if v: d[k] = v else: return JSONbot() elif k == "required": # to order the list; for proper dict equality d[k] = list(set(v)) else: d[k] = [canoncalize_dict(i) for i in v] return typeToConstructor[t](d) else: # TODO: or just return? print("Unknown schema type {} at:".format(t)) print(d) print("Exiting...") sys.exit(1)
def simplify_schema_and_embed_checkers(s): ''' This function assumes the schema s is already canonicalized. So it must be a dict ''' # if s == {} or not definitions.Jkeywords.intersection(s.keys()): top = JSONtop() # top.update(s) return top if "not" in s.keys() and s["not"] == {}: bot = JSONbot() # del s["not"] # bot.update(s) return bot # json.array specific if "items" in s: if utils.is_dict(s["items"]): s["items"] = simplify_schema_and_embed_checkers(s["items"]) elif utils.is_list(s["items"]): s["items"] = [ simplify_schema_and_embed_checkers(i) for i in s["items"] ] if "additionalItems" in s and utils.is_dict(s["additionalItems"]): s["additionalItems"] = simplify_schema_and_embed_checkers( s["additionalItems"]) # json.object specific if "properties" in s: s["properties"] = dict([(k, simplify_schema_and_embed_checkers(v)) for k, v in s["properties"].items()]) if "patternProperties" in s: s["patternProperties"] = dict([ (k, simplify_schema_and_embed_checkers(v)) for k, v in s["patternProperties"].items() ]) if "additionalProperties" in s and utils.is_dict( s["additionalProperties"]): s["additionalProperties"] = simplify_schema_and_embed_checkers( s["additionalProperties"]) # if "type" in s: return typeToConstructor.get(s["type"])(s) if "not" in s: return typeToConstructor.get(s["not"]["type"]).neg(s["not"]) if "anyOf" in s: anyofs = [simplify_schema_and_embed_checkers(i) for i in s["anyOf"]] return boolToConstructor.get("anyOf")({"anyOf": anyofs}) if "allOf" in s: allofs = [simplify_schema_and_embed_checkers(i) for i in s["allOf"]] return boolToConstructor.get("allOf")({"allOf": allofs})
def canonicalize_dict(d, outer_key=None): # not actually needed, but for testing # canonicalization to work properly; if d == {} or d == {"not": {}}: return d # Ignore (drop) any other validatoin keyword when there is a $ref # Currently, jsonref handles this case properly, # We might need to handle it again on out own when # we handle recursive $ref independently from jsonref. # if d.get("$ref"): # for k in list(d.keys()): # if k != "$ref" and k not in definitions.JNonValidation: # del d[k] # Skip normal dict canonicalization # for object.properties; # patternProperties; # dependencies # because these should be usual dict containers. if outer_key in ["properties", "patternProperties"]: for k, v in d.items(): d[k] = canonicalize_dict(v) return d if outer_key == "dependencies": for k, v in d.items(): if utils.is_dict(v): d[k] = canonicalize_dict(v) return d # here, start dict canonicalization if not definitions.Jkeywords.intersection(d.keys()): return d t = d.get("type") has_connectors = definitions.Jconnectors.intersection(d.keys()) # Start canonicalization. Don't modify original dict. d = copy.deepcopy(d) if has_connectors: return canonicalize_connectors(d) elif "enum" in d.keys(): return canonicalize_enum(d) elif utils.is_str(t): return canonicalize_single_type(d) elif utils.is_list(t): return canonicalize_list_of_types(d) else: d["type"] = definitions.Jtypes return canonicalize_list_of_types(d)
def canoncalize_untyped_enum(d): t = set() for i in d.get("enum"): if utils.is_str(i): t.add("string") elif utils.is_bool(i): t.add("boolean") elif utils.is_int(i): t.add("integer") elif utils.is_float(i): t.add("number") elif utils.is_null(i): t.add("null") elif utils.is_list(i): t.add("array") elif utils.is_dict(i): t.add("object") d["type"] = list(t) return canoncalize_list_of_types(d)
def meet_arrayItems_dict_list(s1, s2, ret): assert utils.is_dict(s1.items_) and utils.is_list( s2.items_ ), "Violating meet_arrayItems_dict_list condition: 's1.items is dict' and 's2.items is list'" itms = [] for i in s2.items_: r = i.meet(s1.items_) if not (is_bot(r) or r.checkUninhabited()): itms.append(r) else: break ret.items_ = itms if s2.additionalItems == True: ret.additionalItems = copy.deepcopy(s1.items_) elif s2.additionalItems == False: ret.additionalItems = False elif utils.is_dict(s2.additionalItems): addItms = s2.additionalItems.meet(s1.items_) ret.additionalItems = False if is_bot( addItms) else addItms return ret
def _isArraySubtype(s1, s2): if s2.type != "array": return False # # -- minItems and maxItems is_sub_interval = s1.interval in s2.interval if not is_sub_interval: print_db("__01__") return False # # -- uniqueItemsue # TODO Double-check. Could be more subtle? if not s1.uniqueItems and s2.uniqueItems: print_db("__02__") return False # # -- items = {not empty} # no need to check additionalItems if utils.is_dict(s1.items_): if utils.is_dict(s2.items_): print_db(s1.items_) print_db(s2.items_) if s1.items_.isSubtype(s2.items_): print_db("__05__") return True else: print_db("__06__") return False elif utils.is_list(s2.items_): if s2.additionalItems == False: print_db("__07__") return False elif s2.additionalItems == True: for i in s2.items_: if not s1.items_.isSubtype(i): print_db("__08__") return False print_db("__09__") return True elif utils.is_dict(s2.additionalItems): for i in s2.items_: if not s1.items_.isSubtype(i): print_db("__10__") return False print_db(type(s1.items_), s1.items_) print_db(type(s2.additionalItems), s2.additionalItems) if s1.items_.isSubtype(s2.additionalItems): print_db("__11__") return True else: print_db("__12__") return False # elif utils.is_list(s1.items_): print_db("lhs is list") if utils.is_dict(s2.items_): if s1.additionalItems == False: for i in s1.items_: if not i.isSubtype(s2.items_): print_db("__13__") return False print_db("__14__") return True elif s1.additionalItems == True: for i in s1.items_: if not i.isSubtype(s2.items_): return False # since s1.additional items is True, # then TOP should also be a subtype of # s2.items if JSONtop().isSubtype(s2.items_): return True return False elif utils.is_dict(s1.additionalItems): for i in s1.items_: if not i.isSubtype(s2.items_): return False if s1.additionalItems.isSubtype(s2.items_): return True else: return False # now lhs and rhs are lists elif utils.is_list(s2.items_): print_db("lhs & rhs are lists") len1 = len(s1.items_) len2 = len(s2.items_) for i, j in zip(s1.items_, s2.items_): if not i.isSubtype(j): return False if len1 == len2: print_db("len1 == len2") if s1.additionalItems == s2.additionalItems: return True elif s1.additionalItems == True and s2.additionalItems == False: return False elif s1.additionalItems == False and s2.additionalItems == True: return True else: return s1.additionalItems.isSubtype( s2.additionalItems) elif len1 > len2: diff = len1 - len2 for i in range(len1 - diff, len1): if s2.additionalItems == False: return False elif s2.additionalItems == True: return True elif not s1.items_[i].isSubtype( s2.additionalItems): print_db("9999") return False print_db("8888") return True else: # len2 > len 1 diff = len2 - len1 for i in range(len2 - diff, len2): if s1.additionalItems == False: return True elif s1.additionalItems == True: return False elif not s1.additionalItems.isSubtype( s2.items_[i]): return False return s1.additionalItems.isSubtype(s2.additionalItems)
def _meetArray(s1, s2): if s2.type == "array": # ret = {} # ret["type"] = "array" # ret["minItems"] = max(s1.minItems, s2.minItems) # ret["maxItems"] = min(s1.maxItems, s2.maxItems) # ret["uniqueItems"] = s1.uniqueItems or s2.uniqueItems ret = JSONTypeArray({}) # ret["type"] = "array" ret.minItems = max(s1.minItems, s2.minItems) ret.maxItems = min(s1.maxItems, s2.maxItems) ret.uniqueItems = s1.uniqueItems or s2.uniqueItems def meet_arrayItems_dict_list(s1, s2, ret): assert utils.is_dict(s1.items_) and utils.is_list( s2.items_ ), "Violating meet_arrayItems_dict_list condition: 's1.items is dict' and 's2.items is list'" itms = [] for i in s2.items_: r = i.meet(s1.items_) if not (is_bot(r) or r.checkUninhabited()): itms.append(r) else: break ret.items_ = itms if s2.additionalItems == True: ret.additionalItems = copy.deepcopy(s1.items_) elif s2.additionalItems == False: ret.additionalItems = False elif utils.is_dict(s2.additionalItems): addItms = s2.additionalItems.meet(s1.items_) ret.additionalItems = False if is_bot( addItms) else addItms return ret if utils.is_dict(s1.items_): if utils.is_dict(s2.items_): ret.items = s1.items_.meet(s2.items_) elif utils.is_list(s2.items_): ret = meet_arrayItems_dict_list(s1, s2, ret) elif utils.is_list(s1.items_): if utils.is_dict(s2.items_): ret = meet_arrayItems_dict_list(s2, s1, ret) elif utils.is_list(s2.items_): self_len = len(s1.items_) s_len = len(s2.items_) def meet_arrayAdditionalItems_list_list(s1, s2): if utils.is_bool( s1.additionalItems) and utils.is_bool( s2.additionalItems): ad = s1.additionalItems and s2.additionalItems elif utils.is_dict(s1.additionalItems): ad = s1.additionalItems.meet( s2.additionalItems) elif utils.is_dict(s2.additionalItems): ad = s2.additionalItems.meet( s1.additionalItems) return False if is_bot(ad) else ad def meet_array_longlist_shorterlist(s1, s2, ret): s1_len = len(s1.items_) s2_len = len(s2.items_) assert s1_len > s2_len, "Violating meet_array_longlist_shorterlist condition: 's1.len > s2.len'" itms = [] for i, j in zip(s1.items_, s2.items_): r = i.meet(j) if not (is_bot(r) or r.checkUninhabited()): itms.append(r) else: ad = False break else: for i in range(s2_len, s1_len): r = s1.items_[i].meet(s2.additionalItems) if not (is_bot(r) or r.checkUninhabited()): itms.append(r) else: ad = False break else: ad = meet_arrayAdditionalItems_list_list( s1, s2) ret.additionalItems = ad ret.items_ = itms return ret if self_len == s_len: itms = [] for i, j in zip(s1.items_, s2.items_): r = i.meet(j) if not (is_bot(r) or r.checkUninhabited()): itms.append(r) else: ad = False break else: ad = meet_arrayAdditionalItems_list_list( s1, s2) ret.additionalItems = ad ret.items_ = itms elif self_len > s_len: ret = meet_array_longlist_shorterlist(s1, s2, ret) elif self_len < s_len: ret = meet_array_longlist_shorterlist(s2, s1, ret) return ret else: return JSONbot()
def _isUninhabited(self): return (self.minItems > self.maxItems) or \ (utils.is_list(self.items_) and self.additionalItems == False and self.minItems > len(self.items_)) or \ (utils.is_list(self.items_) and len(self.items_) == 0)
def compute_actual_maxItems(self): if utils.is_list(self.items_) and is_bot(self.additionalItems): new_max = min(self.maxItems, len(self.items_)) if new_max != self.maxItems: self.maxItems = new_max