Ejemplo n.º 1
0
 def _isNumberSubtype(s1, s2):
     if s2.type == "number":
         is_sub_interval = s1.interval in s2.interval
         if not is_sub_interval:
             print_db("num__00")
             return False
         #
         if (s1.multipleOf == s2.multipleOf) \
                 or (s1.multipleOf != None and s2.multipleOf == None) \
                 or (s1.multipleOf != None and s2.multipleOf != None and s1.multipleOf % s2.multipleOf == 0) \
                 or (s1.multipleOf == None and s2.multipleOf == 1):
             print_db("num__02")
             return True
     else:
         return False
Ejemplo n.º 2
0
 def _isIntegerSubtype(s1, s2):
     if s2.type in definitions.Jnumeric:
         #
         is_sub_interval = s1.interval in s2.interval
         if not is_sub_interval:
             print_db("num__00")
             return False
         #
         if (s1.multipleOf == s2.multipleOf) \
                 or (s1.multipleOf != None and s2.multipleOf == None) \
                 or (s1.multipleOf != None and s2.multipleOf != None and s1.multipleOf % s2.multipleOf == 0) \
                 or (s1.multipleOf == None and s2.multipleOf == 1):
             print_db("num__02")
             return True
         #
         if s1.multipleOf == None and s2.multipleOf != None:
             return False
     else:
         return False
Ejemplo n.º 3
0
def isSubschema(s1, s2):
    ''' Entry point for schema subtype checking. '''

    # Validate both lhs and rhs schemas before starting the subtype checking.
    # Subtyping of invalid schemas is erroneous.

    validate_schema(s1)
    print_db("LHS", s1)
    s1 = canoncalize_json(s1)
    print_db("LHS_canonical", s1)

    validate_schema(s2)
    print_db("RHS", s2)
    s2 = canoncalize_json(s2)
    print_db("RHS_canonical", s2)

    return s1.isSubtype(s2)
Ejemplo n.º 4
0
 def _isArraySubtype(s1, s2):
     if s2.type != "array":
         return False
     #
     # -- minItems and maxItems
     is_sub_interval = s1.interval in s2.interval
     if not is_sub_interval:
         print_db("__01__")
         return False
     #
     # -- uniqueItemsue
     # TODO Double-check. Could be more subtle?
     if not s1.uniqueItems and s2.uniqueItems:
         print_db("__02__")
         return False
     #
     # -- items = {not empty}
     # no need to check additionalItems
     if utils.is_dict(s1.items_):
         if utils.is_dict(s2.items_):
             print_db(s1.items_)
             print_db(s2.items_)
             if s1.items_.isSubtype(s2.items_):
                 print_db("__05__")
                 return True
             else:
                 print_db("__06__")
                 return False
         elif utils.is_list(s2.items_):
             if s2.additionalItems == False:
                 print_db("__07__")
                 return False
             elif s2.additionalItems == True:
                 for i in s2.items_:
                     if not s1.items_.isSubtype(i):
                         print_db("__08__")
                         return False
                 print_db("__09__")
                 return True
             elif utils.is_dict(s2.additionalItems):
                 for i in s2.items_:
                     if not s1.items_.isSubtype(i):
                         print_db("__10__")
                         return False
                 print_db(type(s1.items_), s1.items_)
                 print_db(type(s2.additionalItems), s2.additionalItems)
                 if s1.items_.isSubtype(s2.additionalItems):
                     print_db("__11__")
                     return True
                 else:
                     print_db("__12__")
                     return False
     #
     elif utils.is_list(s1.items_):
         print_db("lhs is list")
         if utils.is_dict(s2.items_):
             if s1.additionalItems == False:
                 for i in s1.items_:
                     if not i.isSubtype(s2.items_):
                         print_db("__13__")
                         return False
                 print_db("__14__")
                 return True
             elif s1.additionalItems == True:
                 for i in s1.items_:
                     if not i.isSubtype(s2.items_):
                         return False
                     # since s1.additional items is True,
                     # then TOP should also be a subtype of
                     # s2.items
                 if JSONtop().isSubtype(s2.items_):
                     return True
                 return False
             elif utils.is_dict(s1.additionalItems):
                 for i in s1.items_:
                     if not i.isSubtype(s2.items_):
                         return False
                 if s1.additionalItems.isSubtype(s2.items_):
                     return True
                 else:
                     return False
         # now lhs and rhs are lists
         elif utils.is_list(s2.items_):
             print_db("lhs & rhs are lists")
             len1 = len(s1.items_)
             len2 = len(s2.items_)
             for i, j in zip(s1.items_, s2.items_):
                 if not i.isSubtype(j):
                     return False
             if len1 == len2:
                 print_db("len1 == len2")
                 if s1.additionalItems == s2.additionalItems:
                     return True
                 elif s1.additionalItems == True and s2.additionalItems == False:
                     return False
                 elif s1.additionalItems == False and s2.additionalItems == True:
                     return True
                 else:
                     return s1.additionalItems.isSubtype(
                         s2.additionalItems)
             elif len1 > len2:
                 diff = len1 - len2
                 for i in range(len1 - diff, len1):
                     if s2.additionalItems == False:
                         return False
                     elif s2.additionalItems == True:
                         return True
                     elif not s1.items_[i].isSubtype(
                             s2.additionalItems):
                         print_db("9999")
                         return False
                 print_db("8888")
                 return True
             else:  # len2 > len 1
                 diff = len2 - len1
                 for i in range(len2 - diff, len2):
                     if s1.additionalItems == False:
                         return True
                     elif s1.additionalItems == True:
                         return False
                     elif not s1.additionalItems.isSubtype(
                             s2.items_[i]):
                         return False
                 return s1.additionalItems.isSubtype(s2.additionalItems)
Ejemplo n.º 5
0
        def _isObjectSubtype(s1, s2):
            ''' The general intuition is that a json object with more keys is more restrictive 
                than a similar object with fewer keys. 
                
                E.g.: if corresponding keys have same shcemas, then 
                {name: {..}, age: {..}} <: {name: {..}}
                {name: {..}, age: {..}} />: {name: {..}}

                So the subtype checking is divided into two major parts:
                I) lhs keys/patterns/additional should be a superset of rhs
                II) schemas of comparable keys should have lhs <: rhs
            '''
            if s2.type != "object":
                return False

            # Check properties range
            is_sub_interval = s1.interval in s2.interval
            if not is_sub_interval:
                print_db("__00__")
                return False
            #
            else:
                # If ranges are ok, check another trivial case of almost identical objects.
                # This is some sort of performance heuristic.
                if set(s1.required).issuperset(s2.required) \
                    and s1.properties == s2.properties \
                    and s1.patternProperties == s2.patternProperties \
                    and (s1.additionalProperties == s2.additionalProperties
                         or (utils.is_dict(s1.additionalProperties)
                             and s1.additionalProperties.isSubtype(s2.additionalProperties))):
                    print_db("__01__")
                    return True
            #

            def get_schema_for_key(k, s):
                ''' Searches for matching key and get the corresponding schema(s).
                    Returns iterable because if a key matches more than one pattern, 
                    that key schema has to match all corresponding patterns schemas.
                '''
                if k in s.properties.keys():
                    return [k.properties[k]]
                else:
                    ret = []
                    for k_ in s.patternProperties.keys():
                        if utils.regex_matches_string(k_, k):
                            # in case a key has to be checked against patternProperties,
                            # it has to adhere to all schemas which have pattern matching the key.
                            ret.append(k.patternProperties[k_])
                    if ret:
                        return ret

                return [s.additionalProperties]

            # Check that required keys satisfy subtyping.
            # lhs required keys should be superset of rhs required keys.
            if not set(s1.required).issuperset(s2.required):
                print_db("__02__")
                return False
            # If required keys are properly defined, check their corresponding
            # schemas and make sure they are subtypes.
            # This is required because you could have a required key which does not
            # have an explicit schema defined by the json object.

            else:
                for k in set(s1.required).intersection(s2.required):
                    for lhs_ in get_schema_for_key(k, s1):
                        for rhs_ in get_schema_for_key(k, s2):
                            if lhs_:
                                if rhs_:
                                    if not lhs_.isSubtype(rhs_):
                                        print_db("__03__")
                                        return False
                                else:
                                    print_db("__04__")
                                    return False

            # Missing keys on the rhs
            # I) Simple case:
            # lhs = {"properties": {p1: {string}}
            # rhs = {"properties": {p1: {string}, p2: {int}}}
            # >> this means lhs isNOT subtype of rhs cuz lhs
            # would accept any p2 that does not necesaarily match
            # the type int of the p2 on the rhs
            # II) what if
            # lhs = {"properties": {p1: {string},
            #        "patternProperties": {p2: {int}}}
            # again, ideally this means lhs isNOT subtype of rhs
            # because lhs accept any property name with pattern .*p2.*
            # III) however, the tricky case is: it could happend that
            # every string matched by patternProperties on the lhs exist as a property
            # or property pattern on the rhs, then we need to do picky and enumerative
            # checks cuz it could be that indeed lhs isSubtype of rhs.

            # break it down to subcases
            # if set(s1.properties.keys()).issubset(s2.properties.keys()) \
            #     and len(s1.properties.keys()) < len(s2.properties.keys()) \
            #     and len(s1.patternProperties.keys()) == 0:

            # TODO: The following is very inefficient. Can we do better?
            # lhs_keys = "|".join(k for k in s1.properties.keys(
            # )) + "|".join(utils.regex_unanchor(k) for k in s1.patternProperties.keys())
            # rhs_keys = "|".join(k for k in s2.properties.keys(
            # )) + "|".join(utils.regex_unanchor(k) for k in s2.patternProperties.keys())
            # lhs_keys_proper_subset_rhs_keys = utils.regex_isProperSubset(
            #     lhs_keys, rhs_keys)
            # if lhs_keys_proper_subset_rhs_keys:
            #     print_db("__05__")
            #     return False

            extra_keys_on_rhs = set(s2.properties.keys()).difference(
                s1.properties.keys())
            for k in extra_keys_on_rhs.copy():
                for k_ in s1.patternProperties.keys():
                    if utils.regex_matches_string(k_, k):
                        extra_keys_on_rhs.remove(k)
            if extra_keys_on_rhs:
                if not s1.additionalProperties:
                    print_db("__05__")
                    return False
                else:
                    for k in extra_keys_on_rhs:
                        if not s1.additionalProperties.isSubtype(
                                s2.properties[k]):
                            print_db("__06__")
                            return False

            extra_patterns_on_rhs = set(
                s2.patternProperties.keys()).difference(
                    s1.patternProperties.keys())
            for k in extra_patterns_on_rhs.copy():
                for k_ in s1.patternProperties.keys():
                    if utils.regex_isSubset(k, k_):
                        extra_patterns_on_rhs.remove(k)
            if extra_patterns_on_rhs:
                if not s1.additionalProperties:
                    print_db("__07__")
                    return False
                else:
                    for k in extra_patterns_on_rhs:
                        if not s1.additionalProperties.isSubtype(
                                s2.patternProperties[k]):
                            try:  # means regex k is infinite
                                parse(k).cardinality()
                            except OverflowError:
                                print_db("__08__")
                                return False
            #
            # missing_props_from_lhs = set(
            #     s2.properties.keys()) - set(s1.properties.keys())
            # for k in missing_props_from_lhs:
            #     for k_ in s1.patternProperties.keys():
            #         if utils.regex_matches_string(k_, k):
            #             if not s1.patternProperties[k_].isSubtype(s2.properties[k]):
            #                 return False

            # Now, lhs has a patternProperty which is subtype of a property on the rhs.
            # Idealy, at this point, I'd like to check that EVERY property matched by
            # this pattern also exist on the rhs.
            # from greenery.lego import parse
            # p = parse(k_)
            # try:
            # p.cardinality

            # first, matching properties should be subtype pairwise
            unmatched_lhs_props_keys = set(s1.properties.keys())
            for k in s1.properties.keys():
                if k in s2.properties.keys():
                    unmatched_lhs_props_keys.discard(k)
                    if not s1.properties[k].isSubtype(s2.properties[k]):
                        return False
                # for the remaining keys, make sure they either don't exist
                # in rhs or if they, then their schemas should be sub-type
                else:
                    for k_ in s2.patternProperties:
                        # if utils.regex_isSubset(k, k_):
                        if utils.regex_matches_string(k_, k):
                            unmatched_lhs_props_keys.discard(k)
                            if not s1.properties[k].isSubtype(
                                    s2.patternProperties[k_]):
                                return False

            # second, matching patternProperties should be subtype pairwise
            unmatched_lhs_pProps_keys = set(s1.patternProperties.keys())
            for k in s1.patternProperties.keys():
                for k_ in s2.patternProperties.keys():
                    if utils.regex_isSubset(k_, k):
                        unmatched_lhs_pProps_keys.discard(k)
                        if not s1.patternProperties[k].isSubtype(
                                s2.patternProperties[k_]):
                            return False
            # third,

            # fourth,
            if s2.additionalProperties == True:
                return True
            elif s2.additionalProperties == False:
                if s1.additionalProperties == True:
                    return False
                elif unmatched_lhs_props_keys or unmatched_lhs_pProps_keys:
                    return False
                else:
                    return True
            else:
                for k in unmatched_lhs_props_keys:
                    if not s1.properties[k].isSubtype(s2.additionalProperties):
                        return False
                for k in unmatched_lhs_pProps_keys:
                    if not s1.patternProperties[k].isSubtype(
                            s2.additionalProperties):
                        return False
                if s1.additionalProperties == True:
                    return False
                elif s1.additionalProperties == False:
                    return True
                else:
                    return s1.additionalProperties.isSubtype(
                        s2.additionalProperties)
Ejemplo n.º 6
0
def prepare_operands(s1, s2):
    # First, we reload schemas using jsonref to resolve $ref
    # before starting canonicalization.
    # At the moment, we will get undefined behaviour for recursive/circual refs.

    # s1 = jsonref.loads(json.dumps(s1))
    # s2 = jsonref.loads(json.dumps(s2))
    # This is not very efficient, should be done lazily maybe?
    s1 = jsonref.JsonRef.replace_refs(s1)
    s2 = jsonref.JsonRef.replace_refs(s2)

    # Canonicalize and embed checkers for both lhs
    # and rhs schemas  before starting the subtype checking.
    # This also validates input schemas and canonicalized schemas.

    print_db("LHS", s1)
    print_db()
    s1 = simplify_schema_and_embed_checkers(canonicalize_schema(s1))
    print_db("LHS_canonical", s1)
    print_db()

    print_db("RHS", s2)
    print_db()
    s2 = simplify_schema_and_embed_checkers(canonicalize_schema(s2))
    print_db("RHS_canonical", s2)
    print_db()
    return s1, s2