Ejemplo n.º 1
0
def nodeSatisfiesDataType(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, c: DebugContext) -> bool:
    """ `5.4.3 Datatype Constraints <http://shex.io/shex-semantics/#datatype>`_

    For a node n and constraint value v, nodeSatisfies(n, v) if n is an Literal with the datatype v and, if v is in
    the set of SPARQL operand data types[sparql11-query], an XML schema string with a value of the lexical form of
    n can be cast to the target type v per XPath Functions 3.1 section 19 Casting[xpath-functions]. Only datatypes
    supported by SPARQL MUST be tested but ShEx extensions MAY add support for other datatypes.
    """
    if nc.datatype is None:
        return True
    if c.debug:
        #print(f" Datatype: {nc.datatype}")
        print(" Datatype: " + nc.datatype)
    if not isinstance(n, Literal):
        #cntxt.fail_reason = f"Datatype constraint ({nc.datatype}) " \
        #    f"does not match {type(n).__name__} {cntxt.n3_mapper.n3(n)}"
        cntxt.fail_reason = "Datatype constraint " + nc.datatype + " does not match " + type(n).__name__ +" " + cntxt.n3_mapper.n3(n)    
        cntxt.dump_bnode(n)
        return False
    actual_datatype = _datatype(n)
    if actual_datatype == str(nc.datatype) or \
        (is_sparql_operand_datatype(nc.datatype) and can_cast_to(n, nc.datatype)):
        return True
    #cntxt.fail_reason = f"Datatype mismatch - expected: {nc.datatype} actual: {actual_datatype}"
    cntxt.fail_reason = "Datatype mismatch - expected: "+ nc.datatype + " actual: "+ actual_datatype
    return False
def _fail_triples(cntxt: Context, T: RDFGraph) -> None:
    tlist = list(T)
    if len(tlist):
        cntxt.fail_reason = "Triples:"
        for t in sorted(tlist):
            cntxt.fail_reason = "      " + cntxt.n3_mapper.n3(t)
        if len(tlist) > 5:
            cntxt.fail_reason = "      ...   "
Ejemplo n.º 3
0
def nodeSatisfiesStringFacet(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool:
    """ `5.4.5 XML Schema String Facet Constraints <ttp://shex.io/shex-semantics/#xs-string>`_

     String facet constraints apply to the lexical form of the RDF Literals and IRIs and blank node
     identifiers (see note below regarding access to blank node identifiers).
    """

    # Let lex =
    #
    #  * if the value n is an RDF Literal, the lexical form of the literal (see[rdf11-concepts] section 3.3 Literals).
    #  * if the value n is an IRI, the IRI string (see[rdf11-concepts] section 3.2 IRIs).
    #  * if the value n is a blank node, the blank node identifier (see[rdf11-concepts] section 3.4 Blank Nodes).
    if nc.length is not None or nc.minlength is not None or nc.maxlength is not None \
            or nc.pattern is not None:
        lex = str(n)
        #  Let len = the number of unicode codepoints in lex
        # For a node n and constraint value v, nodeSatisfies(n, v):
        #
        #  * for "length" constraints, v = len,
        #  * for "minlength" constraints, v >= len,
        #  * for "maxlength" constraints, v <= len,
        #  * for "pattern" constraints, v is unescaped into a valid XPath 3.1 regular expression[xpath-functions-31]
        #    re and invoking fn:matches(lex, re) returns fn:true. If the flags parameter is present, it is passed
        #    as a third argument to fn:matches. The pattern may have XPath 3.1 regular expression escape sequences
        #    per the modified production [10] in section 5.6.1.1 as well as numeric escape sequences of the
        #    form 'u' HEX HEX HEX HEX or 'U' HEX HEX HEX HEX HEX HEX HEX HEX. Unescaping replaces numeric escape
        #    sequences with the corresponding unicode codepoint

        # TODO: Figure out whether we need to connect this to the lxml exslt functions
        # TODO: Map flags if not
        if (nc.length is None or len(lex) == nc.length) and \
           (nc.minlength is None or len(lex) >= nc.minlength) and \
           (nc.maxlength is None or len(lex) <= nc.maxlength) and \
           (nc.pattern is None or pattern_match(nc.pattern, nc.flags, lex)):
            return True
        elif nc.length is not None and len(lex) != nc.length:
            #cntxt.fail_reason = f"String length mismatch - expected: {nc.length} actual: {len(lex)}"
            cntxt.fail_reason = "String length mismatch - expected: " + nc.length + " actual: " + len(lex)
        elif nc.minlength is not None and len(lex) < nc.minlength:
            #cntxt.fail_reason = f"String length violation - minimum: {nc.minlength} actual: {len(lex)}"
            cntxt.fail_reason = "String lenght violation - minimum: " + nc.minlength + " actual: " + len(lex)
        elif nc.maxlength is not None and len(lex) > nc.maxlength:
            #cntxt.fail_reason = f"String length violation - maximum: {nc.maxlength} actual: {len(lex)}"
            cntxt.fail_reason = "String length violation - maximum: " + nc.maxlength + " actual: " + len(lex)
        elif nc.pattern is not None and not pattern_match(nc.pattern, nc.flags, lex):
            #cntxt.fail_reason = f"Pattern match failure - pattern: {nc.pattern} flags:{nc.flags}" \
            #                                 f" string: {lex}"
            cntxr.fail_reason = "Pattern match failure - pattern: " + nc.pattern + " flags:" + nc.flags + " string: " + lex
        else:
            cntxt.fail_reason = "Programming error - flame the programmer"
        return False


    else:
        return True
def matchesCardinality(cntxt: Context,
                       T: RDFGraph,
                       expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel],
                       c: DebugContext,
                       extras: Optional[Set[URIRef]] = None) -> bool:
    """ Evaluate cardinality expression

    expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and
    T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn,
    matches(Tn, expr, m) by the remaining rules in this list.
    """
    # TODO: Cardinality defaults into spec
    min_ = expr.min if expr.min is not None else 1
    max_ = expr.max if expr.max is not None else 1

    cardinality_text = f"{{{min_},{'*' if max_ == -1 else max_}}}"
    if c.debug and (min_ != 0 or len(T) != 0):
        print(f"{cardinality_text} matching {len(T)} triples")
    if min_ == 0 and len(T) == 0:
        return True
    if isinstance(expr, ShExJ.TripleConstraint):
        if len(T) < min_:
            if len(T) > 0:
                _fail_triples(cntxt, T)
                cntxt.fail_reason = f"   {len(T)} triples less than {cardinality_text}"
            else:
                cntxt.fail_reason = f"   No matching triples found for predicate {cntxt.n3_mapper.n3(expr.predicate)}"
            return False

        # Don't include extras in the cardinality check
        if extras:
            must_match = RDFGraph([
                t for t in T if t.p not in extras
            ])  # The set of things NOT consumed in extra
        else:
            must_match = T
        if 0 <= max_ < len(must_match):
            # Don't do a cardinality check
            _fail_triples(cntxt, T)
            cntxt.fail_reason = f"   {len(T)} triples exceeds max {cardinality_text}"
            return False
        elif len(must_match):
            return all(
                matchesTripleConstraint(cntxt, t, expr) for t in must_match)
        else:
            return any(matchesTripleConstraint(cntxt, t, expr) for t in T)
    else:
        for partition in _partitions(T, min_, max_):
            if all(matchesExpr(cntxt, part, expr) for part in partition):
                return True
        if min_ != 1 or max_ != 1:
            _fail_triples(cntxt, T)
            cntxt.fail_reason = f"   {len(T)} triples cannot be partitioned into {cardinality_text} passing groups"
        return False
Ejemplo n.º 5
0
def nodeSatisfiesNumericFacet(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool:
    """ `5.4.5 XML Schema Numeric Facet Constraints <http://shex.io/shex-semantics/#xs-numeric>`_

    Numeric facet constraints apply to the numeric value of RDF Literals with datatypes listed in SPARQL 1.1
    Operand Data Types[sparql11-query]. Numeric constraints on non-numeric values fail. totaldigits and
    fractiondigits constraints on values not derived from xsd:decimal fail.
    """
    if nc.mininclusive is not None or nc.minexclusive is not None or nc.maxinclusive is not None \
            or nc.maxexclusive is not None or nc.totaldigits is not None or nc.fractiondigits is not None:
        if is_numeric(n):
            v = n.value
            if isinstance(v, numbers.Number):
                if (nc.mininclusive is None or v >= nc.mininclusive) and \
                   (nc.minexclusive is None or v > nc.minexclusive) and \
                   (nc.maxinclusive is None or v <= nc.maxinclusive) and \
                   (nc.maxexclusive is None or v < nc.maxexclusive) and \
                   (nc.totaldigits is None or (total_digits(n) is not None and
                                                   total_digits(n) <= nc.totaldigits)) and \
                   (nc.fractiondigits is None or (fraction_digits(n) is not None and
                                                      fraction_digits(n) <= nc.fractiondigits)):
                    return True
                else:
                    if nc.mininclusive is not None and v < nc.mininclusive:
                        #cntxt.fail_reason = f"Numeric value volation - minimum inclusive: " \
                        #                                 f"{nc.mininclusive} actual: {v}"
                        cntxt.fail_reason = "Numeric value violation - minimum inclusive: " + nc.mininclusive + " actual: " + v
                    elif nc.minexclusive is not None and v <= nc.minexclusive:
                        #cntxt.fail_reason = f"Numeric value volation - minimum exclusive: " \
                        #                                 f"{nc.minexclusive} actual: {v}"
                        cntxt.fail_reason = "Numeric value violation - minimum exclusive " + nc.minexclusive + " actual: " + v
                    elif nc.maxinclusive is not None and v > nc.maxinclusive:
                        #cntxt.fail_reason = f"Numeric value volation - maximum inclusive: " \
                        #                                 f"{nc.maxinclusive} actual: {v}"
                        cntxt.fail_reason = "Numeric value violation - maximum inclusive: " + nc.maxinclusive + " actual : " + v
                    elif nc.maxexclusive is not None and v >= nc.maxexclusive:
                        #cntxt.fail_reason = f"Numeric value volation - maximum exclusive: " \
                        #                                 f"{nc.maxexclusive} actual: {v}"
                        cntxt.fail_reason = "Numeric value violation - maximum exclusive: " + nc.maxexclusive + " actual: " + v                                
                    elif nc.totaldigits is not None and (total_digits(n) is None or
                                                             total_digits(n) > nc.totaldigits):
                        #cntxt.fail_reason = f"Numeric value volation - max total digits: " \
                        #                                 f"{nc.totaldigits} value: {v}"
                        cntxt.fail_reason = "Numeric value violation - max total digits: " + nc.totaldigits + " value: " + v
                    elif nc.fractiondigits is not None and (fraction_digits(n) is None or
                                                                total_digits(n) > nc.fractiondigits):
                        #cntxt.fail_reason = f"Numeric value volation - max fractional digits: " \
                        #                                 f"{nc.fractiondigits} value: {v}"
                        cntxt.fail_reason = "Numeric value violation - max fractional digits: " + nc.fractiondigits + " value: " + v
                    else:
                        cntxt.fail_reason = "Impossible error - kick the programmer"
                    return False
            else:
                cntxt.fail_reason = "Numeric test on non-number: " + v
                return False
        else:
            cntxt.fail_reason = "Numeric test on non-number: " + n
            return False
    return True
def matchesTripleConstraint(cntxt: Context, t: RDFTriple,
                            expr: ShExJ.TripleConstraint,
                            c: DebugContext) -> bool:
    """
    expr is a TripleConstraint and:

    * t is a triple
    * t's predicate equals expr's predicate.
      Let value be t's subject if inverse is true, else t's object.
    * if inverse is true, t is in arcsIn, else t is in arcsOut.

    """
    from pyshex.shape_expressions_language.p5_3_shape_expressions import satisfies

    if c.debug:
        print(c.i(1, " triple: " + t))
        print(c.i(1, '', expr._as_json_dumps().split('\n')))

    if uriref_matches_iriref(t.p, expr.predicate):
        value = t.s if expr.inverse else t.o
        return expr.valueExpr is None or satisfies(cntxt, value,
                                                   expr.valueExpr)
    else:
        cntxt.fail_reason = "Predicate mismatch: " + t.p + " ≠ " + expr.predicate
        return False
def matchesCardinality(cntxt: Context, T: RDFGraph,
                       expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel],
                       c: DebugContext) -> bool:
    """ Evaluate cardinality expression

    expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and
    T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn,
    matches(Tn, expr, m) by the remaining rules in this list.
    """
    # TODO: Cardinality defaults into spec
    min_ = expr.min if expr.min is not None else 1
    max_ = expr.max if expr.max is not None else 1

    cardinality_text = "{{" + str(min_) + "," + '*' if max_ == -1 else str(
        max_) + "}}"
    if c.debug and (min_ != 0 or len(T) != 0):
        print(cardinality_text + " matching " + len(T) + " triples")
    if min_ == 0 and len(T) == 0:
        return True
    if isinstance(expr, ShExJ.TripleConstraint):
        if len(T) < min_:
            if len(T) > 0:
                _fail_triples(cntxt, T)
                cntxt.fail_reason = "   " + len(
                    T) + " triples less than " + cardinality_text
            else:
                cntxt.fail_reason = "   No matching triples found for predicate " + cntxt.n3_mapper.n3(
                    expr.predicate)
            return False
        elif 0 <= max_ < len(T):
            _fail_triples(cntxt, T)
            cntxt.fail_reason = "   " + str(
                len(T)) + " triples exceeds max " + cardinality_text
            return False
        else:
            return all(matchesTripleConstraint(cntxt, t, expr) for t in T)
    else:
        for partition in _partitions(T, min_, max_):
            if all(matchesExpr(cntxt, part, expr) for part in partition):
                return True
        if min_ != 1 or max_ != 1:
            _fail_triples(cntxt, T)
            cntxt.fail_reason = "   " + str(
                len(T)
            ) + " triples cannot be partitioned into " + cardinality_text + " passing groups"
        return False
def matchesTripleExprLabel(cntxt: Context, T: RDFGraph, expr: ShExJ.tripleExprLabel, c: DebugContext) -> bool:
    if c.debug:
        print(f" {expr}")
    te = cntxt.tripleExprFor(expr)
    if te:
        return matchesCardinality(cntxt, T, te)
    cntxt.fail_reason = f"{expr}: Labeled triple expression not found"
    return False
def matchesTripleExprRef(cntxt: Context, T: RDFGraph, expr: ShExJ.tripleExprLabel, _: DebugContext) -> bool:
    """
    expr is an tripleExprRef and satisfies(value, tripleExprWithId(tripleExprRef), G, m).
    The tripleExprWithId function is defined in Triple Expression Reference Requirement below.
    """
    expr = cntxt.tripleExprFor(expr)
    if expr is None:
        cntxt.fail_reason = "{expr}: Reference not found"
        return False
    return all(matchesTripleConstraint(cntxt, t, expr) for t in T)
Ejemplo n.º 10
0
def satisfiesShapeExprRef(cntxt: Context, n: Node, se: ShExJ.shapeExprLabel,
                          c: DebugContext) -> bool:
    """ Se is a shapeExprRef and there exists in the schema a shape expression se2 with that id
     and satisfies(n, se2, G, m).
     """
    if c.debug:
        print(f"id: {se}")
    for shape in cntxt.schema.shapes:
        if shape.id == se:
            return satisfies(cntxt, n, shape)
    cntxt.fail_reason = f"{se}: Shape is not in Schema"
    return False
Ejemplo n.º 11
0
def satisfiesExternal(cntxt: Context, n: Node, se: ShExJ.ShapeExternal,
                      c: DebugContext) -> bool:
    """ Se is a ShapeExternal and implementation-specific mechansims not defined in this specification indicate
     success.
     """
    if c.debug:
        print(f"id: {se.id}")
    extern_shape = cntxt.external_shape_for(se.id)
    if extern_shape:
        return satisfies(cntxt, n, extern_shape)
    cntxt.fail_reason = f"{se.id}: Shape is not in Schema"
    return False
Ejemplo n.º 12
0
def isValid(cntxt: Context, m: FixedShapeMap) -> Tuple[bool, List[str]]:
    """`5.2 Validation Definition <http://shex.io/shex-semantics/#validation>`_

    The expression isValid(G, m) indicates that for every nodeSelector/shapeLabel pair (n, s) in m, s has a
        corresponding shape expression se and satisfies(n, se, G, m). satisfies is defined below for each form
        of shape expression

    :param cntxt: evaluation context - includes graph and schema
    :param m: list of NodeShape pairs to test
    :return: Success/failure indicator and, if fail, a list of failure reasons
    """
    if not cntxt.is_valid:
        return False, cntxt.error_list
    parse_nodes = []
    for nodeshapepair in m:
        n = nodeshapepair.nodeSelector
        if not isinstance_(n, Node):
            return False, [f"{n}: Triple patterns are not implemented"]
        # The third test below is because the spec asserts that completely empty graphs pass in certain circumstances
        elif not (next(cntxt.graph.predicate_objects(nodeshapepair.nodeSelector), None) or
                  next(cntxt.graph.subject_predicates(nodeshapepair.nodeSelector), None) or
                  not next(cntxt.graph.triples((None, None, None)), None)):
            return False, [f"Focus: {nodeshapepair.nodeSelector} not in graph"]
        else:
            s = cntxt.shapeExprFor(START if nodeshapepair.shapeLabel is None or nodeshapepair.shapeLabel is START
                                   else nodeshapepair.shapeLabel)
            cntxt.current_node = ParseNode(satisfies, s, n, cntxt)
            if not s:
                if nodeshapepair.shapeLabel is START or nodeshapepair.shapeLabel is None:
                    cntxt.fail_reason = "START node is not specified or is invalid"
                else:
                    cntxt.fail_reason = f"Shape: {nodeshapepair.shapeLabel} not found in Schema"
                return False, cntxt.process_reasons()
            parse_nodes.append(cntxt.current_node)
            if not satisfies(cntxt, n, s):
                cntxt.current_node.result = False
                return False, cntxt.process_reasons()
            else:
                cntxt.current_node.result = True
    return True, []
Ejemplo n.º 13
0
def nodeSatisfiesValues(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool:
    """ `5.4.5 Values Constraint <http://shex.io/shex-semantics/#values>`_

     For a node n and constraint value v, nodeSatisfies(n, v) if n matches some valueSetValue vsv in v.
    """
    if nc.values is None:
        return True
    else:
        if any(_nodeSatisfiesValue(cntxt, n, vsv) for vsv in nc.values):
            return True
        else:
            #cntxt.fail_reason = f"Node: {cntxt.n3_mapper.n3(n)} not in value set:\n\t " \
            #    f"{as_json(cntxt.type_last(nc), indent=None)[:60]}..."
            cntxt.fail_reason = "Node: " + cntxt.n3_mapper.n3(n) + " not in value set:\n\t" + as_json(cntxt.type_last(nc), indent=None)[:60] + "..." 
            return False
Ejemplo n.º 14
0
def nodeSatisfiesNodeKind(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint,
                          c: DebugContext) -> bool:
    """ `5.4.2 Node Kind Constraints <http://shex.io/shex-semantics/#nodeKind>`_

    For a node n and constraint value v, nodeSatisfies(n, v) if:

        * v = "iri" and n is an IRI.
        * v = "bnode" and n is a blank node.
        * v = "literal" and n is a Literal.
        * v = "nonliteral" and n is an IRI or blank node.
    """
    if c.debug and nc.nodeKind is not None:
        print(f" Kind: {nc.nodeKind}")
    if nc.nodeKind is None or \
        (nc.nodeKind == 'iri' and isinstance(n, URIRef)) or \
        (nc.nodeKind == 'bnode' and isinstance(n, BNode)) or \
        (nc.nodeKind == 'literal' and isinstance(n, Literal)) or \
        (nc.nodeKind == 'nonliteral' and isinstance(n, (URIRef, BNode))):
        return True
    cntxt.fail_reason = f"Node kind mismatch have: {type(n).__name__} expected: {nc.nodeKind}"
    return False
def satisfiesShape(cntxt: Context, n: Node, S: ShExJ.Shape,
                   c: DebugContext) -> bool:
    """ `5.5.2 Semantics <http://shex.io/shex-semantics/#triple-expressions-semantics>`_

    For a node `n`, shape `S`, graph `G`, and shapeMap `m`, `satisfies(n, S, G, m)` if and only if:

    * `neigh(G, n)` can be partitioned into two sets matched and remainder such that
      `matches(matched, expression, m)`. If expression is absent, remainder = `neigh(G, n)`.

    :param n: focus node
    :param S: Shape to be satisfied
    :param cntxt: Evaluation context
    :param c: Debug context
    :return: true iff `satisfies(n, S, cntxt)`
    """

    # Recursion detection.  If start_evaluating returns a boolean value, this is the assumed result of the shape
    # evaluation.  If it returns None, then an initial evaluation is needed
    rslt = cntxt.start_evaluating(n, S)

    if rslt is None:
        cntxt.evaluate_stack.append((n, S.id))
        predicates = directed_predicates_in_expression(S, cntxt)
        matchables = RDFGraph()

        # Note: The code below does an "over-slurp" for the sake of expediency.  If you are interested in
        #       getting EXACTLY the needed triples, set cntxt.over_slurp to false
        if isinstance(cntxt.graph, SlurpyGraph) and cntxt.over_slurp:
            with slurper(cntxt, n, S) as g:
                _ = g.triples((n, None, None))

        for predicate, direction in predicates.items():
            with slurper(cntxt, n, S) as g:
                matchables.add_triples(
                    g.triples((n if direction.is_fwd else None,
                               iriref_to_uriref(predicate),
                               n if direction.is_rev else None)))

        if c.debug:
            print(
                c.i(1, "predicates:",
                    sorted(cntxt.n3_mapper.n3(p) for p in predicates.keys())))
            print(
                c.i(1, "matchables:",
                    sorted(cntxt.n3_mapper.n3(m) for m in matchables)))
            print()

        if S.closed:
            # TODO: Is this working correctly on reverse items?
            non_matchables = RDFGraph(
                [t for t in arcsOut(cntxt.graph, n) if t not in matchables])
            if len(non_matchables):
                cntxt.fail_reason = "Unmatched triples in CLOSED shape:"
                cntxt.fail_reason = '\n'.join("\t" + t for t in non_matchables)
                if c.debug:
                    print(
                        c.i(
                            0, "<--- Satisfies shape " + c.d() + " FAIL - ",
                            len(non_matchables) +
                            " non-matching triples on a closed shape"))
                    print(c.i(1, "", list(non_matchables)))
                    print()
                return False

        # Evaluate the actual expression.  Start assuming everything matches...
        if S.expression:
            if matches(cntxt, matchables, S.expression):
                rslt = True
            else:
                extras = {iriref_to_uriref(e)
                          for e in S.extra} if S.extra is not None else {}
                if len(extras):
                    permutable_matchables = RDFGraph(
                        [t for t in matchables if t.p in extras])
                    non_permutable_matchables = RDFGraph([
                        t for t in matchables if t not in permutable_matchables
                    ])
                    if c.debug:
                        print(
                            c.i(1,
                                "Complete match failed -- evaluating extras",
                                list(extras)))
                    for matched, remainder in partition_2(
                            permutable_matchables):
                        permutation = non_permutable_matchables.union(matched)
                        if matches(cntxt, permutation, S.expression):
                            rslt = True
                            break
                rslt = rslt or False
        else:
            rslt = True  # Empty shape

        # If an assumption was made and the result doesn't match the assumption, switch directions and try again
        done, consistent = cntxt.done_evaluating(n, S, rslt)
        if not done:
            rslt = satisfiesShape(cntxt, n, S)
        rslt = rslt and consistent

        cntxt.evaluate_stack.pop()
    return rslt