Beispiel #1
0
def directed_predicates_in_expression(
        expression: ShExJ.shapeExpr,
        cntxt: Context) -> Dict[IRIREF, PredDirection]:
    """ Directed predicates in expression -- return all predicates in shapeExpr along with which direction(s) they
    evaluate

    :param expression: Expression to scan
    :param cntxt:
    :return:
    """
    dir_predicates = {}

    def predicate_finder(predicates: Dict[IRIREF, PredDirection],
                         tc: ShExJ.TripleConstraint, _: Context) -> None:
        if isinstance(tc, ShExJ.TripleConstraint):
            predicates.setdefault(tc.predicate,
                                  PredDirection()).dir(tc.inverse is None
                                                       or not tc.inverse)

    def triple_expr_finder(predicates: Dict[IRIREF, PredDirection],
                           expr: ShExJ.shapeExpr, cntxt_: Context) -> None:
        if isinstance(expr, ShExJ.Shape) and expr.expression is not None:
            cntxt_.visit_triple_expressions(expr.expression, predicate_finder,
                                            predicates)

    # TODO: follow_inner_shapes as True probably goes too far, but we definitely need to cross shape/triplecons
    cntxt.visit_shapes(expression,
                       triple_expr_finder,
                       dir_predicates,
                       follow_inner_shapes=False)
    return dir_predicates
Beispiel #2
0
 def test_example_2(self):
     schema, _ = setup_test(shex_2, None)
     cntxt = Context(None, schema)
     shapes_visited = []
     triples_visited = []
     cntxt.visit_shapes(schema.shapes[0], visit_shape, shapes_visited)
     self.assertEqual(["http://schema.example/S1", "http://schema.example/S2" ], shapes_visited)
def nodeSatisfiesDataType(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, c: DebugContext) -> bool:
    """ `5.4.3 Datatype Constraints <http://shex.io/shex-semantics/#datatype>`_

    For a node n and constraint value v, nodeSatisfies(n, v) if n is an Literal with the datatype v and, if v is in
    the set of SPARQL operand data types[sparql11-query], an XML schema string with a value of the lexical form of
    n can be cast to the target type v per XPath Functions 3.1 section 19 Casting[xpath-functions]. Only datatypes
    supported by SPARQL MUST be tested but ShEx extensions MAY add support for other datatypes.
    """
    if nc.datatype is None:
        return True
    if c.debug:
        #print(f" Datatype: {nc.datatype}")
        print(" Datatype: " + nc.datatype)
    if not isinstance(n, Literal):
        #cntxt.fail_reason = f"Datatype constraint ({nc.datatype}) " \
        #    f"does not match {type(n).__name__} {cntxt.n3_mapper.n3(n)}"
        cntxt.fail_reason = "Datatype constraint " + nc.datatype + " does not match " + type(n).__name__ +" " + cntxt.n3_mapper.n3(n)    
        cntxt.dump_bnode(n)
        return False
    actual_datatype = _datatype(n)
    if actual_datatype == str(nc.datatype) or \
        (is_sparql_operand_datatype(nc.datatype) and can_cast_to(n, nc.datatype)):
        return True
    #cntxt.fail_reason = f"Datatype mismatch - expected: {nc.datatype} actual: {actual_datatype}"
    cntxt.fail_reason = "Datatype mismatch - expected: "+ nc.datatype + " actual: "+ actual_datatype
    return False
def _fail_triples(cntxt: Context, T: RDFGraph) -> None:
    tlist = list(T)
    if len(tlist):
        cntxt.fail_reason = "Triples:"
        for t in sorted(tlist):
            cntxt.fail_reason = "      " + cntxt.n3_mapper.n3(t)
        if len(tlist) > 5:
            cntxt.fail_reason = "      ...   "
def matchesTripleExprLabel(cntxt: Context, T: RDFGraph, expr: ShExJ.tripleExprLabel, c: DebugContext) -> bool:
    if c.debug:
        print(f" {expr}")
    te = cntxt.tripleExprFor(expr)
    if te:
        return matchesCardinality(cntxt, T, te)
    cntxt.fail_reason = f"{expr}: Labeled triple expression not found"
    return False
def matchesTripleExprRef(cntxt: Context, T: RDFGraph, expr: ShExJ.tripleExprLabel, _: DebugContext) -> bool:
    """
    expr is an tripleExprRef and satisfies(value, tripleExprWithId(tripleExprRef), G, m).
    The tripleExprWithId function is defined in Triple Expression Reference Requirement below.
    """
    expr = cntxt.tripleExprFor(expr)
    if expr is None:
        cntxt.fail_reason = "{expr}: Reference not found"
        return False
    return all(matchesTripleConstraint(cntxt, t, expr) for t in T)
def nodeSatisfiesStringFacet(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool:
    """ `5.4.5 XML Schema String Facet Constraints <ttp://shex.io/shex-semantics/#xs-string>`_

     String facet constraints apply to the lexical form of the RDF Literals and IRIs and blank node
     identifiers (see note below regarding access to blank node identifiers).
    """

    # Let lex =
    #
    #  * if the value n is an RDF Literal, the lexical form of the literal (see[rdf11-concepts] section 3.3 Literals).
    #  * if the value n is an IRI, the IRI string (see[rdf11-concepts] section 3.2 IRIs).
    #  * if the value n is a blank node, the blank node identifier (see[rdf11-concepts] section 3.4 Blank Nodes).
    if nc.length is not None or nc.minlength is not None or nc.maxlength is not None \
            or nc.pattern is not None:
        lex = str(n)
        #  Let len = the number of unicode codepoints in lex
        # For a node n and constraint value v, nodeSatisfies(n, v):
        #
        #  * for "length" constraints, v = len,
        #  * for "minlength" constraints, v >= len,
        #  * for "maxlength" constraints, v <= len,
        #  * for "pattern" constraints, v is unescaped into a valid XPath 3.1 regular expression[xpath-functions-31]
        #    re and invoking fn:matches(lex, re) returns fn:true. If the flags parameter is present, it is passed
        #    as a third argument to fn:matches. The pattern may have XPath 3.1 regular expression escape sequences
        #    per the modified production [10] in section 5.6.1.1 as well as numeric escape sequences of the
        #    form 'u' HEX HEX HEX HEX or 'U' HEX HEX HEX HEX HEX HEX HEX HEX. Unescaping replaces numeric escape
        #    sequences with the corresponding unicode codepoint

        # TODO: Figure out whether we need to connect this to the lxml exslt functions
        # TODO: Map flags if not
        if (nc.length is None or len(lex) == nc.length) and \
           (nc.minlength is None or len(lex) >= nc.minlength) and \
           (nc.maxlength is None or len(lex) <= nc.maxlength) and \
           (nc.pattern is None or pattern_match(nc.pattern, nc.flags, lex)):
            return True
        elif nc.length is not None and len(lex) != nc.length:
            #cntxt.fail_reason = f"String length mismatch - expected: {nc.length} actual: {len(lex)}"
            cntxt.fail_reason = "String length mismatch - expected: " + nc.length + " actual: " + len(lex)
        elif nc.minlength is not None and len(lex) < nc.minlength:
            #cntxt.fail_reason = f"String length violation - minimum: {nc.minlength} actual: {len(lex)}"
            cntxt.fail_reason = "String lenght violation - minimum: " + nc.minlength + " actual: " + len(lex)
        elif nc.maxlength is not None and len(lex) > nc.maxlength:
            #cntxt.fail_reason = f"String length violation - maximum: {nc.maxlength} actual: {len(lex)}"
            cntxt.fail_reason = "String length violation - maximum: " + nc.maxlength + " actual: " + len(lex)
        elif nc.pattern is not None and not pattern_match(nc.pattern, nc.flags, lex):
            #cntxt.fail_reason = f"Pattern match failure - pattern: {nc.pattern} flags:{nc.flags}" \
            #                                 f" string: {lex}"
            cntxr.fail_reason = "Pattern match failure - pattern: " + nc.pattern + " flags:" + nc.flags + " string: " + lex
        else:
            cntxt.fail_reason = "Programming error - flame the programmer"
        return False


    else:
        return True
def matchesCardinality(cntxt: Context,
                       T: RDFGraph,
                       expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel],
                       c: DebugContext,
                       extras: Optional[Set[URIRef]] = None) -> bool:
    """ Evaluate cardinality expression

    expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and
    T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn,
    matches(Tn, expr, m) by the remaining rules in this list.
    """
    # TODO: Cardinality defaults into spec
    min_ = expr.min if expr.min is not None else 1
    max_ = expr.max if expr.max is not None else 1

    cardinality_text = f"{{{min_},{'*' if max_ == -1 else max_}}}"
    if c.debug and (min_ != 0 or len(T) != 0):
        print(f"{cardinality_text} matching {len(T)} triples")
    if min_ == 0 and len(T) == 0:
        return True
    if isinstance(expr, ShExJ.TripleConstraint):
        if len(T) < min_:
            if len(T) > 0:
                _fail_triples(cntxt, T)
                cntxt.fail_reason = f"   {len(T)} triples less than {cardinality_text}"
            else:
                cntxt.fail_reason = f"   No matching triples found for predicate {cntxt.n3_mapper.n3(expr.predicate)}"
            return False

        # Don't include extras in the cardinality check
        if extras:
            must_match = RDFGraph([
                t for t in T if t.p not in extras
            ])  # The set of things NOT consumed in extra
        else:
            must_match = T
        if 0 <= max_ < len(must_match):
            # Don't do a cardinality check
            _fail_triples(cntxt, T)
            cntxt.fail_reason = f"   {len(T)} triples exceeds max {cardinality_text}"
            return False
        elif len(must_match):
            return all(
                matchesTripleConstraint(cntxt, t, expr) for t in must_match)
        else:
            return any(matchesTripleConstraint(cntxt, t, expr) for t in T)
    else:
        for partition in _partitions(T, min_, max_):
            if all(matchesExpr(cntxt, part, expr) for part in partition):
                return True
        if min_ != 1 or max_ != 1:
            _fail_triples(cntxt, T)
            cntxt.fail_reason = f"   {len(T)} triples cannot be partitioned into {cardinality_text} passing groups"
        return False
Beispiel #9
0
def predicates_in_tripleexpr(expression: ShExJ.tripleExpr,
                             cntxt: Context) -> Set[IRIREF]:
    predicates = set()

    def triple_expr_visitor(predicates: Set[IRIREF], expr: ShExJ.tripleExpr,
                            cntxt_: Context) -> None:
        if isinstance(expr, ShExJ.TripleConstraint):
            predicates.add(expr.predicate)

    cntxt.visit_triple_expressions(expression, triple_expr_visitor, predicates)
    return predicates
def nodeSatisfiesNumericFacet(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool:
    """ `5.4.5 XML Schema Numeric Facet Constraints <http://shex.io/shex-semantics/#xs-numeric>`_

    Numeric facet constraints apply to the numeric value of RDF Literals with datatypes listed in SPARQL 1.1
    Operand Data Types[sparql11-query]. Numeric constraints on non-numeric values fail. totaldigits and
    fractiondigits constraints on values not derived from xsd:decimal fail.
    """
    if nc.mininclusive is not None or nc.minexclusive is not None or nc.maxinclusive is not None \
            or nc.maxexclusive is not None or nc.totaldigits is not None or nc.fractiondigits is not None:
        if is_numeric(n):
            v = n.value
            if isinstance(v, numbers.Number):
                if (nc.mininclusive is None or v >= nc.mininclusive) and \
                   (nc.minexclusive is None or v > nc.minexclusive) and \
                   (nc.maxinclusive is None or v <= nc.maxinclusive) and \
                   (nc.maxexclusive is None or v < nc.maxexclusive) and \
                   (nc.totaldigits is None or (total_digits(n) is not None and
                                                   total_digits(n) <= nc.totaldigits)) and \
                   (nc.fractiondigits is None or (fraction_digits(n) is not None and
                                                      fraction_digits(n) <= nc.fractiondigits)):
                    return True
                else:
                    if nc.mininclusive is not None and v < nc.mininclusive:
                        #cntxt.fail_reason = f"Numeric value volation - minimum inclusive: " \
                        #                                 f"{nc.mininclusive} actual: {v}"
                        cntxt.fail_reason = "Numeric value violation - minimum inclusive: " + nc.mininclusive + " actual: " + v
                    elif nc.minexclusive is not None and v <= nc.minexclusive:
                        #cntxt.fail_reason = f"Numeric value volation - minimum exclusive: " \
                        #                                 f"{nc.minexclusive} actual: {v}"
                        cntxt.fail_reason = "Numeric value violation - minimum exclusive " + nc.minexclusive + " actual: " + v
                    elif nc.maxinclusive is not None and v > nc.maxinclusive:
                        #cntxt.fail_reason = f"Numeric value volation - maximum inclusive: " \
                        #                                 f"{nc.maxinclusive} actual: {v}"
                        cntxt.fail_reason = "Numeric value violation - maximum inclusive: " + nc.maxinclusive + " actual : " + v
                    elif nc.maxexclusive is not None and v >= nc.maxexclusive:
                        #cntxt.fail_reason = f"Numeric value volation - maximum exclusive: " \
                        #                                 f"{nc.maxexclusive} actual: {v}"
                        cntxt.fail_reason = "Numeric value violation - maximum exclusive: " + nc.maxexclusive + " actual: " + v                                
                    elif nc.totaldigits is not None and (total_digits(n) is None or
                                                             total_digits(n) > nc.totaldigits):
                        #cntxt.fail_reason = f"Numeric value volation - max total digits: " \
                        #                                 f"{nc.totaldigits} value: {v}"
                        cntxt.fail_reason = "Numeric value violation - max total digits: " + nc.totaldigits + " value: " + v
                    elif nc.fractiondigits is not None and (fraction_digits(n) is None or
                                                                total_digits(n) > nc.fractiondigits):
                        #cntxt.fail_reason = f"Numeric value volation - max fractional digits: " \
                        #                                 f"{nc.fractiondigits} value: {v}"
                        cntxt.fail_reason = "Numeric value violation - max fractional digits: " + nc.fractiondigits + " value: " + v
                    else:
                        cntxt.fail_reason = "Impossible error - kick the programmer"
                    return False
            else:
                cntxt.fail_reason = "Numeric test on non-number: " + v
                return False
        else:
            cntxt.fail_reason = "Numeric test on non-number: " + n
            return False
    return True
def satisfiesExternal(cntxt: Context, n: Node, se: ShExJ.ShapeExternal,
                      c: DebugContext) -> bool:
    """ Se is a ShapeExternal and implementation-specific mechansims not defined in this specification indicate
     success.
     """
    if c.debug:
        print(f"id: {se.id}")
    extern_shape = cntxt.external_shape_for(se.id)
    if extern_shape:
        return satisfies(cntxt, n, extern_shape)
    cntxt.fail_reason = f"{se.id}: Shape is not in Schema"
    return False
Beispiel #12
0
def triple_constraints_in_expression(
        expression: ShExJ.shapeExpr,
        cntxt: Context) -> List[ShExJ.TripleConstraint]:
    tes = []

    def triple_expr_visitor(tes: List[ShExJ.TripleConstraint],
                            expr: ShExJ.TripleConstraint, _: Context) -> None:
        if isinstance(expr, ShExJ.TripleConstraint):
            tes.append(expr)

    cntxt.visit_triple_expressions(expression, triple_expr_visitor, tes)
    return tes
def isValid(cntxt: Context, m: FixedShapeMap) -> Tuple[bool, List[str]]:
    """`5.2 Validation Definition <http://shex.io/shex-semantics/#validation>`_

    The expression isValid(G, m) indicates that for every nodeSelector/shapeLabel pair (n, s) in m, s has a
        corresponding shape expression se and satisfies(n, se, G, m). satisfies is defined below for each form
        of shape expression

    :param cntxt: evaluation context - includes graph and schema
    :param m: list of NodeShape pairs to test
    :return: Success/failure indicator and, if fail, a list of failure reasons
    """
    if not cntxt.is_valid:
        return False, cntxt.error_list
    parse_nodes = []
    for nodeshapepair in m:
        n = nodeshapepair.nodeSelector
        if not isinstance_(n, Node):
            #return False, [f"{n}: Triple patterns are not implemented"]
            return False, [n + ":Triple patterns are not implemented"]
        # The third test below is because the spec asserts that completely empty graphs pass in certain circumstances
        elif not (next(
                cntxt.graph.predicate_objects(nodeshapepair.nodeSelector),
                None) or next(
                    cntxt.graph.subject_predicates(nodeshapepair.nodeSelector),
                    None) or not next(cntxt.graph.triples(
                        (None, None, None)), None)):
            #return False, [f"Focus: {nodeshapepair.nodeSelector} not in graph"]
            return False, [
                "Focus: " + nodeshapepair.nodeSelector + " not in graph"
            ]
        else:
            s = cntxt.shapeExprFor(START if nodeshapepair.shapeLabel is None
                                   or nodeshapepair.shapeLabel is START else
                                   nodeshapepair.shapeLabel)
            cntxt.current_node = ParseNode(satisfies, s, n, cntxt)
            if not s:
                if nodeshapepair.shapeLabel is START or nodeshapepair.shapeLabel is None:
                    cntxt.fail_reason = "START node is not specified or is invalid"
                else:
                    #cntxt.fail_reason = f"Shape: {nodeshapepair.shapeLabel} not found in Schema"
                    cntxt.fail.reason = "Shape: " + nodeshapepair.shapeLabel + " not found in Schema"
                return False, cntxt.process_reasons()
            parse_nodes.append(cntxt.current_node)
            if not satisfies(cntxt, n, s):
                cntxt.current_node.result = False
                return False, cntxt.process_reasons()
            else:
                cntxt.current_node.result = True
    return True, []
Beispiel #14
0
 def wrapper(cntxt: Context, T: RDFGraph, expr: JSGObject) -> bool:
     parent_parse_node = cntxt.current_node
     cntxt.current_node = ParseNode(f, expr, T, cntxt)
     parent_parse_node.nodes.append(cntxt.current_node)
     c = cntxt.debug_context
     c.splus()
     if c.debug:
         c.print(c.i(0, f'--> {f.__name__} {c.d()}'), not newline)
     rval = f(cntxt, T, expr, c)
     if c.debug:
         c.print(c.i(0, f'<-- {f.__name__} {c.d()} {rval}'))
     c.sminus()
     cntxt.current_node.result = rval
     cntxt.current_node = parent_parse_node
     return rval
Beispiel #15
0
def evaluate(g: Graph,
             schema: Union[str, ShExJ.Schema],
             focus: Optional[Union[str, URIRef, IRIREF]],
             start: Optional[Union[str, URIRef, IRIREF, START,
                                   START_TYPE]] = None,
             debug_trace: bool = False) -> Tuple[bool, Optional[str]]:
    """ Evaluate focus node `focus` in graph `g` against shape `shape` in ShEx schema `schema`

    :param g: Graph containing RDF
    :param schema: ShEx Schema -- if str, it will be parsed
    :param focus: focus node in g. If not specified, all URI subjects in G will be evaluated.
    :param start: Starting shape.  If omitted, the Schema start shape is used
    :param debug_trace: Turn on debug tracing
    :return: None if success or failure reason if failure
    """
    if isinstance(schema, str):
        schema = SchemaLoader().loads(schema)
    if schema is None:
        return False, "Error parsing schema"
    if not isinstance(focus, URIRef):
        focus = URIRef(str(focus))
    if start is None:
        start = str(schema.start) if schema.start else None
    if start is None:
        return False, "No starting shape"
    if not isinstance(
            start, IRIREF) and start is not START and start is not START_TYPE:
        start = IRIREF(str(start))
    cntxt = Context(g, schema)
    cntxt.debug_context.debug = debug_trace
    map_ = FixedShapeMap()
    map_.add(ShapeAssociation(focus, start))
    test_result, reasons = isValid(cntxt, map_)
    return test_result, '\n'.join(reasons)
def matchesTripleConstraint(cntxt: Context, t: RDFTriple,
                            expr: ShExJ.TripleConstraint,
                            c: DebugContext) -> bool:
    """
    expr is a TripleConstraint and:

    * t is a triple
    * t's predicate equals expr's predicate.
      Let value be t's subject if inverse is true, else t's object.
    * if inverse is true, t is in arcsIn, else t is in arcsOut.

    """
    from pyshex.shape_expressions_language.p5_3_shape_expressions import satisfies

    if c.debug:
        print(c.i(1, " triple: " + t))
        print(c.i(1, '', expr._as_json_dumps().split('\n')))

    if uriref_matches_iriref(t.p, expr.predicate):
        value = t.s if expr.inverse else t.o
        return expr.valueExpr is None or satisfies(cntxt, value,
                                                   expr.valueExpr)
    else:
        cntxt.fail_reason = "Predicate mismatch: " + t.p + " ≠ " + expr.predicate
        return False
def matchesCardinality(cntxt: Context, T: RDFGraph,
                       expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel],
                       c: DebugContext) -> bool:
    """ Evaluate cardinality expression

    expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and
    T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn,
    matches(Tn, expr, m) by the remaining rules in this list.
    """
    # TODO: Cardinality defaults into spec
    min_ = expr.min if expr.min is not None else 1
    max_ = expr.max if expr.max is not None else 1

    cardinality_text = "{{" + str(min_) + "," + '*' if max_ == -1 else str(
        max_) + "}}"
    if c.debug and (min_ != 0 or len(T) != 0):
        print(cardinality_text + " matching " + len(T) + " triples")
    if min_ == 0 and len(T) == 0:
        return True
    if isinstance(expr, ShExJ.TripleConstraint):
        if len(T) < min_:
            if len(T) > 0:
                _fail_triples(cntxt, T)
                cntxt.fail_reason = "   " + len(
                    T) + " triples less than " + cardinality_text
            else:
                cntxt.fail_reason = "   No matching triples found for predicate " + cntxt.n3_mapper.n3(
                    expr.predicate)
            return False
        elif 0 <= max_ < len(T):
            _fail_triples(cntxt, T)
            cntxt.fail_reason = "   " + str(
                len(T)) + " triples exceeds max " + cardinality_text
            return False
        else:
            return all(matchesTripleConstraint(cntxt, t, expr) for t in T)
    else:
        for partition in _partitions(T, min_, max_):
            if all(matchesExpr(cntxt, part, expr) for part in partition):
                return True
        if min_ != 1 or max_ != 1:
            _fail_triples(cntxt, T)
            cntxt.fail_reason = "   " + str(
                len(T)
            ) + " triples cannot be partitioned into " + cardinality_text + " passing groups"
        return False
Beispiel #18
0
 def extern_shape_for(self, ref: ShExJ.IRIREF) -> Optional[ShExJ.Shape]:
     for extern in self.externs:
         extern_schema = self.owner.schema_loader.load(extern)
         if extern_schema:
             cntxt = Context(None, extern_schema)
             if ref in cntxt.schema_id_map:
                 return cntxt.schema_id_map[ref]
     return None
def satisfiesShapeExprRef(cntxt: Context, n: Node, se: ShExJ.shapeExprLabel,
                          c: DebugContext) -> bool:
    """ Se is a shapeExprRef and there exists in the schema a shape expression se2 with that id
     and satisfies(n, se2, G, m).
     """
    if c.debug:
        print(f"id: {se}")
    for shape in cntxt.schema.shapes:
        if shape.id == se:
            return satisfies(cntxt, n, shape)
    cntxt.fail_reason = f"{se}: Shape is not in Schema"
    return False
Beispiel #20
0
 def wrapper(cntxt: Context, n: Node, expr: JSGObject) -> bool:
     parent_parse_node = cntxt.current_node
     cntxt.current_node = ParseNode(f, expr, n, cntxt)
     parent_parse_node.nodes.append(cntxt.current_node)
     c = cntxt.debug_context
     c.splus()
     if c.debug and not skip_trace(expr):
         c.print(
             c.i(
                 0, '--> ' + f.__name__ + ' ' + c.d() + ' node: ' +
                 cntxt.n3_mapper.n3(n)), not newline)
     rval = f(cntxt, n, expr, c)
     if c.debug and not skip_trace(expr):
         c.print(
             c.i(
                 0, '<-- ' + f.__name__ + ' ' + c.d() + ' node: ' +
                 cntxt.n3_mapper.n3(n) + ':' + rval))
     c.sminus()
     cntxt.current_node.set_result(rval)
     cntxt.current_node = parent_parse_node
     return rval
def nodeSatisfiesValues(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool:
    """ `5.4.5 Values Constraint <http://shex.io/shex-semantics/#values>`_

     For a node n and constraint value v, nodeSatisfies(n, v) if n matches some valueSetValue vsv in v.
    """
    if nc.values is None:
        return True
    else:
        if any(_nodeSatisfiesValue(cntxt, n, vsv) for vsv in nc.values):
            return True
        else:
            #cntxt.fail_reason = f"Node: {cntxt.n3_mapper.n3(n)} not in value set:\n\t " \
            #    f"{as_json(cntxt.type_last(nc), indent=None)[:60]}..."
            cntxt.fail_reason = "Node: " + cntxt.n3_mapper.n3(n) + " not in value set:\n\t" + as_json(cntxt.type_last(nc), indent=None)[:60] + "..." 
            return False
Beispiel #22
0
def nodeSatisfiesNodeKind(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint,
                          c: DebugContext) -> bool:
    """ `5.4.2 Node Kind Constraints <http://shex.io/shex-semantics/#nodeKind>`_

    For a node n and constraint value v, nodeSatisfies(n, v) if:

        * v = "iri" and n is an IRI.
        * v = "bnode" and n is a blank node.
        * v = "literal" and n is a Literal.
        * v = "nonliteral" and n is an IRI or blank node.
    """
    if c.debug and nc.nodeKind is not None:
        print(f" Kind: {nc.nodeKind}")
    if nc.nodeKind is None or \
        (nc.nodeKind == 'iri' and isinstance(n, URIRef)) or \
        (nc.nodeKind == 'bnode' and isinstance(n, BNode)) or \
        (nc.nodeKind == 'literal' and isinstance(n, Literal)) or \
        (nc.nodeKind == 'nonliteral' and isinstance(n, (URIRef, BNode))):
        return True
    cntxt.fail_reason = f"Node kind mismatch have: {type(n).__name__} expected: {nc.nodeKind}"
    return False
def satisfiesShape(cntxt: Context, n: Node, S: ShExJ.Shape,
                   c: DebugContext) -> bool:
    """ `5.5.2 Semantics <http://shex.io/shex-semantics/#triple-expressions-semantics>`_

    For a node `n`, shape `S`, graph `G`, and shapeMap `m`, `satisfies(n, S, G, m)` if and only if:

    * `neigh(G, n)` can be partitioned into two sets matched and remainder such that
      `matches(matched, expression, m)`. If expression is absent, remainder = `neigh(G, n)`.

    :param n: focus node
    :param S: Shape to be satisfied
    :param cntxt: Evaluation context
    :param c: Debug context
    :return: true iff `satisfies(n, S, cntxt)`
    """

    # Recursion detection.  If start_evaluating returns a boolean value, this is the assumed result of the shape
    # evaluation.  If it returns None, then an initial evaluation is needed
    rslt = cntxt.start_evaluating(n, S)

    if rslt is None:
        cntxt.evaluate_stack.append((n, S.id))
        predicates = directed_predicates_in_expression(S, cntxt)
        matchables = RDFGraph()

        # Note: The code below does an "over-slurp" for the sake of expediency.  If you are interested in
        #       getting EXACTLY the needed triples, set cntxt.over_slurp to false
        if isinstance(cntxt.graph, SlurpyGraph) and cntxt.over_slurp:
            with slurper(cntxt, n, S) as g:
                _ = g.triples((n, None, None))

        for predicate, direction in predicates.items():
            with slurper(cntxt, n, S) as g:
                matchables.add_triples(
                    g.triples((n if direction.is_fwd else None,
                               iriref_to_uriref(predicate),
                               n if direction.is_rev else None)))

        if c.debug:
            print(
                c.i(1, "predicates:",
                    sorted(cntxt.n3_mapper.n3(p) for p in predicates.keys())))
            print(
                c.i(1, "matchables:",
                    sorted(cntxt.n3_mapper.n3(m) for m in matchables)))
            print()

        if S.closed:
            # TODO: Is this working correctly on reverse items?
            non_matchables = RDFGraph(
                [t for t in arcsOut(cntxt.graph, n) if t not in matchables])
            if len(non_matchables):
                cntxt.fail_reason = "Unmatched triples in CLOSED shape:"
                cntxt.fail_reason = '\n'.join("\t" + t for t in non_matchables)
                if c.debug:
                    print(
                        c.i(
                            0, "<--- Satisfies shape " + c.d() + " FAIL - ",
                            len(non_matchables) +
                            " non-matching triples on a closed shape"))
                    print(c.i(1, "", list(non_matchables)))
                    print()
                return False

        # Evaluate the actual expression.  Start assuming everything matches...
        if S.expression:
            if matches(cntxt, matchables, S.expression):
                rslt = True
            else:
                extras = {iriref_to_uriref(e)
                          for e in S.extra} if S.extra is not None else {}
                if len(extras):
                    permutable_matchables = RDFGraph(
                        [t for t in matchables if t.p in extras])
                    non_permutable_matchables = RDFGraph([
                        t for t in matchables if t not in permutable_matchables
                    ])
                    if c.debug:
                        print(
                            c.i(1,
                                "Complete match failed -- evaluating extras",
                                list(extras)))
                    for matched, remainder in partition_2(
                            permutable_matchables):
                        permutation = non_permutable_matchables.union(matched)
                        if matches(cntxt, permutation, S.expression):
                            rslt = True
                            break
                rslt = rslt or False
        else:
            rslt = True  # Empty shape

        # If an assumption was made and the result doesn't match the assumption, switch directions and try again
        done, consistent = cntxt.done_evaluating(n, S, rslt)
        if not done:
            rslt = satisfiesShape(cntxt, n, S)
        rslt = rslt and consistent

        cntxt.evaluate_stack.pop()
    return rslt
Beispiel #24
0
    def eval_entry(self, entry_name: str) -> bool:
        mes = self.mfst.entries[entry_name]
        for me in mes:  # There can be more than one entry per name...
            # Determine the start point
            if not self.started:
                if not me.name.startswith(START_AFTER):
                    self.start_skipped += 1
                    return True
                else:
                    self.started = True
                    if VERBOSE:
                        print(
                            f"STARTED - Skipped {self.start_skipped} entries")

            # Determine whether this entry should be skipped
            should_skip = False

            # Skip
            skipped_traits = list(me.traits.intersection(skip_traits))
            if skipped_traits:
                if VERBOSE:
                    print(
                        f"Skipping {me.name} ({', '.join([self.URIname(t) for t in me.traits])}) - Skipped trait"
                    )
                key = str(skipped_traits[0]).replace(str(SHT), 'sht:')
                if key not in self.skip_reasons:
                    self.skip_reasons[key] = 0
                self.skip_reasons[key] = self.skip_reasons[key] + 1
                self.skip(me.name)
                should_skip = True
            elif me.name in self.expected_failures:
                if VERBOSE:
                    print(
                        f"Skipping {me.name} ({', '.join([self.URIname(t) for t in me.traits])})"
                        f" - {self.expected_failures[me.name]}")
                key = self.expected_failures[me.name]
                if key not in self.skip_reasons:
                    self.skip_reasons[key] = 0
                self.skip_reasons[key] = self.skip_reasons[key] + 1
                self.skip(me.name)
                should_skip = True
            if should_skip and not TEST_SKIPS_ONLY:
                return True
            if TEST_SKIPS_ONLY and not should_skip:
                return True

            # Validate the entry
            if VERBOSE:
                shex_uri = self.mfst.schema_loader.location_rewrite(
                    me.schema_uri)
                data_uri = self.mfst.data_redirector.uri_for(me.data_uri) \
                    if self.mfst.data_redirector else me.data_uri
                print(
                    f"Testing {me.name} ({'P' if me.should_pass else 'F'}): {shex_uri} - {data_uri}"
                )
            g, s = me.data_graph(), me.shex_schema()
            if g is None and me.data_uri:
                print("\t ERROR: Unable to load data file")
                print(f"\t TRAITS: ({','.join(me.traits)})")
                self.skip(me.name)
                return True
            if not s:
                print(f"\t ERROR: Unable to load schema {me.schema_uri}")
                print(f"\t TRAITS: ({','.join(me.traits)})")
                self.nskipped += 1
                self.skip(me.name)
                return False

            cntxt = Context(g,
                            s,
                            me.extern_shape_for,
                            base_namespace=BASE_FILE_LOC)
            cntxt.debug_context.debug = DEBUG
            map_ = FixedShapeMap()
            focus = self.mfst.data_uri(me.focus)
            if not focus:
                print("\t***** FAIL *****")
                print(f"\tFocus: {me.focus} not in schema")
                print(f"\t TRAITS: ({','.join(me.traits)})")
                self.fail(me.name)
                return False
            # if ':' not in focus:
            #     focus = "file://" + focus
            map_.add(
                ShapeAssociation(
                    focus,
                    ShExJ.IRIREF(me.shape) if me.shape else START))

            #################################
            #  Actual validation occurs here
            #################################
            rslt = isValid(cntxt, map_)

            test_result, reasons = rslt[0] or not me.should_pass, rslt[1]

            # Analyze the result
            if not VERBOSE and not test_result:
                print(
                    f"Failed {me.name} ({'P' if me.should_pass else 'F'}): {me.schema_uri} - {me.data_uri}"
                )
                print(f"\t TRAITS: ({','.join(me.traits)})")
            if test_result:
                self.pass_(me.name)
            else:
                if VERBOSE:
                    print("\t**** FAIL *****")
                    print(f"\t TRAITS: ({','.join(me.traits)})")
                    for reason in reasons:
                        print(f"\t{reason}")
                self.fail(me.name)
            return test_result
Beispiel #25
0
 def triple_expr_finder(predicates: Dict[IRIREF, PredDirection],
                        expr: ShExJ.shapeExpr, cntxt_: Context) -> None:
     if isinstance(expr, ShExJ.Shape) and expr.expression is not None:
         cntxt_.visit_triple_expressions(expr.expression, predicate_finder,
                                         predicates)
Beispiel #26
0
def setup_context(shex_str: str, rdf_str: Optional[str]) -> Context:
    schema, g = setup_test(shex_str, rdf_str)
    if g is None:
        g = Graph()
        g.parse(rdf_header)
    return Context(g, schema)
Beispiel #27
0
    def evaluate(
        self,
        rdf: Optional[Union[str, Graph]] = None,
        shex: Optional[Union[str, ShExJ.Schema]] = None,
        focus: Optional[URIPARM] = None,
        start: STARTPARM = None,
        rdf_format: Optional[str] = None,
        debug: Optional[bool] = None,
        debug_slurps: Optional[bool] = None,
        over_slurp: Optional[bool] = None,
        output_sink: Optional[Callable[[EvaluationResult], bool]] = None
    ) -> List[EvaluationResult]:
        if rdf is not None or shex is not None or focus is not None or start is not None:
            evaluator = ShExEvaluator(
                rdf=rdf if rdf is not None else self.g,
                schema=shex if shex is not None else self._schema,
                focus=focus if focus is not None else self.focus,
                start=start
                if start is not None else self.start if self.start else START,
                rdf_format=rdf_format
                if rdf_format is not None else self.rdf_format,
                output_sink=output_sink
                if output_sink is not None else self.output_sink)
        else:
            evaluator = self

        self.eval_result = []
        if evaluator.output_sink is None:

            def sink(e: EvaluationResult) -> bool:
                self.eval_result.append(e)
                return True

            evaluator.output_sink = sink

        processing = True
        self.nerrors = 0
        self.nnodes = 0
        if START in evaluator.start and evaluator._schema.start is None:
            self.nerrors += 1
            evaluator.output_sink(
                EvaluationResult(False, None, None,
                                 'START node is not specified'))
            return self.eval_result

        # Experimental -- xfer all ShEx namespaces to g
        if self.pfx and evaluator.g is not None:
            self.pfx.add_bindings(evaluator.g)

        cntxt = Context(evaluator.g, evaluator._schema)
        cntxt.debug_context.debug = debug if debug is not None else self.debug
        cntxt.debug_context.trace_slurps = debug_slurps if debug_slurps is not None else self.debug_slurps
        cntxt.over_slurp = self.over_slurp if over_slurp is not None else self.over_slurp

        for focus in evaluator.foci:
            self.nnodes += 1
            start_list: List[Union[URIRef, START]] = []
            for start in evaluator.start:
                if start is START:
                    start_list.append(evaluator._schema.start)
                elif isinstance(start, START_TYPE):
                    start_list += list(
                        evaluator.g.objects(focus, start.start_predicate))
                else:
                    start_list.append(start)
            if start_list:
                for start_node in start_list:
                    map_ = FixedShapeMap()
                    map_.add(ShapeAssociation(focus, start_node))
                    cntxt.reset()
                    success, fail_reasons = isValid(cntxt, map_)
                    if not success:
                        self.nerrors += 1
                    if not evaluator.output_sink(
                            EvaluationResult(
                                success, focus, start_node,
                                '\n'.join(fail_reasons)
                                if not success else '')):
                        processing = False
                        break
            else:
                self.nerrors += 1
                evaluator.output_sink(
                    EvaluationResult(False, focus, None,
                                     "No start node located"))
            if not processing:
                break
        return self.eval_result
Beispiel #28
0
def triple_expr_finder(predicates: List[URIRef], expr: ShExJ.shapeExpr,
                       cntxt: Context) -> None:
    if isinstance(expr, ShExJ.Shape) and expr.expression is not None:
        cntxt.visit_triple_expressions(expr.expression, predicate_finder,
                                       predicates)