def nodeSatisfiesDataType(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, c: DebugContext) -> bool: """ `5.4.3 Datatype Constraints <http://shex.io/shex-semantics/#datatype>`_ For a node n and constraint value v, nodeSatisfies(n, v) if n is an Literal with the datatype v and, if v is in the set of SPARQL operand data types[sparql11-query], an XML schema string with a value of the lexical form of n can be cast to the target type v per XPath Functions 3.1 section 19 Casting[xpath-functions]. Only datatypes supported by SPARQL MUST be tested but ShEx extensions MAY add support for other datatypes. """ if nc.datatype is None: return True if c.debug: #print(f" Datatype: {nc.datatype}") print(" Datatype: " + nc.datatype) if not isinstance(n, Literal): #cntxt.fail_reason = f"Datatype constraint ({nc.datatype}) " \ # f"does not match {type(n).__name__} {cntxt.n3_mapper.n3(n)}" cntxt.fail_reason = "Datatype constraint " + nc.datatype + " does not match " + type(n).__name__ +" " + cntxt.n3_mapper.n3(n) cntxt.dump_bnode(n) return False actual_datatype = _datatype(n) if actual_datatype == str(nc.datatype) or \ (is_sparql_operand_datatype(nc.datatype) and can_cast_to(n, nc.datatype)): return True #cntxt.fail_reason = f"Datatype mismatch - expected: {nc.datatype} actual: {actual_datatype}" cntxt.fail_reason = "Datatype mismatch - expected: "+ nc.datatype + " actual: "+ actual_datatype return False
def _fail_triples(cntxt: Context, T: RDFGraph) -> None: tlist = list(T) if len(tlist): cntxt.fail_reason = "Triples:" for t in sorted(tlist): cntxt.fail_reason = " " + cntxt.n3_mapper.n3(t) if len(tlist) > 5: cntxt.fail_reason = " ... "
def nodeSatisfiesStringFacet(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool: """ `5.4.5 XML Schema String Facet Constraints <ttp://shex.io/shex-semantics/#xs-string>`_ String facet constraints apply to the lexical form of the RDF Literals and IRIs and blank node identifiers (see note below regarding access to blank node identifiers). """ # Let lex = # # * if the value n is an RDF Literal, the lexical form of the literal (see[rdf11-concepts] section 3.3 Literals). # * if the value n is an IRI, the IRI string (see[rdf11-concepts] section 3.2 IRIs). # * if the value n is a blank node, the blank node identifier (see[rdf11-concepts] section 3.4 Blank Nodes). if nc.length is not None or nc.minlength is not None or nc.maxlength is not None \ or nc.pattern is not None: lex = str(n) # Let len = the number of unicode codepoints in lex # For a node n and constraint value v, nodeSatisfies(n, v): # # * for "length" constraints, v = len, # * for "minlength" constraints, v >= len, # * for "maxlength" constraints, v <= len, # * for "pattern" constraints, v is unescaped into a valid XPath 3.1 regular expression[xpath-functions-31] # re and invoking fn:matches(lex, re) returns fn:true. If the flags parameter is present, it is passed # as a third argument to fn:matches. The pattern may have XPath 3.1 regular expression escape sequences # per the modified production [10] in section 5.6.1.1 as well as numeric escape sequences of the # form 'u' HEX HEX HEX HEX or 'U' HEX HEX HEX HEX HEX HEX HEX HEX. Unescaping replaces numeric escape # sequences with the corresponding unicode codepoint # TODO: Figure out whether we need to connect this to the lxml exslt functions # TODO: Map flags if not if (nc.length is None or len(lex) == nc.length) and \ (nc.minlength is None or len(lex) >= nc.minlength) and \ (nc.maxlength is None or len(lex) <= nc.maxlength) and \ (nc.pattern is None or pattern_match(nc.pattern, nc.flags, lex)): return True elif nc.length is not None and len(lex) != nc.length: #cntxt.fail_reason = f"String length mismatch - expected: {nc.length} actual: {len(lex)}" cntxt.fail_reason = "String length mismatch - expected: " + nc.length + " actual: " + len(lex) elif nc.minlength is not None and len(lex) < nc.minlength: #cntxt.fail_reason = f"String length violation - minimum: {nc.minlength} actual: {len(lex)}" cntxt.fail_reason = "String lenght violation - minimum: " + nc.minlength + " actual: " + len(lex) elif nc.maxlength is not None and len(lex) > nc.maxlength: #cntxt.fail_reason = f"String length violation - maximum: {nc.maxlength} actual: {len(lex)}" cntxt.fail_reason = "String length violation - maximum: " + nc.maxlength + " actual: " + len(lex) elif nc.pattern is not None and not pattern_match(nc.pattern, nc.flags, lex): #cntxt.fail_reason = f"Pattern match failure - pattern: {nc.pattern} flags:{nc.flags}" \ # f" string: {lex}" cntxr.fail_reason = "Pattern match failure - pattern: " + nc.pattern + " flags:" + nc.flags + " string: " + lex else: cntxt.fail_reason = "Programming error - flame the programmer" return False else: return True
def matchesCardinality(cntxt: Context, T: RDFGraph, expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel], c: DebugContext, extras: Optional[Set[URIRef]] = None) -> bool: """ Evaluate cardinality expression expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn, matches(Tn, expr, m) by the remaining rules in this list. """ # TODO: Cardinality defaults into spec min_ = expr.min if expr.min is not None else 1 max_ = expr.max if expr.max is not None else 1 cardinality_text = f"{{{min_},{'*' if max_ == -1 else max_}}}" if c.debug and (min_ != 0 or len(T) != 0): print(f"{cardinality_text} matching {len(T)} triples") if min_ == 0 and len(T) == 0: return True if isinstance(expr, ShExJ.TripleConstraint): if len(T) < min_: if len(T) > 0: _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples less than {cardinality_text}" else: cntxt.fail_reason = f" No matching triples found for predicate {cntxt.n3_mapper.n3(expr.predicate)}" return False # Don't include extras in the cardinality check if extras: must_match = RDFGraph([ t for t in T if t.p not in extras ]) # The set of things NOT consumed in extra else: must_match = T if 0 <= max_ < len(must_match): # Don't do a cardinality check _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples exceeds max {cardinality_text}" return False elif len(must_match): return all( matchesTripleConstraint(cntxt, t, expr) for t in must_match) else: return any(matchesTripleConstraint(cntxt, t, expr) for t in T) else: for partition in _partitions(T, min_, max_): if all(matchesExpr(cntxt, part, expr) for part in partition): return True if min_ != 1 or max_ != 1: _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples cannot be partitioned into {cardinality_text} passing groups" return False
def nodeSatisfiesNumericFacet(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool: """ `5.4.5 XML Schema Numeric Facet Constraints <http://shex.io/shex-semantics/#xs-numeric>`_ Numeric facet constraints apply to the numeric value of RDF Literals with datatypes listed in SPARQL 1.1 Operand Data Types[sparql11-query]. Numeric constraints on non-numeric values fail. totaldigits and fractiondigits constraints on values not derived from xsd:decimal fail. """ if nc.mininclusive is not None or nc.minexclusive is not None or nc.maxinclusive is not None \ or nc.maxexclusive is not None or nc.totaldigits is not None or nc.fractiondigits is not None: if is_numeric(n): v = n.value if isinstance(v, numbers.Number): if (nc.mininclusive is None or v >= nc.mininclusive) and \ (nc.minexclusive is None or v > nc.minexclusive) and \ (nc.maxinclusive is None or v <= nc.maxinclusive) and \ (nc.maxexclusive is None or v < nc.maxexclusive) and \ (nc.totaldigits is None or (total_digits(n) is not None and total_digits(n) <= nc.totaldigits)) and \ (nc.fractiondigits is None or (fraction_digits(n) is not None and fraction_digits(n) <= nc.fractiondigits)): return True else: if nc.mininclusive is not None and v < nc.mininclusive: #cntxt.fail_reason = f"Numeric value volation - minimum inclusive: " \ # f"{nc.mininclusive} actual: {v}" cntxt.fail_reason = "Numeric value violation - minimum inclusive: " + nc.mininclusive + " actual: " + v elif nc.minexclusive is not None and v <= nc.minexclusive: #cntxt.fail_reason = f"Numeric value volation - minimum exclusive: " \ # f"{nc.minexclusive} actual: {v}" cntxt.fail_reason = "Numeric value violation - minimum exclusive " + nc.minexclusive + " actual: " + v elif nc.maxinclusive is not None and v > nc.maxinclusive: #cntxt.fail_reason = f"Numeric value volation - maximum inclusive: " \ # f"{nc.maxinclusive} actual: {v}" cntxt.fail_reason = "Numeric value violation - maximum inclusive: " + nc.maxinclusive + " actual : " + v elif nc.maxexclusive is not None and v >= nc.maxexclusive: #cntxt.fail_reason = f"Numeric value volation - maximum exclusive: " \ # f"{nc.maxexclusive} actual: {v}" cntxt.fail_reason = "Numeric value violation - maximum exclusive: " + nc.maxexclusive + " actual: " + v elif nc.totaldigits is not None and (total_digits(n) is None or total_digits(n) > nc.totaldigits): #cntxt.fail_reason = f"Numeric value volation - max total digits: " \ # f"{nc.totaldigits} value: {v}" cntxt.fail_reason = "Numeric value violation - max total digits: " + nc.totaldigits + " value: " + v elif nc.fractiondigits is not None and (fraction_digits(n) is None or total_digits(n) > nc.fractiondigits): #cntxt.fail_reason = f"Numeric value volation - max fractional digits: " \ # f"{nc.fractiondigits} value: {v}" cntxt.fail_reason = "Numeric value violation - max fractional digits: " + nc.fractiondigits + " value: " + v else: cntxt.fail_reason = "Impossible error - kick the programmer" return False else: cntxt.fail_reason = "Numeric test on non-number: " + v return False else: cntxt.fail_reason = "Numeric test on non-number: " + n return False return True
def matchesTripleConstraint(cntxt: Context, t: RDFTriple, expr: ShExJ.TripleConstraint, c: DebugContext) -> bool: """ expr is a TripleConstraint and: * t is a triple * t's predicate equals expr's predicate. Let value be t's subject if inverse is true, else t's object. * if inverse is true, t is in arcsIn, else t is in arcsOut. """ from pyshex.shape_expressions_language.p5_3_shape_expressions import satisfies if c.debug: print(c.i(1, " triple: " + t)) print(c.i(1, '', expr._as_json_dumps().split('\n'))) if uriref_matches_iriref(t.p, expr.predicate): value = t.s if expr.inverse else t.o return expr.valueExpr is None or satisfies(cntxt, value, expr.valueExpr) else: cntxt.fail_reason = "Predicate mismatch: " + t.p + " ≠ " + expr.predicate return False
def matchesCardinality(cntxt: Context, T: RDFGraph, expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel], c: DebugContext) -> bool: """ Evaluate cardinality expression expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn, matches(Tn, expr, m) by the remaining rules in this list. """ # TODO: Cardinality defaults into spec min_ = expr.min if expr.min is not None else 1 max_ = expr.max if expr.max is not None else 1 cardinality_text = "{{" + str(min_) + "," + '*' if max_ == -1 else str( max_) + "}}" if c.debug and (min_ != 0 or len(T) != 0): print(cardinality_text + " matching " + len(T) + " triples") if min_ == 0 and len(T) == 0: return True if isinstance(expr, ShExJ.TripleConstraint): if len(T) < min_: if len(T) > 0: _fail_triples(cntxt, T) cntxt.fail_reason = " " + len( T) + " triples less than " + cardinality_text else: cntxt.fail_reason = " No matching triples found for predicate " + cntxt.n3_mapper.n3( expr.predicate) return False elif 0 <= max_ < len(T): _fail_triples(cntxt, T) cntxt.fail_reason = " " + str( len(T)) + " triples exceeds max " + cardinality_text return False else: return all(matchesTripleConstraint(cntxt, t, expr) for t in T) else: for partition in _partitions(T, min_, max_): if all(matchesExpr(cntxt, part, expr) for part in partition): return True if min_ != 1 or max_ != 1: _fail_triples(cntxt, T) cntxt.fail_reason = " " + str( len(T) ) + " triples cannot be partitioned into " + cardinality_text + " passing groups" return False
def matchesTripleExprLabel(cntxt: Context, T: RDFGraph, expr: ShExJ.tripleExprLabel, c: DebugContext) -> bool: if c.debug: print(f" {expr}") te = cntxt.tripleExprFor(expr) if te: return matchesCardinality(cntxt, T, te) cntxt.fail_reason = f"{expr}: Labeled triple expression not found" return False
def matchesTripleExprRef(cntxt: Context, T: RDFGraph, expr: ShExJ.tripleExprLabel, _: DebugContext) -> bool: """ expr is an tripleExprRef and satisfies(value, tripleExprWithId(tripleExprRef), G, m). The tripleExprWithId function is defined in Triple Expression Reference Requirement below. """ expr = cntxt.tripleExprFor(expr) if expr is None: cntxt.fail_reason = "{expr}: Reference not found" return False return all(matchesTripleConstraint(cntxt, t, expr) for t in T)
def satisfiesShapeExprRef(cntxt: Context, n: Node, se: ShExJ.shapeExprLabel, c: DebugContext) -> bool: """ Se is a shapeExprRef and there exists in the schema a shape expression se2 with that id and satisfies(n, se2, G, m). """ if c.debug: print(f"id: {se}") for shape in cntxt.schema.shapes: if shape.id == se: return satisfies(cntxt, n, shape) cntxt.fail_reason = f"{se}: Shape is not in Schema" return False
def satisfiesExternal(cntxt: Context, n: Node, se: ShExJ.ShapeExternal, c: DebugContext) -> bool: """ Se is a ShapeExternal and implementation-specific mechansims not defined in this specification indicate success. """ if c.debug: print(f"id: {se.id}") extern_shape = cntxt.external_shape_for(se.id) if extern_shape: return satisfies(cntxt, n, extern_shape) cntxt.fail_reason = f"{se.id}: Shape is not in Schema" return False
def isValid(cntxt: Context, m: FixedShapeMap) -> Tuple[bool, List[str]]: """`5.2 Validation Definition <http://shex.io/shex-semantics/#validation>`_ The expression isValid(G, m) indicates that for every nodeSelector/shapeLabel pair (n, s) in m, s has a corresponding shape expression se and satisfies(n, se, G, m). satisfies is defined below for each form of shape expression :param cntxt: evaluation context - includes graph and schema :param m: list of NodeShape pairs to test :return: Success/failure indicator and, if fail, a list of failure reasons """ if not cntxt.is_valid: return False, cntxt.error_list parse_nodes = [] for nodeshapepair in m: n = nodeshapepair.nodeSelector if not isinstance_(n, Node): return False, [f"{n}: Triple patterns are not implemented"] # The third test below is because the spec asserts that completely empty graphs pass in certain circumstances elif not (next(cntxt.graph.predicate_objects(nodeshapepair.nodeSelector), None) or next(cntxt.graph.subject_predicates(nodeshapepair.nodeSelector), None) or not next(cntxt.graph.triples((None, None, None)), None)): return False, [f"Focus: {nodeshapepair.nodeSelector} not in graph"] else: s = cntxt.shapeExprFor(START if nodeshapepair.shapeLabel is None or nodeshapepair.shapeLabel is START else nodeshapepair.shapeLabel) cntxt.current_node = ParseNode(satisfies, s, n, cntxt) if not s: if nodeshapepair.shapeLabel is START or nodeshapepair.shapeLabel is None: cntxt.fail_reason = "START node is not specified or is invalid" else: cntxt.fail_reason = f"Shape: {nodeshapepair.shapeLabel} not found in Schema" return False, cntxt.process_reasons() parse_nodes.append(cntxt.current_node) if not satisfies(cntxt, n, s): cntxt.current_node.result = False return False, cntxt.process_reasons() else: cntxt.current_node.result = True return True, []
def nodeSatisfiesValues(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, _c: DebugContext) -> bool: """ `5.4.5 Values Constraint <http://shex.io/shex-semantics/#values>`_ For a node n and constraint value v, nodeSatisfies(n, v) if n matches some valueSetValue vsv in v. """ if nc.values is None: return True else: if any(_nodeSatisfiesValue(cntxt, n, vsv) for vsv in nc.values): return True else: #cntxt.fail_reason = f"Node: {cntxt.n3_mapper.n3(n)} not in value set:\n\t " \ # f"{as_json(cntxt.type_last(nc), indent=None)[:60]}..." cntxt.fail_reason = "Node: " + cntxt.n3_mapper.n3(n) + " not in value set:\n\t" + as_json(cntxt.type_last(nc), indent=None)[:60] + "..." return False
def nodeSatisfiesNodeKind(cntxt: Context, n: Node, nc: ShExJ.NodeConstraint, c: DebugContext) -> bool: """ `5.4.2 Node Kind Constraints <http://shex.io/shex-semantics/#nodeKind>`_ For a node n and constraint value v, nodeSatisfies(n, v) if: * v = "iri" and n is an IRI. * v = "bnode" and n is a blank node. * v = "literal" and n is a Literal. * v = "nonliteral" and n is an IRI or blank node. """ if c.debug and nc.nodeKind is not None: print(f" Kind: {nc.nodeKind}") if nc.nodeKind is None or \ (nc.nodeKind == 'iri' and isinstance(n, URIRef)) or \ (nc.nodeKind == 'bnode' and isinstance(n, BNode)) or \ (nc.nodeKind == 'literal' and isinstance(n, Literal)) or \ (nc.nodeKind == 'nonliteral' and isinstance(n, (URIRef, BNode))): return True cntxt.fail_reason = f"Node kind mismatch have: {type(n).__name__} expected: {nc.nodeKind}" return False
def satisfiesShape(cntxt: Context, n: Node, S: ShExJ.Shape, c: DebugContext) -> bool: """ `5.5.2 Semantics <http://shex.io/shex-semantics/#triple-expressions-semantics>`_ For a node `n`, shape `S`, graph `G`, and shapeMap `m`, `satisfies(n, S, G, m)` if and only if: * `neigh(G, n)` can be partitioned into two sets matched and remainder such that `matches(matched, expression, m)`. If expression is absent, remainder = `neigh(G, n)`. :param n: focus node :param S: Shape to be satisfied :param cntxt: Evaluation context :param c: Debug context :return: true iff `satisfies(n, S, cntxt)` """ # Recursion detection. If start_evaluating returns a boolean value, this is the assumed result of the shape # evaluation. If it returns None, then an initial evaluation is needed rslt = cntxt.start_evaluating(n, S) if rslt is None: cntxt.evaluate_stack.append((n, S.id)) predicates = directed_predicates_in_expression(S, cntxt) matchables = RDFGraph() # Note: The code below does an "over-slurp" for the sake of expediency. If you are interested in # getting EXACTLY the needed triples, set cntxt.over_slurp to false if isinstance(cntxt.graph, SlurpyGraph) and cntxt.over_slurp: with slurper(cntxt, n, S) as g: _ = g.triples((n, None, None)) for predicate, direction in predicates.items(): with slurper(cntxt, n, S) as g: matchables.add_triples( g.triples((n if direction.is_fwd else None, iriref_to_uriref(predicate), n if direction.is_rev else None))) if c.debug: print( c.i(1, "predicates:", sorted(cntxt.n3_mapper.n3(p) for p in predicates.keys()))) print( c.i(1, "matchables:", sorted(cntxt.n3_mapper.n3(m) for m in matchables))) print() if S.closed: # TODO: Is this working correctly on reverse items? non_matchables = RDFGraph( [t for t in arcsOut(cntxt.graph, n) if t not in matchables]) if len(non_matchables): cntxt.fail_reason = "Unmatched triples in CLOSED shape:" cntxt.fail_reason = '\n'.join("\t" + t for t in non_matchables) if c.debug: print( c.i( 0, "<--- Satisfies shape " + c.d() + " FAIL - ", len(non_matchables) + " non-matching triples on a closed shape")) print(c.i(1, "", list(non_matchables))) print() return False # Evaluate the actual expression. Start assuming everything matches... if S.expression: if matches(cntxt, matchables, S.expression): rslt = True else: extras = {iriref_to_uriref(e) for e in S.extra} if S.extra is not None else {} if len(extras): permutable_matchables = RDFGraph( [t for t in matchables if t.p in extras]) non_permutable_matchables = RDFGraph([ t for t in matchables if t not in permutable_matchables ]) if c.debug: print( c.i(1, "Complete match failed -- evaluating extras", list(extras))) for matched, remainder in partition_2( permutable_matchables): permutation = non_permutable_matchables.union(matched) if matches(cntxt, permutation, S.expression): rslt = True break rslt = rslt or False else: rslt = True # Empty shape # If an assumption was made and the result doesn't match the assumption, switch directions and try again done, consistent = cntxt.done_evaluating(n, S, rslt) if not done: rslt = satisfiesShape(cntxt, n, S) rslt = rslt and consistent cntxt.evaluate_stack.pop() return rslt