def partition_map(partition: List[List[int]]) -> Tuple[RDFGraph, ...]: rval = [] for part in partition: if len(part) == 1 and part[0] >= t_list_len: rval.append(RDFGraph()) else: rval.append(RDFGraph([t_list[e] for e in part if e < t_list_len])) return tuple(rval)
def test_rdf_graph(self): x = RDFGraph([(EX.issue1, EX.count, Literal(17))]) self.assertEqual(1, len(x)) x = RDFGraph([(EX.issue1, EX.count, Literal(17)), (EX.issue1, EX.count, Literal(17))]) self.assertEqual(1, len(x)) x = RDFGraph([(EX.issue1, EX.count, Literal(17)), RDFTriple((EX.issue1, EX.count, Literal(17)))]) self.assertEqual(1, len(x)) _, g = setup_test(None, rdf_1) x = RDFGraph(g) self.assertEqual(rdf_out, str(x))
def test_large_partition(self): # The reason for this test is to be certain that we get generators all the way through. This test # will take forever if, somewhere in the process, we actually realize the whole partition g = Graph() g.parse(data=rdf_header, format="turtle") for i in range(25): g.add((EX['s' + str(i)], RDF.type, EX.thing)) rdfg = RDFGraph(g) part1 = partition_t(rdfg, 20) # Skip to the 100th element in the partition [next(part1) for _ in range(100)] self.assertEqual( [{ 'http://schema.example/s0', 'http://schema.example/s1', 'http://schema.example/s10', 'http://schema.example/s11', 'http://schema.example/s12', 'http://schema.example/s13' }, {'http://schema.example/s14'}, {'http://schema.example/s15'}, {'http://schema.example/s16'}, {'http://schema.example/s17'}, {'http://schema.example/s18'}, {'http://schema.example/s19'}, {'http://schema.example/s2'}, {'http://schema.example/s20'}, {'http://schema.example/s21'}, {'http://schema.example/s22'}, {'http://schema.example/s23'}, {'http://schema.example/s24'}, {'http://schema.example/s3'}, {'http://schema.example/s4'}, {'http://schema.example/s9'}, {'http://schema.example/s5'}, {'http://schema.example/s8'}, {'http://schema.example/s6'}, {'http://schema.example/s7'}], [{str(list(e)[0]) for e in part} for part in next(part1)]) part2 = partition_t(rdfg, 1) self.assertEqual(1, sum(1 for _ in part2)) part3 = partition_t(rdfg, 25) self.assertEqual(1, sum(1 for _ in part3))
def __init__(self, cntxt: Context, T: RDFGraph, expr: ShExJ.EachOf) -> None: """ Create an evaluator for expr and T :param cntxt: evaluation context :param T: List of triples to evaluate :param expr: expression to evaluate against """ # tripleExpr = Union["EachOf", "OneOf", "TripleConstraint", tripleExprLabel] # # For each tripleExpr in expressions deteremine the set of applicable predicates and their # corresponding triples. # # Case 1: predicate occurs in exactly one expression and expression references exactly one predicate # Evaluate and return false if fail # Case 2: predicate occurs two or more expressions and all expressions reference exactly one predicate # Permute predicate over expressions until a passing condition is found # Case 3: expression references two or more predicates and all referenced predicates occur only once # Evaluate with set of all predicates and return false if fail # Case 4: predicate occurs in two or more expressions and at least one of the referenced expressions self.expressions: List[ShExJ.tripleExpr] = [] self.predicate_to_expression_nums: Dict[IRIREF, List[int]] = {} self.expression_num_predicates: List[Set[IRIREF]] = [] self.predicate_graph: Dict[IRIREF, RDFGraph] = {} for e in expr.expressions: expr_num = len(self.expressions) self.expressions.append(e) self.expression_num_predicates.append(predicates_in_tripleexpr(e, cntxt)) for p in self.expression_num_predicates[expr_num]: self.predicate_to_expression_nums.setdefault(p, []).append(expr_num) if p not in self.predicate_graph: self.predicate_graph[p] = RDFGraph([t for t in T if str(t.p) == str(p)])
def matchesCardinality(cntxt: Context, T: RDFGraph, expr: Union[ShExJ.tripleExpr, ShExJ.tripleExprLabel], c: DebugContext, extras: Optional[Set[URIRef]] = None) -> bool: """ Evaluate cardinality expression expr has a cardinality of min and/or max not equal to 1, where a max of -1 is treated as unbounded, and T can be partitioned into k subsets T1, T2,…Tk such that min ≤ k ≤ max and for each Tn, matches(Tn, expr, m) by the remaining rules in this list. """ # TODO: Cardinality defaults into spec min_ = expr.min if expr.min is not None else 1 max_ = expr.max if expr.max is not None else 1 cardinality_text = f"{{{min_},{'*' if max_ == -1 else max_}}}" if c.debug and (min_ != 0 or len(T) != 0): print(f"{cardinality_text} matching {len(T)} triples") if min_ == 0 and len(T) == 0: return True if isinstance(expr, ShExJ.TripleConstraint): if len(T) < min_: if len(T) > 0: _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples less than {cardinality_text}" else: cntxt.fail_reason = f" No matching triples found for predicate {cntxt.n3_mapper.n3(expr.predicate)}" return False # Don't include extras in the cardinality check if extras: must_match = RDFGraph([ t for t in T if t.p not in extras ]) # The set of things NOT consumed in extra else: must_match = T if 0 <= max_ < len(must_match): # Don't do a cardinality check _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples exceeds max {cardinality_text}" return False elif len(must_match): return all( matchesTripleConstraint(cntxt, t, expr) for t in must_match) else: return any(matchesTripleConstraint(cntxt, t, expr) for t in T) else: for partition in _partitions(T, min_, max_): if all(matchesExpr(cntxt, part, expr) for part in partition): return True if min_ != 1 or max_ != 1: _fail_triples(cntxt, T) cntxt.fail_reason = f" {len(T)} triples cannot be partitioned into {cardinality_text} passing groups" return False
def evaluate(self, cntxt: Context) -> bool: from pyshex.shape_expressions_language.p5_5_shapes_and_triple_expressions import matches for p, expr_nums in self.predicate_to_expression_nums.items(): if all(len(self.expression_num_predicates[expr_num]) == 1 for expr_num in expr_nums): if len(expr_nums) == 1: # Case 1: unique predicate/expression combo if not matches(cntxt, self.predicate_graph[p], self.expressions[expr_nums[0]]): return False else: # Case 2: several expressions match exactly one predicate -- split the triples successful_combination = False for partition in partition_t(self.predicate_graph[p], len(expr_nums)): if all(matches(cntxt, t, self.expressions[e_num]) for t, e_num in zip(partition, expr_nums)): successful_combination = True break if not successful_combination: return False for expr_num in range(0, len(self.expression_num_predicates)): predicates = self.expression_num_predicates[expr_num] if len(predicates) > 1: # Case 3: Expression matches multiple predicates but each predicate referenced only once # Build a composite graph of all triples and evaluate it target = RDFGraph() for p in predicates: if len(self.predicate_to_expression_nums[p]) == 1: target.update(self.predicate_graph[p]) if target and not matches(cntxt, target, self.expressions[expr_num]): return False for p in predicates: if len(self.predicate_to_expression_nums[p]) > 1: predicates, expressions = self._predicate_closure(p) target = RDFGraph() for predicate in predicates: target.update(self.predicate_graph[predicate]) successful_combination = True for partition in partition_t(target, len(expressions)): if all(matches(cntxt, t, self.expressions[e_num]) for t, e_num in zip(partition, expressions)): successful_combination = True break if not successful_combination: return False return True
def valid_remainder(cntxt: Context, n: Node, matchables: RDFGraph, S: ShExJ.Shape) -> bool: """ Let **outs** be the arcsOut in remainder: `outs = remainder ∩ arcsOut(G, n)`. Let **matchables** be the triples in outs whose predicate appears in a TripleConstraint in `expression`. If `expression` is absent, matchables = Ø (the empty set). * There is no triple in **matchables** which matches a TripleConstraint in expression * There is no triple in **matchables** whose predicate does not appear in extra. * closed is false or unmatchables is empty :param cntxt: evaluation context :param n: focus node :param matchables: non-matched triples :param S: Shape being evaluated :return: True if remainder is valid """ # TODO: Update this and satisfies to address the new algorithm # Let **outs** be the arcsOut in remainder: `outs = remainder ∩ arcsOut(G, n)`. outs = arcsOut(cntxt.graph, n).intersection(matchables) # predicates that in a TripleConstraint in `expression` predicates = predicates_in_expression(S, cntxt) # Let **matchables** be the triples in outs whose predicate appears in predicates. If # `expression` is absent, matchables = Ø (the empty set). matchables = RDFGraph(t for t in outs if str(t.p) in predicates) # There is no triple in **matchables** which matches a TripleConstraint in expression if matchables and S.expression is not None: tes = triple_constraints_in_expression(S.expression, cntxt) for m in matchables: if any(matchesTripleConstraint(cntxt, m, te) for te in tes): return False # There is no triple in **matchables** whose predicate does not appear in extra. extras = {iriref_to_uriref(e) for e in S.extra} if S.extra is not None else {} if any(t.p not in extras for t in matchables): return False # closed is false or unmatchables is empty. return not S.closed.val or not bool(outs - matchables)
def test_partition_t(self): t1 = RDFTriple((EX.Alice, EX.shoeSize, Literal(30, datatype=XSD.integer))) t2 = RDFTriple((EX.Alice, RDF.type, EX.Teacher)) t3 = RDFTriple((EX.Alice, RDF.type, EX.Person)) t4 = RDFTriple((EX.SomeHat, EX.owner, EX.Alice)) t5 = RDFTriple((EX.TheMoon, EX.madeOf, EX.GreenCheese)) g = Graph() g0 = RDFGraph(g) self.assertEqual([(RDFGraph(), RDFGraph())], list(partition_t(g0, 2))) g.add(t1) g1 = RDFGraph(g) self.assertEqual([(g1, g0), (g0, g1)], list(partition_t(g1, 2))) g.add(t2) g2 = RDFGraph(g) self.assertEqual([(g1, RDFGraph((t2, ))), (RDFGraph((t2, )), g1), (g2, g0), (g0, g2)], list(partition_t(g2, 2)))
def test_partition_2(self): # Len(partition) == 2**len(graph) g = Graph() grdf = RDFGraph(g) x11 = list(partition_2( grdf)) # partition_2 is a generator - you can only do it once self.assertEqual(1, len(x11)) self.assertEqual([(RDFGraph(), RDFGraph())], x11) x12 = list(partition_t(grdf, 2)) self.assertEqual(x11, x12) triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer .""") g = Graph() g.parse(data=triples, format="turtle") grdf = RDFGraph(g) x21 = list(partition_2(grdf)) self.assertEqual(2, len(x21)) x22 = list(partition_t(grdf, 2)) self.assertEqual(x21, x22) # Two elements give 4 partitions ((e1, e2), ()), ((e1), (e2)), ((e2), (e1)), ((), (e1, e2)) triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer . <Alice> a ex:Teacher .""") g = Graph() g.parse(data=triples, format="turtle") x = list(partition_2(RDFGraph(g))) self.assertEqual(4, len(x)) triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer . <Alice> a ex:Teacher . <Alice> a ex:Person .""") g = Graph() g.parse(data=triples, format="turtle") self.assertEqual(8, len(list(partition_2(RDFGraph(g))))) triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer . <Alice> a ex:Teacher . <Alice> a ex:Person . <Alice> a ex:Fool .""") g = Graph() g.parse(data=triples, format="turtle") self.assertEqual(16, len(list(partition_2(RDFGraph(g)))))
def arcsIn(G: Graph, n: Node) -> RDFGraph: """ arcsIn(G, n) is the set of triples in a graph G with object n. """ return RDFGraph(G.triples((None, None, n)))
def arcsOut(G: Graph, n: Node) -> RDFGraph: """ arcsOut(G, n) is the set of triples in a graph G with subject n. """ return RDFGraph(G.triples((n, None, None)))
def satisfiesShape(cntxt: Context, n: Node, S: ShExJ.Shape, c: DebugContext) -> bool: """ `5.5.2 Semantics <http://shex.io/shex-semantics/#triple-expressions-semantics>`_ For a node `n`, shape `S`, graph `G`, and shapeMap `m`, `satisfies(n, S, G, m)` if and only if: * `neigh(G, n)` can be partitioned into two sets matched and remainder such that `matches(matched, expression, m)`. If expression is absent, remainder = `neigh(G, n)`. :param n: focus node :param S: Shape to be satisfied :param cntxt: Evaluation context :param c: Debug context :return: true iff `satisfies(n, S, cntxt)` """ # Recursion detection. If start_evaluating returns a boolean value, this is the assumed result of the shape # evaluation. If it returns None, then an initial evaluation is needed rslt = cntxt.start_evaluating(n, S) if rslt is None: cntxt.evaluate_stack.append((n, S.id)) predicates = directed_predicates_in_expression(S, cntxt) matchables = RDFGraph() # Note: The code below does an "over-slurp" for the sake of expediency. If you are interested in # getting EXACTLY the needed triples, set cntxt.over_slurp to false if isinstance(cntxt.graph, SlurpyGraph) and cntxt.over_slurp: with slurper(cntxt, n, S) as g: _ = g.triples((n, None, None)) for predicate, direction in predicates.items(): with slurper(cntxt, n, S) as g: matchables.add_triples( g.triples((n if direction.is_fwd else None, iriref_to_uriref(predicate), n if direction.is_rev else None))) if c.debug: print( c.i(1, "predicates:", sorted(cntxt.n3_mapper.n3(p) for p in predicates.keys()))) print( c.i(1, "matchables:", sorted(cntxt.n3_mapper.n3(m) for m in matchables))) print() if S.closed: # TODO: Is this working correctly on reverse items? non_matchables = RDFGraph( [t for t in arcsOut(cntxt.graph, n) if t not in matchables]) if len(non_matchables): cntxt.fail_reason = "Unmatched triples in CLOSED shape:" cntxt.fail_reason = '\n'.join("\t" + t for t in non_matchables) if c.debug: print( c.i( 0, "<--- Satisfies shape " + c.d() + " FAIL - ", len(non_matchables) + " non-matching triples on a closed shape")) print(c.i(1, "", list(non_matchables))) print() return False # Evaluate the actual expression. Start assuming everything matches... if S.expression: if matches(cntxt, matchables, S.expression): rslt = True else: extras = {iriref_to_uriref(e) for e in S.extra} if S.extra is not None else {} if len(extras): permutable_matchables = RDFGraph( [t for t in matchables if t.p in extras]) non_permutable_matchables = RDFGraph([ t for t in matchables if t not in permutable_matchables ]) if c.debug: print( c.i(1, "Complete match failed -- evaluating extras", list(extras))) for matched, remainder in partition_2( permutable_matchables): permutation = non_permutable_matchables.union(matched) if matches(cntxt, permutation, S.expression): rslt = True break rslt = rslt or False else: rslt = True # Empty shape # If an assumption was made and the result doesn't match the assumption, switch directions and try again done, consistent = cntxt.done_evaluating(n, S, rslt) if not done: rslt = satisfiesShape(cntxt, n, S) rslt = rslt and consistent cntxt.evaluate_stack.pop() return rslt