def test_large_partition(self): # The reason for this test is to be certain that we get generators all the way through. This test # will take forever if, somewhere in the process, we actually realize the whole partition g = Graph() g.parse(data=rdf_header, format="turtle") for i in range(25): g.add((EX['s' + str(i)], RDF.type, EX.thing)) rdfg = RDFGraph(g) part1 = partition_t(rdfg, 20) # Skip to the 100th element in the partition [next(part1) for _ in range(100)] self.assertEqual( [{ 'http://schema.example/s0', 'http://schema.example/s1', 'http://schema.example/s10', 'http://schema.example/s11', 'http://schema.example/s12', 'http://schema.example/s13' }, {'http://schema.example/s14'}, {'http://schema.example/s15'}, {'http://schema.example/s16'}, {'http://schema.example/s17'}, {'http://schema.example/s18'}, {'http://schema.example/s19'}, {'http://schema.example/s2'}, {'http://schema.example/s20'}, {'http://schema.example/s21'}, {'http://schema.example/s22'}, {'http://schema.example/s23'}, {'http://schema.example/s24'}, {'http://schema.example/s3'}, {'http://schema.example/s4'}, {'http://schema.example/s9'}, {'http://schema.example/s5'}, {'http://schema.example/s8'}, {'http://schema.example/s6'}, {'http://schema.example/s7'}], [{str(list(e)[0]) for e in part} for part in next(part1)]) part2 = partition_t(rdfg, 1) self.assertEqual(1, sum(1 for _ in part2)) part3 = partition_t(rdfg, 25) self.assertEqual(1, sum(1 for _ in part3))
def _partitions(T: RDFGraph, min_: Optional[int], max_: Optional[int]) -> List[List[RDFGraph]]: if max_ == 1: yield [T] else: for k in range(max(min_, 1), (max(len(T), min_) if max_ == -1 else max_)+1): for partition in partition_t(T, k): yield partition
def evaluate(self, cntxt: Context) -> bool: from pyshex.shape_expressions_language.p5_5_shapes_and_triple_expressions import matches for p, expr_nums in self.predicate_to_expression_nums.items(): if all(len(self.expression_num_predicates[expr_num]) == 1 for expr_num in expr_nums): if len(expr_nums) == 1: # Case 1: unique predicate/expression combo if not matches(cntxt, self.predicate_graph[p], self.expressions[expr_nums[0]]): return False else: # Case 2: several expressions match exactly one predicate -- split the triples successful_combination = False for partition in partition_t(self.predicate_graph[p], len(expr_nums)): if all(matches(cntxt, t, self.expressions[e_num]) for t, e_num in zip(partition, expr_nums)): successful_combination = True break if not successful_combination: return False for expr_num in range(0, len(self.expression_num_predicates)): predicates = self.expression_num_predicates[expr_num] if len(predicates) > 1: # Case 3: Expression matches multiple predicates but each predicate referenced only once # Build a composite graph of all triples and evaluate it target = RDFGraph() for p in predicates: if len(self.predicate_to_expression_nums[p]) == 1: target.update(self.predicate_graph[p]) if target and not matches(cntxt, target, self.expressions[expr_num]): return False for p in predicates: if len(self.predicate_to_expression_nums[p]) > 1: predicates, expressions = self._predicate_closure(p) target = RDFGraph() for predicate in predicates: target.update(self.predicate_graph[predicate]) successful_combination = True for partition in partition_t(target, len(expressions)): if all(matches(cntxt, t, self.expressions[e_num]) for t, e_num in zip(partition, expressions)): successful_combination = True break if not successful_combination: return False return True
def test_partition_t(self): t1 = RDFTriple((EX.Alice, EX.shoeSize, Literal(30, datatype=XSD.integer))) t2 = RDFTriple((EX.Alice, RDF.type, EX.Teacher)) t3 = RDFTriple((EX.Alice, RDF.type, EX.Person)) t4 = RDFTriple((EX.SomeHat, EX.owner, EX.Alice)) t5 = RDFTriple((EX.TheMoon, EX.madeOf, EX.GreenCheese)) g = Graph() g0 = RDFGraph(g) self.assertEqual([(RDFGraph(), RDFGraph())], list(partition_t(g0, 2))) g.add(t1) g1 = RDFGraph(g) self.assertEqual([(g1, g0), (g0, g1)], list(partition_t(g1, 2))) g.add(t2) g2 = RDFGraph(g) self.assertEqual([(g1, RDFGraph((t2, ))), (RDFGraph((t2, )), g1), (g2, g0), (g0, g2)], list(partition_t(g2, 2)))
def test_partition_2(self): # Len(partition) == 2**len(graph) g = Graph() grdf = RDFGraph(g) x11 = list(partition_2( grdf)) # partition_2 is a generator - you can only do it once self.assertEqual(1, len(x11)) self.assertEqual([(RDFGraph(), RDFGraph())], x11) x12 = list(partition_t(grdf, 2)) self.assertEqual(x11, x12) triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer .""") g = Graph() g.parse(data=triples, format="turtle") grdf = RDFGraph(g) x21 = list(partition_2(grdf)) self.assertEqual(2, len(x21)) x22 = list(partition_t(grdf, 2)) self.assertEqual(x21, x22) # Two elements give 4 partitions ((e1, e2), ()), ((e1), (e2)), ((e2), (e1)), ((), (e1, e2)) triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer . <Alice> a ex:Teacher .""") g = Graph() g.parse(data=triples, format="turtle") x = list(partition_2(RDFGraph(g))) self.assertEqual(4, len(x)) triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer . <Alice> a ex:Teacher . <Alice> a ex:Person .""") g = Graph() g.parse(data=triples, format="turtle") self.assertEqual(8, len(list(partition_2(RDFGraph(g))))) triples = gen_rdf("""<Alice> ex:shoeSize "30"^^xsd:integer . <Alice> a ex:Teacher . <Alice> a ex:Person . <Alice> a ex:Fool .""") g = Graph() g.parse(data=triples, format="turtle") self.assertEqual(16, len(list(partition_2(RDFGraph(g)))))