def __repr__(self): """Readable serialisation.""" quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] selects = [ (expression.alias(asserted_type_table, "typetable"), None, ASSERTED_TYPE_PARTITION), (expression.alias(quoted_table, "quoted"), None, QUOTED_PARTITION), (expression.alias(asserted_table, "asserted"), None, ASSERTED_NON_TYPE_PARTITION), (expression.alias(literal_table, "literal"), None, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=False, select_type=COUNT_SELECT) if hasattr(self, "engine"): with self.engine.connect() as connection: res = connection.execute(q) rt = res.fetchall() typeLen, quotedLen, assertedLen, literalLen = [ rtTuple[0] for rtTuple in rt] try: return ("<Partitioned SQL N3 Store: %s " + "contexts, %s classification assertions, " + "%s quoted statements, %s property/value " + "assertions, and %s other assertions>" % ( len([ctx for ctx in self.contexts()]), typeLen, quotedLen, literalLen, assertedLen)) except Exception: return "<Partitioned SQL N3 Store>" else: return "<Partitioned unopened SQL N3 Store>"
def __len__(self, context=None): """Number of statements in the store.""" quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] typetable = expression.alias(asserted_type_table, "typetable") quoted = expression.alias(quoted_table, "quoted") asserted = expression.alias(asserted_table, "asserted") literal = expression.alias(literal_table, "literal") quotedContext = self.build_context_clause(context, quoted) assertedContext = self.build_context_clause(context, asserted) typeContext = self.build_context_clause(context, typetable) literalContext = self.build_context_clause(context, literal) if context is not None: selects = [ (typetable, typeContext, ASSERTED_TYPE_PARTITION), (quoted, quotedContext, QUOTED_PARTITION), (asserted, assertedContext, ASSERTED_NON_TYPE_PARTITION), (literal, literalContext, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=True, select_type=COUNT_SELECT) else: selects = [ (typetable, typeContext, ASSERTED_TYPE_PARTITION), (asserted, assertedContext, ASSERTED_NON_TYPE_PARTITION), (literal, literalContext, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=False, select_type=COUNT_SELECT) with self.engine.connect() as connection: res = connection.execute(q) rt = res.fetchall() return reduce(lambda x, y: x + y, [rtTuple[0] for rtTuple in rt])
def _do_triples_select(self, selects, context): q = union_select(selects, distinct=True, select_type=TRIPLE_SELECT_NO_ORDER) with self.engine.connect() as connection: res = connection.execute(q) # TODO: False but it may have limitations on text column. Check # NOTE: SQLite does not support ORDER BY terms that aren't # integers, so the entire result set must be iterated in order # to be able to return a generator of contexts result = res.fetchall() tripleCoverage = {} for rt in result: id, s, p, o, (graphKlass, idKlass, graphId) = extract_triple(rt, self, context) contexts = tripleCoverage.get((s, p, o), []) contexts.append(graphKlass(self, idKlass(graphId))) tripleCoverage[(s, p, o)] = contexts for (s, p, o), contexts in tripleCoverage.items(): yield (s, p, o), (c for c in contexts)
def contexts(self, triple=None): quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] typetable = expression.alias(asserted_type_table, "typetable") quoted = expression.alias(quoted_table, "quoted") asserted = expression.alias(asserted_table, "asserted") literal = expression.alias(literal_table, "literal") if triple is not None: subject, predicate, obj = triple if predicate == RDF.type: # Select from asserted rdf:type partition and quoted table # (if a context is specified) clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True) selects = [ (typetable, clause, ASSERTED_TYPE_PARTITION), ] elif isinstance(predicate, REGEXTerm) \ and predicate.compiledExpr.match(RDF.type) \ or not predicate: # Select from quoted partition (if context is specified), # literal partition if (obj is Literal or None) and # asserted non rdf:type partition (if obj is URIRef # or None) clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True) selects = [ (typetable, clause, ASSERTED_TYPE_PARTITION), ] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): clause = self.build_clause(literal, subject, predicate, obj) selects.append( (literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: clause = self.build_clause(asserted, subject, predicate, obj) selects.append( (asserted, clause, ASSERTED_NON_TYPE_PARTITION)) elif predicate: # select from asserted non rdf:type partition (optionally), # quoted partition (if context is speciied), and literal # partition (optionally) selects = [] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): clause = self.build_clause(literal, subject, predicate, obj) selects.append( (literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: clause = self.build_clause(asserted, subject, predicate, obj) selects.append( (asserted, clause, ASSERTED_NON_TYPE_PARTITION)) clause = self.build_clause(quoted, subject, predicate, obj) selects.append((quoted, clause, QUOTED_PARTITION)) q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT) else: selects = [ (typetable, None, ASSERTED_TYPE_PARTITION), (quoted, None, QUOTED_PARTITION), (asserted, None, ASSERTED_NON_TYPE_PARTITION), (literal, None, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT) with self.engine.connect() as connection: res = connection.execute(q) rt = res.fetchall() for context in [rtTuple[0] for rtTuple in rt]: yield URIRef(context)
def contexts(self, triple=None): quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] typetable = expression.alias(asserted_type_table, "typetable") quoted = expression.alias(quoted_table, "quoted") asserted = expression.alias(asserted_table, "asserted") literal = expression.alias(literal_table, "literal") if triple is not None: subject, predicate, obj = triple if predicate == RDF.type: # Select from asserted rdf:type partition and quoted table # (if a context is specified) clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True) selects = [(typetable, clause, ASSERTED_TYPE_PARTITION), ] elif isinstance(predicate, REGEXTerm) \ and predicate.compiledExpr.match(RDF.type) \ or not predicate: # Select from quoted partition (if context is specified), # literal partition if (obj is Literal or None) and # asserted non rdf:type partition (if obj is URIRef # or None) clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True) selects = [(typetable, clause, ASSERTED_TYPE_PARTITION), ] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): clause = self.build_clause(literal, subject, predicate, obj) selects.append( (literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: clause = self.build_clause(asserted, subject, predicate, obj) selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION)) elif predicate: # select from asserted non rdf:type partition (optionally), # quoted partition (if context is speciied), and literal # partition (optionally) selects = [] if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal) or not obj or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))): clause = self.build_clause(literal, subject, predicate, obj) selects.append( (literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: clause = self.build_clause(asserted, subject, predicate, obj) selects.append( (asserted, clause, ASSERTED_NON_TYPE_PARTITION)) clause = self.build_clause(quoted, subject, predicate, obj) selects.append((quoted, clause, QUOTED_PARTITION)) q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT) else: selects = [ (typetable, None, ASSERTED_TYPE_PARTITION), (quoted, None, QUOTED_PARTITION), (asserted, None, ASSERTED_NON_TYPE_PARTITION), (literal, None, ASSERTED_LITERAL_PARTITION), ] q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT) with self.engine.connect() as connection: res = connection.execute(q) rt = res.fetchall() for context in [rtTuple[0] for rtTuple in rt]: yield URIRef(context)
def triples(self, triple, context=None): """ A generator over all the triples matching pattern. Pattern can be any objects for comparing against nodes in the store, for example, RegExLiteral, Date? DateRange? quoted table: <id>_quoted_statements asserted rdf:type table: <id>_type_statements asserted non rdf:type table: <id>_asserted_statements triple columns: subject, predicate, object, context, termComb, objLanguage, objDatatype class membership columns: member, klass, context, termComb FIXME: These union all selects *may* be further optimized by joins """ subject, predicate, obj = triple quoted_table = self.tables["quoted_statements"] asserted_table = self.tables["asserted_statements"] asserted_type_table = self.tables["type_statements"] literal_table = self.tables["literal_statements"] if predicate == RDF.type: # select from asserted rdf:type partition and quoted table # (if a context is specified) typeTable = expression.alias( asserted_type_table, "typetable") clause = self.build_clause(typeTable, subject, RDF.type, obj, context, True) selects = [ (typeTable, clause, ASSERTED_TYPE_PARTITION), ] elif isinstance(predicate, REGEXTerm) \ and predicate.compiledExpr.match(RDF.type) \ or not predicate: # Select from quoted partition (if context is specified), # Literal partition if (obj is Literal or None) and asserted # non rdf:type partition (if obj is URIRef or None) selects = [] if not self.STRONGLY_TYPED_TERMS \ or isinstance(obj, Literal) \ or not obj \ or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm)): literal = expression.alias(literal_table, "literal") clause = self.build_clause(literal, subject, predicate, obj, context) selects.append((literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: asserted = expression.alias(asserted_table, "asserted") clause = self.build_clause(asserted, subject, predicate, obj, context) selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION)) typeTable = expression.alias(asserted_type_table, "typetable") clause = self.build_clause(typeTable, subject, RDF.type, obj, context, True) selects.append((typeTable, clause, ASSERTED_TYPE_PARTITION)) elif predicate: # select from asserted non rdf:type partition (optionally), # quoted partition (if context is specified), and literal # partition (optionally) selects = [] if not self.STRONGLY_TYPED_TERMS \ or isinstance(obj, Literal) \ or not obj \ or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm)): literal = expression.alias(literal_table, "literal") clause = self.build_clause(literal, subject, predicate, obj, context) selects.append((literal, clause, ASSERTED_LITERAL_PARTITION)) if not isinstance(obj, Literal) \ and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \ or not obj: asserted = expression.alias(asserted_table, "asserted") clause = self.build_clause(asserted, subject, predicate, obj, context) selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION)) if context is not None: quoted = expression.alias(quoted_table, "quoted") clause = self.build_clause(quoted, subject, predicate, obj, context) selects.append((quoted, clause, QUOTED_PARTITION)) q = union_select(selects, select_type=TRIPLE_SELECT_NO_ORDER) with self.engine.connect() as connection: res = connection.execute(q) # TODO: False but it may have limitations on text column. Check # NOTE: SQLite does not support ORDER BY terms that aren't # integers, so the entire result set must be iterated in order # to be able to return a generator of contexts result = res.fetchall() tripleCoverage = {} for rt in result: id, s, p, o, (graphKlass, idKlass, graphId) = extract_triple(rt, self, context) contexts = tripleCoverage.get((s, p, o), []) contexts.append(graphKlass(self, idKlass(graphId))) tripleCoverage[(s, p, o)] = contexts for (s, p, o), contexts in tripleCoverage.items(): yield (s, p, o), (c for c in contexts)