Exemple #1
0
    def __repr__(self):
        """Readable serialisation."""
        quoted_table = self.tables["quoted_statements"]
        asserted_table = self.tables["asserted_statements"]
        asserted_type_table = self.tables["type_statements"]
        literal_table = self.tables["literal_statements"]

        selects = [
            (expression.alias(asserted_type_table, "typetable"), None, ASSERTED_TYPE_PARTITION),
            (expression.alias(quoted_table, "quoted"), None, QUOTED_PARTITION),
            (expression.alias(asserted_table, "asserted"), None, ASSERTED_NON_TYPE_PARTITION),
            (expression.alias(literal_table, "literal"), None, ASSERTED_LITERAL_PARTITION),
        ]
        q = union_select(selects, distinct=False, select_type=COUNT_SELECT)
        if hasattr(self, "engine"):
            with self.engine.connect() as connection:
                res = connection.execute(q)
                rt = res.fetchall()
                typeLen, quotedLen, assertedLen, literalLen = [
                    rtTuple[0] for rtTuple in rt]
            try:
                return ("<Partitioned SQL N3 Store: %s " +
                        "contexts, %s classification assertions, " +
                        "%s quoted statements, %s property/value " +
                        "assertions, and %s other assertions>" % (
                            len([ctx for ctx in self.contexts()]),
                            typeLen, quotedLen, literalLen, assertedLen))
            except Exception:
                return "<Partitioned SQL N3 Store>"
        else:
            return "<Partitioned unopened SQL N3 Store>"
Exemple #2
0
    def __len__(self, context=None):
        """Number of statements in the store."""
        quoted_table = self.tables["quoted_statements"]
        asserted_table = self.tables["asserted_statements"]
        asserted_type_table = self.tables["type_statements"]
        literal_table = self.tables["literal_statements"]

        typetable = expression.alias(asserted_type_table, "typetable")
        quoted = expression.alias(quoted_table, "quoted")
        asserted = expression.alias(asserted_table, "asserted")
        literal = expression.alias(literal_table, "literal")

        quotedContext = self.build_context_clause(context, quoted)
        assertedContext = self.build_context_clause(context, asserted)
        typeContext = self.build_context_clause(context, typetable)
        literalContext = self.build_context_clause(context, literal)

        if context is not None:
            selects = [
                (typetable, typeContext,
                 ASSERTED_TYPE_PARTITION),
                (quoted, quotedContext,
                 QUOTED_PARTITION),
                (asserted, assertedContext,
                 ASSERTED_NON_TYPE_PARTITION),
                (literal, literalContext,
                 ASSERTED_LITERAL_PARTITION), ]
            q = union_select(selects, distinct=True, select_type=COUNT_SELECT)
        else:
            selects = [
                (typetable, typeContext,
                 ASSERTED_TYPE_PARTITION),
                (asserted, assertedContext,
                 ASSERTED_NON_TYPE_PARTITION),
                (literal, literalContext,
                 ASSERTED_LITERAL_PARTITION), ]
            q = union_select(selects, distinct=False, select_type=COUNT_SELECT)

        with self.engine.connect() as connection:
            res = connection.execute(q)
            rt = res.fetchall()
            return reduce(lambda x, y: x + y, [rtTuple[0] for rtTuple in rt])
Exemple #3
0
    def _do_triples_select(self, selects, context):
        q = union_select(selects,
                         distinct=True,
                         select_type=TRIPLE_SELECT_NO_ORDER)
        with self.engine.connect() as connection:
            res = connection.execute(q)
            # TODO: False but it may have limitations on text column. Check
            # NOTE: SQLite does not support ORDER BY terms that aren't
            # integers, so the entire result set must be iterated in order
            # to be able to return a generator of contexts
            result = res.fetchall()
        tripleCoverage = {}

        for rt in result:
            id, s, p, o, (graphKlass, idKlass,
                          graphId) = extract_triple(rt, self, context)
            contexts = tripleCoverage.get((s, p, o), [])
            contexts.append(graphKlass(self, idKlass(graphId)))
            tripleCoverage[(s, p, o)] = contexts

        for (s, p, o), contexts in tripleCoverage.items():
            yield (s, p, o), (c for c in contexts)
Exemple #4
0
    def contexts(self, triple=None):
        quoted_table = self.tables["quoted_statements"]
        asserted_table = self.tables["asserted_statements"]
        asserted_type_table = self.tables["type_statements"]
        literal_table = self.tables["literal_statements"]

        typetable = expression.alias(asserted_type_table, "typetable")
        quoted = expression.alias(quoted_table, "quoted")
        asserted = expression.alias(asserted_table, "asserted")
        literal = expression.alias(literal_table, "literal")

        if triple is not None:
            subject, predicate, obj = triple
            if predicate == RDF.type:
                # Select from asserted rdf:type partition and quoted table
                # (if a context is specified)
                clause = self.build_clause(typetable, subject, RDF.type, obj,
                                           Any, True)
                selects = [
                    (typetable, clause, ASSERTED_TYPE_PARTITION),
                ]

            elif isinstance(predicate, REGEXTerm) \
                    and predicate.compiledExpr.match(RDF.type) \
                    or not predicate:
                # Select from quoted partition (if context is specified),
                # literal partition if (obj is Literal or None) and
                # asserted non rdf:type partition (if obj is URIRef
                # or None)
                clause = self.build_clause(typetable, subject, RDF.type, obj,
                                           Any, True)
                selects = [
                    (typetable, clause, ASSERTED_TYPE_PARTITION),
                ]

                if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal)
                        or not obj or (self.STRONGLY_TYPED_TERMS
                                       and isinstance(obj, REGEXTerm))):
                    clause = self.build_clause(literal, subject, predicate,
                                               obj)
                    selects.append(
                        (literal, clause, ASSERTED_LITERAL_PARTITION))
                if not isinstance(obj, Literal) \
                        and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \
                        or not obj:
                    clause = self.build_clause(asserted, subject, predicate,
                                               obj)
                    selects.append(
                        (asserted, clause, ASSERTED_NON_TYPE_PARTITION))

            elif predicate:
                # select from asserted non rdf:type partition (optionally),
                # quoted partition (if context is speciied), and literal
                # partition (optionally)
                selects = []
                if (not self.STRONGLY_TYPED_TERMS or isinstance(obj, Literal)
                        or not obj or (self.STRONGLY_TYPED_TERMS
                                       and isinstance(obj, REGEXTerm))):
                    clause = self.build_clause(literal, subject, predicate,
                                               obj)
                    selects.append(
                        (literal, clause, ASSERTED_LITERAL_PARTITION))
                if not isinstance(obj, Literal) \
                        and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \
                        or not obj:
                    clause = self.build_clause(asserted, subject, predicate,
                                               obj)
                    selects.append(
                        (asserted, clause, ASSERTED_NON_TYPE_PARTITION))

            clause = self.build_clause(quoted, subject, predicate, obj)
            selects.append((quoted, clause, QUOTED_PARTITION))
            q = union_select(selects,
                             distinct=True,
                             select_type=CONTEXT_SELECT)
        else:
            selects = [
                (typetable, None, ASSERTED_TYPE_PARTITION),
                (quoted, None, QUOTED_PARTITION),
                (asserted, None, ASSERTED_NON_TYPE_PARTITION),
                (literal, None, ASSERTED_LITERAL_PARTITION),
            ]
            q = union_select(selects,
                             distinct=True,
                             select_type=CONTEXT_SELECT)

        with self.engine.connect() as connection:
            res = connection.execute(q)
            rt = res.fetchall()
        for context in [rtTuple[0] for rtTuple in rt]:
            yield URIRef(context)
Exemple #5
0
    def contexts(self, triple=None):
        quoted_table = self.tables["quoted_statements"]
        asserted_table = self.tables["asserted_statements"]
        asserted_type_table = self.tables["type_statements"]
        literal_table = self.tables["literal_statements"]

        typetable = expression.alias(asserted_type_table, "typetable")
        quoted = expression.alias(quoted_table, "quoted")
        asserted = expression.alias(asserted_table, "asserted")
        literal = expression.alias(literal_table, "literal")

        if triple is not None:
            subject, predicate, obj = triple
            if predicate == RDF.type:
                # Select from asserted rdf:type partition and quoted table
                # (if a context is specified)
                clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True)
                selects = [(typetable, clause, ASSERTED_TYPE_PARTITION), ]

            elif isinstance(predicate, REGEXTerm) \
                    and predicate.compiledExpr.match(RDF.type) \
                    or not predicate:
                # Select from quoted partition (if context is specified),
                # literal partition if (obj is Literal or None) and
                # asserted non rdf:type partition (if obj is URIRef
                # or None)
                clause = self.build_clause(typetable, subject, RDF.type, obj, Any, True)
                selects = [(typetable, clause, ASSERTED_TYPE_PARTITION), ]

                if (not self.STRONGLY_TYPED_TERMS or
                        isinstance(obj, Literal) or
                        not obj or
                        (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))):
                    clause = self.build_clause(literal, subject, predicate, obj)
                    selects.append(
                        (literal, clause, ASSERTED_LITERAL_PARTITION))
                if not isinstance(obj, Literal) \
                        and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \
                        or not obj:
                    clause = self.build_clause(asserted, subject, predicate, obj)
                    selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION))

            elif predicate:
                # select from asserted non rdf:type partition (optionally),
                # quoted partition (if context is speciied), and literal
                # partition (optionally)
                selects = []
                if (not self.STRONGLY_TYPED_TERMS or
                        isinstance(obj, Literal) or
                        not obj
                        or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm))):
                    clause = self.build_clause(literal, subject, predicate, obj)
                    selects.append(
                        (literal, clause, ASSERTED_LITERAL_PARTITION))
                if not isinstance(obj, Literal) \
                        and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \
                        or not obj:
                    clause = self.build_clause(asserted, subject, predicate, obj)
                    selects.append(
                        (asserted, clause, ASSERTED_NON_TYPE_PARTITION))

            clause = self.build_clause(quoted, subject, predicate, obj)
            selects.append((quoted, clause, QUOTED_PARTITION))
            q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT)
        else:
            selects = [
                (typetable, None, ASSERTED_TYPE_PARTITION),
                (quoted, None, QUOTED_PARTITION),
                (asserted, None, ASSERTED_NON_TYPE_PARTITION),
                (literal, None, ASSERTED_LITERAL_PARTITION), ]
            q = union_select(selects, distinct=True, select_type=CONTEXT_SELECT)

        with self.engine.connect() as connection:
            res = connection.execute(q)
            rt = res.fetchall()
        for context in [rtTuple[0] for rtTuple in rt]:
            yield URIRef(context)
Exemple #6
0
    def triples(self, triple, context=None):
        """
        A generator over all the triples matching pattern.

        Pattern can be any objects for comparing against nodes in
        the store, for example, RegExLiteral, Date? DateRange?

        quoted table:                <id>_quoted_statements
        asserted rdf:type table:     <id>_type_statements
        asserted non rdf:type table: <id>_asserted_statements

        triple columns:
            subject, predicate, object, context, termComb, objLanguage, objDatatype
        class membership columns:
            member, klass, context, termComb

        FIXME:  These union all selects *may* be further optimized by joins

        """
        subject, predicate, obj = triple

        quoted_table = self.tables["quoted_statements"]
        asserted_table = self.tables["asserted_statements"]
        asserted_type_table = self.tables["type_statements"]
        literal_table = self.tables["literal_statements"]

        if predicate == RDF.type:
            # select from asserted rdf:type partition and quoted table
            # (if a context is specified)
            typeTable = expression.alias(
                asserted_type_table, "typetable")
            clause = self.build_clause(typeTable, subject, RDF.type, obj, context, True)
            selects = [
                (typeTable,
                 clause,
                 ASSERTED_TYPE_PARTITION), ]

        elif isinstance(predicate, REGEXTerm) \
                and predicate.compiledExpr.match(RDF.type) \
                or not predicate:
            # Select from quoted partition (if context is specified),
            # Literal partition if (obj is Literal or None) and asserted
            # non rdf:type partition (if obj is URIRef or None)
            selects = []
            if not self.STRONGLY_TYPED_TERMS \
                    or isinstance(obj, Literal) \
                    or not obj \
                    or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm)):
                literal = expression.alias(literal_table, "literal")
                clause = self.build_clause(literal, subject, predicate, obj, context)
                selects.append((literal, clause, ASSERTED_LITERAL_PARTITION))

            if not isinstance(obj, Literal) \
                    and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \
                    or not obj:
                asserted = expression.alias(asserted_table, "asserted")
                clause = self.build_clause(asserted, subject, predicate, obj, context)
                selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION))

            typeTable = expression.alias(asserted_type_table, "typetable")
            clause = self.build_clause(typeTable, subject, RDF.type, obj, context, True)
            selects.append((typeTable, clause, ASSERTED_TYPE_PARTITION))

        elif predicate:
            # select from asserted non rdf:type partition (optionally),
            # quoted partition (if context is specified), and literal
            # partition (optionally)
            selects = []
            if not self.STRONGLY_TYPED_TERMS \
                    or isinstance(obj, Literal) \
                    or not obj \
                    or (self.STRONGLY_TYPED_TERMS and isinstance(obj, REGEXTerm)):
                literal = expression.alias(literal_table, "literal")
                clause = self.build_clause(literal, subject, predicate, obj, context)
                selects.append((literal, clause, ASSERTED_LITERAL_PARTITION))

            if not isinstance(obj, Literal) \
                    and not (isinstance(obj, REGEXTerm) and self.STRONGLY_TYPED_TERMS) \
                    or not obj:
                asserted = expression.alias(asserted_table, "asserted")
                clause = self.build_clause(asserted, subject, predicate, obj, context)
                selects.append((asserted, clause, ASSERTED_NON_TYPE_PARTITION))

        if context is not None:
            quoted = expression.alias(quoted_table, "quoted")
            clause = self.build_clause(quoted, subject, predicate, obj, context)
            selects.append((quoted, clause, QUOTED_PARTITION))

        q = union_select(selects, select_type=TRIPLE_SELECT_NO_ORDER)
        with self.engine.connect() as connection:
            res = connection.execute(q)
            # TODO: False but it may have limitations on text column. Check
            # NOTE: SQLite does not support ORDER BY terms that aren't
            # integers, so the entire result set must be iterated in order
            # to be able to return a generator of contexts
            result = res.fetchall()
        tripleCoverage = {}

        for rt in result:
            id, s, p, o, (graphKlass, idKlass, graphId) = extract_triple(rt, self, context)
            contexts = tripleCoverage.get((s, p, o), [])
            contexts.append(graphKlass(self, idKlass(graphId)))
            tripleCoverage[(s, p, o)] = contexts

        for (s, p, o), contexts in tripleCoverage.items():
            yield (s, p, o), (c for c in contexts)