Ejemplo n.º 1
0
    def addToSchema(self, stmts):
        self._beginTxn()

        propsChanged = False
        typesChanged = False

        #you can declare subproperties to rdf:type, rdfs:subClassOf, rdfs:subPropertyOf
        #but it will only take effect in the next call to addToSchema
        #also they can not be removed consistently
        #thus they should be declared in the initial schemas
        for stmt in stmts:            
            #handle "rdf1" entailment rule in the RDF Semantics spec
            self._addEntailment(Statement(stmt.predicate, RDF_MS_BASE+u'type', 
                                RDF_MS_BASE+u'Property',OBJECT_TYPE_RESOURCE))
    
            #"rdfs4b" entailment rule (rdfs4a isn't necessary to make explicit, we just make this 
            #one explicit to guarantee that the object is the subject of at least one statement)
            if stmt.objectType == OBJECT_TYPE_RESOURCE:
                self.entailments.addStatement( Statement(stmt.object, 
                RDF_MS_BASE+u'type', 
                RDF_SCHEMA_BASE+u'Resource',OBJECT_TYPE_RESOURCE))
            
            #"rdfs2" and "rdfs3" entailment rules: infer types from domain and range of predicate  
            for predicate in self.currentSuperProperties.get(stmt.predicate,[stmt.predicate]):
                domains = self.domains.get(predicate, [])
                for domain in domains:
                    key = (stmt.subject, domain)
                    if key not in self.inferences:
                        typeStmt = Statement(stmt.subject, RDF_MS_BASE+u'type', 
                        domain, OBJECT_TYPE_RESOURCE)
                        self._addTypeStatement(typeStmt)
                        self.inferences[key] = 1
                    else:
                        self.inferences[key] += 1

                if stmt.objectType != OBJECT_TYPE_RESOURCE:
                    continue
                ranges = self.ranges.get(predicate, [])
                for range in ranges:
                    key = (stmt.object, range)
                    if key not in self.inferences:
                        typeStmt = Statement(stmt.object, RDF_MS_BASE+u'type', range, OBJECT_TYPE_RESOURCE)
                        self._addTypeStatement(typeStmt)
                        self.inferences[key] = 1
                    else:
                        self.inferences[key] += 1
                    #todo: we could do a consistency check here to make sure the object type conforms
                
            #the subclass and subproperty rules ("rdfs5" - "rdfs11") are handled dynamically
            #except for "rdfs8": uuu rdf:type rdfs:Class -> uuu rdfs:subClassOf rdfs:Resource
            #which isn't needed
            if stmt.predicate in self.subPropPreds:                
                self.currentSubProperties.setdefault(stmt.object, [stmt.object]).append(stmt.subject)
                #add this subproperty if this is the only reference to it so far
                self.currentSubProperties.setdefault(stmt.subject, [stmt.subject])

                self.currentSuperProperties.setdefault(stmt.subject, [stmt.subject]).append(stmt.object)
                #add this superproperty if this is the only reference to it so far
                self.currentSuperProperties.setdefault(stmt.object, [stmt.object])
                
                #if any superproperties are subject of any domain or range statements
                #deduce new type statements from existing statements 
                for predicate in self.currentSuperProperties[stmt.object]:
                    domaintypes = self.domains.get(predicate,[])
                    rangetypes = self.ranges.get(predicate,[]) 
                    if rangetypes or domaintypes:
                        for targetStmt in self.model.getStatements(predicate=predicate):
                            for type in domaintypes:
                                key = (targetStmt.subject, type)
                                refCount = self.inferences.get(key, 0)
                                if not refCount:
                                    typeStmt = Statement(targetStmt.subject,
                                            RDF_MS_BASE+u'type', type,
                                                         OBJECT_TYPE_RESOURCE)
                                    self._addTypeStatement(typeStmt)
                                    self.inferences[key ] = 1
                                else:
                                    self.inferences[key ] += 1

                            if stmt.objectType != OBJECT_TYPE_RESOURCE:
                                continue                                    
                            for type in rangetypes:
                                key = (targetStmt.object, type)
                                refCount = self.inferences.get(key, 0)
                                if not refCount:
                                    typeStmt = Statement(targetStmt.object,
                                            RDF_MS_BASE+u'type', type,
                                                    OBJECT_TYPE_RESOURCE)
                                    self._addTypeStatement(typeStmt)
                                    self.inferences[key ] = 1
                                else:
                                    self.inferences[key ] += 1
                            
                
                propsChanged = True
            elif stmt.predicate in self.subClassPreds:                
                self.currentSubTypes.setdefault(stmt.object, [stmt.object]).append(stmt.subject)
                #add this subclass if this is the only reference to it so far
                self.currentSubTypes.setdefault(stmt.subject, [stmt.subject])  

                self.currentSuperTypes.setdefault(stmt.subject, [stmt.subject]).append(stmt.object)
                #add this superclass if this is the only reference to it so far
                self.currentSuperTypes.setdefault(stmt.object, [stmt.object])
                
                typesChanged = True
            elif stmt.predicate in self.typePreds:
                self._addTypeStatement(stmt, addStmt=False)                                       
            else:
                self.currentSubProperties.setdefault(stmt.predicate, [stmt.predicate])
                self.currentSuperProperties.setdefault(stmt.predicate, [stmt.predicate])

                #if we're adding a domain or range statement infer type for resources that already have statements
                #with that predicate or subproperty
                if self.isCompatibleProperty(stmt.predicate, RDF_SCHEMA_BASE+u'domain'):
                    self.domains.setdefault(stmt.subject, []).append(stmt.object)
                    
                    for predicate in self.currentSubProperties[stmt.subject]:
                        for targetStmt in self.model.getStatements(predicate=stmt.subject):
                            key = (targetStmt.subject, stmt.object)
                            if key not in self.inferences:
                                typeStmt = Statement(targetStmt.subject,
                                    RDF_MS_BASE+u'type', stmt.object,
                                    OBJECT_TYPE_RESOURCE)
                                self._addTypeStatement(typeStmt)
                                self.inferences[key ] = 1
                            else:                                
                                self.inferences[key ] += 1
                
                if self.isCompatibleProperty(stmt.predicate, RDF_SCHEMA_BASE+u'range'):
                    self.ranges.setdefault(stmt.subject, []).append(stmt.object)
                    
                    for predicate in self.currentSubProperties[stmt.subject]:
                        for targetStmt in self.model.getStatements(
                            predicate=stmt.subject, objecttype=OBJECT_TYPE_RESOURCE):
                            key = (targetStmt.object, stmt.object)
                            if key not in self.inferences:
                                typeStmt = Statement(targetStmt.object,
                                    RDF_MS_BASE+u'type', stmt.object,
                                    OBJECT_TYPE_RESOURCE)
                                self._addTypeStatement(typeStmt)
                                self.inferences[key] = 1
                            else:
                                self.inferences[key] += 1                                                    

        if typesChanged:
            self.currentSubTypes = getTransitiveClosure(self.currentSubTypes)
            #XXX if self.saveSubtypes: 
            #    self.saveSubtypes()   XXX                
            if self.autocommit:
                self.subtypes = self.currentSubTypes
            
        if propsChanged:
            self.currentSubProperties = getTransitiveClosure(self.currentSubProperties)
            if self.autocommit:
                self.subproperties = self.currentSubProperties
        
            #just in case a subproperty of any of these were added
            self.subClassPreds = self.currentSubProperties[self.SUBCLASSOF]
            self.subPropPreds  = self.currentSubProperties[self.SUBPROPOF]
            self.typePreds     = self.currentSubProperties[RDF_MS_BASE+u'type']        
Ejemplo n.º 2
0
    def _findJoinPreds(self, root, joins):
        joinsByName = {}
        simpleJoinCandidates = []
        complexJoinCandidates = []
        aliases = {}
        for join in joins:
            # for each filter
            # if Eq and both sides are simple references (?label or id or ?ref.id)
            # add to aliases and remove filter from join (we'll join together later)
            aliases.setdefault(join.name, [])
            joinsByName[join.name] = join
            for filter in join.depthfirst(descendPredicate=lambda op: op is join or not isinstance(op, ResourceSetOp)):
                if not isinstance(filter, Filter):
                    continue
                for arg in filter.args:
                    if isinstance(arg, Eq):
                        leftname, leftprop = getNameIfSimpleJoinRef(arg.left, join.name, filter)
                        rightname, rightprop = getNameIfSimpleJoinRef(arg.right, join.name, filter)
                        # if both sides are either a project or label
                        if leftname and rightname:
                            # its an alias
                            if not leftprop and not rightprop:
                                # None or 0 (SUBJECT)
                                # its an alias
                                if arg.maybe:
                                    raise QueryException("maybe on an aliasing join not allowed", arg)
                                if leftname != rightname:
                                    aliases.setdefault(leftname, []).append(rightname)
                                    aliases.setdefault(rightname, []).append(leftname)
                                # remove this predicate from the filter
                                arg.parent = None
                                continue
                            else:
                                # expressions like ?foo = ?bar.prop or ?a.prop = ?b.prop
                                candidate = (join.name, arg, leftname, leftprop, rightname, rightprop)
                                simpleJoinCandidates.append(candidate)
                                continue
                        # elif leftname or rightname:
                        # XXX support cases where one side is a complex expression
                    complexJoinCandidates.append(filter)
                if not filter.args:
                    # we must have removed all its predicates so
                    # remove the filter and its join condition
                    join.removeArg(filter.parent)

        # combine together joins that are just aliases of each other
        # assumes join list is in doc order so taht nested joins gets subsumed by outermost join
        aliases = getTransitiveClosure(aliases)
        renamed = {}
        removed = []
        for j in joins:
            for name in aliases[j.name]:
                if name == j.name:
                    continue
                renamed[name] = j.name
                ja = joinsByName.get(name)
                if not ja:
                    continue
                for child in ja.args:
                    j.appendArg(child)
                ja.name = j.name
                self.prepareJoinMove(ja)
                ja.parent = None
                removed.append(ja)
        joins = [j for j in joins if j not in removed]

        import itertools

        for join in itertools.chain((root,), joins):
            for child in join.depthfirst(descendPredicate=lambda op: op is join or not isinstance(op, ResourceSetOp)):
                if isinstance(child, (Label, Join)):
                    if child.name in renamed:
                        child.name = renamed[child.name]
                elif isinstance(child, Project) and child.varref:
                    if child.varref in renamed:
                        child.varref = renamed[child.varref]

        assert len(set([j.name for j in joins])) == len(joins), "join names not unique"

        # now that we figured out all the aliases, we can look for join predicates
        simpleJoins = []
        for joinname, pred, leftname, leftprop, rightname, rightprop in simpleJoinCandidates:
            leftname = renamed.get(leftname, leftname)
            rightname = renamed.get(rightname, rightname)
            filter = pred.parent
            if leftname == rightname:
                # both references point to same join, so not a join predicate after all
                if pred.maybe:
                    raise QueryException("MAYBE can not be used on a filter that is not a join condition", pred)
                if leftprop == rightprop:
                    # identity (a=a), so remove predicate
                    # XXX add user warning
                    pred.parent = None
                    if not filter.args:
                        filter.parent.parent = None
                else:
                    # expression operates on more than one project, need to
                    # execute after both Projects have been retreived
                    filter.complexPredicates = True
            else:
                simpleJoins.append((leftname, leftprop, rightname, rightprop, filter, pred.maybe))
                # pred is just a Projects so its already handled by another
                # filter predicate (see makeJoinExpr() when skipRoot = True)
                # , so remove this one
                if not pred.siblings:
                    filter.parent.parent = None
                else:
                    pred.parent = None

        # xxx handle case like { ?bar ?foo.prop = func() or func(?foo) }
        # aren't these complex (cross) joins but rather misplaced filters
        # that belong in the ?foo join?

        # check if the remaining filter predicates are complex joins
        complexJoins = []
        projectPreds = []

        for filter in complexJoinCandidates:
            if not filter.parent:
                continue
            join = filter.parent.parent
            # if the filter predicates have reference to another join, its a complex join
            # and while we're at it, collect Project predicates for maybe analysis and fixup
            for pred in filter.args:
                joinrefs = {}
                for label in pred.depthfirst():
                    # check if the filter has reference to a different join
                    if isinstance(label, Label):
                        joinrefs.setdefault(label.name, []).append(label)
                        if label.name == join.name:
                            # to reduce the number of equivalent ops
                            # replace this with Project(SUBJECT)
                            newchild = Project(SUBJECT, label.name, maybe=label.maybe)
                            label.parent.replaceArg(label, newchild)
                    elif isinstance(label, Project):
                        if not label.varref:
                            label.varref = join.name
                        joinrefs.setdefault(label.varref, []).append(label)

                        propertyname = None
                        if isinstance(label.name, int):
                            if label.name == PROPERTY:
                                propertyname = filter.labelFromPosition(OBJECT)
                        else:
                            propertyname = label.name
                        if propertyname:
                            projectPreds.append((label.varref or join.name, propertyname, label.maybe, pred))

                if len(joinrefs) == 2 and isinstance(pred, Eq):
                    (leftname, leftops), (rightname, rightops) = joinrefs.items()
                    simple = self._makeHalfSimpleJoin(pred, join.name, leftname, leftops, rightname)
                    if not simple:  # try reverse order
                        simple = self._makeHalfSimpleJoin(pred, join.name, rightname, rightops, leftname)

                    if simple:
                        simpleJoins.append(simple)
                    else:
                        complexJoins.append((filter, set([leftname, rightname]), pred.maybe))
                elif len(joinrefs) > 1:
                    complexJoins.append((filter, set(joinrefs.keys()), pred.maybe))
                else:
                    if pred.maybe and not isinstance(pred, Project):
                        raise QueryException("MAYBE can not be used on a filter that is not a join condition", pred)
                    # if there's only 1 joinref but it not referencing the join
                    # that its part of, move the filter to the join that is referencing
                    if len(joinrefs) == 1 and iter(joinrefs).next() != join.name:
                        # XXX implement this
                        raise QueryException("Filters that refer to a different filter set are not yet implemented.")

        _fixMaybeFilters(projectPreds)

        return joins, simpleJoins, complexJoins
Ejemplo n.º 3
0
            if stmt.predicate in self.subClassPreds:
                try: 
                    self.currentSubTypes[stmt.object].remove(stmt.subject)
                    self.currentSuperTypes[stmt.subject].remove(stmt.object)
                except KeyError, ValueError:
                    pass#todo warn if not found                
                typesChanged = True            

        if typesChanged:
            newsubtypes = {}
            for k, v in self.currentSuperTypes.items():
                for supertype in v:
                    newsubtypes.setdefault(supertype, []).append(k)

            self.currentSubTypes = getTransitiveClosure(newsubtypes)
            if self.autocommit:
                self.subtypes = self.currentSubTypes
            
        if propsChanged:
            newsubprops = {}
            for k, v in self.currentSuperProperties.items():
                for superprop in v:
                    newsubprops.setdefault(superprop, []).append(k)
            
            self.currentSubProperties = getTransitiveClosure(newsubprops)
            if self.autocommit:
                self.subproperties = self.currentSubProperties

            #just in case a subproperty of any of these were removed
            self.subClassPreds = self.currentSubProperties[self.SUBCLASSOF]