Exemple #1
0
 def unindex_doc(self, docid):
     """Unregister the document represented by docid from indexes of
     this catalog."""
     _Catalog.unindex_doc(self, docid)
     try:
         self.objectids.remove(docid)
     except KeyError:
         pass
Exemple #2
0
class GraphDB(Persistent):
    def __init__(self):

        self._init()

        self.node_catalog = Catalog()
        self.edge_catalog = Catalog()

    def _init(self):
        self.nodes = IOBTree()
        self.edges = IOBTree()
        self.edgedata = IOBTree()

        self.outgoing = IOBTree()
        self.incoming = IOBTree()

        self.typeids = PObject()

        self._nodeid = Length(0)
        self._edgeid = Length(0)
        self._typeid = Length(0)

    def nodeid(self):
        self._nodeid.change(1)
        return self._nodeid.value

    def edgeid(self):
        self._edgeid.change(1)
        return self._edgeid.value

    def typeid(self, name):
        if not hasattr(self.typeids, name):
            self._typeid.change(1)
            setattr(self.typeids, name, self._typeid.value)
            self.revtypes[self._typeid.value] = name
        return getattr(self.typeids, name)

    @property
    def revtypes(self):
        if (not hasattr(self, '_v_revtypes')) or (not bool(self._v_revtypes)):
            dir(self.typeids)
            dir(self.typeids)
            self._v_revtypes = dict([
                (v, k) for k, v in list(self.typeids.__dict__.items())
            ])
        return self._v_revtypes

    def getType(self, typeid):
        if type(typeid) != int:
            #lets assume an edge
            typeid = typeid[2]
        return self.revtypes[typeid]

    def addNode(self, **kwargs):
        if '_id' not in kwargs:
            _id = self.nodeid()
        else:
            _id = kwargs.pop('_id')
        self.nodes[_id] = kwargs
        ln = self.lightNode(_id, kwargs)
        self.node_catalog.index_doc(_id, ln)
        return ln

    def lightNode(self, _id, node=None):
        "{'_id':nodeid, ...other attributes...}"
        if node == None:
            node = self.nodes[_id]
        out = dict(node)
        out['_id'] = _id
        return out

    def addEdge(self, start, end, edgetype, **kwargs):
        _id = self.edgeid()

        if type(edgetype) != int:
            edgetype = self.typeid(edgetype)

        if type(start) == dict:
            start = start['_id']
        if type(end) == dict:
            end = end['_id']

        edge = [start, end, edgetype]
        self.edges[_id] = edge

        if kwargs:
            self.edgedata[_id] = kwargs
            le = self.lightEdge(_id, edge)
            self.edge_catalog.index_doc(_id, le)

        le = self.lightEdge(_id, edge)
        # edgeid:nodeid
        data = self.outgoing.setdefault(edgetype,
                                        IOBTree()).setdefault(start, {})
        data[_id] = end
        self.outgoing[edgetype][start] = data

        data = self.incoming.setdefault(edgetype,
                                        IOBTree()).setdefault(end, {})
        data[_id] = start
        self.incoming[edgetype][end] = data

        return le

    def lightEdge(self, _id, edge=None):
        '[sourceid targetid typeid kwargs edgeid]'
        if edge == None:
            edge = self.edges[_id]
        out = list(edge)
        out.append(self.edgedata.get(_id, {}))
        out.append(_id)
        return out

    def delEdge(self, edge):
        if type(edge) == int:
            edge = self.lightEdge(edge)

        start, end, edgetype, props, edgeid = edge

        data = self.outgoing[edgetype][start]
        del (data[edgeid])
        self.outgoing[edgetype][start] = data

        data = self.incoming[edgetype][end]
        del (data[edgeid])
        self.incoming[edgetype][end] = data

        del (self.edges[edgeid])
        if edgeid in self.edgedata:
            self.edge_catalog.unindex_doc(edgeid)
            #del(self.edges[edgeid])

    def delNode(self, node):
        if type(node) == int:
            node = self.lightNode(node)
        nodeid = node['_id']

        for edgetype in list(self.outgoing.keys()):
            if len(self.outgoing[edgetype].get(nodeid, {})) > 0:
                raise StillConnected('outgoing',
                                     self.outgoing[edgetype][nodeid])

        for edgetype in list(self.incoming.keys()):
            if len(self.incoming[edgetype].get(nodeid, {})) > 0:
                raise StillConnected('incoming',
                                     self.incoming[edgetype][nodeid])

        #all good, lets delete
        for edgetype in list(self.outgoing.keys()):
            if nodeid in self.outgoing[edgetype]:
                del (self.outgoing[edgetype][nodeid])

        for edgetype in list(self.incoming.keys()):
            if nodeid in self.incoming[edgetype]:
                del (self.incoming[edgetype][nodeid])

        self.node_catalog.unindex_doc(nodeid)
        del (self.nodes[nodeid])

    def updateNode(self, lightnode):
        nodeid = lightnode['_id']
        data = dict(lightnode)
        self.nodes[nodeid] = data
        self.node_catalog.reindex_doc(nodeid, lightnode)

    def updateEdge(self, lightedge):
        edgeid = lightedge[4]
        edge = list(lightedge[:4])
        data = lightedge[3]
        self.edges[edgeid] = edge
        if data:
            self.edgedata[edgeid] = data
            self.edge_catalog.reindex_doc(edgeid, lightedge)
        elif edgeid in self.edgedata:
            del (self.edgedata[edgeid])
            self.edge_catalog.unindex_doc(edgeid)

    def kwQuery(self, **kwargs):
        kwitems = list(kwargs.items())
        key, value = kwitems[0]
        query = rc_query.Eq(key, value)
        for k, v in kwitems[1:]:
            query = query & rc_query.Eq(k, v)
        return query

    def queryNode(self, **kwargs):
        result = self.node_catalog.query(self.kwQuery(**kwargs))
        return [self.lightNode(i) for i in result[1]]

    def queryEdge(self, **kwargs):
        result = self.edge_catalog.query(self.kwQuery(**kwargs))
        return [self.lightEdge(i) for i in result[1]]

    def prepareTypes(self, types=None):
        if types is None:
            return types
        else:
            if type(types) not in (list, tuple):
                types = [types]
            out = []
            for t in types:
                if type(t) == str:
                    t = self.typeid(t)
                out.append(t)
            return out


################## Higher Level API, functionality > speed ###################

    def getAllEdges(self, nodeids, directions=None, types=None, returnIds=0):

        if not islisttype(nodeids):
            nodeids = [nodeids]
        if directions == None:
            directions = ['i', 'o']
        elif type(directions) not in (list, tuple):
            directions = [directions]

        types = self.prepareTypes(types)

        tmp = []
        for n in nodeids:
            if type(n) != int:
                n = n['_id']
            tmp.append(n)
        nodeids = tmp

        out = EdgeDict()

        for direction in directions:
            if direction.startswith('i'):
                d = 'incoming'
            elif direction.startswith('o'):
                d = 'outgoing'
            else:
                raise 'unknown'

            result = []
            container = getattr(self, d)

            for edgetype in list(container.keys()):
                if types != None and edgetype not in types:
                    continue
                for n in nodeids:
                    edges = container[edgetype].get(n, {})
                    if returnIds:
                        result.extend(list(edges.keys()))
                    else:
                        for key in list(edges.keys()):
                            result.append(self.edge(key))
            out[direction] = result
        if len(directions) == 1:
            return result
        else:
            return out

    # XXX work in progress
    def getEdges(self, start, end, edgetype):
        #import ipdb; ipdb.set_trace()
        if type(edgetype) != int:
            edgetype = self.typeid(edgetype)

        if type(start) != int:
            start = start['_id']
        if type(end) != int:
            end = end['_id']

        out = []
        targets = self.outgoing.get(edgetype, {}).get(start, {})
        for edgeid, nodeid in list(targets.items()):
            if nodeid == end:
                out.append(self.lightEdge(edgeid))
        return out

    # XXX work in progress
    def addUniqueEdge(self, start, end, edgetype, **kwargs):
        if not self.getEdges(start, end, edgetype):
            return self.addEdge(start, end, edgetype, **kwargs)

    def clean(self):
        #import ipdb; ipdb.set_trace()
        for k in list(self.edges.keys()):
            self.delEdge(k)

        for k in list(self.nodes.keys()):
            self.delNode(k)

        self._init()

    def render(self, filename='graphagus', label='name', source=False):
        from graphviz import Digraph

        dot = Digraph('Graphagus dump', format='svg')

        for k in list(self.nodes.keys()):
            n = self.lightNode(k)
            dot.node(str(k), n[label])

        for k in list(self.edges.keys()):
            e = self.lightEdge(k)
            dot.edge(str(e[0]), str(e[1]), self.getType(e))
        if source:
            return dot.source
        else:
            dot.render(filename, cleanup=True)

    def edge(self, lightEdge):
        if type(lightEdge) == int:
            lightEdge = self.lightEdge(lightEdge)
        return Edge(self, lightEdge)

    def node(self, lightNode):
        if type(lightNode) == int:
            lightNode = self.lightNode(lightNode)
        return Node(self, lightNode)
Exemple #3
0
class Table(ZopeDict):
    """
    A table is a combination of a ZopeDict and a RepozeCatalog
    with integer keys and object values
    """

    #TODO write my own IOBTree/Length combination instead of using ZopeDict?
    def __init__(self, parent, indices=None):
        ZopeDict.__init__(self)
        self.__parent__ = parent
        parent[self.__name__] = self
        self._cat = Catalog()
        if indices is None:
            self._currentIndex = ""
        else:
            self._currentIndex = indices[0]
            for index in indices:
                self._cat[index] = CatalogFieldIndex(index)

    def getCurrentIndex(self):
        # FIXME current index should be part of the user preferences
        return self._cat[self._currentIndex]

    def delete(self, names):
        print "TODO delete"
        pass

    def getFromIndex(self, index, target):
        # TODO return a generator instead of a list
        retval = []
        result = self._cat.query(Eq(index, target))
        for key in result[1]:
            user = self.get(key)
            if user:
                retval.append(user)
        return retval

    def maxKey(self):
        return self._data.maxKey() if len(self) else 0

    # NB I have to cast key to an int for traversal to work
    # FIXME: it seems like this is the wrong place for this maybe it
    # is a sign I should give up on traversal altogether?
    def __getitem__(self, key):
        return self._data[int(key)]

    def setdefault(self, key, failobj=None):
        return ZopeDict.setdefault(self, int(key), failobj)

    def has_key(self, key):
        return ZopeDict.has_key(self, int(key))

    def get(self, key, failobj=None):
        return ZopeDict.get(self, int(key), failobj)

    def __contains__(self, key):
        return ZopeDict.__contains__(self, int(key))

    # zc.dict interface
    # Addition is done with __setitem__, overriding it will control addition.
    def __setitem__(self, key, value):
        #self._cat.index_doc(int(key), value)
        ZopeDict.__setitem__(self, int(key), value)

    #TODO find a way to do efficient automatic re-indexing
    # can't see that I can do better than Plone though
    # http://developer.plone.org/searching_and_indexing/indexing.html#when-indexing-happens-and-how-to-reindex-manually
    def reindex(self, value):
        self._cat.index_doc(value.key, value)

    # Removal is done with either pop or clear, overriding these methods will
    # control removal.
    def pop(self, key, *args):
        retval = ZopeDict.pop(self, int(key), *args)
        self._cat.unindex_doc(int(key))
        return retval

    def clear(self):
        ZopeDict.clear(self)
        self._cat.clear()
Exemple #4
0
class Table(ZopeDict):
    """
    A table is a combination of a ZopeDict and a RepozeCatalog
    with integer keys and object values
    """
    #TODO write my own IOBTree/Length combination instead of using ZopeDict?
    def __init__(self, parent, indices=None):
        ZopeDict.__init__(self)
        self.__parent__  = parent
        parent[self.__name__] = self
        self._cat = Catalog()
        if indices is None:
            self._currentIndex = ""
        else:
            self._currentIndex = indices[0]
            for index in indices:
                self._cat[index] = CatalogFieldIndex(index)

    def getCurrentIndex(self):
        # FIXME current index should be part of the user preferences
        return self._cat[self._currentIndex]

    def delete(self, names):
        print "TODO delete"
        pass

    def getFromIndex(self, index, target):
        # TODO return a generator instead of a list
        retval = []
        result = self._cat.query(Eq(index, target))
        for key in result[1]:
            user = self.get(key)
            if user:
                retval.append(user)
        return retval

    def maxKey(self):
        return self._data.maxKey() if len(self) else 0


    # NB I have to cast key to an int for traversal to work
    # FIXME: it seems like this is the wrong place for this maybe it 
    # is a sign I should give up on traversal altogether?
    def __getitem__(self, key):
        return self._data[int(key)]

    def setdefault(self, key, failobj=None):
        return ZopeDict.setdefault(self, int(key), failobj)

    def has_key(self, key):
        return ZopeDict.has_key(self, int(key))

    def get(self, key, failobj=None):
        return ZopeDict.get(self, int(key), failobj)

    def __contains__(self, key):
        return ZopeDict.__contains__(self, int(key))
    
    # zc.dict interface
    # Addition is done with __setitem__, overriding it will control addition.
    def __setitem__(self, key, value):
        #self._cat.index_doc(int(key), value)
        ZopeDict.__setitem__(self, int(key), value)

    #TODO find a way to do efficient automatic re-indexing
    # can't see that I can do better than Plone though
    # http://developer.plone.org/searching_and_indexing/indexing.html#when-indexing-happens-and-how-to-reindex-manually
    def reindex(self, value):
        self._cat.index_doc(value.key, value)

    # Removal is done with either pop or clear, overriding these methods will
    # control removal.
    def pop(self, key, *args):
        retval = ZopeDict.pop(self, int(key), *args)
        self._cat.unindex_doc(int(key))
        return retval

    def clear(self):
        ZopeDict.clear(self)
        self._cat.clear()
Exemple #5
0
class GraphDB(Persistent):

    def __init__(self):
        
        self._init()
        
        self.node_catalog= Catalog()
        self.edge_catalog = Catalog()

    def _init(self):
        self.nodes = IOBTree()
        self.edges = IOBTree()
        self.edgedata = IOBTree()

        self.outgoing = IOBTree()
        self.incoming = IOBTree()

        self.typeids = PObject()

        self._nodeid = Length(0)
        self._edgeid = Length(0)
        self._typeid = Length(0)


    def nodeid(self):
        self._nodeid.change(1)
        return self._nodeid.value

    def edgeid(self):
        self._edgeid.change(1)
        return self._edgeid.value

    def typeid(self,name):
        if not hasattr(self.typeids,name):            
            self._typeid.change(1)
            setattr(self.typeids,name,self._typeid.value)
            self.revtypes[self._typeid.value]=name
        return getattr(self.typeids,name)

    @property
    def revtypes(self):
        if not hasattr(self,'_v_revtypes'):
            dir(self.typeids)
            dir(self.typeids)
            self._v_revtypes = dict([(v,k) for k,v in self.typeids.__dict__.items()])
        return self._v_revtypes

    def getType(self,typeid):
        if type(typeid) != int:
            #lets assume an edge
            typeid = typeid[2]
        return self.revtypes[typeid]


    def addNode(self,**kwargs):
        _id = self.nodeid()
        self.nodes[_id]=kwargs
        ln =  self.lightNode(_id,kwargs)
        self.node_catalog.index_doc(_id,ln)
        return ln
    
    def lightNode(self,_id,node=None):
        "{'id':nodeid, ...other attributes...}"
        if node==None:
            node = self.nodes[_id]
        out = dict(node)
        out['_id'] = _id
        return out

    def addEdge(self,start,end,edgetype,**kwargs):
        _id = self.edgeid()
    
        if type(edgetype) != int:
            edgetype = self.typeid(edgetype)

        if type(start) == dict:
            start = start['_id']
        if type(end) == dict:
            end = end['_id']

        edge = [start,end,edgetype]
        self.edges[_id]=edge
        
        if kwargs:
            self.edgedata[_id]=kwargs
            le =  self.lightEdge(_id,edge)
            self.edge_catalog.index_doc(_id,le)
       
        le =  self.lightEdge(_id,edge)
        # edgeid:nodeid
        data = self.outgoing.setdefault(edgetype,IOBTree()).setdefault(start,{})
        data[_id]=end
        self.outgoing[edgetype][start]=data

        data = self.incoming.setdefault(edgetype,IOBTree()).setdefault(end,{})
        data[_id]=start
        self.incoming[edgetype][end]=data

        return le

    def lightEdge(self,_id,edge=None):
        '[sourceid targetid typeid kwargs edgeid]'
        if edge==None:
            edge = self.edges[_id]
        out = list(edge)
        out.append(self.edgedata.get(_id,{}))
        out.append(_id)
        return out

    def delEdge(self,edge):
        if type(edge)==int:
            edge=self.lightEdge(edge)

        start,end,edgetype,props,edgeid = edge

        data = self.outgoing[edgetype][start]
        del(data[edgeid])                
        self.outgoing[edgetype][start]=data
        
        data = self.incoming[edgetype][end]
        del(data[edgeid])                
        self.incoming[edgetype][end]=data

        del(self.edges[edgeid])
        if self.edgedata.has_key(edgeid):
            self.edge_catalog.unindex_doc(edgeid)
            #del(self.edges[edgeid])


    def delNode(self,node):
        if type(node)==int:
            node=self.lightNode(node)
        nodeid = node['_id']
       
        for edgetype in self.outgoing.keys():
            if len(self.outgoing[edgetype].get(nodeid,{}))>0:
                raise StillConnected('outgoing',self.outgoing[edgetype][nodeid])

        for edgetype in self.incoming.keys():
            if len(self.incoming[edgetype].get(nodeid,{}))>0:
                raise StillConnected('incoming',self.incoming[edgetype][nodeid])

        #all good, lets delete
        for edgetype in self.outgoing.keys():
            if self.outgoing[edgetype].has_key(nodeid):
                del(self.outgoing[edgetype][nodeid])
        
        for edgetype in self.incoming.keys():
            if self.incoming[edgetype].has_key(nodeid):
                del(self.incoming[edgetype][nodeid])

        self.node_catalog.unindex_doc(nodeid)                
        del(self.nodes[nodeid])

    def updateNode(self,lightnode):
        nodeid = lightnode['_id']
        data = dict(lightnode)
        self.nodes[nodeid]=data
        self.node_catalog.reindex_doc(nodeid,lightnode)

    def updateEdge(self,lightedge):
        edgeid = lightedge[4]
        edge = list(lightedge[:4])
        data = lightedge[3]
        self.edges[edgeid]=edge
        if data:
            self.edgedata[edgeid]=data
            self.edge_catalog.reindex_doc(edgeid,lightedge)            
        elif self.edgedata.has_key(edgeid):
            del(self.edgedata[edgeid])
            self.edge_catalog.unindex_doc(edgeid)

    def kwQuery(self,**kwargs):
        kwitems = kwargs.items()
        key,value = kwitems[0]
        query = rc_query.Eq(key,value) 
        for k,v in kwitems[1:]:
            query = query & rc_query.Eq(k,v)
        return query 

    def queryNode(self,**kwargs):
        result = self.node_catalog.query(self.kwQuery(**kwargs))
        return [self.lightNode(i) for i in result[1]]
        
    def queryEdge(self,**kwargs):
        result = self.edge_catalog.query(self.kwQuery(**kwargs))
        return [self.lightEdge(i) for i in result[1]]
    
################## Higher Level API, functionality > speed ###################

    def getAllEdges(self,nodeids,directions=None,types=None):
        

        if type(nodeids) not in (list,tuple):
            nodeids = [nodeids]
        if directions == None:
            directions = ['i','o']
        elif type(directions) not in (list,tuple):
            directions = [directions]

        if types != None:
            if type(types) not in (list,tuple):
                types = [types]
            tmp = []
            for t in types:
                if type(t)==str:
                    t = self.typeid(t)
                tmp.append(t)
            types = tmp
            
        tmp = []
        for n in nodeids:
            if type(n) != int:
                n = n['_id']
            tmp.append(n)
        nodeids = tmp

        out = EdgeDict()

        for direction in directions:
            if direction.startswith('i'):
                d = 'incoming'
            elif direction.startswith('o'):
                d = 'outgoing'
            else:
                raise 'unknown'

            result = []
            container = getattr(self,d)
            
            for edgetype in container.keys():
                if types !=None and edgetype not in types:
                    continue
                for n in nodeids:
                    edges = container[edgetype].get(n,{})
                    for key in edges.keys():
                        result.append(self.edge(key))
            out[direction] = result
        if len(directions) == 1:
            return result
        else:
            return out

    # XXX work in progress
    def getEdges(self,start,end,edgetype):
        #import ipdb; ipdb.set_trace()
        if type(edgetype) != int:
            edgetype = self.typeid(edgetype)

        if type(start) != int:
            start = start['_id']
        if type(end) != int:
            end = end['_id']

        out = []
        targets = self.outgoing.get(edgetype,{}).get(start,{})
        for edgeid,nodeid in targets.items():
            if nodeid==end:
                out.append(self.lightEdge(edgeid))
        return out

    # XXX work in progress
    def addUniqueEdge(self,start,end,edgetype,**kwargs):
        if not self.getEdges(start,end,edgetype):
            return self.addEdge(start,end,edgetype,**kwargs)

    def clean(self):
        #import ipdb; ipdb.set_trace()
        for k in list(self.edges.keys()):
            self.delEdge(k)

        for k in list(self.nodes.keys()):
            self.delNode(k)

        self._init()
    
    def render(self,filename='graphagus',source=False):
        from graphviz import Digraph
        
        dot = Digraph('Graphagus dump',format='svg')
        
        for k in self.nodes.keys():
            n = self.lightNode(k)
            dot.node(str(k),n['name'])
        
        for k in self.edges.keys():
            e = self.lightEdge(k)
            dot.edge(str(e[0]),
                     str(e[1]),
                     self.getType(e))
        if source:
            return dot.source
        else:
            dot.render(filename,cleanup=True)

    def edge(self,lightEdge):
        if type(lightEdge) == int:
            lightEdge = self.lightEdge(lightEdge)
        return Edge(self,lightEdge)

    def node(self,lightNode):
        if type(lightNode) == int:
            lightNode = self.lightNode(lightNode)
        return Node(self,lightNode)
Exemple #6
0
class Catalog(object):

    def __init__(self, home, name, docid_attr='docid'):
        self.indexes = Indexes()
        self.home = home
        self.name = name
        self.docid_attr = docid_attr
        self.document_map = DocumentMap()

    def index_doc(self, doc):
        return self.indexes.index_doc(self._get_docid(doc), doc)

    def reindex_doc(self, doc):
        return self.indexes.reindex_doc(self._get_docid(doc), doc)

    def unindex_doc(self, doc_or_docid):
        if type(doc_or_docid) in (int, long):
            docid = doc_or_docid
        else:
            docid = getattr(doc_or_docid, self.docid_attr, None)
            if docid is None:
                return
        self.document_map.remove_docid(docid)
        return self.indexes.unindex_doc(docid)

    def query(self, queryobject, sort_index=None, limit=None, sort_type=None,
              reverse=False, names=None):
        count, docids = self.indexes.query(
            queryobject,
            sort_index=sort_index,
            limit=limit,
            sort_type=sort_type,
            reverse=reverse,
            names=names)
        return count, docids, self.resolver()

    def _get_docid(self, doc):
        docid_attr = self.docid_attr
        path = resource_path(doc)
        document_map = self.document_map
        docid = getattr(doc, docid_attr, None)
        if docid is None:
            docid = document_map.add(path)
            setattr(doc, docid_attr, docid)
        else:
            old_path = document_map.address_for_docid(docid)
            if old_path != path:
                document_map.remove_address(old_path)
                document_map.add(path, docid)
        return docid

    def resolver(self):
        root = self.home['content']
        document_map = self.document_map
        def resolve(docid):
            path = document_map.address_for_docid(docid)
            return find_resource(root, path)
        return resolve

    def add_index(self, name, index):
        """
        Add an index to an existing catalog.
        """
        log.info('Adding index: %s' % name)
        self.indexes[name] = index
        resolver = self.resolver()
        for docid in self.document_map.docid_to_address.keys():
            doc = resolver(docid)
            log.info('Calculating index for %s' % resource_path(doc))
            index.index_doc(docid, doc)