def _update(self,documentId,val,oldval,threshold):
   add= difference(val,oldval)
   rem= difference(oldval,val)
   if add: self._indexValue(documentId,add,threshold)
   if rem: self._unindexValue(documentId,rem)
   self._updateOldval(oldval, val, add, rem)
   return len(add),
Ejemplo n.º 2
0
    def index_doc(self, docid, seq):
        if isinstance(seq, StringTypes):
            raise TypeError('seq argument must be a list/tuple of strings')
    
        if not seq:
            return

        if self.normalize:
            seq = [w.lower() for w in seq]

        old_kw = self._rev_index.get(docid, None)
        new_kw = OOSet(seq)

        if old_kw is None:
            self._insert_forward(docid, new_kw)
            self._insert_reverse(docid, new_kw)
            self._num_docs.change(1)
        else:

            # determine added and removed keywords
            kw_added = difference(new_kw, old_kw)
            kw_removed = difference(old_kw, new_kw)

            # removed keywords are removed from the forward index
            for word in kw_removed:
                self._fwd_index[word].remove(docid)
            
            # now update reverse and forward indexes
            self._insert_forward(docid, kw_added)
            self._insert_reverse(docid, new_kw)
Ejemplo n.º 3
0
 def _update(self, documentId, val, oldval, threshold):
     add = difference(val, oldval)
     rem = difference(oldval, val)
     if add: self._indexValue(documentId, add, threshold)
     if rem: self._unindexValue(documentId, rem)
     self._updateOldval(oldval, val, add, rem)
     return len(add),
Ejemplo n.º 4
0
    def test_None_is_smallest(self):
        t = self._makeOne()
        for i in range(999):  # Make sure we multiple buckets
            t[i] = i * i
        t[None] = -1
        for i in range(-99, 0):  # Make sure we multiple buckets
            t[i] = i * i
        self.assertEqual(list(t), [None] + list(range(-99, 999)))
        self.assertEqual(list(t.values()),
                         [-1] + [i * i for i in range(-99, 999)])
        self.assertEqual(t[2], 4)
        self.assertEqual(t[-2], 4)
        self.assertEqual(t[None], -1)
        t[None] = -2
        self.assertEqual(t[None], -2)
        t2 = t.__class__(t)
        del t[None]
        self.assertEqual(list(t), list(range(-99, 999)))

        if 'Py' in self.__class__.__name__:
            return
        from BTrees.OOBTree import difference, union, intersection
        self.assertEqual(list(difference(t2, t).items()), [(None, -2)])
        self.assertEqual(list(union(t, t2)), list(t2))
        self.assertEqual(list(intersection(t, t2)), list(t))
Ejemplo n.º 5
0
    def test_None_is_smallest(self):
        t = self._makeOne()
        for i in range(999): # Make sure we multiple buckets
            t[i] = i*i
        t[None] = -1
        for i in range(-99,0): # Make sure we multiple buckets
            t[i] = i*i
        self.assertEqual(list(t), [None] + list(range(-99, 999)))
        self.assertEqual(list(t.values()),
                         [-1] + [i*i for i in range(-99, 999)])
        self.assertEqual(t[2], 4)
        self.assertEqual(t[-2], 4)
        self.assertEqual(t[None], -1)
        t[None] = -2
        self.assertEqual(t[None], -2)
        t2 = t.__class__(t)
        del t[None]
        self.assertEqual(list(t), list(range(-99, 999)))

        if 'Py' in self.__class__.__name__:
            return
        from BTrees.OOBTree import difference, union, intersection
        self.assertEqual(list(difference(t2, t).items()), [(None, -2)])
        self.assertEqual(list(union(t, t2)), list(t2))
        self.assertEqual(list(intersection(t, t2)), list(t))
Ejemplo n.º 6
0
    def read(self):
        """Return messages added and removed from folder.

        Two sets of message objects are returned.  The first set is
        messages that were added to the folder since the last read.
        The second set is the messages that were removed from the
        folder since the last read.

        The code assumes messages are added and removed but not edited.
        """
        mbox = mailbox.UnixMailbox(open(self.path, "rb"), factory)
        self._stat()
        cur = OOSet()
        new = OOSet()
        while 1:
            msg = mbox.next()
            if msg is None:
                break
            msgid = msg["message-id"]
            cur.insert(msgid)
            if not self.messages.has_key(msgid):
                self.messages[msgid] = msg
                new.insert(msg)

        removed = difference(self.messages, cur)
        for msgid in removed.keys():
            del self.messages[msgid]

        # XXX perhaps just return the OOBTree for removed?
        return new, OOSet(removed.values())
Ejemplo n.º 7
0
 def __init__(self, name, searcher, parent, other_tags, tag_path):
     self.name = name
     self.__name__ = name.encode('utf-8')
     self.__parent__ = parent
     self.searcher = searcher
     self.other_tags = difference(other_tags, OOTreeSet([self.__name__]))
     self.tag_path = tag_path + [name]
Ejemplo n.º 8
0
    def read(self):
        """Return messages added and removed from folder.

        Two sets of message objects are returned.  The first set is
        messages that were added to the folder since the last read.
        The second set is the messages that were removed from the
        folder since the last read.

        The code assumes messages are added and removed but not edited.
        """
        mbox = mailbox.UnixMailbox(open(self.path, "rb"), factory)
        self._stat()
        cur = OOSet()
        new = OOSet()
        while 1:
            msg = mbox.next()
            if msg is None:
                break
            msgid = msg["message-id"]
            cur.insert(msgid)
            if not self.messages.has_key(msgid):
                self.messages[msgid] = msg
                new.insert(msg)

        removed = difference(self.messages, cur)
        for msgid in removed.keys():
            del self.messages[msgid]

        # XXX perhaps just return the OOBTree for removed?
        return new, OOSet(removed.values())
Ejemplo n.º 9
0
    def set_oids(self, oids):
        """Sets the list of OIDs to scan.

        Gathers source information about new OIDs and discards
        source information for OIDs no longer in use.
        """
        new_sources = {}  # { oid -> sourcedict }
        self.lock.acquire()
        try:
            removed = difference(self.current, oids)
            for oid in removed.keys():
                del self.current[oid]
            added = difference(oids, self.current)
            for oid in added.keys():
                if self.future.has_key(oid):
                    # Source info for this OID was provided earlier.
                    sources, atime = self.future[oid]
                    del self.future[oid]
                    self.current[oid] = sources
                else:
                    new_sources[oid] = None
        finally:
            self.lock.release()
        if new_sources:
            # Load source info the slow way.
            if self.storage is not None:
                LOG('Ape', DEBUG,
                    'Getting sources for %d oids.' % len(new_sources))
                new_sources = self.storage.get_all_sources(new_sources.keys())
            else:
                LOG(
                    'Ape', DEBUG, "Can't get sources for %d oids. "
                    "Assuming no sources!" % len(new_sources))
                # This will cause the scanner to miss changes, but
                # since no storage is known, there is little we can
                # do.
                for oid in new_sources.keys():
                    new_sources[oid] = {}
            self.lock.acquire()
            try:
                for oid, sources in new_sources.items():
                    if not self.current.has_key(oid):
                        self.current[oid] = sources
                    # else something else added the source info
                    # while self.lock was released.
            finally:
                self.lock.release()
    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index an object 'obj' with integer id 'i'

        Ideally, we've been passed a sequence of some sort that we
        can iterate over. If however, we haven't, we should do something
        useful with the results. In the case of a string, this means
        indexing the entire string as a keyword."""

        # First we need to see if there's anything interesting to look at
        # self.id is the name of the index, which is also the name of the
        # attribute we're interested in.  If the attribute is callable,
        # we'll do so.

        newKeywords = self._get_object_keywords(obj, attr)

        oldKeywords = self._unindex.get(documentId, None)

        if oldKeywords is None:
            # we've got a new document, let's not futz around.
            try:
                for kw in newKeywords:
                    self.insertForwardIndexEntry(kw, documentId)
                if newKeywords:
                    self._unindex[documentId] = list(newKeywords)
            except TypeError:
                return 0
        else:
            # we have an existing entry for this document, and we need
            # to figure out if any of the keywords have actually changed
            if type(oldKeywords) is not OOSet:
                oldKeywords = OOSet(oldKeywords)
            newKeywords = OOSet(newKeywords)
            fdiff = difference(oldKeywords, newKeywords)
            rdiff = difference(newKeywords, oldKeywords)
            if fdiff or rdiff:
                # if we've got forward or reverse changes
                if newKeywords:
                    self._unindex[documentId] = list(newKeywords)
                else:
                    del self._unindex[documentId]
                if fdiff:
                    self.unindex_objectKeywords(documentId, fdiff)
                if rdiff:
                    for kw in rdiff:
                        self.insertForwardIndexEntry(kw, documentId)
        return 1
Ejemplo n.º 11
0
    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index an object 'obj' with integer id 'i'

        Ideally, we've been passed a sequence of some sort that we
        can iterate over. If however, we haven't, we should do something
        useful with the results. In the case of a string, this means
        indexing the entire string as a keyword."""

        # First we need to see if there's anything interesting to look at
        # self.id is the name of the index, which is also the name of the
        # attribute we're interested in.  If the attribute is callable,
        # we'll do so.

        newKeywords = self._get_object_keywords(obj, attr)

        oldKeywords = self._unindex.get(documentId, None)

        if oldKeywords is None:
            # we've got a new document, let's not futz around.
            try:
                for kw in newKeywords:
                    self.insertForwardIndexEntry(kw, documentId)
                if newKeywords:
                    self._unindex[documentId] = list(newKeywords)
            except TypeError:
                return 0
        else:
            # we have an existing entry for this document, and we need
            # to figure out if any of the keywords have actually changed
            if type(oldKeywords) is not OOSet:
                oldKeywords = OOSet(oldKeywords)
            newKeywords = OOSet(newKeywords)
            fdiff = difference(oldKeywords, newKeywords)
            rdiff = difference(newKeywords, oldKeywords)
            if fdiff or rdiff:
                # if we've got forward or reverse changes
                if newKeywords:
                    self._unindex[documentId] = list(newKeywords)
                else:
                    del self._unindex[documentId]
                if fdiff:
                    self.unindex_objectKeywords(documentId, fdiff)
                if rdiff:
                    for kw in rdiff:
                        self.insertForwardIndexEntry(kw, documentId)
        return 1
Ejemplo n.º 12
0
    def set_oids(self, oids):
        """Sets the list of OIDs to scan.

        Gathers source information about new OIDs and discards
        source information for OIDs no longer in use.
        """
        new_sources = {}  # { oid -> sourcedict }
        self.lock.acquire()
        try:
            removed = difference(self.current, oids)
            for oid in removed.keys():
                del self.current[oid]
            added = difference(oids, self.current)
            for oid in added.keys():
                if self.future.has_key(oid):
                    # Source info for this OID was provided earlier.
                    sources, atime = self.future[oid]
                    del self.future[oid]
                    self.current[oid] = sources
                else:
                    new_sources[oid] = None
        finally:
            self.lock.release()
        if new_sources:
            # Load source info the slow way.
            if self.storage is not None:
                LOG('Ape', DEBUG, 'Getting sources for %d oids.'
                    % len(new_sources))
                new_sources = self.storage.get_all_sources(new_sources.keys())
            else:
                LOG('Ape', DEBUG, "Can't get sources for %d oids. "
                    "Assuming no sources!" % len(new_sources))
                # This will cause the scanner to miss changes, but
                # since no storage is known, there is little we can
                # do.
                for oid in new_sources.keys():
                    new_sources[oid] = {}
            self.lock.acquire()
            try:
                for oid, sources in new_sources.items():
                    if not self.current.has_key(oid):
                        self.current[oid] = sources
                    # else something else added the source info
                    # while self.lock was released.
            finally:
                self.lock.release()
Ejemplo n.º 13
0
 def __init__(s, inputs, defaults):
     """Initialise from given inputs and defaults, which are name -> value
     collections.  The job inputs are their union, with values in
     inputs having precedence over defaults.
     """
     s.status = Job_status.INVALID
     s.inputs = OOBTree()
     s.inputs.update(inputs)
     s.inputs.update(difference(defaults, s.inputs))
     s.results = OOBTree()
     s.error = s.workdir = None
    def _index_object(self, documentId, obj, threshold=None, attr=''):

        # get permuted keywords
        newKeywords = self._get_permuted_keywords(obj)

        oldKeywords = self._unindex.get(documentId, None)

        if oldKeywords is None:
            # we've got a new document, let's not futz around.
            try:
                for kw in newKeywords:
                    self.insertForwardIndexEntry(kw, documentId)
                if newKeywords:
                    self._unindex[documentId] = list(newKeywords)
            except TypeError:
                return 0
        else:
            # we have an existing entry for this document, and we need
            # to figure out if any of the keywords have actually changed
            if type(oldKeywords) is not OOSet:
                oldKeywords = OOSet(oldKeywords)
            newKeywords = OOSet(newKeywords)
            fdiff = difference(oldKeywords, newKeywords)
            rdiff = difference(newKeywords, oldKeywords)
            if fdiff or rdiff:
                # if we've got forward or reverse changes
                if newKeywords:
                    self._unindex[documentId] = list(newKeywords)
                else:
                    del self._unindex[documentId]
                if fdiff:
                    self.unindex_objectKeywords(documentId, fdiff)
                if rdiff:
                    for kw in rdiff:
                        self.insertForwardIndexEntry(kw, documentId)
        return 1
Ejemplo n.º 15
0
    def _index_object(self, documentId, obj, threshold=None, attr=''):

        # get permuted keywords
        newKeywords = self._get_permuted_keywords(obj)

        oldKeywords = self._unindex.get(documentId, None)

        if oldKeywords is None:
            # we've got a new document, let's not futz around.
            try:
                for kw in newKeywords:
                    self.insertForwardIndexEntry(kw, documentId)
                if newKeywords:
                    self._unindex[documentId] = list(newKeywords)
            except TypeError:
                return 0
        else:
            # we have an existing entry for this document, and we need
            # to figure out if any of the keywords have actually changed
            if type(oldKeywords) is not OOSet:
                oldKeywords = OOSet(oldKeywords)
            newKeywords = OOSet(newKeywords)
            fdiff = difference(oldKeywords, newKeywords)
            rdiff = difference(newKeywords, oldKeywords)
            if fdiff or rdiff:
                # if we've got forward or reverse changes
                if newKeywords:
                    self._unindex[documentId] = list(newKeywords)
                else:
                    del self._unindex[documentId]
                if fdiff:
                    self.unindex_objectKeywords(documentId, fdiff)
                if rdiff:
                    for kw in rdiff:
                        self.insertForwardIndexEntry(kw, documentId)
        return 1
Ejemplo n.º 16
0
 def __xor__(self, other):
     return QuerySet(difference(OOTreeSet(self), OOTreeSet(other)))
Ejemplo n.º 17
0
 def difference(self, *args):
     from BTrees.OOBTree import difference
     return difference(*args)
Ejemplo n.º 18
0
 def difference(self, *args):
     from BTrees.OLBTree import difference
     return difference(*args)