def _update(self,documentId,val,oldval,threshold): add= difference(val,oldval) rem= difference(oldval,val) if add: self._indexValue(documentId,add,threshold) if rem: self._unindexValue(documentId,rem) self._updateOldval(oldval, val, add, rem) return len(add),
def index_doc(self, docid, seq): if isinstance(seq, StringTypes): raise TypeError('seq argument must be a list/tuple of strings') if not seq: return if self.normalize: seq = [w.lower() for w in seq] old_kw = self._rev_index.get(docid, None) new_kw = OOSet(seq) if old_kw is None: self._insert_forward(docid, new_kw) self._insert_reverse(docid, new_kw) self._num_docs.change(1) else: # determine added and removed keywords kw_added = difference(new_kw, old_kw) kw_removed = difference(old_kw, new_kw) # removed keywords are removed from the forward index for word in kw_removed: self._fwd_index[word].remove(docid) # now update reverse and forward indexes self._insert_forward(docid, kw_added) self._insert_reverse(docid, new_kw)
def _update(self, documentId, val, oldval, threshold): add = difference(val, oldval) rem = difference(oldval, val) if add: self._indexValue(documentId, add, threshold) if rem: self._unindexValue(documentId, rem) self._updateOldval(oldval, val, add, rem) return len(add),
def test_None_is_smallest(self): t = self._makeOne() for i in range(999): # Make sure we multiple buckets t[i] = i * i t[None] = -1 for i in range(-99, 0): # Make sure we multiple buckets t[i] = i * i self.assertEqual(list(t), [None] + list(range(-99, 999))) self.assertEqual(list(t.values()), [-1] + [i * i for i in range(-99, 999)]) self.assertEqual(t[2], 4) self.assertEqual(t[-2], 4) self.assertEqual(t[None], -1) t[None] = -2 self.assertEqual(t[None], -2) t2 = t.__class__(t) del t[None] self.assertEqual(list(t), list(range(-99, 999))) if 'Py' in self.__class__.__name__: return from BTrees.OOBTree import difference, union, intersection self.assertEqual(list(difference(t2, t).items()), [(None, -2)]) self.assertEqual(list(union(t, t2)), list(t2)) self.assertEqual(list(intersection(t, t2)), list(t))
def test_None_is_smallest(self): t = self._makeOne() for i in range(999): # Make sure we multiple buckets t[i] = i*i t[None] = -1 for i in range(-99,0): # Make sure we multiple buckets t[i] = i*i self.assertEqual(list(t), [None] + list(range(-99, 999))) self.assertEqual(list(t.values()), [-1] + [i*i for i in range(-99, 999)]) self.assertEqual(t[2], 4) self.assertEqual(t[-2], 4) self.assertEqual(t[None], -1) t[None] = -2 self.assertEqual(t[None], -2) t2 = t.__class__(t) del t[None] self.assertEqual(list(t), list(range(-99, 999))) if 'Py' in self.__class__.__name__: return from BTrees.OOBTree import difference, union, intersection self.assertEqual(list(difference(t2, t).items()), [(None, -2)]) self.assertEqual(list(union(t, t2)), list(t2)) self.assertEqual(list(intersection(t, t2)), list(t))
def read(self): """Return messages added and removed from folder. Two sets of message objects are returned. The first set is messages that were added to the folder since the last read. The second set is the messages that were removed from the folder since the last read. The code assumes messages are added and removed but not edited. """ mbox = mailbox.UnixMailbox(open(self.path, "rb"), factory) self._stat() cur = OOSet() new = OOSet() while 1: msg = mbox.next() if msg is None: break msgid = msg["message-id"] cur.insert(msgid) if not self.messages.has_key(msgid): self.messages[msgid] = msg new.insert(msg) removed = difference(self.messages, cur) for msgid in removed.keys(): del self.messages[msgid] # XXX perhaps just return the OOBTree for removed? return new, OOSet(removed.values())
def __init__(self, name, searcher, parent, other_tags, tag_path): self.name = name self.__name__ = name.encode('utf-8') self.__parent__ = parent self.searcher = searcher self.other_tags = difference(other_tags, OOTreeSet([self.__name__])) self.tag_path = tag_path + [name]
def set_oids(self, oids): """Sets the list of OIDs to scan. Gathers source information about new OIDs and discards source information for OIDs no longer in use. """ new_sources = {} # { oid -> sourcedict } self.lock.acquire() try: removed = difference(self.current, oids) for oid in removed.keys(): del self.current[oid] added = difference(oids, self.current) for oid in added.keys(): if self.future.has_key(oid): # Source info for this OID was provided earlier. sources, atime = self.future[oid] del self.future[oid] self.current[oid] = sources else: new_sources[oid] = None finally: self.lock.release() if new_sources: # Load source info the slow way. if self.storage is not None: LOG('Ape', DEBUG, 'Getting sources for %d oids.' % len(new_sources)) new_sources = self.storage.get_all_sources(new_sources.keys()) else: LOG( 'Ape', DEBUG, "Can't get sources for %d oids. " "Assuming no sources!" % len(new_sources)) # This will cause the scanner to miss changes, but # since no storage is known, there is little we can # do. for oid in new_sources.keys(): new_sources[oid] = {} self.lock.acquire() try: for oid, sources in new_sources.items(): if not self.current.has_key(oid): self.current[oid] = sources # else something else added the source info # while self.lock was released. finally: self.lock.release()
def _index_object(self, documentId, obj, threshold=None, attr=''): """ index an object 'obj' with integer id 'i' Ideally, we've been passed a sequence of some sort that we can iterate over. If however, we haven't, we should do something useful with the results. In the case of a string, this means indexing the entire string as a keyword.""" # First we need to see if there's anything interesting to look at # self.id is the name of the index, which is also the name of the # attribute we're interested in. If the attribute is callable, # we'll do so. newKeywords = self._get_object_keywords(obj, attr) oldKeywords = self._unindex.get(documentId, None) if oldKeywords is None: # we've got a new document, let's not futz around. try: for kw in newKeywords: self.insertForwardIndexEntry(kw, documentId) if newKeywords: self._unindex[documentId] = list(newKeywords) except TypeError: return 0 else: # we have an existing entry for this document, and we need # to figure out if any of the keywords have actually changed if type(oldKeywords) is not OOSet: oldKeywords = OOSet(oldKeywords) newKeywords = OOSet(newKeywords) fdiff = difference(oldKeywords, newKeywords) rdiff = difference(newKeywords, oldKeywords) if fdiff or rdiff: # if we've got forward or reverse changes if newKeywords: self._unindex[documentId] = list(newKeywords) else: del self._unindex[documentId] if fdiff: self.unindex_objectKeywords(documentId, fdiff) if rdiff: for kw in rdiff: self.insertForwardIndexEntry(kw, documentId) return 1
def set_oids(self, oids): """Sets the list of OIDs to scan. Gathers source information about new OIDs and discards source information for OIDs no longer in use. """ new_sources = {} # { oid -> sourcedict } self.lock.acquire() try: removed = difference(self.current, oids) for oid in removed.keys(): del self.current[oid] added = difference(oids, self.current) for oid in added.keys(): if self.future.has_key(oid): # Source info for this OID was provided earlier. sources, atime = self.future[oid] del self.future[oid] self.current[oid] = sources else: new_sources[oid] = None finally: self.lock.release() if new_sources: # Load source info the slow way. if self.storage is not None: LOG('Ape', DEBUG, 'Getting sources for %d oids.' % len(new_sources)) new_sources = self.storage.get_all_sources(new_sources.keys()) else: LOG('Ape', DEBUG, "Can't get sources for %d oids. " "Assuming no sources!" % len(new_sources)) # This will cause the scanner to miss changes, but # since no storage is known, there is little we can # do. for oid in new_sources.keys(): new_sources[oid] = {} self.lock.acquire() try: for oid, sources in new_sources.items(): if not self.current.has_key(oid): self.current[oid] = sources # else something else added the source info # while self.lock was released. finally: self.lock.release()
def __init__(s, inputs, defaults): """Initialise from given inputs and defaults, which are name -> value collections. The job inputs are their union, with values in inputs having precedence over defaults. """ s.status = Job_status.INVALID s.inputs = OOBTree() s.inputs.update(inputs) s.inputs.update(difference(defaults, s.inputs)) s.results = OOBTree() s.error = s.workdir = None
def _index_object(self, documentId, obj, threshold=None, attr=''): # get permuted keywords newKeywords = self._get_permuted_keywords(obj) oldKeywords = self._unindex.get(documentId, None) if oldKeywords is None: # we've got a new document, let's not futz around. try: for kw in newKeywords: self.insertForwardIndexEntry(kw, documentId) if newKeywords: self._unindex[documentId] = list(newKeywords) except TypeError: return 0 else: # we have an existing entry for this document, and we need # to figure out if any of the keywords have actually changed if type(oldKeywords) is not OOSet: oldKeywords = OOSet(oldKeywords) newKeywords = OOSet(newKeywords) fdiff = difference(oldKeywords, newKeywords) rdiff = difference(newKeywords, oldKeywords) if fdiff or rdiff: # if we've got forward or reverse changes if newKeywords: self._unindex[documentId] = list(newKeywords) else: del self._unindex[documentId] if fdiff: self.unindex_objectKeywords(documentId, fdiff) if rdiff: for kw in rdiff: self.insertForwardIndexEntry(kw, documentId) return 1
def __xor__(self, other): return QuerySet(difference(OOTreeSet(self), OOTreeSet(other)))
def difference(self, *args): from BTrees.OOBTree import difference return difference(*args)
def difference(self, *args): from BTrees.OLBTree import difference return difference(*args)