def __init__(self, lexicon): self._lexicon = lexicon # wid -> {docid -> weight}; t -> D -> w(D, t) # Different indexers have different notions of term weight, but we # expect each indexer to use ._wordinfo to map wids to its notion # of a docid-to-weight map. # There are two kinds of OOV words: wid 0 is explicitly OOV, # and it's possible that the lexicon will return a non-zero wid # for a word we don't currently know about. For example, if we # unindex the last doc containing a particular word, that wid # remains in the lexicon, but is no longer in our _wordinfo map; # lexicons can also be shared across indices, and some other index # may introduce a lexicon word we've never seen. # A word is in-vocabulary for this index if and only if # _wordinfo.has_key(wid). Note that wid 0 must not be a key. self._wordinfo = IOBTree() # docid -> weight # Different indexers have different notions of doc weight, but we # expect each indexer to use ._docweight to map docids to its # notion of what a doc weight is. self._docweight = IIBTree() # docid -> WidCode'd list of wids # Used for un-indexing, and for phrase search. self._docwords = IOBTree() # Use a BTree length for efficient length computation w/o conflicts self.length = Length() self.document_count = Length()
def clear(self): # wid -> {docid -> weight}; t -> D -> w(D, t) self._wordinfo = trees.family32.IO.BTree() # XXX # Scalability of this Zope's approach is pretty bad (esp. when many documents). # Following is needed: # _wordinfo = BTree of (word, weight, docid) - TreeSet could be used instead # searching a keyword will be as _wordinfo.keys((word_start, None, None)), # already sorted by weight (just has to be multiplied by idf) # this works for both search and glob_search # However, when searching multiple keywords, # Need to find an efficient (logarithmic) algorithm of # incremental, weighted set intersection # Even without efficient intersection, it is faster and more secure anyway # XXX # docid -> weight self._docweight = self.family.IF.BTree() # docid -> WidCode'd list of wids (~1/4 of document size) # Used for un-indexing, and for phrase search. self._docwords = self.family.IO.BTree() # Use a BTree length for efficient length computation w/o conflicts self.wordCount = Length() self.documentCount = Length()
class Lexicon(_Lexicon): family = trees.family32 # In comparison with standard Lexicon, use bigger buckets def __init__(self, *pipeline): self._wids = self.family.OI.BTree() self._words = self.family.IO.BTree() self.wordCount = Length() self._pipeline = pipeline def sourceToWordIds(self, text): if text is None: text = '' last = _text2list(text) for element in self._pipeline: last = element.process(last) if not isinstance(self.wordCount, Length): # Make sure wordCount is overridden with a BTrees.Length.Length self.wordCount = Length(self.wordCount()) # Strategically unload the length value so that we get the most # recent value written to the database to minimize conflicting wids # Because length is independent, this will load the most # recent value stored, regardless of whether MVCC is enabled self.wordCount._p_deactivate() parallel_traversal(self._wids, last) return list(map(self._getWordIdCreate, last))
def _BTreeContainer__len(self): l = Length() ol = len(self.__data) if ol > 0: l.change(ol) self._p_changed = True return l
class DateBookingList(Persistent): """ Simple set of booking objects with a count attribute. """ def __init__(self): self._bookings = OOBTree.OOTreeSet() self._count = Length(0) def addBooking(self, booking): self._bookings.insert(booking) self._count.change(1) def removeBooking(self, booking): self._bookings.remove(booking) self._count.change(-1) def getCount(self): return self._count() def iterbookings(self): """ Iterator over the bookings """ return self._bookings.__iter__() def getBookingsPerConf(self): """ Returns a dictionary where the keys are Conference objects and the values are the number of Vidyo bookings of that conference. """ result = {} for b in self._bookings: result[b.getConference()] = result.setdefault(b.getConference(), 0) + 1 return result
def _QuestionRecord__len(self): l=Length() ol = len(self._tree) if ol>0: l.change(ol) self._p_changed=True return l
def _Folder__len(self): l = Length() ol = len(self.__data) if ol > 0: l.change(ol) self._p_changed = True return l
def _PersitentOOBTree__len(self): l = Length() ol = len(self._data) if ol > 0: l.change(ol) self._p_changed = True return l
def __init__(self): """Setup our data structures""" self._anon_ratings = IOBTree() self._ratings = OOBTree() self._sessions = OOBTree() self._length = Length() self._anon_length = Length()
def _get_next_number(self, prefix): last = getattr(self, '_autoname_last_' + prefix, None) if last is None: last = Length() setattr(self, '_autoname_last_' + prefix, last) number = last.value last.change(1) return number
class CanopyLexicon(Lexicon) : # pragma: no cover def sourceToWordIds(self, last): if last is None: last = [] if not isinstance(self.wordCount, Length): self.wordCount = Length(self.wordCount()) self.wordCount._p_deactivate() return list(map(self._getWordIdCreate, last))
def clear(self): '''clear the index.''' l = self.__len__ if isinstance(l, Length): l.set(0) else: self.__len__ = Length() try: self.numObjects.set(0) except AttributeError: self.numObjects= Length() if self.ReverseOrder: self._reverseOrder = OOTreeSet() self._setup()
def clear(self): self._length = Length() self._index = OOBTree() self._unindex = IOBTree() if self._counter is None: self._counter = Length() else: self._increment_counter()
def clear(self): # ._wordinfo = BTree(wids -> (TreeSet((weight, docid)), BTree(docid -> weight), Length)) self._wordinfo = self.family.IO.BTree() # ._docwords = BTree(docid -> widcode) # used for document unindexing # but no phrase search self._docwords = self.family.IO.BTree() self.wordCount = Length() self.documentCount = Length()
class MailDataStorage(PersistentItem): interface.implements(IMailDataStorage) def __init__(self, **kw): self.count = Length(0) super(MailDataStorage, self).__init__(**kw) def append(self, form, record, request): mail = getMultiAdapter((form, request), IMailTemplate) mail.send((self.emailto,), record=record, storage=self) self.count.change(1)
def _init(self): self.nodes = IOBTree() self.edges = IOBTree() self.edgedata = IOBTree() self.outgoing = IOBTree() self.incoming = IOBTree() self.typeids = PObject() self._nodeid = Length(0) self._edgeid = Length(0) self._typeid = Length(0)
class CanopyLexicon(Lexicon) : # pragma : no cover def __init__(self, stop_words) : super(CanopyLexicon, self).__init__() self._pipeline = [CustomStopWordRemover(stop_words)] def sourceToWordIds(self, last): if last is None: last = [] for element in self._pipeline: last = element.process(last) if not isinstance(self.wordCount, Length): self.wordCount = Length(self.wordCount()) self.wordCount._p_deactivate() return list(map(self._getWordIdCreate, last))
class MessageService(Persistent, Location): interface.implements(IMessageService) def __init__(self, storage): self.__parent__ = storage self.index = OIBTree() self.unread = Length(0) def __len__(self): return len(self.index) def __iter__(self): return iter(self.index.values()) def __contains__(self, key): msg = self.__parent__.getMessage(key) if msg is not None: return True else: return False def get(self, msgId, default=None): msg = self.__parent__.getMessage(msgId) if msg is not None: if msg.__date__ in self.index: return msg return default def append(self, message): message.__parent__ = self if self.__parent__.readStatus(message): self.unread.change(1) self.index[message.__date__] = message.__id__ def remove(self, message): id = message.__date__ if id in self.index: del self.index[id] if self.__parent__.readStatus(message) and self.unread() > 0: self.unread.change(-1) def create(self, **data): raise NotImplemented('create')
class MessageQueues(persistent.dict.PersistentDict): interface.implements(interfaces.IMessageQueues) def __init__(self, *args, **kwargs): super(MessageQueues, self).__init__(*args, **kwargs) for status in interfaces.MESSAGE_STATES: self[status] = queue.CompositeQueue() self._messages_sent = Length() @property def messages_sent(self): return self._messages_sent() def dispatch(self): try: lock = zc.lockfile.LockFile(LOCKFILE_NAME) except zc.lockfile.LockError: logger.info("Dispatching is locked by another process.") return (0, 0) try: return self._dispatch() finally: lock.close() def _dispatch(self): sent = 0 failed = 0 for name in 'new', 'retry': queue = self[name] while True: try: message = queue.pull() except IndexError: break else: status, message = dispatch(message) if status == 'sent': sent += 1 else: failed += 1 self._messages_sent.change(sent) return sent, failed def clear(self, queue_names=('error', 'sent')): for name in queue_names: self[name] = self[name].__class__()
def _migrateStorage(self): # we're going to use an LOBTree for storage. we need to # consider the possibility that self is from an # older version that uses the native Archetypes storage # or the former IOBTree (<= 1.6.0b2 ) # in the SavedFormInput field. updated = base_hasattr(self, '_inputStorage') and \ base_hasattr(self, '_inputItems') and \ base_hasattr(self, '_length') if not updated: try: saved_input = self.getSavedFormInput() except AttributeError: saved_input = [] self._inputStorage = SavedDataBTree() i = 0 self._inputItems = 0 self._length = Length() if len(saved_input): for row in saved_input: self._inputStorage[i] = row i += 1 self.SavedFormInput = [] self._inputItems = i self._length.set(i)
def clear(self): """Empty the lexicon. """ self.length = Length() self._wid_length_based = False self._wids = OIBTree() # word -> wid self._words = IOBTree() # wid -> word
def add(self, name, other, send_events=True): """See IFolder.""" if not isinstance(name, basestring): raise TypeError("Name must be a string rather than a %s" % name.__class__.__name__) if not name: raise TypeError("Name must not be empty") name = unicodify(name) if name in self.data: raise KeyError('An object named %s already exists' % name) if send_events: objectEventNotify(ObjectWillBeAddedEvent(other, self, name)) other.__parent__ = self other.__name__ = name # backwards compatibility: add a Length _num_objects to folders that # have none if self._num_objects is None: self._num_objects = Length(len(self.data)) self.data[name] = other self._num_objects.change(1) if self._order is not None: self._order += (name,) if send_events: objectEventNotify(ObjectAddedEvent(other, self, name))
def remove(self, name, send_events=True): """See IFolder.""" name = unicodify(name) other = self.data[name] if send_events: objectEventNotify(ObjectWillBeRemovedEvent(other, self, name)) if hasattr(other, '__parent__'): del other.__parent__ if hasattr(other, '__name__'): del other.__name__ # backwards compatibility: add a Length _num_objects to folders that # have none if self._num_objects is None: self._num_objects = Length(len(self.data)) del self.data[name] self._num_objects.change(-1) if self._order is not None: self._order = tuple([x for x in self._order if x != name]) if send_events: objectEventNotify(ObjectRemovedEvent(other, self, name)) return other
def _store_data(self, appstruct: dict): """Store data appstruct. `comments_count` value is converted from int to :class:`Btrees.Length`, to support ZODB conflict resultion. """ if self._count_field_name in appstruct: # pragma: no branch data = getattr(self.context, self._annotation_key, {}) if self._count_field_name not in data: counter = Length(0) else: counter = data[self._count_field_name] count = appstruct[self._count_field_name] counter.set(count) appstruct[self._count_field_name] = counter super()._store_data(appstruct)
def sourceToWordIds(self, last): if last is None: last = [] if not isinstance(self.wordCount, Length): self.wordCount = Length(self.wordCount()) self.wordCount._p_deactivate() return list(map(self._getWordIdCreate, last))
def _mass_add_wordinfo(self, wid2weight, docid): dicttype = type({}) # self._wordinfo - IOBTree of docid -> weight trees get_doc2score = self._wordinfo.get new_word_count = 0 # Fill up cache for performance over the network wids = wid2weight.keys() parallel_traversal(self._wordinfo, wids) parallel_traversal(map(get_doc2score, wids), [docid] * len(wids)) from time import time for wid, weight in wid2weight.items(): doc2score = get_doc2score(wid) if doc2score is None: doc2score = {} new_word_count += 1 elif (isinstance(doc2score, dicttype) and len(doc2score) == self.DICT_CUTOFF): doc2score = self.family.IF.BTree(doc2score) doc2score[docid] = weight self._wordinfo[wid] = doc2score # not redundant: Persistency! try: self.wordCount.change(new_word_count) except AttributeError: # upgrade wordCount to Length object self.wordCount = Length(len(self._wordinfo))
def _change_doc_len(self, delta): # Change total doc length used for scoring try: self._totaldoclen.change(delta) except AttributeError: # Opportunistically upgrade _totaldoclen attribute to Length object self._totaldoclen = Length(long(self._totaldoclen + delta))
def clear(self): """Initialize forward and reverse mappings.""" # The forward index maps indexed values to a sequence of docids self._fwd_index = self.family.OO.BTree() # The reverse index maps a docid to its index value self._rev_index = self.family.IO.BTree() self._num_docs = Length(0)
class PollRecord(BTreeContainer): implements(IPollRecord, IContentContainer) voteCount = None firstVote = None lastVote = None def __init__(self, *kv, **kw): super(PollRecord, self).__init__(*kv, **kw) self._results = OOBTree() self.voteCount = Length() def add(self, record): polling = getUtility(IPolling) for key, value in record.choices.items(): item = self._results.get(key) if item is None: item = QuestionRecord() notify(ObjectCreatedEvent(item)) self._results[key] = item for id in value: self.voteCount.change(1) polling.voteCount.change(1) item.voteCount.change(1) if item.firstVote is None: item.firstVote = record item.lastVote = record answer = item.get(id) if answer: answer.change(1) else: item[id] = Length(1) if self.firstVote is None: self.firstVote = record self.lastVote = record self._p_changed = 1 def getResults(self): res = {} for question, answers in self._results.items(): res[question] = {} size = float(answers.voteCount.value) for answer, votes in answers.items(): res[question][answer] = (votes.value, votes.value/size) return res, self
def __init__(self, principalId): self.index = OIBTree() self.messages = IOBTree() self.services = OOBTree() self.readstatus = IITreeSet() self.principalId = principalId self._next = Length(1)
def clear(self): self._length = Length() self._index = OOBTree() self._unindex = IOBTree()
class UUIDIndex(UnIndex): """Index for uuid fields with an unique value per key. The internal structure is: self._index = {datum:documentId]} self._unindex = {documentId:datum} For each datum only one documentId can exist. """ meta_type = "UUIDIndex" manage_options = ( { 'label': 'Settings', 'action': 'manage_main' }, { 'label': 'Browse', 'action': 'manage_browse' }, ) query_options = ["query", "range"] manage = manage_main = DTMLFile('dtml/manageUUIDIndex', globals()) manage_main._setName('manage_main') manage_browse = DTMLFile('../dtml/browseIndex', globals()) def clear(self): self._length = Length() self._index = OIBTree() self._unindex = IOBTree() def numObjects(self): """Return the number of indexed objects. Since we have a 1:1 mapping from documents to values, we can reuse the stored length. """ return self.indexSize() def uniqueValues(self, name=None, withLengths=0): """returns the unique values for name if withLengths is true, returns a sequence of tuples of (value, length) """ if name is None: name = self.id elif name != self.id: return [] if not withLengths: return tuple(self._index.keys()) # We know the length for each value is one return [(k, 1) for k in self._index.keys()] def insertForwardIndexEntry(self, entry, documentId): """Take the entry provided and put it in the correct place in the forward index. """ if entry is None: return old_docid = self._index.get(entry, _marker) if old_docid is _marker: self._index[entry] = documentId self._length.change(1) elif old_docid != documentId: logger.exception("A different document with value '%s' already " "exists in the index.'" % entry) def removeForwardIndexEntry(self, entry, documentId): """Take the entry provided and remove any reference to documentId in its entry in the index. """ old_docid = self._index.get(entry, _marker) if old_docid is not _marker: del self._index[entry] self._length.change(-1) def _get_object_datum(self, obj, attr): # for a uuid it never makes sense to acquire a parent value via # Acquisition has_attr = getattr(aq_base(obj), attr, _marker) if has_attr is _marker: return _marker return super(UUIDIndex, self)._get_object_datum(obj, attr)
class OkapiIndex(BaseIndex): # BM25 free parameters. K1 = 1.2 B = 0.75 assert K1 >= 0.0 assert 0.0 <= B <= 1.0 def __init__(self, lexicon): BaseIndex.__init__(self, lexicon) # ._wordinfo for Okapi is # wid -> {docid -> frequency}; t -> D -> f(D, t) # ._docweight for Okapi is # docid -> # of words in the doc # This is just len(self._docwords[docid]), but _docwords is stored # in compressed form, so uncompressing it just to count the list # length would be ridiculously expensive. # sum(self._docweight.values()), the total # of words in all docs # This is a long for "better safe than sorry" reasons. It isn't # used often enough that speed should matter. # Use a BTree.Length.Length object to avoid concurrent write conflicts self._totaldoclen = Length(0) def index_doc(self, docid, text): count = BaseIndex.index_doc(self, docid, text) self._change_doc_len(count) return count def _reindex_doc(self, docid, text): self._change_doc_len(-self._docweight[docid]) return BaseIndex._reindex_doc(self, docid, text) def unindex_doc(self, docid): self._change_doc_len(-self._docweight[docid]) BaseIndex.unindex_doc(self, docid) def _change_doc_len(self, delta): # Change total doc length used for scoring if delta == 0: return try: self._totaldoclen.change(delta) except AttributeError: # Opportunistically upgrade _totaldoclen attribute to Length object self._totaldoclen = Length(int(self._totaldoclen + delta)) def _search_wids(self, wids): # The workhorse. Return a list of (IIBucket, weight) pairs, one pair # for each wid t in wids. The IIBucket, times the weight, maps D to # TF(D,t) * IDF(t) for every docid D containing t. # As currently written, the weights are always 1, and the IIBucket maps # D to TF(D,t)*IDF(t) directly, where the product is computed # as a float but stored as a scaled_int. # Cautions: _search_wids hardcodes the the scaled_int function. if not wids: return [] N = float(self.document_count()) # total # of docs try: doclen = self._totaldoclen() except TypeError: # _totaldoclen has not yet been upgraded doclen = self._totaldoclen meandoclen = doclen / N K1 = self.K1 B = self.B K1_plus1 = K1 + 1.0 B_from1 = 1.0 - B # f(D, t) * (k1 + 1) # TF(D, t) = ------------------------------------------- # f(D, t) + k1 * ((1-b) + b*len(D)/E(len(D))) L = [] docid2len = self._docweight for t in wids: d2f = self._wordinfo[t] # map {docid -> f(docid, t)} idf = inverse_doc_frequency(len(d2f), N) # an unscaled float result = IIBucket() # inner score loop, was implemented in C before idf *= 1024.0 # float out part of the scaled_int computation for docid, f in d2f.items(): lenweight = B_from1 + B * docid2len[docid] / meandoclen tf = f * K1_plus1 / (f + K1 * lenweight) result[docid] = int(tf * idf + 0.5) L.append((result, 1)) return L # Note about the above: the result is tf * idf. tf is small -- it # can't be larger than k1+1 = 2.2. idf is formally unbounded, but # is less than 14 for a term that appears in only 1 of a million # documents. So the product is probably less than 32, or 5 bits # before the radix point. If we did the scaled-int business on # both of them, we'd be up to 25 bits. Add 64 of those and we'd # be in overflow territory. That's pretty unlikely, so we *could* # just store scaled_int(tf) in result[docid], and use scaled_int(idf) # as an invariant weight across the whole result. But besides # skating near the edge, it's not a speed cure, since the computation # of tf would still be done at Python speed, and it's a lot more # work than just multiplying by idf. def query_weight(self, terms): # Get the wids. wids = [] for term in terms: termwids = self._lexicon.termToWordIds(term) wids.extend(termwids) # The max score for term t is the maximum value of # TF(D, t) * IDF(Q, t) # We can compute IDF directly, and as noted in the comments below # TF(D, t) is bounded above by 1+K1. N = float(len(self._docweight)) tfmax = 1.0 + self.K1 sum = 0 for t in self._remove_oov_wids(wids): idf = inverse_doc_frequency(len(self._wordinfo[t]), N) sum += scaled_int(idf * tfmax) return sum def _get_frequencies(self, wids): d = {} dget = d.get for wid in wids: d[wid] = dget(wid, 0) + 1 return d, len(wids)
def testUpgradeIdToolDicts(self): # With old erp5_core, we have no generators, no IdTool_* zsql methods, # and we have a dictionary stored on id tool id_tool = self.portal.portal_ids # Rebuild a persistent mapping like it already existed in beginning 2010 # First persistent mapping of generateNewLengthIdList id_tool.dict_length_ids = PersistentMapping() id_tool.dict_length_ids['foo'] = Length(5) id_tool.dict_length_ids['bar'] = Length(5) id_tool.IdTool_zSetLastId(id_group='foo', last_id=5) id_tool.IdTool_zSetLastId(id_group='bar', last_id=10) # Then persistent mapping of generateNewId id_tool.dict_ids = PersistentMapping() id_tool.dict_ids['foo'] = 3 # it was unfortunately possible to define something else # than strings id_tool.dict_ids[('bar','baz')] = 2 # Delete portal type info and new generators id_tool.manage_delObjects(ids=list(id_tool.objectIds())) id_tool.__class__.getTypeInfo = lambda self: None # Test with compatibility self.tic() id_list = id_tool.generateNewLengthIdList(id_group='foo', store=1) self.assertEqual(id_list, [5]) self.assertEqual(int(id_tool.dict_length_ids['foo'].value), 6) # Now, restore and make sure we can still generate ids del id_tool.__class__.getTypeInfo bt = self.portal.portal_templates.getInstalledBusinessTemplate('erp5_core', strict=True) for path, obj in bt._path_item._objects.iteritems(): path, obj_id = path.rsplit('/', 1) if path == 'portal_ids': id_tool._setObject(obj_id, obj._getCopy(bt)) self.tic() id_list = id_tool.generateNewLengthIdList(id_group='foo') # it is known that with current upgrade there is a hole self.assertEqual(id_list, [7]) new_id = id_tool.generateNewId(id_group='foo') self.assertEqual(new_id, 4) new_id = id_tool.generateNewId(id_group=('bar','baz')) self.assertEqual(new_id, 3) # Make sure that the old code is not used any more, so the dic on # id tool should not change, checking for length_dict self.assertEqual(int(id_tool.dict_length_ids['foo'].value), 6) id_list = id_tool.generateNewLengthIdList(id_group='bar') self.assertEqual(id_list, [11]) generator_list = [x for x in id_tool.objectValues() if x.getReference()=='mysql_non_continuous_increasing'] self.assertEqual(len(generator_list), 1) generator = generator_list[0] self.assertEqual(generator.last_max_id_dict['foo'].value, 7) self.assertEqual(generator.last_max_id_dict['bar'].value, 11) # Make sure that the old code is not used any more, so the dic on # id tool should not change, checking for dict self.assertEqual(id_tool.dict_ids['foo'], 3) generator_list = [x for x in id_tool.objectValues() if x.getReference()=='zodb_continuous_increasing'] self.assertEqual(len(generator_list), 1) generator = generator_list[0] self.assertEqual(generator.last_id_dict['foo'], 4) self.assertEqual(generator.last_id_dict["('bar', 'baz')"], 3)
class Lexicon(Persistent): def __init__(self, *pipeline): self._wids = OIBTree() # word -> wid self._words = IOBTree() # wid -> word # wid 0 is reserved for words that aren't in the lexicon (OOV -- out # of vocabulary). This can happen, e.g., if a query contains a word # we never saw before, and that isn't a known stopword (or otherwise # filtered out). Returning a special wid value for OOV words is a # way to let clients know when an OOV word appears. self.wordCount = Length() self._pipeline = pipeline def wordCount(self): """Return the number of unique terms in the lexicon.""" # overridden per instance return len(self._wids) def words(self): return self._wids.keys() def wids(self): return self._words.keys() def items(self): return self._wids.items() def sourceToWordIds(self, text): if text is None: text = '' last = _text2list(text) for element in self._pipeline: last = element.process(last) if not isinstance(self.wordCount, Length): # Make sure wordCount is overridden with a BTrees.Length.Length self.wordCount = Length(self.wordCount()) # Strategically unload the length value so that we get the most # recent value written to the database to minimize conflicting wids # Because length is independent, this will load the most # recent value stored, regardless of whether MVCC is enabled self.wordCount._p_deactivate() return list(map(self._getWordIdCreate, last)) def termToWordIds(self, text): last = _text2list(text) for element in self._pipeline: last = element.process(last) wids = [] for word in last: wids.append(self._wids.get(word, 0)) return wids def parseTerms(self, text): last = _text2list(text) for element in self._pipeline: process = getattr(element, "processGlob", element.process) last = process(last) return last def isGlob(self, word): return "*" in word or "?" in word def get_word(self, wid): return self._words[wid] def get_wid(self, word): return self._wids.get(word, 0) def globToWordIds(self, pattern): # Implement * and ? just as in the shell, except the pattern # must not start with either of these prefix = "" while pattern and pattern[0] not in "*?": prefix += pattern[0] pattern = pattern[1:] if not pattern: # There were no globbing characters in the pattern wid = self._wids.get(prefix, 0) if wid: return [wid] else: return [] if not prefix: # The pattern starts with a globbing character. # This is too efficient, so we raise an exception. raise QueryError( "pattern %r shouldn't start with glob character" % pattern) pat = prefix for c in pattern: if c == "*": pat += ".*" elif c == "?": pat += "." else: pat += re.escape(c) pat += "$" prog = re.compile(pat) keys = self._wids.keys(prefix) # Keys starting at prefix wids = [] for key in keys: if not key.startswith(prefix): break if prog.match(key): wids.append(self._wids[key]) return wids def _getWordIdCreate(self, word): wid = self._wids.get(word) if wid is None: wid = self._new_wid() self._wids[word] = wid self._words[wid] = word return wid def _new_wid(self): count = self.wordCount count.change(1) while count() in self._words: # just to be safe count.change(1) return count()
class Folder(Persistent): """ A folder implementation which acts much like a Python dictionary. Keys must be Unicode strings; values must be arbitrary Python objects. """ family = BTrees.family64 __name__ = None __parent__ = None __services__ = () # Default uses ordering of underlying BTree. _order = None def _get_order(self): if self._order is not None: return list(self._order) return self.data.keys() def _set_order(self, value): # XXX: should we test against self.data.keys()? self._order = tuple([unicode(x) for x in value]) def _del_order(self): del self._order order = property(_get_order, _set_order, _del_order) def __init__(self, data=None, family=None): """ Constructor. Data may be an initial dictionary mapping object name to object. """ if family is not None: self.family = family if data is None: data = {} self.data = self.family.OO.BTree(data) self._num_objects = Length(len(data)) def find_service(self, service_name): """ Return a service named by ``service_name`` in this folder *or any parent service folder* or ``None`` if no such service exists. A shortcut for :func:`substanced.service.find_service`.""" return find_service(self, service_name) def find_services(self, service_name): """ Returns a sequence of service objects named by ``service_name`` in this folder's lineage or an empty sequence if no such service exists. A shortcut for :func:`substanced.service.find_services`""" return find_services(self, service_name) def add_service(self, name, obj, registry=None, **kw): """ Add a service to this folder named ``name``.""" if registry is None: registry = get_current_registry() kw['registry'] = registry self.add(name, obj, **kw) if not name in self.__services__: self.__services__ = self.__services__ + (name,) def keys(self): """ Return an iterable sequence of object names present in the folder. Respect ``order``, if set. """ return self.order def __iter__(self): """ An alias for ``keys`` """ return iter(self.order) def values(self): """ Return an iterable sequence of the values present in the folder. Respect ``order``, if set. """ if self._order is not None: return [self.data[name] for name in self.order] return self.data.values() def items(self): """ Return an iterable sequence of (name, value) pairs in the folder. Respect ``order``, if set. """ if self._order is not None: return [(name, self.data[name]) for name in self.order] return self.data.items() def __len__(self): """ Return the number of objects in the folder. """ return self._num_objects() def __nonzero__(self): """ Return ``True`` unconditionally. """ return True def __repr__(self): klass = self.__class__ classname = '%s.%s' % (klass.__module__, klass.__name__) return '<%s object %r at %#x>' % (classname, self.__name__, id(self)) def __getitem__(self, name): """ Return the object named ``name`` added to this folder or raise ``KeyError`` if no such object exists. ``name`` must be a Unicode object or directly decodeable to Unicode using the system default encoding. """ name = unicode(name) return self.data[name] def get(self, name, default=None): """ Return the object named by ``name`` or the default. ``name`` must be a Unicode object or a bytestring object. If ``name`` is a bytestring object, it must be decodable using the system default encoding. """ name = unicode(name) return self.data.get(name, default) def __contains__(self, name): """ Does the container contains an object named by name? ``name`` must be a Unicode object or a bytestring object. If ``name`` is a bytestring object, it must be decodable using the system default encoding. """ name = unicode(name) return name in self.data def __setitem__(self, name, other): """ Set object ``other' into this folder under the name ``name``. ``name`` must be a Unicode object or a bytestring object. If ``name`` is a bytestring object, it must be decodable using the system default encoding. ``name`` cannot be the empty string. When ``other`` is seated into this folder, it will also be decorated with a ``__parent__`` attribute (a reference to the folder into which it is being seated) and ``__name__`` attribute (the name passed in to this function. It must not already have a ``__parent__`` attribute before being seated into the folder, or an exception will be raised. If a value already exists in the foldr under the name ``name``, raise :exc:`KeyError`. When this method is called, the object will be added to the objectmap, an :class:`substanced.event.ObjectWillBeAdded` event will be emitted before the object obtains a ``__name__`` or ``__parent__`` value, then a :class:`substanced.event.ObjectAdded` will be emitted after the object obtains a ``__name__`` and ``__parent__`` value. """ return self.add(name, other) def validate_name(self, name, reserved_names=()): """ Validate the ``name`` passed to ensure that it's addable to the folder. Returns the name decoded to Unicode if it passes all addable checks. It's not addable if: - the name is not decodeable to Unicode. - the name starts with ``@@`` (conflicts with explicit view names). - the name has slashes in it (WSGI limitation). - the name is empty. If any of these conditions are untrue, raise a :exc:`ValueError`. If the name passed is in the list of ``reserved_names``, raise a :exc:`ValueError`. """ if not isinstance(name, basestring): raise ValueError("Name must be a string rather than a %s" % name.__class__.__name__) if not name: raise ValueError("Name must not be empty") try: name = unicode(name) except UnicodeDecodeError: raise ValueError('Name "%s" not decodeable to unicode' % name) if name in reserved_names: raise ValueError('%s is a reserved name' % name) if name.startswith('@@'): raise ValueError('Names which start with "@@" are not allowed') if '/' in name: raise ValueError('Names which contain a slash ("/") are not ' 'allowed') return name def check_name(self, name, reserved_names=()): """ Perform all the validation checks implied by :meth:`~substanced.folder.Folder.validate_name` against the ``name`` supplied but also fail with a :class:`~substanced.folder.FolderKeyError` if an object with the name ``name`` already exists in the folder.""" name = self.validate_name(name, reserved_names=reserved_names) if name in self.data: raise FolderKeyError('An object named %s already exists' % name) return name def add(self, name, other, send_events=True, reserved_names=(), duplicating=False, moving=False, registry=None): """ Same as ``__setitem__``. If ``send_events`` is False, suppress the sending of folder events. Don't allow names in the ``reserved_names`` sequence to be added. If ``duplicating`` is True, oids will be replaced in objectmap. This method returns the name used to place the subobject in the folder (a derivation of ``name``, usually the result of ``self.check_name(name)``). """ if registry is None: registry = get_current_registry() name = self.check_name(name, reserved_names) if getattr(other, '__parent__', None): raise ValueError( 'obj %s added to folder %s already has a __parent__ attribute, ' 'please remove it completely from its existing parent (%s) ' 'before trying to readd it to this one' % ( other, self, self.__parent__) ) objectmap = find_objectmap(self) if objectmap is not None: basepath = resource_path_tuple(self) for node in postorder(other): node_path = node_path_tuple(node) path_tuple = basepath + (name,) + node_path[1:] # the below gives node an objectid; if the will-be-added event # is the result of a duplication, replace the oid of the node # with a new one objectmap.add(node, path_tuple, replace_oid=duplicating) if send_events: event = ObjectWillBeAdded( other, self, name, duplicating=duplicating, moving=moving ) self._notify(event, registry) other.__parent__ = self other.__name__ = name self.data[name] = other self._num_objects.change(1) if self._order is not None: self._order += (name,) if send_events: event = ObjectAdded( other, self, name, duplicating=duplicating, moving=moving ) self._notify(event, registry) return name def pop(self, name, default=marker, registry=None): """ Remove the item stored in the under ``name`` and return it. If ``name`` doesn't exist in the folder, and ``default`` **is not** passed, raise a :exc:`KeyError`. If ``name`` doesn't exist in the folder, and ``default`` **is** passed, return ``default``. When the object stored under ``name`` is removed from this folder, remove its ``__parent__`` and ``__name__`` values. When this method is called, emit an :class:`substanced.event.ObjectWillBeRemoved` event before the object loses its ``__name__`` or ``__parent__`` values. Emit an :class:`substanced.event.ObjectRemoved` after the object loses its ``__name__`` and ``__parent__`` value, """ if registry is None: registry = get_current_registry() try: result = self.remove(name, registry=registry) except KeyError: if default is marker: raise return default return result def _notify(self, event, registry=None): if registry is None: registry = get_current_registry() registry.subscribers((event, event.object, self), None) def __delitem__(self, name): """ Remove the object from this folder stored under ``name``. ``name`` must be a Unicode object or a bytestring object. If ``name`` is a bytestring object, it must be decodable using the system default encoding. If no object is stored in the folder under ``name``, raise a :exc:`KeyError`. When the object stored under ``name`` is removed from this folder, remove its ``__parent__`` and ``__name__`` values. When this method is called, the removed object will be removed from the objectmap, a :class:`substanced.event.ObjectWillBeRemoved` event will be emitted before the object loses its ``__name__`` or ``__parent__`` values and a :class:`substanced.event.ObjectRemoved` will be emitted after the object loses its ``__name__`` and ``__parent__`` value, """ return self.remove(name) def remove(self, name, send_events=True, moving=False, registry=None): """ Same thing as ``__delitem__``. If ``send_events`` is false, suppress the sending of folder events. If ``moving`` is True, the events sent will indicate that a move is in process. """ name = unicode(name) other = self.data[name] oid = oid_of(other, None) if registry is None: registry = get_current_registry() if send_events: event = ObjectWillBeRemoved(other, self, name, moving=moving) self._notify(event, registry) if hasattr(other, '__parent__'): del other.__parent__ if hasattr(other, '__name__'): del other.__name__ del self.data[name] self._num_objects.change(-1) if name in self.__services__: self.__services__ = filter(lambda x: x != name, self.__services__) if self._order is not None: self._order = tuple([x for x in self._order if x != name]) objectmap = find_objectmap(self) removed_oids = set([oid]) if objectmap is not None and oid is not None: removed_oids = objectmap.remove(oid, references=not moving) if send_events: event = ObjectRemoved(other, self, name, removed_oids, moving=moving) self._notify(event, registry) return other def copy(self, name, other, newname=None, registry=None): """ Copy a subobject named ``name`` from this folder to the folder represented by ``other``. If ``newname`` is not none, it is used as the target object name; otherwise the existing subobject name is used. """ if newname is None: newname = name if registry is None: registry = get_current_registry() with tempfile.TemporaryFile() as f: obj = self.get(name) obj._p_jar.exportFile(obj._p_oid, f) f.seek(0) new_obj = obj._p_jar.importFile(f) del new_obj.__parent__ obj = other.add(newname, new_obj, duplicating=True, registry=registry) return obj def move(self, name, other, newname=None, registry=None): """ Move a subobject named ``name`` from this folder to the folder represented by ``other``. If ``newname`` is not none, it is used as the target object name; otherwise the existing subobject name is used. This operation is done in terms of a remove and an add. The Removed and WillBeRemoved events as well as the Added and WillBeAdded events sent will indicate that the object is moving. """ is_service = False if newname is None: newname = name if name in self.__services__: is_service = True if registry is None: registry = get_current_registry() ob = self.remove(name, moving=True, registry=registry) other.add(newname, ob, moving=True, registry=registry) if is_service: other.__services__ = other.__services__ + (name,) return ob def rename(self, oldname, newname, registry=None): """ Rename a subobject from oldname to newname. This operation is done in terms of a remove and an add. The Removed and WillBeRemoved events sent will indicate that the object is moving. """ if registry is None: registry = get_current_registry() return self.move(oldname, self, newname, registry=registry) def replace(self, name, newobject, registry=None): """ Replace an existing object named ``name`` in this folder with a new object ``newobject``. If there isn't an object named ``name`` in this folder, an exception will *not* be raised; instead, the new object will just be added. This operation is done in terms of a remove and an add. The Removed and WillBeRemoved events will be sent for the old object, and the WillBeAdded and Add events will be sent for the new object. """ if registry is None: registry = get_current_registry() if name in self: self.remove(name) self.add(name, newobject, registry=registry)
class HBTreeFolder2Base(Persistent): """Base for BTree-based folders. BUG: Due to wrong design, we can't store 2 objects <A> and <A>-<B> where <A> does not contain '-'. We detect conflicts at the root level using 'type(ob) is OOBTree' """ security = ClassSecurityInfo() manage_options = (({ 'label': 'Contents', 'action': 'manage_main', }, ) + Folder.manage_options[1:]) security.declareProtected(view_management_screens, 'manage_main') manage_main = DTMLFile('contents', globals()) _htree = None # OOBTree: { id -> object } _count = None # A BTrees.Length _v_nextid = 0 # The integer component of the next generated ID title = '' def __init__(self, id=None): if id is not None: self.id = id self._initBTrees() def _initBTrees(self): self._htree = OOBTree() self._count = Length() def _populateFromFolder(self, source): """Fill this folder with the contents of another folder. """ for name, value in source.objectItems(): self._setOb(name, aq_base(value)) security.declareProtected(view_management_screens, 'manage_fixCount') def manage_fixCount(self, dry_run=0): """Calls self._fixCount() and reports the result as text. """ old, new = self._fixCount(dry_run) path = '/'.join(self.getPhysicalPath()) if old == new: return "No count mismatch detected in HBTreeFolder2 at %s." % path else: return ("Fixed count mismatch in HBTreeFolder2 at %s. " "Count was %d; corrected to %d" % (path, old, new)) def _fixCount(self, dry_run=0): """Checks if the value of self._count disagrees with the content of the htree. If so, corrects self._count. Returns the old and new count values. If old==new, no correction was performed. """ old = self._count() new = sum(1 for x in self._htree_iteritems()) if old != new and not dry_run: self._count.set(new) return old, new def hashId(self, id): return id.split(H_SEPARATOR) def _htree_get(self, id): id_list = self.hashId(id) if len(id_list) == 1: ob = self._htree[id] if type(ob) is OOBTree: raise KeyError else: ob = self._htree[id_list.pop(0)] if type(ob) is not OOBTree: raise KeyError id_list[-1] = id for sub_id in id_list: ob = ob[sub_id] return ob def _getOb(self, id, default=_marker): """Return the named object from the folder """ try: return self._htree_get(id).__of__(self) except KeyError: if default is _marker: raise KeyError(id) return default def __getitem__(self, id): try: return self._htree_get(id).__of__(self) except KeyError: raise KeyError(id) def _setOb(self, id, object): """Store the named object in the folder. """ if type(object) is OOBTree: raise ValueError('HBTreeFolder2 can not store OOBTree objects') htree = self._htree for sub_id in self.hashId(id)[:-1]: try: htree = htree[sub_id] except KeyError: htree[sub_id] = htree = OOBTree() continue if type(htree) is not OOBTree: assert self._htree[sub_id] is htree, (htree, id) raise KeyError('There is already an item whose id is %r' % sub_id) if id in htree: raise KeyError('There is already an item named %r.' % id) htree[id] = object self._count.change(1) def _delOb(self, id): """Remove the named object from the folder. """ htree = self._htree h = [] for sub_id in self.hashId(id)[:-1]: h.append((htree, sub_id)) htree = htree.get(sub_id) if type(htree) is not OOBTree: raise KeyError(id) if type(htree[id]) is OOBTree: raise KeyError(id) del htree[id] self._count.change(-1) while h and not htree: htree, sub_id = h.pop() del htree[sub_id] security.declareProtected(view_management_screens, 'getBatchObjectListing') def getBatchObjectListing(self, REQUEST=None): """Return a structure for a page template to show the list of objects. """ if REQUEST is None: REQUEST = {} pref_rows = int(REQUEST.get('dtpref_rows', 20)) b_start = int(REQUEST.get('b_start', 1)) b_count = int(REQUEST.get('b_count', 1000)) b_end = b_start + b_count - 1 url = self.absolute_url() + '/manage_main' count = self.objectCount() if b_end < count: next_url = url + '?b_start=%d' % (b_start + b_count) else: b_end = count next_url = '' if b_start > 1: prev_url = url + '?b_start=%d' % max(b_start - b_count, 1) else: prev_url = '' formatted = [listtext0 % pref_rows] for optID in islice(self.objectIds(), b_start - 1, b_end): optID = escape(optID) formatted.append(listtext1 % (escape(optID, quote=1), optID)) formatted.append(listtext2) return { 'b_start': b_start, 'b_end': b_end, 'prev_batch_url': prev_url, 'next_batch_url': next_url, 'formatted_list': ''.join(formatted) } security.declareProtected(view_management_screens, 'manage_object_workspace') def manage_object_workspace(self, ids=(), REQUEST=None): '''Redirects to the workspace of the first object in the list.''' if ids and REQUEST is not None: REQUEST.RESPONSE.redirect('%s/%s/manage_workspace' % (self.absolute_url(), quote(ids[0]))) else: return self.manage_main(self, REQUEST) security.declareProtected(access_contents_information, 'tpValues') def tpValues(self): """Ensures the items don't show up in the left pane. """ return () security.declareProtected(access_contents_information, 'objectCount') def objectCount(self): """Returns the number of items in the folder.""" return self._count() security.declareProtected(access_contents_information, 'has_key') def has_key(self, id): """Indicates whether the folder has an item by ID. """ try: self._htree_get(id) except KeyError: return 0 return 1 # Work around for the performance regression introduced in Zope 2.12.23. # Otherwise, we use superclass' __contains__ implementation, which uses # objectIds, which is inefficient in HBTreeFolder2 to lookup a single key. __contains__ = has_key def _htree_iteritems(self, min=None): # BUG: Due to bad design of HBTreeFolder2, buckets other than the root # one must not contain both buckets & leafs. Otherwise, this method # fails. h = self._htree recurse_stack = [] try: for sub_id in self.hashId(min) if min else ('', ): if recurse_stack: next(i) if type(h) is not OOBTree: break id += H_SEPARATOR + sub_id if type(next(six.itervalues(h))) is not OOBTree: sub_id = id else: id = sub_id i = h.iteritems(sub_id) recurse_stack.append(i) h = h[sub_id] except (KeyError, StopIteration): pass while recurse_stack: i = recurse_stack.pop() try: while 1: id, h = next(i) if type(h) is OOBTree: recurse_stack.append(i) i = six.iteritems(h) else: yield id, h except StopIteration: pass security.declareProtected(access_contents_information, 'getTreeIdList') def getTreeIdList(self, htree=None): """ Return list of all tree ids """ r = [] s = [(None, six.iteritems(self._htree))] while s: base_id, items = s.pop() if base_id: for k, v in items: if type(v) is not OOBTree: r.append(base_id) # As an optimization, and because _htree_iteritems does not # support mixed buckets except at the root, we consider that # this one only contains leafs. break s.append((base_id + H_SEPARATOR + k, six.iteritems(v))) else: for k, v in items: if type(v) is not OOBTree: r.append(base_id) for k, v in items: if type(v) is OOBTree: s.append((k, six.iteritems(v))) break s.append((k, six.iteritems(v))) r.sort() return r security.declareProtected(access_contents_information, 'objectValues') def objectValues(self, base_id=_marker): return HBTreeObjectValues(self, base_id) security.declareProtected(access_contents_information, 'objectIds') def objectIds(self, base_id=_marker): return HBTreeObjectIds(self, base_id) security.declareProtected(access_contents_information, 'objectItems') def objectItems(self, base_id=_marker): # Returns a list of (id, subobject) tuples of the current object. return HBTreeObjectItems(self, base_id) # superValues() looks for the _objects attribute, but the implementation # would be inefficient, so superValues() support is disabled. _objects = () security.declareProtected(access_contents_information, 'objectIds_d') def objectIds_d(self, t=None): return dict.fromkeys(self.objectIds(t), 1) def _checkId(self, id, allow_dup=0): if not allow_dup and id in self: raise BadRequestException( 'The id %r is invalid--it is already in use.' % id) def _setObject(self, id, object, roles=None, user=None, set_owner=1): v = self._checkId(id) if v is not None: id = v # If an object by the given id already exists, remove it. if id in self: self._delObject(id) self._setOb(id, object) object = self._getOb(id) if set_owner: object.manage_fixupOwnershipAfterAdd() # Try to give user the local role "Owner", but only if # no local roles have been set on the object yet. if hasattr(object, '__ac_local_roles__'): if object.__ac_local_roles__ is None: user = getSecurityManager().getUser() if user is not None: userid = user.getId() if userid is not None: object.manage_setLocalRoles(userid, ['Owner']) object.manage_afterAdd(object, self) return id def _delObject(self, id, dp=1): object = self._getOb(id) try: object.manage_beforeDelete(object, self) except BeforeDeleteException as ob: raise except ConflictError: raise except Exception: LOG('Zope', ERROR, 'manage_beforeDelete() threw', error=True) self._delOb(id) # Aliases for mapping-like access. __len__ = objectCount keys = objectIds values = objectValues items = objectItems # backward compatibility hasObject = has_key security.declareProtected(access_contents_information, 'get') def get(self, name, default=None): try: return self._htree_get(name).__of__(self) except KeyError: return default # Utility for generating unique IDs. security.declareProtected(access_contents_information, 'generateId') def generateId(self, prefix='item', suffix='', rand_ceiling=999999999): """Returns an ID not used yet by this folder. The ID is unlikely to collide with other threads and clients. The IDs are sequential to optimize access to objects that are likely to have some relation. """ tree = self._htree n = self._v_nextid attempt = 0 while 1: if n % 4000 != 0 and n <= rand_ceiling: id = '%s%d%s' % (prefix, n, suffix) if id not in tree: break n = randint(1, rand_ceiling) attempt = attempt + 1 if attempt > MAX_UNIQUEID_ATTEMPTS: # Prevent denial of service raise ExhaustedUniqueIdsError self._v_nextid = n + 1 return id def __getattr__(self, name): # Boo hoo hoo! Zope 2 prefers implicit acquisition over traversal # to subitems, and __bobo_traverse__ hooks don't work with # restrictedTraverse() unless __getattr__() is also present. # Oh well. try: return self._htree_get(name) except KeyError: raise AttributeError(name)
def __init__(self): """Initialize self.""" self.__changed_backrefs_counter__ = Length() """Counter that should increment if backreferences are changed."""
class CatalogTool(PloneBaseTool, BaseTool): """Plone's catalog tool""" meta_type = 'Plone Catalog Tool' security = ClassSecurityInfo() toolicon = 'skins/plone_images/book_icon.png' _counter = None manage_catalogAdvanced = DTMLFile('www/catalogAdvanced', globals()) manage_options = ( {'action': 'manage_main', 'label': 'Contents'}, {'action': 'manage_catalogView', 'label': 'Catalog'}, {'action': 'manage_catalogIndexes', 'label': 'Indexes'}, {'action': 'manage_catalogSchema', 'label': 'Metadata'}, {'action': 'manage_catalogAdvanced', 'label': 'Advanced'}, {'action': 'manage_catalogReport', 'label': 'Query Report'}, {'action': 'manage_catalogPlan', 'label': 'Query Plan'}, {'action': 'manage_propertiesForm', 'label': 'Properties'}, ) def __init__(self): ZCatalog.__init__(self, self.getId()) def _removeIndex(self, index): # Safe removal of an index. try: self.manage_delIndex(index) except: pass def _listAllowedRolesAndUsers(self, user): # Makes sure the list includes the user's groups. result = user.getRoles() if 'Anonymous' in result: # The anonymous user has no further roles return ['Anonymous'] result = list(result) if hasattr(aq_base(user), 'getGroups'): groups = ['user:%s' % x for x in user.getGroups()] if groups: result = result + groups # Order the arguments from small to large sets result.insert(0, 'user:%s' % user.getId()) result.append('Anonymous') return result @security.private def indexObject(self, object, idxs=None): # Add object to catalog. # The optional idxs argument is a list of specific indexes # to populate (all of them by default). if idxs is None: idxs = [] self.reindexObject(object, idxs) @security.protected(ManageZCatalogEntries) def catalog_object(self, object, uid=None, idxs=None, update_metadata=1, pghandler=None): if idxs is None: idxs = [] self._increment_counter() w = object if not IIndexableObject.providedBy(object): # This is the CMF 2.2 compatible approach, which should be used # going forward wrapper = queryMultiAdapter((object, self), IIndexableObject) if wrapper is not None: w = wrapper ZCatalog.catalog_object(self, w, uid, idxs, update_metadata, pghandler=pghandler) @security.protected(ManageZCatalogEntries) def uncatalog_object(self, *args, **kwargs): self._increment_counter() return BaseTool.uncatalog_object(self, *args, **kwargs) def _increment_counter(self): if self._counter is None: self._counter = Length() self._counter.change(1) @security.private def getCounter(self): processQueue() return self._counter is not None and self._counter() or 0 @security.private def allow_inactive(self, query_kw): """Check, if the user is allowed to see inactive content. First, check if the user is allowed to see inactive content site-wide. Second, if there is a 'path' key in the query, check if the user is allowed to see inactive content for these paths. Conservative check: as soon as one path is disallowed, return False. If a path cannot be traversed, ignore it. """ allow_inactive = _checkPermission(AccessInactivePortalContent, self) if allow_inactive: return True paths = query_kw.get('path', False) if not paths: return False if isinstance(paths, dict): # Like: {'path': {'depth': 0, 'query': ['/Plone/events/']}} # Or: {'path': {'depth': 0, 'query': '/Plone/events/'}} paths = paths.get('query', []) if isinstance(paths, six.string_types): paths = [paths] objs = [] site = getSite() for path in list(paths): if six.PY2: path = path.encode('utf-8') # paths must not be unicode try: site_path = '/'.join(site.getPhysicalPath()) parts = path[len(site_path) + 1:].split('/') parent = site.unrestrictedTraverse('/'.join(parts[:-1])) objs.append(parent.restrictedTraverse(parts[-1])) except (KeyError, AttributeError, Unauthorized): # When no object is found don't raise an error pass if not objs: return False allow = True for ob in objs: allow = allow and\ _checkPermission(AccessInactivePortalContent, ob) return allow @security.protected(SearchZCatalog) def searchResults(self, query=None, **kw): # Calls ZCatalog.searchResults with extra arguments that # limit the results to what the user is allowed to see. # # This version uses the 'effectiveRange' DateRangeIndex. # # It also accepts a keyword argument show_inactive to disable # effectiveRange checking entirely even for those without portal # wide AccessInactivePortalContent permission. # Make sure any pending index tasks have been processed processQueue() kw = kw.copy() show_inactive = kw.get('show_inactive', False) if isinstance(query, dict) and not show_inactive: show_inactive = 'show_inactive' in query user = _getAuthenticatedUser(self) kw['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user) if not show_inactive and not self.allow_inactive(kw): kw['effectiveRange'] = DateTime() # filter out invalid sort_on indexes sort_on = kw.get('sort_on') or [] if isinstance(sort_on, six.string_types): sort_on = [sort_on] valid_indexes = self.indexes() try: sort_on = [idx for idx in sort_on if idx in valid_indexes] except TypeError: # sort_on is not iterable sort_on = [] if not sort_on: kw.pop('sort_on', None) else: kw['sort_on'] = sort_on return ZCatalog.searchResults(self, query, **kw) __call__ = searchResults def search(self, query, sort_index=None, reverse=0, limit=None, merge=1): # Wrap search() the same way that searchResults() is # Make sure any pending index tasks have been processed processQueue() user = _getAuthenticatedUser(self) query['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user) if not self.allow_inactive(query): query['effectiveRange'] = DateTime() return super(CatalogTool, self).search( query, sort_index, reverse, limit, merge) @security.protected(ManageZCatalogEntries) def clearFindAndRebuild(self): # Empties catalog, then finds all contentish objects (i.e. objects # with an indexObject method), and reindexes them. # This may take a long time. idxs = list(self.indexes()) def indexObject(obj, path): if (base_hasattr(obj, 'reindexObject') and safe_callable(obj.reindexObject)): try: self.reindexObject(obj, idxs=idxs) # index conversions from plone.app.discussion annotions = IAnnotations(obj) if DISCUSSION_ANNOTATION_KEY in annotions: conversation = annotions[DISCUSSION_ANNOTATION_KEY] conversation = conversation.__of__(obj) for comment in conversation.getComments(): try: self.indexObject(comment, idxs=idxs) except StopIteration: # pragma: no cover pass except TypeError: # Catalogs have 'indexObject' as well, but they # take different args, and will fail pass self.manage_catalogClear() portal = aq_parent(aq_inner(self)) portal.ZopeFindAndApply( portal, search_sub=True, apply_func=indexObject ) @security.protected(ManageZCatalogEntries) def manage_catalogRebuild(self, RESPONSE=None, URL1=None): """Clears the catalog and indexes all objects with an 'indexObject' method. This may take a long time. """ elapse = time.time() c_elapse = process_time() self.clearFindAndRebuild() elapse = time.time() - elapse c_elapse = process_time() - c_elapse msg = ('Catalog Rebuilt\n' 'Total time: %s\n' 'Total CPU time: %s' % (repr(elapse), repr(c_elapse))) logger.info(msg) if RESPONSE is not None: RESPONSE.redirect( URL1 + '/manage_catalogAdvanced?manage_tabs_message=' + urllib.parse.quote(msg))
def testUpgradeIdToolDicts(self): # With old erp5_core, we have no generators, no IdTool_* zsql methods, # and we have a dictionary stored on id tool id_tool = self.getPortal().portal_ids # Rebuild a persistent mapping like it already existed in beginning 2010 # First persistent mapping of generateNewLengthIdList id_tool.dict_length_ids = PersistentMapping() id_tool.dict_length_ids['foo'] = Length(5) id_tool.dict_length_ids['bar'] = Length(5) id_tool.IdTool_zSetLastId(id_group='foo', last_id=5) id_tool.IdTool_zSetLastId(id_group='bar', last_id=10) # Then persistent mapping of generateNewId id_tool.dict_ids = PersistentMapping() id_tool.dict_ids['foo'] = 3 # it was unfortunately possible to define something else # than strings id_tool.dict_ids[('bar','baz')] = 2 # Delete new zsql methods which are used by new code skin_folder = self.getPortal().portal_skins.erp5_core custom_skin_folder = self.getPortal().portal_skins.custom script_id_list = [x for x in skin_folder.objectIds() if x.startswith('IdTool')] self.assertTrue(len(script_id_list)>0) cp_data = skin_folder.manage_cutObjects(ids=script_id_list) custom_skin_folder.manage_pasteObjects(cp_data) # Set old revision for erp5_core bt, because the id tool decide which code # to run depending on this revision template_tool = self.getPortal().portal_templates erp5_core_bt_list = [x for x in template_tool.objectValues() if x.getTitle()=='erp5_core'] self.assertEqual(len(erp5_core_bt_list), 1) erp5_core_bt = erp5_core_bt_list[0] erp5_core_bt.setRevision(1561) # Delete all new generators generator_id_list = [x for x in id_tool.objectIds()] id_tool.manage_delObjects(ids=generator_id_list) id_list = id_tool.generateNewLengthIdList(id_group='foo', store=1) self.assertEqual(id_list, [5]) self.assertEqual(int(id_tool.dict_length_ids['foo'].value), 6) # Now, reinstall erp5_core, and make sure we still have the possibility # to continue generating ids cp_data = template_tool.manage_copyObjects(ids=(erp5_core_bt.getId(),)) new_id = template_tool.manage_pasteObjects(cp_data)[0]['new_id'] new_bt = template_tool[new_id] self.tic() self.commit() new_bt.install(force=1) erp5_core_bt.setRevision(1562) cp_data = custom_skin_folder.manage_cutObjects(ids=script_id_list) skin_folder.manage_pasteObjects(cp_data) id_list = id_tool.generateNewLengthIdList(id_group='foo') # it is known that with current upgrade there is a whole self.assertEqual(id_list, [7]) new_id = id_tool.generateNewId(id_group='foo') self.assertEqual(new_id, 4) new_id = id_tool.generateNewId(id_group=('bar','baz')) self.assertEqual(new_id, 3) # Make sure that the old code is not used any more, so the dic on # id tool should not change, checking for length_dict self.assertEqual(int(id_tool.dict_length_ids['foo'].value), 6) id_list = id_tool.generateNewLengthIdList(id_group='bar') self.assertEqual(id_list, [11]) generator_list = [x for x in id_tool.objectValues() if x.getReference()=='mysql_non_continuous_increasing'] self.assertEqual(len(generator_list), 1) generator = generator_list[0] self.assertEqual(generator.last_max_id_dict['foo'].value, 7) self.assertEqual(generator.last_max_id_dict['bar'].value, 11) # Make sure that the old code is not used any more, so the dic on # id tool should not change, checking for dict self.assertEqual(id_tool.dict_ids['foo'], 3) generator_list = [x for x in id_tool.objectValues() if x.getReference()=='zodb_continuous_increasing'] self.assertEqual(len(generator_list), 1) generator = generator_list[0] self.assertEqual(generator.last_id_dict['foo'], 4) self.assertEqual(generator.last_id_dict["('bar', 'baz')"], 3)
def clear(self): """ Clears all the information stored """ self._tree = LOBTree.LOBTree() self._count = Length(0)
def __init__(self): self._tree = LOBTree.LOBTree() self._count = Length(0)
class EventEndDateIndex(Persistent): """ List of bookings ordered by their event's ending date """ def __init__(self): self._tree = LOBTree.LOBTree() self._count = Length(0) ## private class methods ## @classmethod def _dateToKey(cls, date): if date: return datetimeToUnixTimeInt(date) else: return None @classmethod def _keyToDate(cls, key): if key: return unixTimeToDatetime(key) else: return None @classmethod def _bookingToKey(cls, booking): return cls._dateToKey(booking.getConference().getAdjustedEndDate(tz = 'UTC')) ## public instance methods ## def clear(self): """ Clears all the information stored """ self._tree = LOBTree.LOBTree() self._count = Length(0) def getCount(self): """ Returns the number of bookings (not keys) stored """ return self._count() #to get the value of a Length object, one has to "call" the object def indexBooking(self, booking): """ Stores a booking in the index """ key = EventEndDateIndex._bookingToKey(booking) if not key in self._tree: self._tree[key] = DateBookingList() self._tree[key].addBooking(booking) self._count.change(1) def unindexBooking(self, booking): """ Removes a booking from the index """ key = EventEndDateIndex._bookingToKey(booking) try: self._tree[key].removeBooking(booking) if self._tree[key].getCount() == 0: del self._tree[key] self._count.change(-1) except KeyError: Logger.get('Vidyo').warning("Could not unindex booking: (confId=%s, id=%s) from Vidyo's GlobalData. Tried with key: %s." % (booking.getConference().getId(), booking.getId(), str(key))) def moveBooking(self, booking, oldDate): """ Changes the position of a booking in the index """ oldKey = EventEndDateIndex._dateToKey(oldDate) newKey = EventEndDateIndex._bookingToKey(booking) try: self._tree[oldKey].removeBooking(booking) if self._tree[oldKey].getCount() == 0: del self._tree[oldKey] if not newKey in self._tree: self._tree[newKey] = DateBookingList() self._tree[newKey].addBooking(booking) except KeyError: Logger.get('Vidyo').warning("Could not move booking: (confId=%s, id=%s) from Vidyo's GlobalData. Tried moving from key: %s to key: %s." % (booking.getConference().getId(), booking.getId(), str(oldKey), str(newKey))) def iterbookings(self, minDate = None, maxDate = None): """ Will return an iterator over Vidyo bookings attached to conferences whose end date is between minDate and maxDate """ minKey = EventEndDateIndex._dateToKey(minDate) maxKey = EventEndDateIndex._dateToKey(maxDate) for bookingList in self._tree.itervalues(min = minKey, max = maxKey): for b in bookingList.iterbookings(): yield b def deleteKeys(self, minDate = None, maxDate = None): """ """ minKey = EventEndDateIndex._dateToKey(minDate) maxKey = EventEndDateIndex._dateToKey(maxDate) for key in list(self._tree.keys(min = minKey, max = maxKey)): #we want a copy because we are going to modify self._deleteKey(key) def _deleteKey(self, key): Logger.get("Vidyo").info("Vidyo EventEndDateIndex: deleting key %s (%s)" % (str(key), str(EventEndDateIndex._keyToDate(key)) + " (UTC)")) self._count.change(-self._tree[key].getCount()) del self._tree[key] def initialize(self, dbi=None): """ Cleans the indexes, and then indexes all the vidyo bookings from all the conferences WARNING: obviously, this can potentially take a while """ i = 0 self.clear() for conf in ConferenceHolder().getList(): csbm = conf.getCSBookingManager() for booking in csbm.getBookingList(): if booking.getType() == "Vidyo" and booking.isCreated(): self.indexBooking(booking) i += 1 if dbi and i % 100 == 0: dbi.commit()
def _initBTrees(self): self._htree = OOBTree() self._count = Length()
def _reset(self): # this counter keeps the number of elements self._elem_counter = Length(0) self._container = IOBTree()
def clear(self): self._depth = 0 self._index = self.family.OO.BTree() self._unindex = self.family.IO.BTree() self._length = Length(0)
class PersistentWaitingQueue(Persistent): """ A Waiting queue, implemented using a map structure (BTree...) It is persistent, but very vulnerable to conflicts. This is due to the fact that sets are used as container, and there can happen a situation where two different sets are assigned to the same timestamp. This will for sure result in conflict. That said, the commits of objects like these have to be carefully synchronized. See `indico.modules.scheduler.controllers` for more info (particularly the way we use the 'spool'). """ def __init__(self): super(PersistentWaitingQueue, self).__init__() self._reset() def _reset(self): # this counter keeps the number of elements self._elem_counter = Length(0) self._container = IOBTree() def _gc_bin(self, t): """ 'garbage-collect' bins """ if len(self._container[t]) == 0: del self._container[t] def _check_gc_consistency(self): """ 'check that there are no empty bins' """ for t in self._container: if len(self._container[t]) == 0: return False return True def enqueue(self, t, obj): """ Add an element to the queue """ if t not in self._container: self._container[t] = OOTreeSet() if obj in self._container[t]: raise DuplicateElementException(obj) self._container[t].add(obj) self._elem_counter.change(1) def dequeue(self, t, obj): """ Remove an element from the queue """ self._container[t].remove(obj) self._gc_bin(t) self._elem_counter.change(-1) def _next_timestamp(self): """ Return the next 'priority' to be served """ i = iter(self._container) try: t = i.next() return t except StopIteration: return None def peek(self): """ Return the next element """ t = self._next_timestamp() if t: # just to be sure assert (len(self._container[t]) != 0) # find the next element i = iter(self._container[t]) # store it elem = i.next() # return the element return t, elem else: return None def pop(self): """ Remove and return the next set of elements to be processed """ pair = self.peek() if pair: self.dequeue(*pair) # return the element return pair else: return None def nbins(self): """ Return the number of 'bins' (map entries) currently used """ # get 'real' len() return len(self._container) def __len__(self): return self._elem_counter() def __getitem__(self, param): return self._container.__getitem__(param) def __iter__(self): # tree iterator for tstamp in iter(self._container): cur_set = self._container[tstamp] try: # set iterator for elem in cur_set: yield tstamp, elem except StopIteration: pass
class CachingCatalog(Catalog): implements(ICatalog) os = os # for unit tests generation = None # b/c def __init__(self): super(CachingCatalog, self).__init__() self.generation = Length(0) def clear(self): self.invalidate() super(CachingCatalog, self).clear() def index_doc(self, *arg, **kw): self.invalidate() super(CachingCatalog, self).index_doc(*arg, **kw) def unindex_doc(self, *arg, **kw): self.invalidate() super(CachingCatalog, self).unindex_doc(*arg, **kw) def reindex_doc(self, *arg, **kw): self.invalidate() super(CachingCatalog, self).reindex_doc(*arg, **kw) def __setitem__(self, *arg, **kw): self.invalidate() super(CachingCatalog, self).__setitem__(*arg, **kw) @MetricMod('CS.%s') @metricmethod def search(self, *arg, **kw): use_cache = True if 'use_cache' in kw: use_cache = kw.pop('use_cache') if 'NO_CATALOG_CACHE' in self.os.environ: use_cache = False if 'tags' in kw: # The tags index changes without invalidating the catalog, # so don't cache any query involving the tags index. use_cache = False if not use_cache: return self._search(*arg, **kw) cache = queryUtility(ICatalogSearchCache) if cache is None: return self._search(*arg, **kw) key = cPickle.dumps((arg, kw)) generation = self.generation if generation is None: generation = Length(0) genval = generation.value if (genval == 0) or (genval > cache.generation): # an update in another process requires that the local cache be # invalidated cache.clear() cache.generation = genval if cache.get(key) is None: num, docids = self._search(*arg, **kw) # We don't cache large result sets because the time it takes to # unroll the result set turns out to be far more time than it # takes to run the search. In a particular instance using OSI's # catalog a search that took 0.015s but returned nearly 35,295 # results took over 50s to unroll the result set for caching, # significantly slowing search performance. if num > LARGE_RESULT_SET: return num, docids # we need to unroll here; a btree-based structure may have # a reference to its connection docids = list(docids) cache[key] = (num, docids) return cache.get(key) @metricmethod def _search(self, *arg, **kw): start = time.time() res = super(CachingCatalog, self).search(*arg, **kw) duration = time.time() - start notify(CatalogQueryEvent(self, kw, duration, res)) return res def invalidate(self): # Increment the generation; this tells *another process* that # its catalog cache needs to be cleared generation = self.generation if generation is None: generation = self.generation = Length(0) if generation.value >= sys.maxint: # don't keep growing the generation integer; wrap at sys.maxint self.generation.set(0) else: self.generation.change(1) # Clear the cache for *this process* cache = queryUtility(ICatalogSearchCache) if cache is not None: cache.clear() cache.generation = self.generation.value
def generateNewIdList(self, id_group=None, id_count=1, default=None, store=_marker, id_generator=None): """ Generate a list of next ids in the sequence of ids of a particular group """ if id_group in (None, 'None'): raise ValueError, '%s is not a valid id_group' % (repr(id_group), ) # for compatibilty with sql data, must not use id_group as a list if not isinstance(id_group, str): id_group = repr(id_group) warnings.warn( 'id_group must be a string, other types ' 'are deprecated.', DeprecationWarning) if id_generator is None: id_generator = 'uid' if store is not _marker: warnings.warn("Use of 'store' argument is deprecated.", DeprecationWarning) try: #use _getLatestGeneratorValue here for that the technical level #must not call the method last_generator = self._getLatestGeneratorValue(id_generator) new_id_list = last_generator.generateNewIdList(id_group=id_group, id_count=id_count, default=default) except (KeyError, ValueError): template_tool = getattr(self, 'portal_templates', None) revision = template_tool.getInstalledBusinessTemplateRevision( 'erp5_core') # XXX backward compatiblity if int(revision) > 1561: LOG('generateNewIdList', ERROR, 'while generating id') raise else: # Compatibility code below, in case the last version of erp5_core # is not installed yet warnings.warn( "You are using an old version of erp5_core to generate" "ids.\nPlease update erp5_core business template to " "use new id generators", DeprecationWarning) new_id = None if default is None: default = 1 # XXX It's temporary, a New API will be implemented soon # the code will be change portal = self.getPortalObject() query = getattr(portal, 'IdTool_zGenerateId', None) commit = getattr(portal, 'IdTool_zCommit', None) if query is None or commit is None: portal_catalog = getattr(self, 'portal_catalog').getSQLCatalog() query = getattr(portal_catalog, 'z_portal_ids_generate_id') commit = getattr(portal_catalog, 'z_portal_ids_commit') if None in (query, commit): raise AttributeError, 'Error while generating Id: ' \ 'idTool_zGenerateId and/or idTool_zCommit could not ' \ 'be found.' try: result = query(id_group=id_group, id_count=id_count, default=default) finally: commit() new_id = result[0]['LAST_INSERT_ID()'] if store: if getattr(aq_base(self), 'dict_length_ids', None) is None: # Length objects are stored in a persistent mapping: there is one # Length object per id_group. self.dict_length_ids = PersistentMapping() if self.dict_length_ids.get(id_group) is None: self.dict_length_ids[id_group] = Length(new_id) self.dict_length_ids[id_group].set(new_id) new_id_list = range(new_id - id_count, new_id) return new_id_list
def _increment_counter(self): if self._counter is None: self._counter = Length() self._counter.change(1)
def __init__(self, id_=None): self._Folder__data = OOBTree() self.__len = Length() super(Folder, self).__init__()
class CatalogPathIndex(CatalogIndex): """Index for model paths (tokens separated by '/' characters) A path index stores all path components of the physical path of an object. Internal datastructure: - a physical path of an object is split into its components - every component is kept as a key of a OOBTree in self._indexes - the value is a mapping 'level of the path component' to 'all docids with this path component on this level' Query types supported: - Eq - NotEq """ useOperator = 'or' family = BTrees.family32 def __init__(self, discriminator): if not callable(discriminator): if not isinstance(discriminator, six.string_types): raise ValueError('discriminator value must be callable or a ' 'string') self.discriminator = discriminator self._not_indexed = self.family.IF.Set() self.clear() def clear(self): self._depth = 0 self._index = self.family.OO.BTree() self._unindex = self.family.IO.BTree() self._length = Length(0) def insertEntry(self, comp, id, level): """Insert an entry. comp is a path component id is the docid level is the level of the component inside the path """ if comp not in self._index: self._index[comp] = self.family.IO.BTree() if level not in self._index[comp]: self._index[comp][level] = self.family.IF.TreeSet() self._index[comp][level].insert(id) if level > self._depth: self._depth = level def index_doc(self, docid, object): if callable(self.discriminator): value = self.discriminator(object, _marker) else: value = getattr(object, self.discriminator, _marker) if value is _marker: # unindex the previous value self.unindex_doc(docid) # Store docid in set of unindexed docids self._not_indexed.add(docid) return None if isinstance(value, Persistent): raise ValueError('Catalog cannot index persistent object %s' % value) if docid in self._not_indexed: # Remove from set of unindexed docs if it was in there. self._not_indexed.remove(docid) path = value if isinstance(path, (list, tuple)): path = '/'+ '/'.join(path[1:]) comps = [_f for _f in path.split('/') if _f] if docid not in self._unindex: self._length.change(1) for i in range(len(comps)): self.insertEntry(comps[i], docid, i) self._unindex[docid] = path return 1 def unindex_doc(self, docid): _not_indexed = self._not_indexed if docid in _not_indexed: _not_indexed.remove(docid) if docid not in self._unindex: return comps = self._unindex[docid].split('/') for level in range(len(comps[1:])): comp = comps[level+1] try: self._index[comp][level].remove(docid) if not self._index[comp][level]: del self._index[comp][level] if not self._index[comp]: del self._index[comp] except KeyError: pass self._length.change(-1) del self._unindex[docid] def _indexed(self): return list(self._unindex.keys()) def search(self, path, default_level=0): """ path is either a string representing a relative URL or a part of a relative URL or a tuple (path,level). level >= 0 starts searching at the given level level < 0 not implemented yet """ if isinstance(path, six.string_types): level = default_level else: level = int(path[1]) path = path[0] comps = [_f for _f in path.split('/') if _f] if len(comps) == 0: return self.family.IF.Set(list(self._unindex.keys())) results = None if level >= 0: for i, comp in enumerate(comps): if comp not in self._index: return self.family.IF.Set() if level+i not in self._index[comp]: return self.family.IF.Set() results = self.family.IF.intersection( results, self._index[comp][level+i]) else: for level in range(self._depth + 1): ids = None for i, comp in enumerate(comps): try: ids = self.family.IF.intersection( ids, self._index[comp][level+i]) except KeyError: break else: results = self.family.IF.union(results, ids) return results def numObjects(self): """ return the number distinct values """ return len(self._unindex) def getEntryForObject(self, docid): """ Takes a document ID and returns all the information we have on that specific object. """ return self._unindex.get(docid) def apply(self, query): """ """ level = 0 operator = self.useOperator if isinstance(query, six.string_types): paths = [query] elif isinstance(query, (tuple, list)): paths = query else: paths = query.get('query', []) if isinstance(paths, six.string_types): paths = [ paths ] level = query.get('level', 0) operator = query.get('operator', self.useOperator).lower() sets = [] for path in paths: sets.append(self.search(path, level)) if operator == 'or': rs = self.family.IF.multiunion(sets) else: rs = None sets.sort(lambda x, y: cmp(len(x), len(y))) for set in sets: rs = self.family.IF.intersection(rs, set) if not rs: break if rs: return rs else: return self.family.IF.Set() applyEq = apply
def __init__(self): super(CachingCatalog, self).__init__() self.generation = Length(0)
class UnIndex(SimpleItem): """Simple forward and reverse index. """ implements(ILimitedResultIndex, IUniqueValueIndex, ISortIndex) def __init__(self, id, ignore_ex=None, call_methods=None, extra=None, caller=None): """Create an unindex UnIndexes are indexes that contain two index components, the forward index (like plain index objects) and an inverted index. The inverted index is so that objects can be unindexed even when the old value of the object is not known. e.g. self._index = {datum:[documentId1, documentId2]} self._unindex = {documentId:datum} The arguments are: 'id' -- the name of the item attribute to index. This is either an attribute name or a record key. 'ignore_ex' -- should be set to true if you want the index to ignore exceptions raised while indexing instead of propagating them. 'call_methods' -- should be set to true if you want the index to call the attribute 'id' (note: 'id' should be callable!) You will also need to pass in an object in the index and uninded methods for this to work. 'extra' -- a mapping object that keeps additional index-related parameters - subitem 'indexed_attrs' can be string with comma separated attribute names or a list 'caller' -- reference to the calling object (usually a (Z)Catalog instance """ def _get(o, k, default): """ return a value for a given key of a dict/record 'o' """ if isinstance(o, dict): return o.get(k, default) else: return getattr(o, k, default) self.id = id self.ignore_ex = ignore_ex # currently unimplimented self.call_methods = call_methods self.operators = ('or', 'and') self.useOperator = 'or' # allow index to index multiple attributes ia = _get(extra, 'indexed_attrs', id) if isinstance(ia, str): self.indexed_attrs = ia.split(',') else: self.indexed_attrs = list(ia) self.indexed_attrs = [ attr.strip() for attr in self.indexed_attrs if attr ] if not self.indexed_attrs: self.indexed_attrs = [id] self.clear() def __len__(self): return self._length() def getId(self): return self.id def clear(self): self._length = Length() self._index = OOBTree() self._unindex = IOBTree() def __nonzero__(self): return not not self._unindex def histogram(self): """Return a mapping which provides a histogram of the number of elements found at each point in the index. """ histogram = {} for item in self._index.items(): if isinstance(item, int): entry = 1 # "set" length is 1 else: key, value = item entry = len(value) histogram[entry] = histogram.get(entry, 0) + 1 return histogram def referencedObjects(self): """Generate a list of IDs for which we have referenced objects.""" return self._unindex.keys() def getEntryForObject(self, documentId, default=_marker): """Takes a document ID and returns all the information we have on that specific object. """ if default is _marker: return self._unindex.get(documentId) else: return self._unindex.get(documentId, default) def removeForwardIndexEntry(self, entry, documentId): """Take the entry provided and remove any reference to documentId in its entry in the index. """ indexRow = self._index.get(entry, _marker) if indexRow is not _marker: try: indexRow.remove(documentId) if not indexRow: del self._index[entry] self._length.change(-1) except ConflictError: raise except AttributeError: # index row is an int try: del self._index[entry] except KeyError: # XXX swallow KeyError because it was probably # removed and then _length AttributeError raised pass if isinstance(self.__len__, Length): self._length = self.__len__ del self.__len__ self._length.change(-1) except: LOG.error( '%s: unindex_object could not remove ' 'documentId %s from index %s. This ' 'should not happen.' % (self.__class__.__name__, str(documentId), str(self.id)), exc_info=sys.exc_info()) else: LOG.error('%s: unindex_object tried to retrieve set %s ' 'from index %s but couldn\'t. This ' 'should not happen.' % (self.__class__.__name__, repr(entry), str(self.id))) def insertForwardIndexEntry(self, entry, documentId): """Take the entry provided and put it in the correct place in the forward index. This will also deal with creating the entire row if necessary. """ indexRow = self._index.get(entry, _marker) # Make sure there's actually a row there already. If not, create # a set and stuff it in first. if indexRow is _marker: # We always use a set to avoid getting conflict errors on # multiple threads adding a new row at the same time self._index[entry] = IITreeSet((documentId, )) self._length.change(1) else: try: indexRow.insert(documentId) except AttributeError: # Inline migration: index row with one element was an int at # first (before Zope 2.13). indexRow = IITreeSet((indexRow, documentId)) self._index[entry] = indexRow def index_object(self, documentId, obj, threshold=None): """ wrapper to handle indexing of multiple attributes """ fields = self.getIndexSourceNames() res = 0 for attr in fields: res += self._index_object(documentId, obj, threshold, attr) return res > 0 def _index_object(self, documentId, obj, threshold=None, attr=''): """ index and object 'obj' with integer id 'documentId'""" returnStatus = 0 # First we need to see if there's anything interesting to look at datum = self._get_object_datum(obj, attr) # We don't want to do anything that we don't have to here, so we'll # check to see if the new and existing information is the same. oldDatum = self._unindex.get(documentId, _marker) if datum != oldDatum: if oldDatum is not _marker: self.removeForwardIndexEntry(oldDatum, documentId) if datum is _marker: try: del self._unindex[documentId] except ConflictError: raise except: LOG.error( 'Should not happen: oldDatum was there, now its not,' 'for document with id %s' % documentId) if datum is not _marker: self.insertForwardIndexEntry(datum, documentId) self._unindex[documentId] = datum returnStatus = 1 return returnStatus def _get_object_datum(self, obj, attr): # self.id is the name of the index, which is also the name of the # attribute we're interested in. If the attribute is callable, # we'll do so. try: datum = getattr(obj, attr) if safe_callable(datum): datum = datum() except (AttributeError, TypeError): datum = _marker return datum def numObjects(self): """Return the number of indexed objects.""" return len(self._unindex) def indexSize(self): """Return the size of the index in terms of distinct values.""" return len(self) def unindex_object(self, documentId): """ Unindex the object with integer id 'documentId' and don't raise an exception if we fail """ unindexRecord = self._unindex.get(documentId, _marker) if unindexRecord is _marker: return None self.removeForwardIndexEntry(unindexRecord, documentId) try: del self._unindex[documentId] except ConflictError: raise except: LOG.debug('Attempt to unindex nonexistent document' ' with id %s' % documentId, exc_info=True) def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in the request arg. The request argument should be a mapping object. If the request does not have a key which matches the "id" of the index instance, then None is returned. If the request *does* have a key which matches the "id" of the index instance, one of a few things can happen: - if the value is a blank string, None is returned (in order to support requests from web forms where you can't tell a blank string from empty). - if the value is a nonblank string, turn the value into a single-element sequence, and proceed. - if the value is a sequence, return a union search. - If the value is a dict and contains a key of the form '<index>_operator' this overrides the default method ('or') to combine search results. Valid values are "or" and "and". If None is not returned as a result of the abovementioned constraints, two objects are returned. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. FAQ answer: to search a Field Index for documents that have a blank string as their value, wrap the request value up in a tuple ala: request = {'id':('',)} """ record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index r = None opr = None # experimental code for specifing the operator operator = record.get('operator', self.useOperator) if not operator in self.operators: raise RuntimeError("operator not valid: %s" % escape(operator)) # Range parameter range_parm = record.get('range', None) if range_parm: opr = "range" opr_args = [] if range_parm.find("min") > -1: opr_args.append("min") if range_parm.find("max") > -1: opr_args.append("max") if record.get('usage', None): # see if any usage params are sent to field opr = record.usage.lower().split(':') opr, opr_args = opr[0], opr[1:] if opr == "range": # range search if 'min' in opr_args: lo = min(record.keys) else: lo = None if 'max' in opr_args: hi = max(record.keys) else: hi = None if hi: setlist = index.values(lo, hi) else: setlist = index.values(lo) # If we only use one key, intersect and return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) return result, (self.id, ) if operator == 'or': tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) r = multiunion(tmp) else: # For intersection, sort with smallest data set first tmp = [] for s in setlist: if isinstance(s, int): s = IISet((s, )) tmp.append(s) if len(tmp) > 2: setlist = sorted(tmp, key=len) else: setlist = tmp r = resultset for s in setlist: # the result is bound by the resultset r = intersection(r, s) else: # not a range search # Filter duplicates setlist = [] for k in record.keys: s = index.get(k, None) # If None, try to bail early if s is None: if operator == 'or': # If union, we can't possibly get a bigger result continue # If intersection, we can't possibly get a smaller result return IISet(), (self.id, ) elif isinstance(s, int): s = IISet((s, )) setlist.append(s) # If we only use one key return immediately if len(setlist) == 1: result = setlist[0] if isinstance(result, int): result = IISet((result, )) return result, (self.id, ) if operator == 'or': # If we already get a small result set passed in, intersecting # the various indexes with it and doing the union later is # faster than creating a multiunion first. if resultset is not None and len(resultset) < 200: smalllist = [] for s in setlist: smalllist.append(intersection(resultset, s)) r = multiunion(smalllist) else: r = multiunion(setlist) else: # For intersection, sort with smallest data set first if len(setlist) > 2: setlist = sorted(setlist, key=len) r = resultset for s in setlist: r = intersection(r, s) if isinstance(r, int): r = IISet((r, )) if r is None: return IISet(), (self.id, ) else: return r, (self.id, ) def hasUniqueValuesFor(self, name): """has unique values for column name""" if name == self.id: return 1 else: return 0 def getIndexSourceNames(self): """ return sequence of indexed attributes """ # BBB: older indexes didn't have 'indexed_attrs' return getattr(self, 'indexed_attrs', [self.id]) def uniqueValues(self, name=None, withLengths=0): """returns the unique values for name if withLengths is true, returns a sequence of tuples of (value, length) """ if name is None: name = self.id elif name != self.id: return [] if not withLengths: return tuple(self._index.keys()) else: rl = [] for i in self._index.keys(): set = self._index[i] if isinstance(set, int): l = 1 else: l = len(set) rl.append((i, l)) return tuple(rl) def keyForDocument(self, id): # This method is superceded by documentToKeyMap return self._unindex[id] def documentToKeyMap(self): return self._unindex def items(self): items = [] for k, v in self._index.items(): if isinstance(v, int): v = IISet((v, )) items.append((k, v)) return items
class SIndex(Index): _fwd_class = None _fwd_set_class = None def __init__(self, adapter): self._adapter = adapter self._fwd_index = self._fwd_class() self._num_objs = Length(0) def _gc_entry(self, v): """ 'Garbage collect' empty set entries """ if len(self._fwd_index[v]) == 0: del self._fwd_index[v] def index_obj(self, obj): values = self._adapter(obj) if type(values) != list: values = [values] for value in values: vset = self._fwd_index.get(value, self._fwd_set_class()) if obj in vset: raise InconsistentIndexException("%r already in fwd[%r]" % (obj, value)) else: vset.add(obj) self._fwd_index[value] = vset self._num_objs.change(1) def _unindex_obj_from_key(self, key, obj): if key in self._fwd_index: vset = self._fwd_index[key] if obj in vset: vset.remove(obj) self._fwd_index[key] = vset self._gc_entry(key) else: raise InconsistentIndexException("'%s' not in fwd[%s]", (obj, key)) else: raise InconsistentIndexException("'%s' not in fwd index" % key) def unindex_obj(self, obj): """ Slightly dumber than the one in DIndex, takes the indexation value (key) instead of looking it up in the reverse index """ keys = self._adapter(obj) if type(keys) != list: keys = [keys] for k in keys: self._unindex_obj_from_key(k, obj) self._num_objs.change(-1) def values(self, *args): return list(self.itervalues(*args)) def itervalues(self, *args): for s in self._fwd_index.itervalues(*args): for t in s: yield t def iteritems(self, *args): for ts, s in self._fwd_index.iteritems(*args): for t in s: yield ts, t def minKey(self): return self._fwd_index.minKey() def maxKey(self): return self._fwd_index.maxKey() def __iter__(self): return iter(self._fwd_index) def __len__(self): return self._num_objs() def __getitem__(self, item): return self._fwd_index[item] def get(self, item, default=None): return self._fwd_index.get(item, default) def clear(self): """ Initialize index """ # The forward index maps indexed values to a sequence of docids self._fwd_index = self._fwd_class() self._num_objs = Length(0)
class CatalogTool(PloneBaseTool, BaseTool): """Plone's catalog tool""" implements(IPloneCatalogTool) meta_type = 'Plone Catalog Tool' security = ClassSecurityInfo() toolicon = 'skins/plone_images/book_icon.png' _counter = None manage_catalogAdvanced = DTMLFile('www/catalogAdvanced', globals()) manage_options = ( { 'action': 'manage_main', 'label': 'Contents' }, { 'action': 'manage_catalogView', 'label': 'Catalog' }, { 'action': 'manage_catalogIndexes', 'label': 'Indexes' }, { 'action': 'manage_catalogSchema', 'label': 'Metadata' }, { 'action': 'manage_catalogAdvanced', 'label': 'Advanced' }, { 'action': 'manage_catalogReport', 'label': 'Query Report' }, { 'action': 'manage_catalogPlan', 'label': 'Query Plan' }, { 'action': 'manage_propertiesForm', 'label': 'Properties' }, ) def __init__(self): ZCatalog.__init__(self, self.getId()) def _removeIndex(self, index): """Safe removal of an index. """ try: self.manage_delIndex(index) except: pass def _listAllowedRolesAndUsers(self, user): """Makes sure the list includes the user's groups. """ result = user.getRoles() if 'Anonymous' in result: # The anonymous user has no further roles return ['Anonymous'] result = list(result) if hasattr(aq_base(user), 'getGroups'): groups = ['user:%s' % x for x in user.getGroups()] if groups: result = result + groups result.append('Anonymous') result.append('user:%s' % user.getId()) return result security.declarePrivate('indexObject') def indexObject(self, object, idxs=[]): """Add object to catalog. The optional idxs argument is a list of specific indexes to populate (all of them by default). """ self.reindexObject(object, idxs) security.declareProtected(ManageZCatalogEntries, 'catalog_object') def catalog_object(self, object, uid=None, idxs=[], update_metadata=1, pghandler=None): self._increment_counter() w = object if not IIndexableObject.providedBy(object): # This is the CMF 2.2 compatible approach, which should be used # going forward wrapper = queryMultiAdapter((object, self), IIndexableObject) if wrapper is not None: w = wrapper ZCatalog.catalog_object(self, w, uid, idxs, update_metadata, pghandler=pghandler) security.declareProtected(ManageZCatalogEntries, 'catalog_object') def uncatalog_object(self, *args, **kwargs): self._increment_counter() return BaseTool.uncatalog_object(self, *args, **kwargs) def _increment_counter(self): if self._counter is None: self._counter = Length() self._counter.change(1) security.declarePrivate('getCounter') def getCounter(self): return self._counter is not None and self._counter() or 0 security.declareProtected(SearchZCatalog, 'searchResults') def searchResults(self, REQUEST=None, **kw): """Calls ZCatalog.searchResults with extra arguments that limit the results to what the user is allowed to see. This version uses the 'effectiveRange' DateRangeIndex. It also accepts a keyword argument show_inactive to disable effectiveRange checking entirely even for those without portal wide AccessInactivePortalContent permission. """ kw = kw.copy() show_inactive = kw.get('show_inactive', False) if isinstance(REQUEST, dict) and not show_inactive: show_inactive = 'show_inactive' in REQUEST user = _getAuthenticatedUser(self) kw['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user) if not show_inactive and not _checkPermission( AccessInactivePortalContent, self): kw['effectiveRange'] = DateTime() return ZCatalog.searchResults(self, REQUEST, **kw) __call__ = searchResults security.declareProtected(ManageZCatalogEntries, 'clearFindAndRebuild') def clearFindAndRebuild(self): """Empties catalog, then finds all contentish objects (i.e. objects with an indexObject method), and reindexes them. This may take a long time. """ def indexObject(obj, path): if (base_hasattr(obj, 'indexObject') and safe_callable(obj.indexObject)): try: obj.indexObject() except TypeError: # Catalogs have 'indexObject' as well, but they # take different args, and will fail pass self.manage_catalogClear() portal = aq_parent(aq_inner(self)) portal.ZopeFindAndApply(portal, search_sub=True, apply_func=indexObject) security.declareProtected(ManageZCatalogEntries, 'manage_catalogRebuild') def manage_catalogRebuild(self, RESPONSE=None, URL1=None): """Clears the catalog and indexes all objects with an 'indexObject' method. This may take a long time. """ elapse = time.time() c_elapse = time.clock() self.clearFindAndRebuild() elapse = time.time() - elapse c_elapse = time.clock() - c_elapse if RESPONSE is not None: RESPONSE.redirect(URL1 + '/manage_catalogAdvanced?manage_tabs_message=' + urllib.quote('Catalog Rebuilt\n' 'Total time: %s\n' 'Total CPU time: %s' % ( ` elapse `, ` c_elapse `)))
def __init__(self, adapter): self._adapter = adapter self._fwd_index = self._fwd_class() self._num_objs = Length(0)
class CatalogTool(PloneBaseTool, BaseTool): """Plone's catalog tool""" meta_type = 'Plone Catalog Tool' security = ClassSecurityInfo() toolicon = 'skins/plone_images/book_icon.png' _counter = None manage_catalogAdvanced = DTMLFile('www/catalogAdvanced', globals()) manage_options = ( { 'action': 'manage_main', 'label': 'Contents' }, { 'action': 'manage_catalogView', 'label': 'Catalog' }, { 'action': 'manage_catalogIndexes', 'label': 'Indexes' }, { 'action': 'manage_catalogSchema', 'label': 'Metadata' }, { 'action': 'manage_catalogAdvanced', 'label': 'Advanced' }, { 'action': 'manage_catalogReport', 'label': 'Query Report' }, { 'action': 'manage_catalogPlan', 'label': 'Query Plan' }, { 'action': 'manage_propertiesForm', 'label': 'Properties' }, ) def __init__(self): ZCatalog.__init__(self, self.getId()) def _removeIndex(self, index): # Safe removal of an index. try: self.manage_delIndex(index) except: pass def _listAllowedRolesAndUsers(self, user): # Makes sure the list includes the user's groups. result = user.getRoles() if 'Anonymous' in result: # The anonymous user has no further roles return ['Anonymous'] result = list(result) if hasattr(aq_base(user), 'getGroups'): groups = ['user:%s' % x for x in user.getGroups()] if groups: result = result + groups # Order the arguments from small to large sets result.insert(0, 'user:%s' % user.getId()) result.append('Anonymous') return result @security.private def indexObject(self, object, idxs=None): # Add object to catalog. # The optional idxs argument is a list of specific indexes # to populate (all of them by default). if idxs is None: idxs = [] self.reindexObject(object, idxs) @security.protected(ManageZCatalogEntries) def catalog_object(self, object, uid=None, idxs=None, update_metadata=1, pghandler=None): if idxs is None: idxs = [] self._increment_counter() w = object if not IIndexableObject.providedBy(object): # This is the CMF 2.2 compatible approach, which should be used # going forward wrapper = queryMultiAdapter((object, self), IIndexableObject) if wrapper is not None: w = wrapper ZCatalog.catalog_object(self, w, uid, idxs, update_metadata, pghandler=pghandler) @security.protected(ManageZCatalogEntries) def uncatalog_object(self, *args, **kwargs): self._increment_counter() return BaseTool.uncatalog_object(self, *args, **kwargs) def _increment_counter(self): if self._counter is None: self._counter = Length() self._counter.change(1) @security.private def getCounter(self): return self._counter is not None and self._counter() or 0 @security.protected(SearchZCatalog) def searchResults(self, REQUEST=None, **kw): # Calls ZCatalog.searchResults with extra arguments that # limit the results to what the user is allowed to see. # # This version uses the 'effectiveRange' DateRangeIndex. # # It also accepts a keyword argument show_inactive to disable # effectiveRange checking entirely even for those without portal # wide AccessInactivePortalContent permission. kw = kw.copy() show_inactive = kw.get('show_inactive', False) if isinstance(REQUEST, dict) and not show_inactive: show_inactive = 'show_inactive' in REQUEST user = _getAuthenticatedUser(self) kw['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user) if not show_inactive \ and not _checkPermission(AccessInactivePortalContent, self): kw['effectiveRange'] = DateTime() return ZCatalog.searchResults(self, REQUEST, **kw) __call__ = searchResults def search(self, *args, **kw): # Wrap search() the same way that searchResults() is query = {} if args: query = args[0] elif 'query_request' in kw: query = kw.get('query_request') kw['query_request'] = query.copy() user = _getAuthenticatedUser(self) query['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user) if not _checkPermission(AccessInactivePortalContent, self): query['effectiveRange'] = DateTime() kw['query_request'] = query return super(CatalogTool, self).search(**kw) @security.protected(ManageZCatalogEntries) def clearFindAndRebuild(self): # Empties catalog, then finds all contentish objects (i.e. objects # with an indexObject method), and reindexes them. # This may take a long time. def indexObject(obj, path): if (base_hasattr(obj, 'indexObject') and safe_callable(obj.indexObject)): try: obj.indexObject() # index conversions from plone.app.discussion annotions = IAnnotations(obj) catalog = getToolByName(obj, "portal_catalog") if DISCUSSION_ANNOTATION_KEY in annotions: conversation = annotions[DISCUSSION_ANNOTATION_KEY] conversation = conversation.__of__(obj) for comment in conversation.getComments(): try: if catalog: catalog.indexObject(comment) except StopIteration: # pragma: no cover pass except TypeError: # Catalogs have 'indexObject' as well, but they # take different args, and will fail pass self.manage_catalogClear() portal = aq_parent(aq_inner(self)) portal.ZopeFindAndApply(portal, search_sub=True, apply_func=indexObject) @security.protected(ManageZCatalogEntries) def manage_catalogRebuild(self, RESPONSE=None, URL1=None): """Clears the catalog and indexes all objects with an 'indexObject' method. This may take a long time. """ elapse = time.time() c_elapse = time.clock() self.clearFindAndRebuild() elapse = time.time() - elapse c_elapse = time.clock() - c_elapse msg = ('Catalog Rebuilt\n' 'Total time: %s\n' 'Total CPU time: %s' % (repr(elapse), repr(c_elapse))) logger.info(msg) if RESPONSE is not None: RESPONSE.redirect(URL1 + '/manage_catalogAdvanced?manage_tabs_message=' + urllib.quote(msg))
def __init__(self): self._bookings = OOBTree.OOTreeSet() self._count = Length(0)
class GranularIndex(CatalogFieldIndex): """Indexes integer values using multiple granularity levels. The multiple levels of granularity make it possible to query large ranges without loading many IFTreeSets from the forward index. """ implements( ICatalogIndex, IStatistics, ) def __init__(self, discriminator, levels=(1000, )): """Create an index. levels is a sequence of integer coarseness levels. The default is (1000,). """ self._levels = tuple(levels) super(GranularIndex, self).__init__(discriminator) def clear(self): """Initialize all mappings.""" # The forward index maps an indexed value to IFSet(docids) self._fwd_index = self.family.IO.BTree() # The reverse index maps a docid to its index value self._rev_index = self.family.II.BTree() self._num_docs = Length(0) # self._granular_indexes: [(level, BTree(value -> IFSet([docid])))] self._granular_indexes = [(level, self.family.IO.BTree()) for level in self._levels] def index_doc(self, docid, obj): if callable(self.discriminator): value = self.discriminator(obj, _marker) else: value = getattr(obj, self.discriminator, _marker) if value is _marker: # unindex the previous value self.unindex_doc(docid) return if not isinstance(value, int): raise ValueError( 'GranularIndex cannot index non-integer value %s' % value) rev_index = self._rev_index if docid in rev_index: if docid in self._fwd_index.get(value, ()): # There's no need to index the doc; it's already up to date. return # unindex doc if present self.unindex_doc(docid) # Insert into forward index. set = self._fwd_index.get(value) if set is None: set = self.family.IF.TreeSet() self._fwd_index[value] = set set.insert(docid) # increment doc count self._num_docs.change(1) # Insert into reverse index. rev_index[docid] = value for level, ndx in self._granular_indexes: v = value // level set = ndx.get(v) if set is None: set = self.family.IF.TreeSet() ndx[v] = set set.insert(docid) def unindex_doc(self, docid): rev_index = self._rev_index value = rev_index.get(docid) if value is None: return # not in index del rev_index[docid] self._num_docs.change(-1) ndx = self._fwd_index try: set = ndx[value] set.remove(docid) if not set: del ndx[value] except KeyError: pass for level, ndx in self._granular_indexes: v = value // level try: set = ndx[v] set.remove(docid) if not set: del ndx[v] except KeyError: pass def search(self, queries, operator='or'): sets = [] for query in queries: if isinstance(query, Range): query = query.as_tuple() else: query = (query, query) set = self.family.IF.multiunion(self.docids_in_range(*query)) sets.append(set) result = None if len(sets) == 1: result = sets[0] elif operator == 'and': sets.sort() for set in sets: result = self.family.IF.intersection(set, result) else: result = self.family.IF.multiunion(sets) return result def docids_in_range(self, min, max): """List the docids for an integer range, inclusive on both ends. min or max can be None, making them unbounded. Returns an iterable of IFSets. """ for level, ndx in sorted(self._granular_indexes, reverse=True): # Try to fill the range using coarse buckets first. # Use only buckets that completely fill the range. # For example, if start is 2 and level is 10, then we can't # use bucket 0; only buckets 1 and greater are useful. # Similarly, if end is 18 and level is 10, then we can't use # bucket 1; only buckets 0 and less are useful. if min is not None: a = (min + level - 1) // level else: a = None if max is not None: b = (max - level + 1) // level else: b = None # a and b are now coarse bucket values (or None). if a is None or b is None or a <= b: sets = [] if a is not None and min < a * level: # include the gap before sets.extend(self.docids_in_range(min, a * level - 1)) sets.extend(ndx.values(a, b)) if b is not None and (b + 1) * level - 1 < max: # include the gap after sets.extend(self.docids_in_range((b + 1) * level, max)) return sets return self._fwd_index.values(min, max)