def _combine_union(self, values, object): if not values: return set= None for v in values: sv= self._standardizeValue_(v, object) if not sv: continue if set is None: set = IITreeSet(sv) else: set.update(sv) return set
def clear( self ): """ Start over fresh. """ self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length()
def _insert_migrate(self, tree, key, value): treeset = tree.get(key, None) if treeset is None: tree[key] = value else: if isinstance(treeset, int): tree[key] = IITreeSet((treeset, value)) elif isinstance(treeset, IISet): tree[key] = IITreeSet(treeset) tree[key].insert(value) else: treeset.insert(value)
def getPositions(self, docid, wordid): """ return a sequence of positions of occurrences of wordid within a document given by its docid. """ encoded_wid = encode((wordid, )) encoded_document = self._doc2wid[docid].get() positions = IITreeSet() for pos, wid in enumerate(decode(encoded_document)): if wid == wordid: positions.insert(pos) return positions
def test_empty(self): bigsize = BIGSETSIZE smallsize = 0 small = IISet(xrange(smallsize)) large = IITreeSet(xrange(bigsize)) self.timing(small, large, 'Intersection empty set + large treeset') self.timing(large, small, 'Intersection large treeset + empty set') small = IITreeSet(xrange(smallsize)) large = IISet(xrange(bigsize)) self.timing(small, large, 'Intersection empty tree set + large set') self.timing(large, small, 'Intersection large set + empty tree set')
def clear(self): """Start over fresh.""" self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length() if self._counter is None: self._counter = Length() else: self._increment_counter()
def test_empty(self): bigsize = BIGSETSIZE smallsize = 0 small = IISet(xrange(smallsize)) large = IITreeSet(xrange(bigsize)) print '\nIntersection empty set + large treeset' self.timing(small, large) small = IITreeSet(xrange(smallsize)) large = IISet(xrange(bigsize)) print '\nIntersection empty tree set + large set' self.timing(small, large)
def getPositions(self, docid, wordid): """ return a sequence of positions of occurrences of wordid within a document given by its docid. """ encoded_wid = encode((wordid,)) encoded_document = self._doc2wid[docid].get() positions = IITreeSet() for pos, wid in enumerate(decode(encoded_document)): if wid == wordid: positions.insert(pos) return positions
def _insertForwardIndexEntry( self, since, until, documentId ): """ Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert( documentId ) elif since is None: set = self._until_only.get( until, None ) if set is None: self._until_only[ until ] = documentId else: if isinstance(set, (int, IISet)): set = self._until_only[until] = IITreeSet((set, documentId)) else: set.insert( documentId ) elif until is None: set = self._since_only.get( since, None ) if set is None: self._since_only[ since ] = documentId else: if isinstance(set, (int, IISet)): set = self._since_only[since] = IITreeSet((set, documentId)) else: set.insert( documentId ) else: set = self._since.get( since, None ) if set is None: self._since[ since ] = documentId else: if isinstance(set, (int, IISet)): set = self._since[since] = IITreeSet((set, documentId)) else: set.insert( documentId ) set = self._until.get( until, None ) if set is None: self._until[ until ] = documentId else: if isinstance(set, (int, IISet)): set = self._until[until] = IITreeSet((set, documentId)) else: set.insert( documentId )
def _finalize_index(self, result, start, end, used_fields): filtered_result = IITreeSet() # used_recurrence = False for documentId in result: recurrence = self._uid2recurrence.get(documentId) if recurrence is None: # This event isn't recurring, so it's a match: filtered_result.add(documentId) continue # used_recurrence = True match = False # This is a possible place where optimizations can be done if # necessary. For example, for periods where the start and end # date is the same, we can first check if the start time and # and time of the date falls inbetween the start and end times # of the period, so to avoid expansion. But most likely this # will have a very small impact on speed, so I skip this until # it actually becomes a problem. if start is not None: event_start = datetime(*self._uid2start[documentId][:6]) else: event_start = None if end is not None: event_duration = self._uid2duration[documentId] event_end = event_start + event_duration else: event_end = None for occurrence in recurrence._iter(): utc_occurrence = datetime(*occurrence.utctimetuple()[:6]) if event_start is not None and utc_occurrence < event_start: # XXX we should add a counter and break after 10000 occurrences. continue if event_end is not None and utc_occurrence > event_end: break # The start of this occurrence starts between the start and end date of # the query: match = True break if match: filtered_result.add(documentId) # if used_recurrence: used_fields += (self.recurrence_attr,) return filtered_result, used_fields
def test_large(self): bigsize = BIGSETSIZE / 10 small = IITreeSet(xrange(bigsize)) large = IITreeSet(xrange(bigsize)) print '\nIntersection Large tree sets' self.timing(small, large) small = IISet(xrange(bigsize)) large = IISet(xrange(bigsize)) print '\nIntersection Large sets' self.timing(small, large) small = set(xrange(bigsize)) large = set(xrange(bigsize)) self.pytiming(small, large)
def insertForwardIndexEntry(self, entry, documentId): """Take the entry provided and put it in the correct place in the forward index. This will also deal with creating the entire row if necessary. """ indexRow = self._index.get(entry, _marker) # Make sure there's actually a row there already. If not, create # an IntSet and stuff it in first. if indexRow is _marker: self._index[entry] = documentId # XXX _length needs to be migrated to Length object try: self._length.change(1) except AttributeError: if isinstance(self.__len__, BTrees.Length.Length): self._length = self.__len__ del self.__len__ self._length.change(1) else: try: indexRow.insert(documentId) except AttributeError: # index row is an int indexRow=IITreeSet((indexRow, documentId)) self._index[entry] = indexRow
def test_None(self): bigsize = BIGSETSIZE large = IITreeSet(xrange(bigsize)) print '\nIntersection large, None' self.timing(large, None) print '\nIntersection None, large' self.timing(None, large)
def _apply_index(self, request, cid=''): '''see 'PluggableIndex'. What is *cid* for??? ''' __traceback_info__ = self.id record = parseIndexRequest(request, self.id, self.query_options) terms = record.keys if not terms: return __traceback_info__ = self.id, terms if len(terms) == 1: if isinstance(terms[0], StringTypes): terms = self._getLexicon().termToWordIds(terms[0]) if not terms: return None, self.id r = self._search(IITreeSet(terms), intersection, record) if record.get('phrase'): phrase = self._val2UnindexVal(terms) filter = lambda did, idx=self._unindex: phrase in idx[did] if record.get('isearch'): # maybe, we want to do something different when 'dm.incrementalsearch' # is not available. # On the other hand, 'isearch' should not be called for then. from dm.incrementalsearch import IFilter_int, IAnd_int r = IAnd_int(r, IFilter_int(filter)) r.complete() else: r = IISet((did for did in r.keys() if filter(did))) return r, self.id
def test_small(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IITreeSet(xrange(smallsize)) large = IITreeSet(xrange(smallsize)) print '\nIntersection small tree sets' self.timing(small, large) small = IISet(xrange(smallsize)) large = IISet(xrange(smallsize)) print '\nIntersection small sets' self.timing(small, large) small = set(xrange(bigsize)) large = set(xrange(bigsize)) self.pytiming(small, large)
def __init__(self, set): '''query returning *set*. *set* must be an 'IISet' or 'IITreeSet' of catalog record ids. ''' if not isinstance(set, (IISet, IITreeSet)): set = IITreeSet(set) self._set = set
def group(self, seq): sortIndex = self._sortIndex; sortReverse = self._sortReverse ns = len(seq); ni = len(sortIndex) if ns >= 0.1 * ni: # result large compared to index -- sort via index handled = IISet(); hn = 0 _load = getattr(sortIndex, '_load', None) if _load is None: # not an optimized index items = sortIndex.items() _load = lambda (x1, x2): x2 if sortReverse: items.reverse() elif sortReverse: gRO = getattr(sortIndex, 'getReverseOrder', None) items = gRO and gRO() if items is None: items = list(sortIndex._index.keys()); items.reverse() else: items = sortIndex._index.keys() for i in items: ids = intersection(seq, _load(i)) if ids: handled.update(ids); hn += len(ids) yield i, ids if hn != len(seq): yield None, difference(seq, handled) else: # result relatively small -- sort via result m = OOBTree() keyFor = getattr(sortIndex, 'keyForDocument', None) # work around "nogopip" bug: it defines "keyForDocument" as an integer if not callable(keyFor): # this will fail, when the index neither defines a reasonable # "keyForDocument" nor "documentToKeyMap". In this case, # the index cannot be used for sorting. keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc] noValue = IITreeSet() for doc in seq.keys(): try: k = keyFor(doc) except KeyError: noValue.insert(doc); continue l = m.get(k) if l is None: l = m[k] = IITreeSet() l.insert(doc) items = m.items() if sortReverse: items = list(items); items.reverse() for i in items: yield i if noValue: yield None, noValue
def __init__(self, principalId): self.index = OIBTree() self.messages = IOBTree() self.services = OOBTree() self.readstatus = IITreeSet() self.principalId = principalId self._next = Length(1)
def _insertAux(self, index, term, docId): '''index *docId* under *term*.''' dl = index.get(term) if dl is None: index[term] = docId return if isinstance(dl, int): dl = index[term] = IITreeSet((dl, )) dl.insert(docId)
class FilteredSetBase(Persistent): # A pre-calculated result list based on an expression. def __init__(self, id, expr): self.id = id self.expr = expr self.clear() def clear(self): self.ids = IITreeSet() def index_object(self, documentId, obj): raise NotImplementedError('index_object not defined') def unindex_object(self, documentId): try: self.ids.remove(documentId) except KeyError: pass def getId(self): return self.id def getExpression(self): # Get the expression. return self.expr def getIds(self): # Get the IDs of all objects for which the expression is True. return self.ids def getType(self): return self.meta_type def setExpression(self, expr): # Set the expression. self.expr = expr def __repr__(self): return '{0}: ({1}) {2}'.format( self.id, self.expr, list(map(None, self.ids)) ) __str__ = __repr__
def clear(self): """Start over fresh.""" self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length()
class FilteredSetBase(Persistent): # A pre-calculated result list based on an expression. implements(IFilteredSet) def __init__(self, id, expr): self.id = id self.expr = expr self.clear() def clear(self): self.ids = IITreeSet() def index_object(self, documentId, obj): raise RuntimeError, 'index_object not defined' def unindex_object(self, documentId): try: self.ids.remove(documentId) except KeyError: pass def getId(self): return self.id def getExpression(self): # Get the expression. return self.expr def getIds(self): # Get the IDs of all objects for which the expression is True. return self.ids def getType(self): return self.meta_type def setExpression(self, expr): # Set the expression. self.expr = expr def __repr__(self): return '%s: (%s) %s' % (self.id, self.expr, map(None, self.ids)) __str__ = __repr__
def index_object(self, docid, obj, threshold=100): """ hook for (Z)Catalog """ # PathIndex first checks for an attribute matching its id and # falls back to getPhysicalPath only when failing to get one. # If self.indexed_attrs is not None, it's value overrides this behavior attrs = self.indexed_attrs index = attrs is None and self.id or attrs[0] path = getattr(obj, index, None) if path is not None: if safe_callable(path): path = path() if not isinstance(path, (str, tuple)): raise TypeError('path value must be string or tuple ' 'of strings: (%r, %s)' % (index, repr(path))) else: try: path = obj.getPhysicalPath() except AttributeError: return 0 if isinstance(path, (list, tuple)): path = '/' + '/'.join(path[1:]) comps = [p for p in path.split('/') if p] # Make sure we reindex properly when path change old_path = self._unindex.get(docid, _marker) if old_path is not _marker: if old_path != path: self.unindex_object(docid, _old=old_path) # unindex reduces length, we need to counter that self._length.change(1) else: # We only get a new entry if the value wasn't there before. # If it already existed the length is unchanged self._length.change(1) for i, comp in enumerate(comps): self.insertEntry(comp, docid, i) # Add terminator self.insertEntry(None, docid, len(comps) - 1) # Add full-path indexes, to optimize certain edge cases parent_path = '/' + '/'.join(comps[:-1]) parents = self._index_parents.get(parent_path, _marker) if parents is _marker: self._index_parents[parent_path] = parents = IITreeSet() parents.insert(docid) self._index_items[path] = docid self._unindex[docid] = path return 1
def _insert(self, term, docId, _isInstance=isinstance, _IntType=int): '''index *docId* under *term*.''' i, k = self._findDocList(term, 1)[-1] dl = i.get(k) if dl is None: i[k] = docId self.__len__.change(1) return if _isInstance(dl, _IntType): dl = i[k] = IITreeSet((dl, )) dl.insert(docId)
def clear(self): self._index = IITreeSet() self._index_length = BTrees.Length.Length() self._index_value = 1 self._unindex = IIBTree() self._length = BTrees.Length.Length() if self._counter is None: self._counter = BTrees.Length.Length() else: self._increment_counter()
def _insert(self, term, docId, _isInstance=isinstance, _IntType=IntType): '''index *docId* under *term*.''' index = self._index dl = index.get(term) if dl is None: index[term] = docId self.__len__.change(1) if self.ReverseOrder: self._reverseOrder.insert(reverseOrder(term)) return if _isInstance(dl, _IntType): dl = index[term] = IITreeSet((dl, )) dl.insert(docId)
def test_even_dist(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IITreeSet(xrange(bigsize)) print '\nDifference Small set even distribution + large treeset' self.timing(small, large) small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IISet(xrange(bigsize)) print '\nDifference Small set even distribution + large set' self.timing(small, large)
def optimize_rangeindex_int_iiset(index): # migrate internal int and IISet to IITreeSet logger.info('Converting to IITreeSet for index `%s`.' % index.getId()) for name in ('_since', '_since_only', '_until', '_until_only'): tree = getattr(index, name, None) if tree is not None: logger.info('Converting tree `%s`.' % name) i = 0 for k, v in tree.items(): if isinstance(v, IISet): tree[k] = IITreeSet(v) i += 1 elif isinstance(v, int): tree[k] = IITreeSet((v, )) i += 1 if i and i % 10000 == 0: transaction.savepoint(optimistic=True) logger.info('Processed %s items.' % i) transaction.savepoint(optimistic=True) logger.info('Finished conversion.')
def insertEntry(self, comp, id, level): """ See IPathIndex """ if not self._index.has_key(comp): self._index[comp] = IOBTree() if not self._index[comp].has_key(level): self._index[comp][level] = IITreeSet() self._index[comp][level].insert(id) if level > self._depth: self._depth = level
def test_heavy_end(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(bigsize - smallsize, bigsize)) large = IITreeSet(xrange(bigsize)) print '\nDifference Small set high values + large treeset' self.timing(small, large) small = IISet(xrange(bigsize - smallsize, bigsize)) large = IISet(xrange(bigsize)) print '\nDifference Small set high values + large set' self.timing(small, large)
def insert(self, params): record = self.tuplify(params) # Determine the primary key. primary_key = [] for position, column in self.col_info: if column.primary: if record[position] is None: raise ValueError, ( "No value provided for primary key column %s" % repr(column.name)) primary_key.append(record[position]) if primary_key: primary_key = tuple(primary_key) if self.primary_index.has_key(primary_key): raise DuplicateError("Primary key %s in use" % repr(primary_key)) # Add a record. rid = self.next_rid self.next_rid += 1 # XXX Hotspot! record = (rid, ) + record[1:] self.data[rid] = record if primary_key: self.primary_index[primary_key] = rid # Add to indexes. for position, column in self.col_info: name = column.name value = record[position] if value is not None: if self.indexes.has_key(name): set = self.indexes[name].get(value) if set is None: set = IITreeSet() self.indexes[name][value] = set set.insert(rid) # Return the number of rows inserted. return 1
def group(self, seq): sortIndex = self._sortIndex; sortReverse = self._sortReverse ns = len(seq); ni = len(sortIndex) if ns >= 0.1 * ni: # result large compared to index -- sort via index handled = IISet(); hn = 0 _load = getattr(sortIndex, '_load', None) if _load is None: # not an optimized index items = sortIndex.items() _load = lambda (x1, x2): x2 if sortReverse: items.reverse() elif sortReverse: gRO = getattr(sortIndex, 'getReverseOrder', None) items = gRO and gRO() if items is None: items = list(sortIndex._index.keys()); items.reverse() else: items = sortIndex._index.keys() for i in items: ids = intersection(seq, _load(i)) if ids: handled.update(ids); hn += len(ids) yield i, ids if hn != len(seq): yield None, difference(seq, handled) else: # result relatively small -- sort via result keyFor = sortIndex.keyForDocument; m = OOBTree() noValue = IITreeSet() for doc in seq.keys(): try: k = keyFor(doc) except KeyError: noValue.insert(doc); continue l = m.get(k) if l is None: l = m[k] = IITreeSet() l.insert(doc) items = m.items() if sortReverse: items = list(items); items.reverse() for i in items: yield i if noValue: yield None, noValue
def test_even_dist(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IITreeSet(xrange(smallsize)) print '\nIntersection small set even distribution + small treeset' self.timing(small, large) small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IITreeSet(xrange(bigsize)) print '\nIntersection small set even distribution + large treeset' self.timing(small, large) small = IISet(xrange(0, bigsize, bigsize / smallsize)) large = IISet(xrange(bigsize)) print '\nIntersection small set even distribution + large set' self.timing(small, large) small = set(xrange(0, bigsize, bigsize / smallsize)) large = set(xrange(bigsize)) self.pytiming(small, large)
def test_heavy_start(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(smallsize)) large = IITreeSet(xrange(smallsize)) print '\nIntersection small set low values + small treeset' self.timing(small, large) small = IISet(xrange(smallsize)) large = IITreeSet(xrange(bigsize)) print '\nIntersection small set low values + large treeset' self.timing(small, large) small = IISet(xrange(smallsize)) large = IISet(xrange(bigsize)) print '\nIntersection small set low values + large set' self.timing(small, large) small = set(xrange(smallsize)) large = set(xrange(bigsize)) self.pytiming(small, large)
def insertEntry(self, comp, id, level): """ See IPathIndex """ tree = self._index.get(comp, None) if tree is None: self._index[comp] = tree = IOBTree() tree2 = tree.get(level, None) if tree2 is None: tree[level] = tree2 = IITreeSet() tree2.insert(id) if level > self._depth: self._depth = level
def insertForwardIndexEntry(self, entry, documentId): """Take the entry provided and put it in the correct place in the forward index. This will also deal with creating the entire row if necessary. """ indexRow = self._index.get(entry, _marker) # Make sure there's actually a row there already. If not, create # a set and stuff it in first. if indexRow is _marker: # We always use a set to avoid getting conflict errors on # multiple threads adding a new row at the same time self._index[entry] = IITreeSet((documentId, )) self._length.change(1) else: try: indexRow.insert(documentId) except AttributeError: # Inline migration: index row with one element was an int at # first (before Zope 2.13). indexRow = IITreeSet((indexRow, documentId)) self._index[entry] = indexRow
def _reindex_doc(self, docid, text): # Touch as few docid->w(docid, score) maps in ._wordinfo as possible. old_wids = self.get_words(docid) new_wids = self._lexicon.sourceToWordIds(text) if old_wids == new_wids: return len(new_wids) old_wid2w, old_docw = self._get_frequencies(old_wids) new_wid2w, new_docw = self._get_frequencies(new_wids) old_widset = IITreeSet(old_wid2w.keys()) new_widset = IITreeSet(new_wid2w.keys()) in_both_widset = intersection(old_widset, new_widset) only_old_widset = difference(old_widset, in_both_widset) only_new_widset = difference(new_widset, in_both_widset) del old_widset, new_widset for wid in only_old_widset.keys(): self._del_wordinfo(wid, docid) for wid in only_new_widset.keys(): self._add_wordinfo(wid, new_wid2w[wid], docid) for wid in in_both_widset.keys(): # For the Okapi indexer, the "if" will trigger only for words # whose counts have changed. For the cosine indexer, the "if" # may trigger for every wid, since W(d) probably changed and # W(d) is divided into every score. newscore = new_wid2w[wid] if old_wid2w[wid] != newscore: self._add_wordinfo(wid, newscore, docid) self._docweight[docid] = new_docw self._docwords[docid] = WidCode.encode(new_wids) return len(new_wids)
def test_heavy_end(self): bigsize = BIGSETSIZE smallsize = SMALLSETSIZE small = IISet(xrange(bigsize - smallsize, bigsize)) large = IITreeSet(xrange(smallsize)) self.timing(small, large, 'Intersection small set high values + small treeset') self.timing(large, small, 'Intersection small treeset + small set high values') small = IISet(xrange(bigsize - smallsize, bigsize)) large = IITreeSet(xrange(bigsize)) self.timing(small, large, 'Intersection small set high values + large treeset') self.timing(large, small, 'Intersection large treeset + small set high values') small = IISet(xrange(bigsize - smallsize, bigsize)) large = IISet(xrange(bigsize)) self.timing(small, large, 'Intersection small set high values + large set') self.timing(large, small, '\nIntersection large set + small set high values')
def convert_to_booleanindex(catalog, index): if isinstance(index, BooleanIndex): return logger.info('Converting index `%s` to BooleanIndex.' % index.getId()) index.__class__ = BooleanIndex index._p_changed = True catalog._catalog._p_changed = True # convert _unindex from IOBTree to IIBTree sets = {0: IITreeSet(), 1: IITreeSet()} old_unindex = index._unindex index._unindex = _unindex = IIBTree() for k, v in old_unindex.items(): # docid to value (True, False) value = int(bool(v)) _unindex[k] = value sets[value].add(k) del old_unindex # convert _index from OOBTree to IITreeSet and set lengths false_length = len(sets[0]) true_length = len(sets[1]) index._length = Length(false_length + true_length) # we put the smaller set into the index if false_length < true_length: index._index_value = 0 index._index_length = Length(false_length) index._index = sets[0] del sets[1] else: index._index_value = 1 index._index_length = Length(true_length) index._index = sets[1] del sets[0] transaction.savepoint(optimistic=True) logger.info('Finished conversion.')
def index_object(self, documentId, obj, threshold=None): """Index an object. - ``documentId`` is the integer ID of the document. - ``obj`` is the object to be indexed. - ``threshold`` is the number of words to process between committing subtransactions. If None, subtransactions are disabled. For each name in ``getIndexSourceNames``, try to get the named attribute from ``obj``. - If the object does not have the attribute, do not add it to the index for that name. - If the attribute is a callable, call it to get the value. If calling it raises an AttributeError, do not add it to the index. for that name. """ # Clear the data structures before indexing the object. This will ensure # we don't leave any stale data behind when an object gets reindexed. self.unindex_object(documentId) ### 1. Get the values. start = self._getattr(self.start_attr, obj) end = self._getattr(self.end_attr, obj) if start is None: # Ignore calls if the obj does not have the start field. return False if end is None: # Singular event end = start recurrence = self._getattr(self.recurrence_attr, obj) if not recurrence: rule = None elif isinstance(recurrence, basestring): # XXX trap and log errors rule = rrule.rrulestr(recurrence, dtstart=start) elif isinstance(recurrence, rrule.rrulebase): rule = recurrence else: #XXX Log error rule = None # Strip out times from the recurrence: if rule is not None: sync_timezone(rule, start.tzinfo) ### 2. Make them into what should be indexed. # XXX Naive events are not comparable to timezoned events, so we convert # everything to utctimetuple(). This means naive events are assumed to # be GMT, but we can live with that at the moment. start_value = start.utctimetuple() end_value = end.utctimetuple() # The end value should be the end of the recurrence, if any: if rule is not None: if is_open_ended(rule): # This recurrence is open ended end_value = None else: duration = end - start allrecs = [x for x in rule._iter()] if allrecs: last = allrecs[-1] + duration else: # Real data may have invalud recurrence rules, # which end before the start for example. # Then we end up here. last = end end_value = last.utctimetuple() ### 3. Store everything in the indexes: row = self._start2uid.get(start_value, None) if row is None: row = IITreeSet((documentId,)) self._start2uid[start_value] = row else: row.insert(documentId) row = self._end2uid.get(end_value, None) if row is None: row = IITreeSet((documentId,)) self._end2uid[end_value] = row else: row.insert(documentId) self._uid2start[documentId] = start_value self._uid2recurrence[documentId] = rule self._uid2end[documentId] = end_value self._uid2duration[documentId] = end - start return True
def remove(self, obj): return IITreeSet.remove(self, self._get_id(obj))
class DateRangeIndex(UnIndex): """Index for date ranges, such as the "effective-expiration" range in CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match (i.e., they returned None for both); - Objects which match after a given time (i.e., they returned None for the end date); - Objects which match until a given time (i.e., they returned None for the start date); - Objects which match only during a specific interval. """ security = ClassSecurityInfo() meta_type = "DateRangeIndex" query_options = ('query', ) manage_options = ({'label': 'Properties', 'action': 'manage_indexProperties'}, ) since_field = until_field = None # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60) floor_value = -510162480 # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60) ceiling_value = 278751600 def __init__(self, id, since_field=None, until_field=None, caller=None, extra=None, floor_value=None, ceiling_value=None): if extra: since_field = extra.since_field until_field = extra.until_field floor_value = getattr(extra, 'floor_value', None) ceiling_value = getattr(extra, 'ceiling_value', None) self._setId(id) self._edit(since_field, until_field, floor_value, ceiling_value) self.clear() security.declareProtected(view, 'getSinceField') def getSinceField(self): """Get the name of the attribute indexed as start date. """ return self._since_field security.declareProtected(view, 'getUntilField') def getUntilField(self): """Get the name of the attribute indexed as end date. """ return self._until_field security.declareProtected(view, 'getFloorValue') def getFloorValue(self): """ """ return self.floor_value security.declareProtected(view, 'getCeilingValue') def getCeilingValue(self): """ """ return self.ceiling_value manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir) security.declareProtected(manage_zcatalog_indexes, 'manage_edit') def manage_edit(self, since_field, until_field, floor_value, ceiling_value, REQUEST): """ """ self._edit(since_field, until_field, floor_value, ceiling_value) REQUEST['RESPONSE'].redirect('%s/manage_main' '?manage_tabs_message=Updated' % REQUEST.get('URL2')) security.declarePrivate('_edit') def _edit(self, since_field, until_field, floor_value=None, ceiling_value=None): """Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field if floor_value is not None: self.floor_value = int(floor_value) if ceiling_value is not None: self.ceiling_value = int(ceiling_value) security.declareProtected(manage_zcatalog_indexes, 'clear') def clear(self): """Start over fresh.""" self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length() if self._counter is None: self._counter = Length() else: self._increment_counter() def getEntryForObject(self, documentId, default=None): """Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get(documentId, default) def index_object(self, documentId, obj, threshold=None): """Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr(obj, self._since_field, None) if safe_callable(since): since = since() since = self._convertDateTime(since) until = getattr(obj, self._until_field, None) if safe_callable(until): until = until() until = self._convertDateTime(until) datum = (since, until) old_datum = self._unindex.get(documentId, None) if datum == old_datum: # No change? bail out! return 0 self._increment_counter() if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry(old_since, old_until, documentId) self._insertForwardIndexEntry(since, until, documentId) self._unindex[documentId] = datum return 1 def unindex_object(self, documentId): """Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get(documentId, None) if datum is None: return self._increment_counter() since, until = datum self._removeForwardIndexEntry(since, until, documentId) del self._unindex[documentId] def uniqueValues(self, name=None, withLengths=0): """Return a sequence of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '(value, length)'. """ if name not in (self._since_field, self._until_field): raise StopIteration if name == self._since_field: sets = (self._since, self._since_only) else: sets = (self._until, self._until_only) if not withLengths: for s in sets: for key in s.keys(): yield key else: for s in sets: for key, value in s.items(): if isinstance(value, int): yield (key, 1) else: yield (key, len(value)) def getRequestCacheKey(self, record, resultset=None): term = self._convertDateTime(record.keys[0]) tid = str(term) # unique index identifier iid = '_%s_%s_%s' % (self.__class__.__name__, self.id, self.getCounter()) # record identifier if resultset is None: rid = '_%s' % (tid, ) else: rid = '_inverse_%s' % (tid, ) return (iid, rid) def _apply_index(self, request, resultset=None): record = IndexQuery(request, self.id, self.query_options, self.operators, self.useOperator) if record.keys is None: return None return (self.query_index(record, resultset=resultset), (self._since_field, self._until_field)) def query_index(self, record, resultset=None): cache = self.getRequestCache() if cache is not None: cachekey = self.getRequestCacheKey(record, resultset) cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return cached else: return difference(resultset, cached) term = self._convertDateTime(record.keys[0]) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion([bounded, until_only, since_only, self._always]) if cache is not None: cache[cachekey] = result return result else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if cache is not None: cache[cachekey] = result return difference(resultset, result) def _insert_migrate(self, tree, key, value): treeset = tree.get(key, None) if treeset is None: tree[key] = IITreeSet((value, )) else: if isinstance(treeset, IITreeSet): treeset.insert(value) elif isinstance(treeset, int): tree[key] = IITreeSet((treeset, value)) else: tree[key] = IITreeSet(treeset) tree[key].insert(value) def _insertForwardIndexEntry(self, since, until, documentId): """Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert(documentId) elif since is None: self._insert_migrate(self._until_only, until, documentId) elif until is None: self._insert_migrate(self._since_only, since, documentId) else: self._insert_migrate(self._since, since, documentId) self._insert_migrate(self._until, until, documentId) def _remove_delete(self, tree, key, value): treeset = tree.get(key, None) if treeset is not None: if isinstance(treeset, int): del tree[key] else: treeset.remove(value) if not treeset: del tree[key] def _removeForwardIndexEntry(self, since, until, documentId): """Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove(documentId) elif since is None: self._remove_delete(self._until_only, until, documentId) elif until is None: self._remove_delete(self._since_only, since, documentId) else: self._remove_delete(self._since, since, documentId) self._remove_delete(self._until, until, documentId) def _convertDateTime(self, value): if value is None: return value if isinstance(value, (str, datetime)): dt_obj = DateTime(value) value = dt_obj.millis() / 1000 / 60 # flatten to minutes elif isinstance(value, DateTime): value = value.millis() / 1000 / 60 # flatten to minutes if value > MAX32 or value < -MAX32: # t_val must be integer fitting in the 32bit range raise OverflowError('%s is not within the range of dates allowed' 'by a DateRangeIndex' % value) value = int(value) # handle values outside our specified range if value > self.ceiling_value: return None elif value < self.floor_value: return None return value
def _wrapLookup(r): if not isinstance(r, (IISet, IITreeSet)): r = IITreeSet(r.keys()) return r
def clear(self): self.ids = IITreeSet()
def readstatus(self): self.readstatus = IITreeSet() return self.readstatus
class DateRangeIndex(UnIndex): """Index for date ranges, such as the "effective-expiration" range in CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match (i.e., they returned None for both); - Objects which match after a given time (i.e., they returned None for the end date); - Objects which match until a given time (i.e., they returned None for the start date); - Objects which match only during a specific interval. """ implements(IDateRangeIndex) security = ClassSecurityInfo() meta_type = "DateRangeIndex" query_options = ("query",) manage_options = ({"label": "Properties", "action": "manage_indexProperties"},) since_field = until_field = None # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60) floor_value = -510162480 # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60) ceiling_value = 278751600 def __init__( self, id, since_field=None, until_field=None, caller=None, extra=None, floor_value=None, ceiling_value=None ): if extra: since_field = extra.since_field until_field = extra.until_field floor_value = getattr(extra, "floor_value", None) ceiling_value = getattr(extra, "ceiling_value", None) self._setId(id) self._edit(since_field, until_field, floor_value, ceiling_value) self.clear() security.declareProtected(view, "getSinceField") def getSinceField(self): """Get the name of the attribute indexed as start date. """ return self._since_field security.declareProtected(view, "getUntilField") def getUntilField(self): """Get the name of the attribute indexed as end date. """ return self._until_field security.declareProtected(view, "getFloorValue") def getFloorValue(self): """""" return self.floor_value security.declareProtected(view, "getCeilingValue") def getCeilingValue(self): """""" return self.ceiling_value manage_indexProperties = DTMLFile("manageDateRangeIndex", _dtmldir) security.declareProtected(manage_zcatalog_indexes, "manage_edit") def manage_edit(self, since_field, until_field, floor_value, ceiling_value, REQUEST): """""" self._edit(since_field, until_field, floor_value, ceiling_value) REQUEST["RESPONSE"].redirect("%s/manage_main" "?manage_tabs_message=Updated" % REQUEST.get("URL2")) security.declarePrivate("_edit") def _edit(self, since_field, until_field, floor_value=None, ceiling_value=None): """Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field if floor_value is not None: self.floor_value = int(floor_value) if ceiling_value is not None: self.ceiling_value = int(ceiling_value) security.declareProtected(manage_zcatalog_indexes, "clear") def clear(self): """Start over fresh.""" self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length() def getEntryForObject(self, documentId, default=None): """Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get(documentId, default) def index_object(self, documentId, obj, threshold=None): """Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr(obj, self._since_field, None) if safe_callable(since): since = since() since = self._convertDateTime(since) until = getattr(obj, self._until_field, None) if safe_callable(until): until = until() until = self._convertDateTime(until) datum = (since, until) old_datum = self._unindex.get(documentId, None) if datum == old_datum: # No change? bail out! return 0 if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry(old_since, old_until, documentId) self._insertForwardIndexEntry(since, until, documentId) self._unindex[documentId] = datum return 1 def unindex_object(self, documentId): """Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get(documentId, None) if datum is None: return since, until = datum self._removeForwardIndexEntry(since, until, documentId) del self._unindex[documentId] def uniqueValues(self, name=None, withLengths=0): """Return a list of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '(value, length)'. """ if not name in (self._since_field, self._until_field): return [] if name == self._since_field: t1 = self._since t2 = self._since_only else: t1 = self._until t2 = self._until_only result = [] if not withLengths: result.extend(t1.keys()) result.extend(t2.keys()) else: for key in t1.keys(): set = t1[key] if isinstance(set, int): length = 1 else: length = len(set) result.append((key, length)) for key in t2.keys(): set = t2[key] if isinstance(set, int): length = 1 else: length = len(set) result.append((key, length)) return tuple(result) def _cache_key(self, catalog): cid = catalog.getId() counter = getattr(aq_base(catalog), "getCounter", None) if counter is not None: return "%s_%s" % (cid, counter()) return cid def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parameters, then return None. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ iid = self.id record = parseIndexRequest(request, iid, self.query_options) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = aq_get(self, "REQUEST", None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = self._cache_key(catalog) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or "None" if resultset is None: cachekey = "_daterangeindex_%s_%s" % (iid, tid) else: cachekey = "_daterangeindex_inverse_%s_%s" % (iid, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return (cached, (self._since_field, self._until_field)) else: return (difference(resultset, cached), (self._since_field, self._until_field)) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by resultset if REQUEST is None: until = intersection(resultset, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion([bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (result, (self._since_field, self._until_field)) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (difference(resultset, result), (self._since_field, self._until_field)) def _insert_migrate(self, tree, key, value): treeset = tree.get(key, None) if treeset is None: tree[key] = IITreeSet((value,)) else: if isinstance(treeset, IITreeSet): treeset.insert(value) elif isinstance(treeset, int): tree[key] = IITreeSet((treeset, value)) else: tree[key] = IITreeSet(treeset) tree[key].insert(value) def _insertForwardIndexEntry(self, since, until, documentId): """Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert(documentId) elif since is None: self._insert_migrate(self._until_only, until, documentId) elif until is None: self._insert_migrate(self._since_only, since, documentId) else: self._insert_migrate(self._since, since, documentId) self._insert_migrate(self._until, until, documentId) def _remove_delete(self, tree, key, value): treeset = tree.get(key, None) if treeset is not None: if isinstance(treeset, int): del tree[key] else: treeset.remove(value) if not treeset: del tree[key] def _removeForwardIndexEntry(self, since, until, documentId): """Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove(documentId) elif since is None: self._remove_delete(self._until_only, until, documentId) elif until is None: self._remove_delete(self._since_only, since, documentId) else: self._remove_delete(self._since, since, documentId) self._remove_delete(self._until, until, documentId) def _convertDateTime(self, value): if value is None: return value if isinstance(value, (str, datetime)): dt_obj = DateTime(value) value = dt_obj.millis() / 1000 / 60 # flatten to minutes elif isinstance(value, DateTime): value = value.millis() / 1000 / 60 # flatten to minutes if value > MAX32 or value < -MAX32: # t_val must be integer fitting in the 32bit range raise OverflowError("%s is not within the range of dates allowed" "by a DateRangeIndex" % value) value = int(value) # handle values outside our specified range if value > self.ceiling_value: return None elif value < self.floor_value: return None return value
class MessageStorage(Persistent, Location): interface.implements(IMessageStorage) notify = True principalId = None def __init__(self, principalId): self.index = OIBTree() self.messages = IOBTree() self.services = OOBTree() self.readstatus = IITreeSet() self.principalId = principalId self._next = Length(1) @Lazy def readstatus(self): self.readstatus = IITreeSet() return self.readstatus @property def principal(self): try: return getUtility(IAuthentication).getPrincipal(self.principalId) except: return None @property def unread(self): unread = 0 for serviceId in self.services.keys(): service = self.getService(serviceId) unread = unread + service.unread() return unread def getMessage(self, messageId): return self.messages.get(messageId) def getServiceIds(self): return list(self.services.keys()) def getService(self, serviceId): service = self.services.get(serviceId) if not IMessageService.providedBy(service): factory = getUtility(IMessageServiceFactory, serviceId) service = factory(self) self.services[serviceId] = service return service def create(self, serviceId, **data): """ create and append message to storage """ id = self._next() self._next.change(1) service = self.getService(serviceId) msg = service.create(**data) date = datetime.now(ITZInfo(self.principal, pytz.utc)) while date in self.index: date = date + timedelta msg.__id__ = id msg.__date__ = date self.index[date] = id self.messages[id] = msg self.readstatus.insert(id) service.append(msg) event.notify(MessageCreatedEvent(msg, self)) return id def remove(self, messageId): message = self.messages.get(messageId) if message is None: return else: self.clearReadStatus(message) del self.index[message.__date__] del self.messages[message.__id__] for serviceId in self.services.keys(): service = self.getService(serviceId) service.remove(message) event.notify(MessageRemovedEvent(message, self)) def readStatus(self, message): return message.__id__ in self.readstatus def clearReadStatus(self, message): if message.__id__ not in self.readstatus: return idx = message.__date__ for serviceId in self.services.keys(): service = self.getService(serviceId) if idx in service.index and service.unread() > 0: service.unread.change(-1) self.readstatus.remove(message.__id__)
class BooleanIndex(UnIndex): """Index for booleans self._index = set([documentId1, documentId2]) self._unindex = {documentId:[True/False]} self._length is the length of the unindex self._index_length is the length of the index False doesn't have actual entries in _index. """ meta_type = "BooleanIndex" manage_options = ( {'label': 'Settings', 'action': 'manage_main'}, {'label': 'Browse', 'action': 'manage_browse'}, ) query_options = ["query"] manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals()) manage_main._setName('manage_main') manage_browse = DTMLFile('../dtml/browseIndex', globals()) _index_value = 1 _index_length = None def clear(self): self._index = IITreeSet() self._index_length = BTrees.Length.Length() self._index_value = 1 self._unindex = IIBTree() self._length = BTrees.Length.Length() if self._counter is None: self._counter = BTrees.Length.Length() else: self._increment_counter() def histogram(self): """Return a mapping which provides a histogram of the number of elements found at each point in the index. """ histogram = {} indexed = bool(self._index_value) histogram[indexed] = self._index_length.value histogram[not indexed] = self._length.value - self._index_length.value return histogram def _invert_index(self, documentId=None): self._index_value = indexed = int(not self._index_value) self._index.clear() length = 0 for rid, value in self._unindex.iteritems(): if value == indexed: self._index.add(rid) length += 1 # documentId is the rid of the currently processed object that # triggered the invert. in the case of unindexing, the rid hasn't # been removed from the unindex yet. While indexing, the rid will # be added to the index and unindex after this method is done if documentId is not None: self._index.remove(documentId) length -= 1 self._index_length = BTrees.Length.Length(length) def insertForwardIndexEntry(self, entry, documentId): """If the value matches the indexed one, insert into treeset """ # When we get the first entry, decide to index the opposite of what # we got, as indexing zero items is fewer than one. if self._length.value == 0: self._index_value = int(not bool(entry)) # if the added entry value is index value, insert it into index if bool(entry) is bool(self._index_value): self._index_length.change(1) self._index.insert(documentId) # insert value into global unindex (before computing index invert) self._unindex[documentId] = entry self._length.change(1) # is the index (after adding the current entry) larger than 60% # of the total length? than switch the indexed value if bool(entry) is bool(self._index_value): if (self._index_length.value) >= ((self._length.value) * 0.6): self._invert_index() def removeForwardIndexEntry(self, entry, documentId, check=True): """Take the entry provided and remove any reference to documentId in its entry in the index. """ if bool(entry) is bool(self._index_value): try: self._index.remove(documentId) self._index_length.change(-1) except ConflictError: raise except Exception: LOG.exception( '%s: unindex_object could not remove documentId %s ' 'from index %s. This should not happen.' % ( self.__class__.__name__, str(documentId), str(self.id))) elif check: # is the index (after removing the current entry) larger than # 60% of the total length? than switch the indexed value if (self._index_length.value) <= ((self._length.value - 1) * 0.6): self._invert_index(documentId) return def _index_object(self, documentId, obj, threshold=None, attr=''): """ index and object 'obj' with integer id 'documentId'""" returnStatus = 0 # First we need to see if there's anything interesting to look at datum = self._get_object_datum(obj, attr) # Make it boolean, int as an optimization if datum is not _marker: datum = int(bool(datum)) # We don't want to do anything that we don't have to here, so we'll # check to see if the new and existing information is the same. oldDatum = self._unindex.get(documentId, _marker) if datum != oldDatum: if oldDatum is not _marker: self.removeForwardIndexEntry(oldDatum, documentId, check=False) if datum is _marker: try: del self._unindex[documentId] self._length.change(-1) except ConflictError: raise except Exception: LOG.error('Should not happen: oldDatum was there, now ' 'its not, for document with id %s' % documentId) if datum is not _marker: self.insertForwardIndexEntry(datum, documentId) returnStatus = 1 return returnStatus def unindex_object(self, documentId): """ Unindex the object with integer id 'documentId' and don't raise an exception if we fail """ unindexRecord = self._unindex.get(documentId, _marker) if unindexRecord is _marker: return None self._increment_counter() self.removeForwardIndexEntry(unindexRecord, documentId) try: del self._unindex[documentId] self._length.change(-1) except ConflictError: raise except Exception: LOG.debug('Attempt to unindex nonexistent document' ' with id %s' % documentId, exc_info=True) def query_index(self, record, resultset=None): index = self._index indexed = self._index_value for key in record.keys: if bool(key) is bool(indexed): # If we match the indexed value, check index return intersection(index, resultset) else: # Otherwise, remove from resultset or _unindex if resultset is None: return union(difference(self._unindex, index), IISet([])) else: return difference(resultset, index) return IISet() def indexSize(self): """Return distinct values, as an optimization we always claim 2.""" return 2 def items(self): # return a list of value to int set of rid tuples indexed = self._index_value items = [(bool(indexed), self._index)] false = IISet() for rid, value in self._unindex.iteritems(): if value != indexed: false.add(rid) items.append((not bool(indexed), false)) return items
def clear(self): self._index = IITreeSet() self._index_length = BTrees.Length.Length() self._index_value = 1 self._unindex = IIBTree() self._length = BTrees.Length.Length()
class DateRangeIndex(UnIndex): """ Index a date range, such as the canonical "effective-expiration" range in the CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match ( i.e., they returned None for both ); - Objects which match after a given time ( i.e., they returned None for the end date ); - Objects which match until a given time ( i.e., they returned None for the start date ); - Objects which match only during a specific interval. """ __implements__ = ( PluggableIndex.PluggableIndexInterface, ) security = ClassSecurityInfo() meta_type = "DateRangeIndex" manage_options= ( { 'label' : 'Properties' , 'action' : 'manage_indexProperties' } , ) query_options = ['query'] since_field = until_field = None def __init__(self, id, since_field=None, until_field=None, caller=None, extra=None): if extra: since_field = extra.since_field until_field = extra.until_field self._setId(id) self._edit(since_field, until_field) self.clear() security.declareProtected( VIEW_PERMISSION , 'getSinceField' ) def getSinceField( self ): """ """ return self._since_field security.declareProtected( VIEW_PERMISSION , 'getUntilField' ) def getUntilField( self ): """ """ return self._until_field manage_indexProperties = DTMLFile( 'manageDateRangeIndex', _dtmldir ) security.declareProtected( INDEX_MGMT_PERMISSION , 'manage_edit' ) def manage_edit( self, since_field, until_field, REQUEST ): """ """ self._edit( since_field, until_field ) REQUEST[ 'RESPONSE' ].redirect( '%s/manage_main' '?manage_tabs_message=Updated' % REQUEST.get('URL2') ) security.declarePrivate( '_edit' ) def _edit( self, since_field, until_field ): """ Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field security.declareProtected( INDEX_MGMT_PERMISSION , 'clear' ) def clear( self ): """ Start over fresh. """ self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints # # PluggableIndexInterface implementation (XXX inherit assertions?) # def getEntryForObject( self, documentId, default=None ): """ Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get( documentId, default ) def index_object( self, documentId, obj, threshold=None ): """ Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr( obj, self._since_field, None ) if callable( since ): since = since() since = self._convertDateTime( since ) until = getattr( obj, self._until_field, None ) if callable( until ): until = until() until = self._convertDateTime( until ) datum = ( since, until ) old_datum = self._unindex.get( documentId, None ) if datum == old_datum: # No change? bail out! return 0 if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry( old_since, old_until, documentId ) self._insertForwardIndexEntry( since, until, documentId ) self._unindex[ documentId ] = datum return 1 def unindex_object( self, documentId ): """ Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get( documentId, None ) if datum is None: return since, until = datum self._removeForwardIndexEntry( since, until, documentId ) del self._unindex[ documentId ] def uniqueValues( self, name=None, withLengths=0 ): """ Return a list of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '( value, length )'. """ if not name in ( self._since_field, self._until_field ): return [] if name == self._since_field: t1 = self._since t2 = self._since_only else: t1 = self._until t2 = self._until_only result = [] IntType = type( 0 ) if not withValues: result.extend( t1.keys() ) result.extend( t2.keys() ) else: for key in t1.keys(): set = t1[ key ] if type( set ) is IntType: length = 1 else: length = len( set ) result.append( ( key, length) ) for key in t2.keys(): set = t2[ key ] if type( set ) is IntType: length = 1 else: length = len( set ) result.append( ( key, length) ) return tuple( result ) def _apply_index( self, request, cid='' ): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest( request, self.getId() ) if record.keys is None: return None term = self._convertDateTime( record.keys[0] ) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion( self._until_only.values( term ) ) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion( self._since_only.values( None, term ) ) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion( self._until.values( term ) ) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion( self._since.values( None, term ) ) bounded = intersection( until, since ) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union( bounded, until_only ) result = union( result, since_only ) #result = union( result, bounded ) result = union( result, self._always ) return result, ( self._since_field, self._until_field ) # # ZCatalog needs this, although it isn't (yet) part of the interface. # security.declareProtected( VIEW_PERMISSION , 'numObjects' ) def numObjects( self ): """ """ return len( self._unindex ) # # Helper functions. # def _insertForwardIndexEntry( self, since, until, documentId ): """ Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert( documentId ) elif since is None: set = self._until_only.get( until, None ) if set is None: set = self._until_only[ until ] = IISet() # XXX: Store an int? set.insert( documentId ) elif until is None: set = self._since_only.get( since, None ) if set is None: set = self._since_only[ since ] = IISet() # XXX: Store an int? set.insert( documentId ) else: set = self._since.get( since, None ) if set is None: set = self._since[ since ] = IISet() # XXX: Store an int? set.insert( documentId ) set = self._until.get( until, None ) if set is None: set = self._until[ until ] = IISet() # XXX: Store an int? set.insert( documentId ) def _removeForwardIndexEntry( self, since, until, documentId ): """ Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove( documentId ) elif since is None: set = self._until_only.get( until, None ) if set is not None: set.remove( documentId ) if not set: del self._until_only[ until ] elif until is None: set = self._since_only.get( since, None ) if set is not None: set.remove( documentId ) if not set: del self._since_only[ since ] else: set = self._since.get( since, None ) if set is not None: set.remove( documentId ) if not set: del self._since[ since ] set = self._until.get( until, None ) if set is not None: set.remove( documentId ) if not set: del self._until[ until ] def _convertDateTime( self, value ): if value is None: return value if type( value ) == type( '' ): dt_obj = DateTime( value ) value = dt_obj.millis() / 1000 / 60 # flatten to minutes if isinstance( value, DateTime ): value = value.millis() / 1000 / 60 # flatten to minutes return int( value )
def __contains__(self, obj): return IITreeSet.__contains__(self, self._get_id(obj))
def __iter__(self): for item in IITreeSet.__iter__(self): yield self._get_object(item)