def group(self, seq): sortIndex = self._sortIndex sortReverse = self._sortReverse ns = len(seq) ni = len(sortIndex) if ns >= 0.1 * ni: # result large compared to index -- sort via index handled = IISet() hn = 0 _load = getattr(sortIndex, '_load', None) if _load is None: # not an optimized index items = sortIndex.items() _load = lambda (x1, x2): x2 if sortReverse: items.reverse() elif sortReverse: gRO = getattr(sortIndex, 'getReverseOrder', None) items = gRO and gRO() if items is None: items = list(sortIndex._index.keys()) items.reverse() else: items = sortIndex._index.keys() for i in items: ids = intersection(seq, _load(i)) if ids: handled.update(ids) hn += len(ids) yield i, ids if hn != len(seq): yield None, difference(seq, handled) else: # result relatively small -- sort via result m = OOBTree() keyFor = getattr(sortIndex, 'keyForDocument', None) # work around "nogopip" bug: it defines "keyForDocument" as an integer if not callable(keyFor): # this will fail, when the index neither defines a reasonable # "keyForDocument" nor "documentToKeyMap". In this case, # the index cannot be used for sorting. keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc] noValue = IITreeSet() for doc in seq.keys(): try: k = keyFor(doc) except KeyError: noValue.insert(doc) continue l = m.get(k) if l is None: l = m[k] = IITreeSet() l.insert(doc) items = m.items() if sortReverse: items = list(items) items.reverse() for i in items: yield i if noValue: yield None, noValue
def getPositions(self, docid, wordid): """ return a sequence of positions of occurrences of wordid within a document given by its docid. """ encoded_wid = encode((wordid, )) encoded_document = self._doc2wid[docid].get() positions = IITreeSet() for pos, wid in enumerate(decode(encoded_document)): if wid == wordid: positions.insert(pos) return positions
def getPositions(self, docid, wordid): """ return a sequence of positions of occurrences of wordid within a document given by its docid. """ encoded_wid = encode((wordid,)) encoded_document = self._doc2wid[docid].get() positions = IITreeSet() for pos, wid in enumerate(decode(encoded_document)): if wid == wordid: positions.insert(pos) return positions
def group(self, seq): sortIndex = self._sortIndex; sortReverse = self._sortReverse ns = len(seq); ni = len(sortIndex) if ns >= 0.1 * ni: # result large compared to index -- sort via index handled = IISet(); hn = 0 _load = getattr(sortIndex, '_load', None) if _load is None: # not an optimized index items = sortIndex.items() _load = lambda (x1, x2): x2 if sortReverse: items.reverse() elif sortReverse: gRO = getattr(sortIndex, 'getReverseOrder', None) items = gRO and gRO() if items is None: items = list(sortIndex._index.keys()); items.reverse() else: items = sortIndex._index.keys() for i in items: ids = intersection(seq, _load(i)) if ids: handled.update(ids); hn += len(ids) yield i, ids if hn != len(seq): yield None, difference(seq, handled) else: # result relatively small -- sort via result m = OOBTree() keyFor = getattr(sortIndex, 'keyForDocument', None) # work around "nogopip" bug: it defines "keyForDocument" as an integer if not callable(keyFor): # this will fail, when the index neither defines a reasonable # "keyForDocument" nor "documentToKeyMap". In this case, # the index cannot be used for sorting. keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc] noValue = IITreeSet() for doc in seq.keys(): try: k = keyFor(doc) except KeyError: noValue.insert(doc); continue l = m.get(k) if l is None: l = m[k] = IITreeSet() l.insert(doc) items = m.items() if sortReverse: items = list(items); items.reverse() for i in items: yield i if noValue: yield None, noValue
def insert(self, params): record = self.tuplify(params) # Determine the primary key. primary_key = [] for position, column in self.col_info: if column.primary: if record[position] is None: raise ValueError, ( "No value provided for primary key column %s" % repr(column.name)) primary_key.append(record[position]) if primary_key: primary_key = tuple(primary_key) if self.primary_index.has_key(primary_key): raise DuplicateError("Primary key %s in use" % repr(primary_key)) # Add a record. rid = self.next_rid self.next_rid += 1 # XXX Hotspot! record = (rid, ) + record[1:] self.data[rid] = record if primary_key: self.primary_index[primary_key] = rid # Add to indexes. for position, column in self.col_info: name = column.name value = record[position] if value is not None: if self.indexes.has_key(name): set = self.indexes[name].get(value) if set is None: set = IITreeSet() self.indexes[name][value] = set set.insert(rid) # Return the number of rows inserted. return 1
def group(self, seq): sortIndex = self._sortIndex; sortReverse = self._sortReverse ns = len(seq); ni = len(sortIndex) if ns >= 0.1 * ni: # result large compared to index -- sort via index handled = IISet(); hn = 0 _load = getattr(sortIndex, '_load', None) if _load is None: # not an optimized index items = sortIndex.items() _load = lambda (x1, x2): x2 if sortReverse: items.reverse() elif sortReverse: gRO = getattr(sortIndex, 'getReverseOrder', None) items = gRO and gRO() if items is None: items = list(sortIndex._index.keys()); items.reverse() else: items = sortIndex._index.keys() for i in items: ids = intersection(seq, _load(i)) if ids: handled.update(ids); hn += len(ids) yield i, ids if hn != len(seq): yield None, difference(seq, handled) else: # result relatively small -- sort via result keyFor = sortIndex.keyForDocument; m = OOBTree() noValue = IITreeSet() for doc in seq.keys(): try: k = keyFor(doc) except KeyError: noValue.insert(doc); continue l = m.get(k) if l is None: l = m[k] = IITreeSet() l.insert(doc) items = m.items() if sortReverse: items = list(items); items.reverse() for i in items: yield i if noValue: yield None, noValue
class BooleanIndex(UnIndex): """Index for booleans self._index = set([documentId1, documentId2]) self._unindex = {documentId:[True/False]} self._length is the length of the unindex self._index_length is the length of the index False doesn't have actual entries in _index. """ meta_type = "BooleanIndex" manage_options = ( {'label': 'Settings', 'action': 'manage_main'}, {'label': 'Browse', 'action': 'manage_browse'}, ) query_options = ["query"] manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals()) manage_main._setName('manage_main') manage_browse = DTMLFile('../dtml/browseIndex', globals()) _index_value = 1 _index_length = None def clear(self): self._index = IITreeSet() self._index_length = BTrees.Length.Length() self._index_value = 1 self._unindex = IIBTree() self._length = BTrees.Length.Length() if self._counter is None: self._counter = BTrees.Length.Length() else: self._increment_counter() def histogram(self): """Return a mapping which provides a histogram of the number of elements found at each point in the index. """ histogram = {} indexed = bool(self._index_value) histogram[indexed] = self._index_length.value histogram[not indexed] = self._length.value - self._index_length.value return histogram def _invert_index(self, documentId=None): self._index_value = indexed = int(not self._index_value) self._index.clear() length = 0 for rid, value in self._unindex.iteritems(): if value == indexed: self._index.add(rid) length += 1 # documentId is the rid of the currently processed object that # triggered the invert. in the case of unindexing, the rid hasn't # been removed from the unindex yet. While indexing, the rid will # be added to the index and unindex after this method is done if documentId is not None: self._index.remove(documentId) length -= 1 self._index_length = BTrees.Length.Length(length) def insertForwardIndexEntry(self, entry, documentId): """If the value matches the indexed one, insert into treeset """ # When we get the first entry, decide to index the opposite of what # we got, as indexing zero items is fewer than one. if self._length.value == 0: self._index_value = int(not bool(entry)) # if the added entry value is index value, insert it into index if bool(entry) is bool(self._index_value): self._index_length.change(1) self._index.insert(documentId) # insert value into global unindex (before computing index invert) self._unindex[documentId] = entry self._length.change(1) # is the index (after adding the current entry) larger than 60% # of the total length? than switch the indexed value if bool(entry) is bool(self._index_value): if (self._index_length.value) >= ((self._length.value) * 0.6): self._invert_index() def removeForwardIndexEntry(self, entry, documentId, check=True): """Take the entry provided and remove any reference to documentId in its entry in the index. """ if bool(entry) is bool(self._index_value): try: self._index.remove(documentId) self._index_length.change(-1) except ConflictError: raise except Exception: LOG.exception( '%s: unindex_object could not remove documentId %s ' 'from index %s. This should not happen.' % ( self.__class__.__name__, str(documentId), str(self.id))) elif check: # is the index (after removing the current entry) larger than # 60% of the total length? than switch the indexed value if (self._index_length.value) <= ((self._length.value - 1) * 0.6): self._invert_index(documentId) return def _index_object(self, documentId, obj, threshold=None, attr=''): """ index and object 'obj' with integer id 'documentId'""" returnStatus = 0 # First we need to see if there's anything interesting to look at datum = self._get_object_datum(obj, attr) # Make it boolean, int as an optimization if datum is not _marker: datum = int(bool(datum)) # We don't want to do anything that we don't have to here, so we'll # check to see if the new and existing information is the same. oldDatum = self._unindex.get(documentId, _marker) if datum != oldDatum: if oldDatum is not _marker: self.removeForwardIndexEntry(oldDatum, documentId, check=False) if datum is _marker: try: del self._unindex[documentId] self._length.change(-1) except ConflictError: raise except Exception: LOG.error('Should not happen: oldDatum was there, now ' 'its not, for document with id %s' % documentId) if datum is not _marker: self.insertForwardIndexEntry(datum, documentId) returnStatus = 1 return returnStatus def unindex_object(self, documentId): """ Unindex the object with integer id 'documentId' and don't raise an exception if we fail """ unindexRecord = self._unindex.get(documentId, _marker) if unindexRecord is _marker: return None self._increment_counter() self.removeForwardIndexEntry(unindexRecord, documentId) try: del self._unindex[documentId] self._length.change(-1) except ConflictError: raise except Exception: LOG.debug('Attempt to unindex nonexistent document' ' with id %s' % documentId, exc_info=True) def query_index(self, record, resultset=None): index = self._index indexed = self._index_value for key in record.keys: if bool(key) is bool(indexed): # If we match the indexed value, check index return intersection(index, resultset) else: # Otherwise, remove from resultset or _unindex if resultset is None: return union(difference(self._unindex, index), IISet([])) else: return difference(resultset, index) return IISet() def indexSize(self): """Return distinct values, as an optimization we always claim 2.""" return 2 def items(self): # return a list of value to int set of rid tuples indexed = self._index_value items = [(bool(indexed), self._index)] false = IISet() for rid, value in self._unindex.iteritems(): if value != indexed: false.add(rid) items.append((not bool(indexed), false)) return items
class BooleanIndex(UnIndex): """Index for booleans self._index = set([documentId1, documentId2]) self._unindex = {documentId:[True/False]} self._length is the length of the unindex self._index_length is the length of the index False doesn't have actual entries in _index. """ meta_type = "BooleanIndex" manage_options = ( { 'label': 'Settings', 'action': 'manage_main' }, { 'label': 'Browse', 'action': 'manage_browse' }, ) query_options = ["query"] manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals()) manage_main._setName('manage_main') manage_browse = DTMLFile('../dtml/browseIndex', globals()) _index_value = 1 _index_length = None def clear(self): self._index = IITreeSet() self._index_length = BTrees.Length.Length() self._index_value = 1 self._unindex = IIBTree() self._length = BTrees.Length.Length() def histogram(self): """Return a mapping which provides a histogram of the number of elements found at each point in the index. """ histogram = {} indexed = bool(self._index_value) histogram[indexed] = self._index_length.value histogram[not indexed] = self._length.value - self._index_length.value return histogram def _invert_index(self, documentId=None): self._index_value = indexed = int(not self._index_value) self._index.clear() length = 0 for rid, value in self._unindex.iteritems(): if value == indexed: self._index.add(rid) length += 1 # documentId is the rid of the currently processed object that # triggered the invert. in the case of unindexing, the rid hasn't # been removed from the unindex yet. While indexing, the rid will # be added to the index and unindex after this method is done if documentId is not None: self._index.remove(documentId) length -= 1 self._index_length = BTrees.Length.Length(length) def _inline_migration(self): self._length = BTrees.Length.Length(len(self._unindex.keys())) self._index_length = BTrees.Length.Length(len(self._index)) if self._index_length.value > (self._length.value / 2): self._index_value = 1 self._invert_index() else: # set an instance variable self._index_value = 1 def insertForwardIndexEntry(self, entry, documentId): """If the value matches the indexed one, insert into treeset """ # when we get the first entry, decide to index the opposite of what # we got, as indexing zero items is fewer than one # BBB inline migration if self._index_length is None: self._inline_migration() if self._length.value == 0: self._index_value = int(not bool(entry)) # if the added entry value is index value, insert it into index if bool(entry) is bool(self._index_value): self._index_length.change(1) self._index.insert(documentId) # insert value into global unindex (before computing index invert) self._unindex[documentId] = entry self._length.change(1) # is the index (after adding the current entry) larger than 60% # of the total length? than switch the indexed value if bool(entry) is bool(self._index_value): if (self._index_length.value) >= ((self._length.value) * 0.6): self._invert_index() def removeForwardIndexEntry(self, entry, documentId, check=True): """Take the entry provided and remove any reference to documentId in its entry in the index. """ index_length = self._index_length if index_length is None: self._inline_migration() if bool(entry) is bool(self._index_value): try: self._index.remove(documentId) # BBB inline migration length = self._index_length length.change(-1) except ConflictError: raise except Exception: LOG.exception( '%s: unindex_object could not remove ' 'documentId %s from index %s. This ' 'should not happen.' % (self.__class__.__name__, str(documentId), str(self.id))) elif check: length = self._length.value index_length = self._index_length.value # is the index (after removing the current entry) larger than # 60% of the total length? than switch the indexed value if (index_length) <= ((length - 1) * 0.6): self._invert_index(documentId) return def _index_object(self, documentId, obj, threshold=None, attr=''): """ index and object 'obj' with integer id 'documentId'""" returnStatus = 0 # First we need to see if there's anything interesting to look at datum = self._get_object_datum(obj, attr) # Make it boolean, int as an optimization if datum is not _marker: datum = int(bool(datum)) # We don't want to do anything that we don't have to here, so we'll # check to see if the new and existing information is the same. oldDatum = self._unindex.get(documentId, _marker) if datum != oldDatum: if oldDatum is not _marker: self.removeForwardIndexEntry(oldDatum, documentId, check=False) if datum is _marker: try: del self._unindex[documentId] self._length.change(-1) except ConflictError: raise except Exception: LOG.error('Should not happen: oldDatum was there, now ' 'its not, for document with id %s' % documentId) if datum is not _marker: self.insertForwardIndexEntry(datum, documentId) returnStatus = 1 return returnStatus def unindex_object(self, documentId): """ Unindex the object with integer id 'documentId' and don't raise an exception if we fail """ unindexRecord = self._unindex.get(documentId, _marker) if unindexRecord is _marker: return None self.removeForwardIndexEntry(unindexRecord, documentId) try: del self._unindex[documentId] self._length.change(-1) except ConflictError: raise except Exception: LOG.debug('Attempt to unindex nonexistent document' ' with id %s' % documentId, exc_info=True) def _apply_index(self, request, resultset=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index indexed = self._index_value for key in record.keys: if bool(key) is bool(indexed): # If we match the indexed value, check index return (intersection(index, resultset), (self.id, )) else: # Otherwise, remove from resultset or _unindex if resultset is None: return (union(difference(self._unindex, index), IISet([])), (self.id, )) else: return (difference(resultset, index), (self.id, )) return (IISet(), (self.id, )) def indexSize(self): """Return distinct values, as an optimization we always claim 2.""" return 2 def items(self): # return a list of value to int set of rid tuples indexed = self._index_value items = [(bool(indexed), self._index)] false = IISet() for rid, value in self._unindex.iteritems(): if value != indexed: false.add(rid) items.append((not bool(indexed), false)) return items
class BooleanIndex(UnIndex): """Index for booleans self._index = set([documentId1, documentId2]) self._unindex = {documentId:[True/False]} self._length is the length of the unindex self._index_length is the length of the index self._index_value is the indexed value The document ids in self._index have self._index_value as their value. Since there are only two possible values (True/False), the index only stores a forward index for the less common value. It starts off with the opposite of value of the first document and later checks and inverts itself, if more than 60% of all documents now have the indexed value. It does the inversion at 60% to avoid inverting itself constantly for an index that has a roughly equal 50/50 split. """ meta_type = 'BooleanIndex' manage_options = ( { 'label': 'Settings', 'action': 'manage_main' }, { 'label': 'Browse', 'action': 'manage_browse' }, ) query_options = ['query'] manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals()) manage_main._setName('manage_main') manage_browse = DTMLFile('../dtml/browseIndex', globals()) _index_value = 1 _index_length = None def clear(self): self._index = IITreeSet() self._index_length = BTrees.Length.Length() self._index_value = 1 self._unindex = IIBTree() self._length = BTrees.Length.Length() if self._counter is None: self._counter = BTrees.Length.Length() else: self._increment_counter() def histogram(self): """Return a mapping which provides a histogram of the number of elements found at each point in the index. """ histogram = {} indexed = bool(self._index_value) histogram[indexed] = self._index_length.value histogram[not indexed] = self._length.value - self._index_length.value return histogram def _invert_index(self, documentId=None): self._index_value = indexed = int(not self._index_value) self._index.clear() length = 0 for rid, value in self._unindex.iteritems(): if value == indexed: self._index.add(rid) length += 1 # documentId is the rid of the currently processed object that # triggered the invert. in the case of unindexing, the rid hasn't # been removed from the unindex yet. While indexing, the rid will # be added to the index and unindex after this method is done if documentId is not None: self._index.remove(documentId) length -= 1 self._index_length = BTrees.Length.Length(length) def insertForwardIndexEntry(self, entry, documentId): """If the value matches the indexed one, insert into treeset """ # When we get the first entry, decide to index the opposite of what # we got, as indexing zero items is fewer than one. if self._length.value == 0: self._index_value = int(not bool(entry)) # if the added entry value is index value, insert it into index if bool(entry) is bool(self._index_value): self._index_length.change(1) self._index.insert(documentId) # insert value into global unindex (before computing index invert) self._unindex[documentId] = entry self._length.change(1) # is the index (after adding the current entry) larger than 60% # of the total length? than switch the indexed value if bool(entry) is bool(self._index_value): if (self._index_length.value) >= ((self._length.value) * 0.6): self._invert_index() def removeForwardIndexEntry(self, entry, documentId, check=True): """Take the entry provided and remove any reference to documentId in its entry in the index. """ if bool(entry) is bool(self._index_value): try: self._index.remove(documentId) self._index_length.change(-1) except ConflictError: raise except Exception: LOG.exception( '%(context)s: unindex_object could not ' 'remove documentId %(doc_id)s from ' 'index %(index)r. This should not ' 'happen.', dict(context=self.__class__.__name__, doc_id=documentId, index=self.id)) elif check: # is the index (after removing the current entry) larger than # 60% of the total length? than switch the indexed value if (self._index_length.value) <= ((self._length.value - 1) * 0.6): self._invert_index(documentId) return def _index_object(self, documentId, obj, threshold=None, attr=''): """ index and object 'obj' with integer id 'documentId'""" returnStatus = 0 # First we need to see if there's anything interesting to look at datum = self._get_object_datum(obj, attr) # Make it boolean, int as an optimization if datum is not _marker: datum = int(bool(datum)) # We don't want to do anything that we don't have to here, so we'll # check to see if the new and existing information is the same. oldDatum = self._unindex.get(documentId, _marker) if datum != oldDatum: if oldDatum is not _marker: self.removeForwardIndexEntry(oldDatum, documentId, check=False) if datum is _marker: try: del self._unindex[documentId] self._length.change(-1) except ConflictError: raise except Exception: LOG.error( 'Should not happen: oldDatum was there, now ' 'its not, for document with id %s', documentId) if datum is not _marker: self.insertForwardIndexEntry(datum, documentId) returnStatus = 1 return returnStatus def unindex_object(self, documentId): """ Unindex the object with integer id 'documentId' and don't raise an exception if we fail """ unindexRecord = self._unindex.get(documentId, _marker) if unindexRecord is _marker: return None self._increment_counter() self.removeForwardIndexEntry(unindexRecord, documentId) try: del self._unindex[documentId] self._length.change(-1) except ConflictError: raise except Exception: LOG.debug('Attempt to unindex nonexistent document' ' with id %s', documentId, exc_info=True) def query_index(self, record, resultset=None): index = self._index indexed = self._index_value for key in record.keys: if bool(key) is bool(indexed): # If we match the indexed value, check index return intersection(index, resultset) else: # Otherwise, remove from resultset or _unindex if resultset is None: return union(difference(self._unindex, index), IISet([])) else: return intersection(difference(resultset, index), self._unindex) return IISet() def indexSize(self): """Return distinct values, as an optimization we always claim 2.""" return 2 def items(self): # return a list of value to int set of rid tuples indexed = self._index_value items = [(bool(indexed), self._index)] false = IISet() for rid, value in self._unindex.iteritems(): if value != indexed: false.add(rid) items.append((not bool(indexed), false)) return items def uniqueValues(self, name=None, withLengths=0): """returns the unique values for name if withLengths is true, returns a sequence of tuples of (value, length) """ if name is None: name = self.id elif name != self.id: return indexed = bool(self._index_value) unique_values = (indexed, not indexed) if not withLengths: for key in unique_values: yield key else: for key in unique_values: ilen = len(self._index) if key is indexed: yield (key, ilen) else: ulen = len(self._unindex) yield (key, ulen - ilen)
class DateRangeIndex(UnIndex): """Index for date ranges, such as the "effective-expiration" range in CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match (i.e., they returned None for both); - Objects which match after a given time (i.e., they returned None for the end date); - Objects which match until a given time (i.e., they returned None for the start date); - Objects which match only during a specific interval. """ __implements__ = UnIndex.__implements__ implements(IDateRangeIndex) security = ClassSecurityInfo() meta_type = "DateRangeIndex" manage_options = ({ 'label': 'Properties', 'action': 'manage_indexProperties' }, ) query_options = ['query'] since_field = until_field = None def __init__(self, id, since_field=None, until_field=None, caller=None, extra=None): if extra: since_field = extra.since_field until_field = extra.until_field self._setId(id) self._edit(since_field, until_field) self.clear() security.declareProtected(VIEW_PERMISSION, 'getSinceField') def getSinceField(self): """Get the name of the attribute indexed as start date. """ return self._since_field security.declareProtected(VIEW_PERMISSION, 'getUntilField') def getUntilField(self): """Get the name of the attribute indexed as end date. """ return self._until_field manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir) security.declareProtected(INDEX_MGMT_PERMISSION, 'manage_edit') def manage_edit(self, since_field, until_field, REQUEST): """ """ self._edit(since_field, until_field) REQUEST['RESPONSE'].redirect('%s/manage_main' '?manage_tabs_message=Updated' % REQUEST.get('URL2')) security.declarePrivate('_edit') def _edit(self, since_field, until_field): """ Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field security.declareProtected(INDEX_MGMT_PERMISSION, 'clear') def clear(self): """ Start over fresh. """ self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = BTrees.Length.Length() # # PluggableIndexInterface implementation (XXX inherit assertions?) # def getEntryForObject(self, documentId, default=None): """ Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get(documentId, default) def index_object(self, documentId, obj, threshold=None): """ Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr(obj, self._since_field, None) if safe_callable(since): since = since() since = self._convertDateTime(since) until = getattr(obj, self._until_field, None) if safe_callable(until): until = until() until = self._convertDateTime(until) datum = (since, until) old_datum = self._unindex.get(documentId, None) if datum == old_datum: # No change? bail out! return 0 if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry(old_since, old_until, documentId) self._insertForwardIndexEntry(since, until, documentId) self._unindex[documentId] = datum return 1 def unindex_object(self, documentId): """ Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get(documentId, None) if datum is None: return since, until = datum self._removeForwardIndexEntry(since, until, documentId) del self._unindex[documentId] def uniqueValues(self, name=None, withLengths=0): """ Return a list of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '( value, length )'. """ if not name in (self._since_field, self._until_field): return [] if name == self._since_field: t1 = self._since t2 = self._since_only else: t1 = self._until t2 = self._until_only result = [] IntType = type(0) if not withLengths: result.extend(t1.keys()) result.extend(t2.keys()) else: for key in t1.keys(): set = t1[key] if type(set) is IntType: length = 1 else: length = len(set) result.append((key, length)) for key in t2.keys(): set = t2[key] if type(set) is IntType: length = 1 else: length = len(set) result.append((key, length)) return tuple(result) def _apply_index(self, request, cid=''): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest(request, self.getId()) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion(self._until_only.values(term)) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion(self._since_only.values(None, term)) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion(self._until.values(term)) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union(bounded, until_only) result = union(result, since_only) #result = union( result, bounded ) result = union(result, self._always) return result, (self._since_field, self._until_field) # # ZCatalog needs this, although it isn't (yet) part of the interface. # security.declareProtected(VIEW_PERMISSION, 'numObjects') def numObjects(self): """ """ return len(self._unindex) def indexSize(self): """ """ return len(self) # # Helper functions. # def _insertForwardIndexEntry(self, since, until, documentId): """ Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert(documentId) elif since is None: set = self._until_only.get(until, None) if set is None: set = self._until_only[until] = IISet() # XXX: Store an int? set.insert(documentId) elif until is None: set = self._since_only.get(since, None) if set is None: set = self._since_only[since] = IISet() # XXX: Store an int? set.insert(documentId) else: set = self._since.get(since, None) if set is None: set = self._since[since] = IISet() # XXX: Store an int? set.insert(documentId) set = self._until.get(until, None) if set is None: set = self._until[until] = IISet() # XXX: Store an int? set.insert(documentId) def _removeForwardIndexEntry(self, since, until, documentId): """ Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove(documentId) elif since is None: set = self._until_only.get(until, None) if set is not None: set.remove(documentId) if not set: del self._until_only[until] elif until is None: set = self._since_only.get(since, None) if set is not None: set.remove(documentId) if not set: del self._since_only[since] else: set = self._since.get(since, None) if set is not None: set.remove(documentId) if not set: del self._since[since] set = self._until.get(until, None) if set is not None: set.remove(documentId) if not set: del self._until[until] def _convertDateTime(self, value): if value is None: return value if type(value) == type(''): dt_obj = DateTime(value) value = dt_obj.millis() / 1000 / 60 # flatten to minutes if isinstance(value, DateTime): value = value.millis() / 1000 / 60 # flatten to minutes result = int(value) if isinstance(result, long): # this won't work (Python 2.3) raise OverflowError('%s is not within the range of dates allowed' 'by a DateRangeIndex' % value) return result
class DateRangeIndex(UnIndex): """Index for date ranges, such as the "effective-expiration" range in CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match (i.e., they returned None for both); - Objects which match after a given time (i.e., they returned None for the end date); - Objects which match until a given time (i.e., they returned None for the start date); - Objects which match only during a specific interval. """ implements(IDateRangeIndex) security = ClassSecurityInfo() meta_type = "DateRangeIndex" query_options = ('query', ) manage_options = ({ 'label': 'Properties', 'action': 'manage_indexProperties' }, ) since_field = until_field = None # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60) floor_value = -510162480 # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60) ceiling_value = 278751600 def __init__(self, id, since_field=None, until_field=None, caller=None, extra=None, floor_value=None, ceiling_value=None): if extra: since_field = extra.since_field until_field = extra.until_field floor_value = getattr(extra, 'floor_value', None) ceiling_value = getattr(extra, 'ceiling_value', None) self._setId(id) self._edit(since_field, until_field, floor_value, ceiling_value) self.clear() security.declareProtected(view, 'getSinceField') def getSinceField(self): """Get the name of the attribute indexed as start date. """ return self._since_field security.declareProtected(view, 'getUntilField') def getUntilField(self): """Get the name of the attribute indexed as end date. """ return self._until_field security.declareProtected(view, 'getFloorValue') def getFloorValue(self): """""" return self.floor_value security.declareProtected(view, 'getCeilingValue') def getCeilingValue(self): """""" return self.ceiling_value manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir) security.declareProtected(manage_zcatalog_indexes, 'manage_edit') def manage_edit(self, since_field, until_field, floor_value, ceiling_value, REQUEST): """ """ self._edit(since_field, until_field, floor_value, ceiling_value) REQUEST['RESPONSE'].redirect('%s/manage_main' '?manage_tabs_message=Updated' % REQUEST.get('URL2')) security.declarePrivate('_edit') def _edit(self, since_field, until_field, floor_value=None, ceiling_value=None): """Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field if floor_value is not None: self.floor_value = int(floor_value) if ceiling_value is not None: self.ceiling_value = int(ceiling_value) security.declareProtected(manage_zcatalog_indexes, 'clear') def clear(self): """ Start over fresh. """ self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length() # # PluggableIndexInterface implementation (XXX inherit assertions?) # def getEntryForObject(self, documentId, default=None): """ Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get(documentId, default) def index_object(self, documentId, obj, threshold=None): """ Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr(obj, self._since_field, None) if safe_callable(since): since = since() since = self._convertDateTime(since) until = getattr(obj, self._until_field, None) if safe_callable(until): until = until() until = self._convertDateTime(until) datum = (since, until) old_datum = self._unindex.get(documentId, None) if datum == old_datum: # No change? bail out! return 0 if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry(old_since, old_until, documentId) self._insertForwardIndexEntry(since, until, documentId) self._unindex[documentId] = datum return 1 def unindex_object(self, documentId): """ Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get(documentId, None) if datum is None: return since, until = datum self._removeForwardIndexEntry(since, until, documentId) del self._unindex[documentId] def uniqueValues(self, name=None, withLengths=0): """ Return a list of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '(value, length)'. """ if not name in (self._since_field, self._until_field): return [] if name == self._since_field: t1 = self._since t2 = self._since_only else: t1 = self._until t2 = self._until_only result = [] if not withLengths: result.extend(t1.keys()) result.extend(t2.keys()) else: for key in t1.keys(): set = t1[key] if isinstance(set, int): length = 1 else: length = len(set) result.append((key, length)) for key in t2.keys(): set = t2[key] if isinstance(set, int): length = 1 else: length = len(set) result.append((key, length)) return tuple(result) def _cache_key(self, catalog): cid = catalog.getId() counter = getattr(aq_base(catalog), 'getCounter', None) if counter is not None: return '%s_%s' % (cid, counter()) return cid def _apply_index(self, request, resultset=None): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parameters, then return None. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ iid = self.id record = parseIndexRequest(request, iid, self.query_options) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = aq_get(self, 'REQUEST', None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = self._cache_key(catalog) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or 'None' if resultset is None: cachekey = '_daterangeindex_%s_%s' % (iid, tid) else: cachekey = '_daterangeindex_inverse_%s_%s' % (iid, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return (cached, (self._since_field, self._until_field)) else: return (difference(resultset, cached), (self._since_field, self._until_field)) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by resultset if REQUEST is None: until = intersection(resultset, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion( [bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (result, (self._since_field, self._until_field)) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([until_only, since_only, until, since]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (difference(resultset, result), (self._since_field, self._until_field)) def _insert_migrate(self, tree, key, value): treeset = tree.get(key, None) if treeset is None: tree[key] = value else: if isinstance(treeset, int): tree[key] = IITreeSet((treeset, value)) elif isinstance(treeset, IISet): tree[key] = IITreeSet(treeset) tree[key].insert(value) else: treeset.insert(value) def _insertForwardIndexEntry(self, since, until, documentId): """Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert(documentId) elif since is None: self._insert_migrate(self._until_only, until, documentId) elif until is None: self._insert_migrate(self._since_only, since, documentId) else: self._insert_migrate(self._since, since, documentId) self._insert_migrate(self._until, until, documentId) def _remove_delete(self, tree, key, value): treeset = tree.get(key, None) if treeset is not None: if isinstance(treeset, int): del tree[key] else: treeset.remove(value) if not treeset: del tree[key] def _removeForwardIndexEntry(self, since, until, documentId): """Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove(documentId) elif since is None: self._remove_delete(self._until_only, until, documentId) elif until is None: self._remove_delete(self._since_only, since, documentId) else: self._remove_delete(self._since, since, documentId) self._remove_delete(self._until, until, documentId) def _convertDateTime(self, value): if value is None: return value if isinstance(value, (str, datetime)): dt_obj = DateTime(value) value = dt_obj.millis() / 1000 / 60 # flatten to minutes elif isinstance(value, DateTime): value = value.millis() / 1000 / 60 # flatten to minutes if value > MAX32 or value < -MAX32: # t_val must be integer fitting in the 32bit range raise OverflowError('%s is not within the range of dates allowed' 'by a DateRangeIndex' % value) value = int(value) # handle values outside our specified range if value > self.ceiling_value: return None elif value < self.floor_value: return None return value
class MessageStorage(Persistent, Location): interface.implements(IMessageStorage) notify = True principalId = None def __init__(self, principalId): self.index = OIBTree() self.messages = IOBTree() self.services = OOBTree() self.readstatus = IITreeSet() self.principalId = principalId self._next = Length(1) @Lazy def readstatus(self): self.readstatus = IITreeSet() return self.readstatus @property def principal(self): try: return getUtility(IAuthentication).getPrincipal(self.principalId) except: return None @property def unread(self): unread = 0 for serviceId in self.services.keys(): service = self.getService(serviceId) unread = unread + service.unread() return unread def getMessage(self, messageId): return self.messages.get(messageId) def getServiceIds(self): return list(self.services.keys()) def getService(self, serviceId): service = self.services.get(serviceId) if not IMessageService.providedBy(service): factory = getUtility(IMessageServiceFactory, serviceId) service = factory(self) self.services[serviceId] = service return service def create(self, serviceId, **data): """ create and append message to storage """ id = self._next() self._next.change(1) service = self.getService(serviceId) msg = service.create(**data) date = datetime.now(ITZInfo(self.principal, pytz.utc)) while date in self.index: date = date + timedelta msg.__id__ = id msg.__date__ = date self.index[date] = id self.messages[id] = msg self.readstatus.insert(id) service.append(msg) event.notify(MessageCreatedEvent(msg, self)) return id def remove(self, messageId): message = self.messages.get(messageId) if message is None: return else: self.clearReadStatus(message) del self.index[message.__date__] del self.messages[message.__id__] for serviceId in self.services.keys(): service = self.getService(serviceId) service.remove(message) event.notify(MessageRemovedEvent(message, self)) def readStatus(self, message): return message.__id__ in self.readstatus def clearReadStatus(self, message): if message.__id__ not in self.readstatus: return idx = message.__date__ for serviceId in self.services.keys(): service = self.getService(serviceId) if idx in service.index and service.unread() > 0: service.unread.change(-1) self.readstatus.remove(message.__id__)
class DateRangeIndex(UnIndex): """ Index a date range, such as the canonical "effective-expiration" range in the CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match ( i.e., they returned None for both ); - Objects which match after a given time ( i.e., they returned None for the end date ); - Objects which match until a given time ( i.e., they returned None for the start date ); - Objects which match only during a specific interval. """ __implements__ = ( PluggableIndex.PluggableIndexInterface, ) security = ClassSecurityInfo() meta_type = "DateRangeIndex" manage_options= ( { 'label' : 'Properties' , 'action' : 'manage_indexProperties' } , ) query_options = ['query'] since_field = until_field = None def __init__(self, id, since_field=None, until_field=None, caller=None, extra=None): if extra: since_field = extra.since_field until_field = extra.until_field self._setId(id) self._edit(since_field, until_field) self.clear() security.declareProtected( VIEW_PERMISSION , 'getSinceField' ) def getSinceField( self ): """ """ return self._since_field security.declareProtected( VIEW_PERMISSION , 'getUntilField' ) def getUntilField( self ): """ """ return self._until_field manage_indexProperties = DTMLFile( 'manageDateRangeIndex', _dtmldir ) security.declareProtected( INDEX_MGMT_PERMISSION , 'manage_edit' ) def manage_edit( self, since_field, until_field, REQUEST ): """ """ self._edit( since_field, until_field ) REQUEST[ 'RESPONSE' ].redirect( '%s/manage_main' '?manage_tabs_message=Updated' % REQUEST.get('URL2') ) security.declarePrivate( '_edit' ) def _edit( self, since_field, until_field ): """ Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field security.declareProtected( INDEX_MGMT_PERMISSION , 'clear' ) def clear( self ): """ Start over fresh. """ self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints # # PluggableIndexInterface implementation (XXX inherit assertions?) # def getEntryForObject( self, documentId, default=None ): """ Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get( documentId, default ) def index_object( self, documentId, obj, threshold=None ): """ Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr( obj, self._since_field, None ) if callable( since ): since = since() since = self._convertDateTime( since ) until = getattr( obj, self._until_field, None ) if callable( until ): until = until() until = self._convertDateTime( until ) datum = ( since, until ) old_datum = self._unindex.get( documentId, None ) if datum == old_datum: # No change? bail out! return 0 if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry( old_since, old_until, documentId ) self._insertForwardIndexEntry( since, until, documentId ) self._unindex[ documentId ] = datum return 1 def unindex_object( self, documentId ): """ Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get( documentId, None ) if datum is None: return since, until = datum self._removeForwardIndexEntry( since, until, documentId ) del self._unindex[ documentId ] def uniqueValues( self, name=None, withLengths=0 ): """ Return a list of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '( value, length )'. """ if not name in ( self._since_field, self._until_field ): return [] if name == self._since_field: t1 = self._since t2 = self._since_only else: t1 = self._until t2 = self._until_only result = [] IntType = type( 0 ) if not withValues: result.extend( t1.keys() ) result.extend( t2.keys() ) else: for key in t1.keys(): set = t1[ key ] if type( set ) is IntType: length = 1 else: length = len( set ) result.append( ( key, length) ) for key in t2.keys(): set = t2[ key ] if type( set ) is IntType: length = 1 else: length = len( set ) result.append( ( key, length) ) return tuple( result ) def _apply_index( self, request, cid='' ): """ Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parametrs, then return None. If the request contains a parameter with the name of the column + "_usage", snif for information on how to handle applying the index. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ record = parseIndexRequest( request, self.getId() ) if record.keys is None: return None term = self._convertDateTime( record.keys[0] ) # # Aggregate sets for each bucket separately, to avoid # large-small union penalties. # #until_only = IISet() #map( until_only.update, self._until_only.values( term ) ) # XXX use multi-union until_only = multiunion( self._until_only.values( term ) ) #since_only = IISet() #map( since_only.update, self._since_only.values( None, term ) ) # XXX use multi-union since_only = multiunion( self._since_only.values( None, term ) ) #until = IISet() #map( until.update, self._until.values( term ) ) # XXX use multi-union until = multiunion( self._until.values( term ) ) #since = IISet() #map( since.update, self._since.values( None, term ) ) # XXX use multi-union since = multiunion( self._since.values( None, term ) ) bounded = intersection( until, since ) # Merge from smallest to largest. #result = union( self._always, until_only ) result = union( bounded, until_only ) result = union( result, since_only ) #result = union( result, bounded ) result = union( result, self._always ) return result, ( self._since_field, self._until_field ) # # ZCatalog needs this, although it isn't (yet) part of the interface. # security.declareProtected( VIEW_PERMISSION , 'numObjects' ) def numObjects( self ): """ """ return len( self._unindex ) # # Helper functions. # def _insertForwardIndexEntry( self, since, until, documentId ): """ Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert( documentId ) elif since is None: set = self._until_only.get( until, None ) if set is None: set = self._until_only[ until ] = IISet() # XXX: Store an int? set.insert( documentId ) elif until is None: set = self._since_only.get( since, None ) if set is None: set = self._since_only[ since ] = IISet() # XXX: Store an int? set.insert( documentId ) else: set = self._since.get( since, None ) if set is None: set = self._since[ since ] = IISet() # XXX: Store an int? set.insert( documentId ) set = self._until.get( until, None ) if set is None: set = self._until[ until ] = IISet() # XXX: Store an int? set.insert( documentId ) def _removeForwardIndexEntry( self, since, until, documentId ): """ Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove( documentId ) elif since is None: set = self._until_only.get( until, None ) if set is not None: set.remove( documentId ) if not set: del self._until_only[ until ] elif until is None: set = self._since_only.get( since, None ) if set is not None: set.remove( documentId ) if not set: del self._since_only[ since ] else: set = self._since.get( since, None ) if set is not None: set.remove( documentId ) if not set: del self._since[ since ] set = self._until.get( until, None ) if set is not None: set.remove( documentId ) if not set: del self._until[ until ] def _convertDateTime( self, value ): if value is None: return value if type( value ) == type( '' ): dt_obj = DateTime( value ) value = dt_obj.millis() / 1000 / 60 # flatten to minutes if isinstance( value, DateTime ): value = value.millis() / 1000 / 60 # flatten to minutes return int( value )
class BooleanIndex(UnIndex): """Index for booleans self._index = set([documentId1, documentId2]) self._unindex = {documentId:[True/False]} False doesn't have actual entries in _index. """ meta_type = "BooleanIndex" manage_options = ( { 'label': 'Settings', 'action': 'manage_main' }, { 'label': 'Browse', 'action': 'manage_browse' }, ) query_options = ["query"] manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals()) manage_main._setName('manage_main') manage_browse = DTMLFile('../dtml/browseIndex', globals()) def clear(self): self._length = BTrees.Length.Length() self._index = IITreeSet() self._unindex = IIBTree() def insertForwardIndexEntry(self, entry, documentId): """If True, insert directly into treeset """ if entry: self._index.insert(documentId) self._length.change(1) def removeForwardIndexEntry(self, entry, documentId): """Take the entry provided and remove any reference to documentId in its entry in the index. """ try: if entry: self._index.remove(documentId) self._length.change(-1) except ConflictError: raise except Exception: LOG.exception( '%s: unindex_object could not remove ' 'documentId %s from index %s. This ' 'should not happen.' % (self.__class__.__name__, str(documentId), str(self.id))) def _index_object(self, documentId, obj, threshold=None, attr=''): """ index and object 'obj' with integer id 'documentId'""" returnStatus = 0 # First we need to see if there's anything interesting to look at datum = self._get_object_datum(obj, attr) # Make it boolean, int as an optimization if datum is not _marker: datum = int(bool(datum)) # We don't want to do anything that we don't have to here, so we'll # check to see if the new and existing information is the same. oldDatum = self._unindex.get(documentId, _marker) if datum != oldDatum: if oldDatum is not _marker: self.removeForwardIndexEntry(oldDatum, documentId) if datum is _marker: try: del self._unindex[documentId] except ConflictError: raise except Exception: LOG.error('Should not happen: oldDatum was there, now ' 'its not, for document with id %s' % documentId) if datum is not _marker: if datum: self.insertForwardIndexEntry(datum, documentId) self._unindex[documentId] = datum returnStatus = 1 return returnStatus def _apply_index(self, request, resultset=None): record = parseIndexRequest(request, self.id, self.query_options) if record.keys is None: return None index = self._index for key in record.keys: if key: # If True, check index return (intersection(index, resultset), (self.id, )) else: # Otherwise, remove from resultset or _unindex if resultset is None: return (union(difference(self._unindex, index), IISet([])), (self.id, )) else: return (difference(resultset, index), (self.id, )) return (IISet(), (self.id, )) def indexSize(self): """Return distinct values, as an optimization we always claim 2.""" return 2 def items(self): items = [] for v, k in self._unindex.items(): if isinstance(v, int): v = IISet((v, )) items.append((k, v)) return items
class DateRangeIndex(UnIndex): """Index for date ranges, such as the "effective-expiration" range in CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match (i.e., they returned None for both); - Objects which match after a given time (i.e., they returned None for the end date); - Objects which match until a given time (i.e., they returned None for the start date); - Objects which match only during a specific interval. """ security = ClassSecurityInfo() meta_type = "DateRangeIndex" query_options = ('query', ) manage_options = ({'label': 'Properties', 'action': 'manage_indexProperties'}, ) since_field = until_field = None # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60) floor_value = -510162480 # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60) ceiling_value = 278751600 def __init__(self, id, since_field=None, until_field=None, caller=None, extra=None, floor_value=None, ceiling_value=None): if extra: since_field = extra.since_field until_field = extra.until_field floor_value = getattr(extra, 'floor_value', None) ceiling_value = getattr(extra, 'ceiling_value', None) self._setId(id) self._edit(since_field, until_field, floor_value, ceiling_value) self.clear() security.declareProtected(view, 'getSinceField') def getSinceField(self): """Get the name of the attribute indexed as start date. """ return self._since_field security.declareProtected(view, 'getUntilField') def getUntilField(self): """Get the name of the attribute indexed as end date. """ return self._until_field security.declareProtected(view, 'getFloorValue') def getFloorValue(self): """ """ return self.floor_value security.declareProtected(view, 'getCeilingValue') def getCeilingValue(self): """ """ return self.ceiling_value manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir) security.declareProtected(manage_zcatalog_indexes, 'manage_edit') def manage_edit(self, since_field, until_field, floor_value, ceiling_value, REQUEST): """ """ self._edit(since_field, until_field, floor_value, ceiling_value) REQUEST['RESPONSE'].redirect('%s/manage_main' '?manage_tabs_message=Updated' % REQUEST.get('URL2')) security.declarePrivate('_edit') def _edit(self, since_field, until_field, floor_value=None, ceiling_value=None): """Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field if floor_value is not None: self.floor_value = int(floor_value) if ceiling_value is not None: self.ceiling_value = int(ceiling_value) security.declareProtected(manage_zcatalog_indexes, 'clear') def clear(self): """Start over fresh.""" self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length() if self._counter is None: self._counter = Length() else: self._increment_counter() def getEntryForObject(self, documentId, default=None): """Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get(documentId, default) def index_object(self, documentId, obj, threshold=None): """Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr(obj, self._since_field, None) if safe_callable(since): since = since() since = self._convertDateTime(since) until = getattr(obj, self._until_field, None) if safe_callable(until): until = until() until = self._convertDateTime(until) datum = (since, until) old_datum = self._unindex.get(documentId, None) if datum == old_datum: # No change? bail out! return 0 self._increment_counter() if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry(old_since, old_until, documentId) self._insertForwardIndexEntry(since, until, documentId) self._unindex[documentId] = datum return 1 def unindex_object(self, documentId): """Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get(documentId, None) if datum is None: return self._increment_counter() since, until = datum self._removeForwardIndexEntry(since, until, documentId) del self._unindex[documentId] def uniqueValues(self, name=None, withLengths=0): """Return a sequence of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '(value, length)'. """ if name not in (self._since_field, self._until_field): raise StopIteration if name == self._since_field: sets = (self._since, self._since_only) else: sets = (self._until, self._until_only) if not withLengths: for s in sets: for key in s.keys(): yield key else: for s in sets: for key, value in s.items(): if isinstance(value, int): yield (key, 1) else: yield (key, len(value)) def getRequestCacheKey(self, record, resultset=None): term = self._convertDateTime(record.keys[0]) tid = str(term) # unique index identifier iid = '_%s_%s_%s' % (self.__class__.__name__, self.id, self.getCounter()) # record identifier if resultset is None: rid = '_%s' % (tid, ) else: rid = '_inverse_%s' % (tid, ) return (iid, rid) def _apply_index(self, request, resultset=None): record = IndexQuery(request, self.id, self.query_options, self.operators, self.useOperator) if record.keys is None: return None return (self.query_index(record, resultset=resultset), (self._since_field, self._until_field)) def query_index(self, record, resultset=None): cache = self.getRequestCache() if cache is not None: cachekey = self.getRequestCacheKey(record, resultset) cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return cached else: return difference(resultset, cached) term = self._convertDateTime(record.keys[0]) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion([bounded, until_only, since_only, self._always]) if cache is not None: cache[cachekey] = result return result else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if cache is not None: cache[cachekey] = result return difference(resultset, result) def _insert_migrate(self, tree, key, value): treeset = tree.get(key, None) if treeset is None: tree[key] = IITreeSet((value, )) else: if isinstance(treeset, IITreeSet): treeset.insert(value) elif isinstance(treeset, int): tree[key] = IITreeSet((treeset, value)) else: tree[key] = IITreeSet(treeset) tree[key].insert(value) def _insertForwardIndexEntry(self, since, until, documentId): """Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert(documentId) elif since is None: self._insert_migrate(self._until_only, until, documentId) elif until is None: self._insert_migrate(self._since_only, since, documentId) else: self._insert_migrate(self._since, since, documentId) self._insert_migrate(self._until, until, documentId) def _remove_delete(self, tree, key, value): treeset = tree.get(key, None) if treeset is not None: if isinstance(treeset, int): del tree[key] else: treeset.remove(value) if not treeset: del tree[key] def _removeForwardIndexEntry(self, since, until, documentId): """Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove(documentId) elif since is None: self._remove_delete(self._until_only, until, documentId) elif until is None: self._remove_delete(self._since_only, since, documentId) else: self._remove_delete(self._since, since, documentId) self._remove_delete(self._until, until, documentId) def _convertDateTime(self, value): if value is None: return value if isinstance(value, (str, datetime)): dt_obj = DateTime(value) value = dt_obj.millis() / 1000 / 60 # flatten to minutes elif isinstance(value, DateTime): value = value.millis() / 1000 / 60 # flatten to minutes if value > MAX32 or value < -MAX32: # t_val must be integer fitting in the 32bit range raise OverflowError('%s is not within the range of dates allowed' 'by a DateRangeIndex' % value) value = int(value) # handle values outside our specified range if value > self.ceiling_value: return None elif value < self.floor_value: return None return value
def update(self, filter, changes): rids = self._select_rids(self.tuplify(filter)) if rids is None: rids = self.data.keys() elif not rids: # Nothing needs to be updated. return 0 count = len(rids) # Identify changes. old_data = {} # rid -> old tuple new_data = {} # rid -> new tuple old_to_new = {} # old primary key -> new primary key new_to_rid = {} # new primary key -> rid record = self.tuplify(changes) for rid in rids: old_r = self.data[rid] old_data[rid] = old_r new_r = list(old_r) # new_r and old_r contain record tuples. for position, column in self.col_info: if record[position] is not None: new_r[position] = record[position] new_data[rid] = tuple(new_r) # Hmm. The code below allows an update to change the primary # key. It might be better to prevent primary key columns from # being changed by an update() call. opk = [] npk = [] for position, column in self.col_info: if column.primary: opk.append(old_r[position]) npk.append(new_r[position]) if opk != npk: opk = tuple(opk) npk = tuple(npk) old_to_new[opk] = npk new_to_rid[npk] = rid # Look for primary key conflicts. A primary key conflict can # occur when changing a record to a different primary key and # the new primary key is already in use. for pk in old_to_new.values(): if (self.primary_index.has_key(pk) and not old_to_new.has_key(pk)): raise DuplicateError("Primary key %s in use" % repr(pk)) # Update the data. self.data.update(new_data) # Remove old primary key indexes and insert new primary key indexes. for pk in old_to_new.keys(): del self.primary_index[pk] self.primary_index.update(new_to_rid) # Update indexes. for rid, old_r in old_data.items(): for position, column in self.col_info: index = self.indexes.get(column.name) if index is not None: new_value = record[position] old_value = old_r[position] if new_value != old_value: if old_value is not None and index.has_key(old_value): # Remove an index entry. set = index[old_value] set.remove(rid) if not set: del index[old_value] if new_value is not None: # Add an index entry. set = index.get(new_value) if set is None: set = IITreeSet() index[new_value] = set set.insert(rid) # Return the number of rows affected. return count
class DateRangeIndex(UnIndex): """Index for date ranges, such as the "effective-expiration" range in CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match (i.e., they returned None for both); - Objects which match after a given time (i.e., they returned None for the end date); - Objects which match until a given time (i.e., they returned None for the start date); - Objects which match only during a specific interval. """ security = ClassSecurityInfo() meta_type = 'DateRangeIndex' query_options = ('query', ) manage_options = ({ 'label': 'Properties', 'action': 'manage_indexProperties' }, ) since_field = until_field = None # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60) floor_value = -510162480 # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60) ceiling_value = 278751600 # precision of indexed time interval in minutes precision_value = 1 def __init__(self, id, since_field=None, until_field=None, caller=None, extra=None, floor_value=None, ceiling_value=None, precision_value=None): if extra: since_field = extra.since_field until_field = extra.until_field floor_value = getattr(extra, 'floor_value', None) ceiling_value = getattr(extra, 'ceiling_value', None) precision_value = getattr(extra, 'precision_value', None) self._setId(id) self._edit(since_field, until_field, floor_value, ceiling_value, precision_value) self.clear() @security.protected(view) def getSinceField(self): """Get the name of the attribute indexed as start date. """ return self._since_field @security.protected(view) def getUntilField(self): """Get the name of the attribute indexed as end date. """ return self._until_field @security.protected(view) def getFloorValue(self): """ """ return self.floor_value @security.protected(view) def getCeilingValue(self): """ """ return self.ceiling_value @security.protected(view) def getPrecisionValue(self): """ """ return self.precision_value manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir) @security.protected(manage_zcatalog_indexes) def manage_edit(self, since_field, until_field, floor_value, ceiling_value, precision_value, REQUEST): """ """ self._edit(since_field, until_field, floor_value, ceiling_value, precision_value) REQUEST['RESPONSE'].redirect('{0}/manage_main' '?manage_tabs_message=Updated'.format( REQUEST.get('URL2'))) @security.private def _edit(self, since_field, until_field, floor_value=None, ceiling_value=None, precision_value=None): """Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field if floor_value not in (None, ''): self.floor_value = int(floor_value) if ceiling_value not in (None, ''): self.ceiling_value = int(ceiling_value) if precision_value not in (None, ''): self.precision_value = int(precision_value) @security.protected(manage_zcatalog_indexes) def clear(self): """Start over fresh.""" self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length() if self._counter is None: self._counter = Length() else: self._increment_counter() def getEntryForObject(self, documentId, default=None): """Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get(documentId, default) def index_object(self, documentId, obj, threshold=None): """Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr(obj, self._since_field, None) if safe_callable(since): since = since() since = self._convertDateTime(since) until = getattr(obj, self._until_field, None) if safe_callable(until): until = until() until = self._convertDateTime(until) datum = (since, until) old_datum = self._unindex.get(documentId, None) if datum == old_datum: # No change? bail out! return 0 self._increment_counter() if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry(old_since, old_until, documentId) self._insertForwardIndexEntry(since, until, documentId) self._unindex[documentId] = datum return 1 def unindex_object(self, documentId): """Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get(documentId, None) if datum is None: return self._increment_counter() since, until = datum self._removeForwardIndexEntry(since, until, documentId) del self._unindex[documentId] def uniqueValues(self, name=None, withLengths=0): """Return a sequence of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '(value, length)'. """ if name not in (self._since_field, self._until_field): return if name == self._since_field: sets = (self._since, self._since_only) else: sets = (self._until, self._until_only) if not withLengths: for s in sets: for key in s.keys(): yield key else: for s in sets: for key, value in s.items(): if isinstance(value, int): yield (key, 1) else: yield (key, len(value)) def getRequestCacheKey(self, record, resultset=None): term = self._convertDateTime(record.keys[0]) tid = str(term) # unique index identifier iid = '_{0}_{1}_{2}'.format(self.__class__.__name__, self.id, self.getCounter()) # record identifier if resultset is None: rid = '_{0}'.format(tid) else: rid = '_inverse_{0}'.format(tid) return (iid, rid) def _apply_index(self, request, resultset=None): record = IndexQuery(request, self.id, self.query_options, self.operators, self.useOperator) if record.keys is None: return None return (self.query_index(record, resultset=resultset), (self._since_field, self._until_field)) def query_index(self, record, resultset=None): cache = self.getRequestCache() if cache is not None: cachekey = self.getRequestCacheKey(record, resultset) cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return cached else: return difference(resultset, cached) term = self._convertDateTime(record.keys[0]) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion( [bounded, until_only, since_only, self._always]) if cache is not None: cache[cachekey] = result return result else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if cache is not None: cache[cachekey] = result return difference(resultset, result) def _insert_migrate(self, tree, key, value): treeset = tree.get(key, None) if treeset is None: tree[key] = IITreeSet((value, )) else: if isinstance(treeset, IITreeSet): treeset.insert(value) elif isinstance(treeset, int): tree[key] = IITreeSet((treeset, value)) else: tree[key] = IITreeSet(treeset) tree[key].insert(value) def _insertForwardIndexEntry(self, since, until, documentId): """Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert(documentId) elif since is None: self._insert_migrate(self._until_only, until, documentId) elif until is None: self._insert_migrate(self._since_only, since, documentId) else: self._insert_migrate(self._since, since, documentId) self._insert_migrate(self._until, until, documentId) def _remove_delete(self, tree, key, value): treeset = tree.get(key, None) if treeset is not None: if isinstance(treeset, int): del tree[key] else: treeset.remove(value) if not treeset: del tree[key] def _removeForwardIndexEntry(self, since, until, documentId): """Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove(documentId) elif since is None: self._remove_delete(self._until_only, until, documentId) elif until is None: self._remove_delete(self._since_only, since, documentId) else: self._remove_delete(self._since, since, documentId) self._remove_delete(self._until, until, documentId) def _convertDateTime(self, value): value = datetime_to_minutes(value, self.precision_value) if value is None: return None if (value > self.ceiling_value or value < self.floor_value): # handle values outside our specified range return None return value
class DateRangeIndex(UnIndex): """Index for date ranges, such as the "effective-expiration" range in CMF. Any object may return None for either the start or the end date: for the start date, this should be the logical equivalent of "since the beginning of time"; for the end date, "until the end of time". Therefore, divide the space of indexed objects into four containers: - Objects which always match (i.e., they returned None for both); - Objects which match after a given time (i.e., they returned None for the end date); - Objects which match until a given time (i.e., they returned None for the start date); - Objects which match only during a specific interval. """ implements(IDateRangeIndex) security = ClassSecurityInfo() meta_type = "DateRangeIndex" query_options = ("query",) manage_options = ({"label": "Properties", "action": "manage_indexProperties"},) since_field = until_field = None # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60) floor_value = -510162480 # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60) ceiling_value = 278751600 def __init__( self, id, since_field=None, until_field=None, caller=None, extra=None, floor_value=None, ceiling_value=None ): if extra: since_field = extra.since_field until_field = extra.until_field floor_value = getattr(extra, "floor_value", None) ceiling_value = getattr(extra, "ceiling_value", None) self._setId(id) self._edit(since_field, until_field, floor_value, ceiling_value) self.clear() security.declareProtected(view, "getSinceField") def getSinceField(self): """Get the name of the attribute indexed as start date. """ return self._since_field security.declareProtected(view, "getUntilField") def getUntilField(self): """Get the name of the attribute indexed as end date. """ return self._until_field security.declareProtected(view, "getFloorValue") def getFloorValue(self): """""" return self.floor_value security.declareProtected(view, "getCeilingValue") def getCeilingValue(self): """""" return self.ceiling_value manage_indexProperties = DTMLFile("manageDateRangeIndex", _dtmldir) security.declareProtected(manage_zcatalog_indexes, "manage_edit") def manage_edit(self, since_field, until_field, floor_value, ceiling_value, REQUEST): """""" self._edit(since_field, until_field, floor_value, ceiling_value) REQUEST["RESPONSE"].redirect("%s/manage_main" "?manage_tabs_message=Updated" % REQUEST.get("URL2")) security.declarePrivate("_edit") def _edit(self, since_field, until_field, floor_value=None, ceiling_value=None): """Update the fields used to compute the range. """ self._since_field = since_field self._until_field = until_field if floor_value is not None: self.floor_value = int(floor_value) if ceiling_value is not None: self.ceiling_value = int(ceiling_value) security.declareProtected(manage_zcatalog_indexes, "clear") def clear(self): """Start over fresh.""" self._always = IITreeSet() self._since_only = IOBTree() self._until_only = IOBTree() self._since = IOBTree() self._until = IOBTree() self._unindex = IOBTree() # 'datum' will be a tuple of date ints self._length = Length() def getEntryForObject(self, documentId, default=None): """Get all information contained for the specific object identified by 'documentId'. Return 'default' if not found. """ return self._unindex.get(documentId, default) def index_object(self, documentId, obj, threshold=None): """Index an object: - 'documentId' is the integer ID of the document - 'obj' is the object to be indexed - ignore threshold """ if self._since_field is None: return 0 since = getattr(obj, self._since_field, None) if safe_callable(since): since = since() since = self._convertDateTime(since) until = getattr(obj, self._until_field, None) if safe_callable(until): until = until() until = self._convertDateTime(until) datum = (since, until) old_datum = self._unindex.get(documentId, None) if datum == old_datum: # No change? bail out! return 0 if old_datum is not None: old_since, old_until = old_datum self._removeForwardIndexEntry(old_since, old_until, documentId) self._insertForwardIndexEntry(since, until, documentId) self._unindex[documentId] = datum return 1 def unindex_object(self, documentId): """Remove the object corresponding to 'documentId' from the index. """ datum = self._unindex.get(documentId, None) if datum is None: return since, until = datum self._removeForwardIndexEntry(since, until, documentId) del self._unindex[documentId] def uniqueValues(self, name=None, withLengths=0): """Return a list of unique values for 'name'. If 'withLengths' is true, return a sequence of tuples, in the form '(value, length)'. """ if not name in (self._since_field, self._until_field): return [] if name == self._since_field: t1 = self._since t2 = self._since_only else: t1 = self._until t2 = self._until_only result = [] if not withLengths: result.extend(t1.keys()) result.extend(t2.keys()) else: for key in t1.keys(): set = t1[key] if isinstance(set, int): length = 1 else: length = len(set) result.append((key, length)) for key in t2.keys(): set = t2[key] if isinstance(set, int): length = 1 else: length = len(set) result.append((key, length)) return tuple(result) def _cache_key(self, catalog): cid = catalog.getId() counter = getattr(aq_base(catalog), "getCounter", None) if counter is not None: return "%s_%s" % (cid, counter()) return cid def _apply_index(self, request, resultset=None): """Apply the index to query parameters given in 'request', which should be a mapping object. If the request does not contain the needed parameters, then return None. Otherwise return two objects. The first object is a ResultSet containing the record numbers of the matching records. The second object is a tuple containing the names of all data fields used. """ iid = self.id record = parseIndexRequest(request, iid, self.query_options) if record.keys is None: return None term = self._convertDateTime(record.keys[0]) REQUEST = aq_get(self, "REQUEST", None) if REQUEST is not None: catalog = aq_parent(aq_parent(aq_inner(self))) if catalog is not None: key = self._cache_key(catalog) cache = REQUEST.get(key, None) tid = isinstance(term, int) and term / 10 or "None" if resultset is None: cachekey = "_daterangeindex_%s_%s" % (iid, tid) else: cachekey = "_daterangeindex_inverse_%s_%s" % (iid, tid) if cache is None: cache = REQUEST[key] = RequestCache() else: cached = cache.get(cachekey, None) if cached is not None: if resultset is None: return (cached, (self._since_field, self._until_field)) else: return (difference(resultset, cached), (self._since_field, self._until_field)) if resultset is None: # Aggregate sets for each bucket separately, to avoid # large-small union penalties. until_only = multiunion(self._until_only.values(term)) since_only = multiunion(self._since_only.values(None, term)) until = multiunion(self._until.values(term)) # Total result is bound by resultset if REQUEST is None: until = intersection(resultset, until) since = multiunion(self._since.values(None, term)) bounded = intersection(until, since) # Merge from smallest to largest. result = multiunion([bounded, until_only, since_only, self._always]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (result, (self._since_field, self._until_field)) else: # Compute the inverse and subtract from res until_only = multiunion(self._until_only.values(None, term - 1)) since_only = multiunion(self._since_only.values(term + 1)) until = multiunion(self._until.values(None, term - 1)) since = multiunion(self._since.values(term + 1)) result = multiunion([since, since_only, until_only, until]) if REQUEST is not None and catalog is not None: cache[cachekey] = result return (difference(resultset, result), (self._since_field, self._until_field)) def _insert_migrate(self, tree, key, value): treeset = tree.get(key, None) if treeset is None: tree[key] = IITreeSet((value,)) else: if isinstance(treeset, IITreeSet): treeset.insert(value) elif isinstance(treeset, int): tree[key] = IITreeSet((treeset, value)) else: tree[key] = IITreeSet(treeset) tree[key].insert(value) def _insertForwardIndexEntry(self, since, until, documentId): """Insert 'documentId' into the appropriate set based on 'datum'. """ if since is None and until is None: self._always.insert(documentId) elif since is None: self._insert_migrate(self._until_only, until, documentId) elif until is None: self._insert_migrate(self._since_only, since, documentId) else: self._insert_migrate(self._since, since, documentId) self._insert_migrate(self._until, until, documentId) def _remove_delete(self, tree, key, value): treeset = tree.get(key, None) if treeset is not None: if isinstance(treeset, int): del tree[key] else: treeset.remove(value) if not treeset: del tree[key] def _removeForwardIndexEntry(self, since, until, documentId): """Remove 'documentId' from the appropriate set based on 'datum'. """ if since is None and until is None: self._always.remove(documentId) elif since is None: self._remove_delete(self._until_only, until, documentId) elif until is None: self._remove_delete(self._since_only, since, documentId) else: self._remove_delete(self._since, since, documentId) self._remove_delete(self._until, until, documentId) def _convertDateTime(self, value): if value is None: return value if isinstance(value, (str, datetime)): dt_obj = DateTime(value) value = dt_obj.millis() / 1000 / 60 # flatten to minutes elif isinstance(value, DateTime): value = value.millis() / 1000 / 60 # flatten to minutes if value > MAX32 or value < -MAX32: # t_val must be integer fitting in the 32bit range raise OverflowError("%s is not within the range of dates allowed" "by a DateRangeIndex" % value) value = int(value) # handle values outside our specified range if value > self.ceiling_value: return None elif value < self.floor_value: return None return value
def index_object(self, documentId, obj, threshold=None): """Index an object. - ``documentId`` is the integer ID of the document. - ``obj`` is the object to be indexed. - ``threshold`` is the number of words to process between committing subtransactions. If None, subtransactions are disabled. For each name in ``getIndexSourceNames``, try to get the named attribute from ``obj``. - If the object does not have the attribute, do not add it to the index for that name. - If the attribute is a callable, call it to get the value. If calling it raises an AttributeError, do not add it to the index. for that name. """ # Clear the data structures before indexing the object. This will ensure # we don't leave any stale data behind when an object gets reindexed. self.unindex_object(documentId) ### 1. Get the values. start = self._getattr(self.start_attr, obj) end = self._getattr(self.end_attr, obj) if start is None: # Ignore calls if the obj does not have the start field. return False if end is None: # Singular event end = start recurrence = self._getattr(self.recurrence_attr, obj) if not recurrence: rule = None elif isinstance(recurrence, basestring): # XXX trap and log errors rule = rrule.rrulestr(recurrence, dtstart=start) elif isinstance(recurrence, rrule.rrulebase): rule = recurrence else: #XXX Log error rule = None # Strip out times from the recurrence: if rule is not None: sync_timezone(rule, start.tzinfo) ### 2. Make them into what should be indexed. # XXX Naive events are not comparable to timezoned events, so we convert # everything to utctimetuple(). This means naive events are assumed to # be GMT, but we can live with that at the moment. start_value = start.utctimetuple() end_value = end.utctimetuple() # The end value should be the end of the recurrence, if any: if rule is not None: if is_open_ended(rule): # This recurrence is open ended end_value = None else: duration = end - start allrecs = [x for x in rule._iter()] if allrecs: last = allrecs[-1] + duration else: # Real data may have invalud recurrence rules, # which end before the start for example. # Then we end up here. last = end end_value = last.utctimetuple() ### 3. Store everything in the indexes: row = self._start2uid.get(start_value, None) if row is None: row = IITreeSet((documentId,)) self._start2uid[start_value] = row else: row.insert(documentId) row = self._end2uid.get(end_value, None) if row is None: row = IITreeSet((documentId,)) self._end2uid[end_value] = row else: row.insert(documentId) self._uid2start[documentId] = start_value self._uid2recurrence[documentId] = rule self._uid2end[documentId] = end_value self._uid2duration[documentId] = end - start return True