Ejemplo n.º 1
0
 def _combine_union(self, values, object):
   if not values: return
   set= None
   for v in values:
     sv= self._standardizeValue_(v, object)
     if not sv: continue
     if set is None: set = IITreeSet(sv)
     else: set.update(sv)
   return set
Ejemplo n.º 2
0
 def clear( self ):
     """
         Start over fresh.
     """
     self._always        = IITreeSet()
     self._since_only    = IOBTree()
     self._until_only    = IOBTree()
     self._since         = IOBTree()
     self._until         = IOBTree()
     self._unindex       = IOBTree() # 'datum' will be a tuple of date ints
     self._length        = Length()
Ejemplo n.º 3
0
 def _insert_migrate(self, tree, key, value):
     treeset = tree.get(key, None)
     if treeset is None:
         tree[key] = value
     else:
         if isinstance(treeset, int):
             tree[key] = IITreeSet((treeset, value))
         elif isinstance(treeset, IISet):
             tree[key] = IITreeSet(treeset)
             tree[key].insert(value)
         else:
             treeset.insert(value)
Ejemplo n.º 4
0
    def getPositions(self, docid, wordid):
        """ return a sequence of positions of occurrences of wordid within
            a document given by its docid.
        """

        encoded_wid = encode((wordid, ))
        encoded_document = self._doc2wid[docid].get()

        positions = IITreeSet()
        for pos, wid in enumerate(decode(encoded_document)):
            if wid == wordid:
                positions.insert(pos)
        return positions
Ejemplo n.º 5
0
    def test_empty(self):
        bigsize = BIGSETSIZE
        smallsize = 0
        small = IISet(xrange(smallsize))
        large = IITreeSet(xrange(bigsize))

        self.timing(small, large, 'Intersection empty set + large treeset')
        self.timing(large, small, 'Intersection large treeset + empty set')

        small = IITreeSet(xrange(smallsize))
        large = IISet(xrange(bigsize))
        self.timing(small, large, 'Intersection empty tree set + large set')
        self.timing(large, small, 'Intersection large set + empty tree set')
Ejemplo n.º 6
0
 def clear(self):
     """Start over fresh."""
     self._always = IITreeSet()
     self._since_only = IOBTree()
     self._until_only = IOBTree()
     self._since = IOBTree()
     self._until = IOBTree()
     self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
     self._length = Length()
     if self._counter is None:
         self._counter = Length()
     else:
         self._increment_counter()
Ejemplo n.º 7
0
    def test_empty(self):
        bigsize = BIGSETSIZE
        smallsize = 0
        small = IISet(xrange(smallsize))
        large = IITreeSet(xrange(bigsize))

        print '\nIntersection empty set + large treeset'
        self.timing(small, large)

        small = IITreeSet(xrange(smallsize))
        large = IISet(xrange(bigsize))
        print '\nIntersection empty tree set + large set'
        self.timing(small, large)
Ejemplo n.º 8
0
    def getPositions(self, docid, wordid):
        """ return a sequence of positions of occurrences of wordid within
            a document given by its docid.
        """

        encoded_wid = encode((wordid,))
        encoded_document = self._doc2wid[docid].get()

        positions = IITreeSet()
        for pos, wid in enumerate(decode(encoded_document)):
            if wid == wordid:
                positions.insert(pos)
        return positions
Ejemplo n.º 9
0
    def _insertForwardIndexEntry( self, since, until, documentId ):
        """
            Insert 'documentId' into the appropriate set based on
            'datum'.
        """
        if since is None and until is None:

            self._always.insert( documentId )

        elif since is None:

            set = self._until_only.get( until, None )
            if set is None:
                self._until_only[ until ] = documentId
            else:
                if isinstance(set, (int, IISet)):
                    set = self._until_only[until] = IITreeSet((set, documentId))
                else:
                    set.insert( documentId )
        elif until is None:

            set = self._since_only.get( since, None )
            if set is None:
                self._since_only[ since ] = documentId
            else:
                if isinstance(set, (int, IISet)):
                    set = self._since_only[since] = IITreeSet((set, documentId))
                else:
                    set.insert( documentId )

        else:

            set = self._since.get( since, None )
            if set is None:
                self._since[ since ] = documentId
            else:
                if isinstance(set, (int, IISet)):
                    set = self._since[since] = IITreeSet((set, documentId))
                else:
                    set.insert( documentId )

            set = self._until.get( until, None )
            if set is None:
                self._until[ until ] = documentId
            else:
                if isinstance(set, (int, IISet)):
                    set = self._until[until] = IITreeSet((set, documentId))
                else:
                    set.insert( documentId )
Ejemplo n.º 10
0
    def _finalize_index(self, result, start, end, used_fields):
        filtered_result = IITreeSet()
        # used_recurrence = False

        for documentId in result:
            recurrence = self._uid2recurrence.get(documentId)
            if recurrence is None:
                # This event isn't recurring, so it's a match:
                filtered_result.add(documentId)
                continue

            # used_recurrence = True
            match = False
            # This is a possible place where optimizations can be done if
            # necessary. For example, for periods where the start and end
            # date is the same, we can first check if the start time and
            # and time of the date falls inbetween the start and end times
            # of the period, so to avoid expansion. But most likely this
            # will have a very small impact on speed, so I skip this until
            # it actually becomes a problem.

            if start is not None:
                event_start = datetime(*self._uid2start[documentId][:6])
            else:
                event_start = None
            if end is not None:
                event_duration = self._uid2duration[documentId]
                event_end = event_start + event_duration
            else:
                event_end = None

            for occurrence in recurrence._iter():
                utc_occurrence = datetime(*occurrence.utctimetuple()[:6])
                if event_start is not None and utc_occurrence < event_start:
                    # XXX we should add a counter and break after 10000 occurrences.
                    continue
                if event_end is not None and utc_occurrence > event_end:
                    break

                # The start of this occurrence starts between the start and end date of
                # the query:
                match = True
                break

            if match:
                filtered_result.add(documentId)
            # if used_recurrence:
            used_fields += (self.recurrence_attr,)
        return filtered_result, used_fields
Ejemplo n.º 11
0
    def test_large(self):
        bigsize = BIGSETSIZE / 10
        small = IITreeSet(xrange(bigsize))
        large = IITreeSet(xrange(bigsize))
        print '\nIntersection Large tree sets'
        self.timing(small, large)

        small = IISet(xrange(bigsize))
        large = IISet(xrange(bigsize))
        print '\nIntersection Large sets'
        self.timing(small, large)

        small = set(xrange(bigsize))
        large = set(xrange(bigsize))
        self.pytiming(small, large)
Ejemplo n.º 12
0
    def insertForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and put it in the correct place
        in the forward index.

        This will also deal with creating the entire row if necessary.
        """
        indexRow = self._index.get(entry, _marker)

        # Make sure there's actually a row there already.  If not, create
        # an IntSet and stuff it in first.
        if indexRow is _marker:
            self._index[entry] = documentId
            # XXX _length needs to be migrated to Length object
            try:
                self._length.change(1)
            except AttributeError:
                if isinstance(self.__len__, BTrees.Length.Length):
                    self._length = self.__len__
                    del self.__len__
                self._length.change(1)
        else:
            try: indexRow.insert(documentId)
            except AttributeError:
                # index row is an int
                indexRow=IITreeSet((indexRow, documentId))
                self._index[entry] = indexRow
Ejemplo n.º 13
0
 def test_None(self):
     bigsize = BIGSETSIZE
     large = IITreeSet(xrange(bigsize))
     print '\nIntersection large, None'
     self.timing(large, None)
     print '\nIntersection None, large'
     self.timing(None, large)
Ejemplo n.º 14
0
    def _apply_index(self, request, cid=''):
        '''see 'PluggableIndex'.

    What is *cid* for???
    '''
        __traceback_info__ = self.id

        record = parseIndexRequest(request, self.id, self.query_options)
        terms = record.keys
        if not terms: return

        __traceback_info__ = self.id, terms

        if len(terms) == 1:
            if isinstance(terms[0], StringTypes):
                terms = self._getLexicon().termToWordIds(terms[0])
                if not terms: return None, self.id

        r = self._search(IITreeSet(terms), intersection, record)

        if record.get('phrase'):
            phrase = self._val2UnindexVal(terms)
            filter = lambda did, idx=self._unindex: phrase in idx[did]
            if record.get('isearch'):
                # maybe, we want to do something different when 'dm.incrementalsearch'
                #  is not available.
                #  On the other hand, 'isearch' should not be called for then.
                from dm.incrementalsearch import IFilter_int, IAnd_int
                r = IAnd_int(r, IFilter_int(filter))
                r.complete()
            else:
                r = IISet((did for did in r.keys() if filter(did)))

        return r, self.id
Ejemplo n.º 15
0
    def test_small(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE
        small = IITreeSet(xrange(smallsize))
        large = IITreeSet(xrange(smallsize))
        print '\nIntersection small tree sets'
        self.timing(small, large)

        small = IISet(xrange(smallsize))
        large = IISet(xrange(smallsize))
        print '\nIntersection small sets'
        self.timing(small, large)

        small = set(xrange(bigsize))
        large = set(xrange(bigsize))
        self.pytiming(small, large)
Ejemplo n.º 16
0
    def __init__(self, set):
        '''query returning *set*.

    *set* must be an 'IISet' or 'IITreeSet' of catalog record ids.
    '''
        if not isinstance(set, (IISet, IITreeSet)): set = IITreeSet(set)
        self._set = set
Ejemplo n.º 17
0
 def group(self, seq):
   sortIndex = self._sortIndex; sortReverse = self._sortReverse
   ns = len(seq); ni = len(sortIndex)
   if ns >= 0.1 * ni:
     # result large compared to index -- sort via index
     handled = IISet(); hn = 0
     _load = getattr(sortIndex, '_load', None)
     if _load is None:
       # not an optimized index
       items = sortIndex.items()
       
       _load = lambda (x1, x2): x2
       if sortReverse: items.reverse()
     elif sortReverse:
       gRO = getattr(sortIndex, 'getReverseOrder', None)
       items = gRO and gRO()
       if items is None:
         items = list(sortIndex._index.keys()); items.reverse()
     else: items = sortIndex._index.keys()
     for i in items:
       ids = intersection(seq, _load(i))
       if ids:
         handled.update(ids); hn += len(ids)
         yield i, ids
     if hn != len(seq): yield None, difference(seq, handled)
   else:
     # result relatively small -- sort via result
     m = OOBTree()
     keyFor = getattr(sortIndex, 'keyForDocument', None)
     # work around "nogopip" bug: it defines "keyForDocument" as an integer
     if not callable(keyFor):
       # this will fail, when the index neither defines a reasonable
       # "keyForDocument" nor "documentToKeyMap". In this case,
       # the index cannot be used for sorting.
       keyFor = lambda doc, map=sortIndex.documentToKeyMap(): map[doc]
     noValue = IITreeSet()
     for doc in seq.keys():
       try: k = keyFor(doc)
       except KeyError: noValue.insert(doc); continue
       l = m.get(k)
       if l is None: l = m[k] = IITreeSet()
       l.insert(doc)
     items = m.items()
     if sortReverse: items = list(items); items.reverse()
     for i in items: yield i
     if noValue: yield None, noValue
Ejemplo n.º 18
0
    def __init__(self, principalId):
        self.index = OIBTree()
        self.messages = IOBTree()
        self.services = OOBTree()
        self.readstatus = IITreeSet()
        self.principalId = principalId

        self._next = Length(1)
Ejemplo n.º 19
0
 def _insertAux(self, index, term, docId):
     '''index *docId* under *term*.'''
     dl = index.get(term)
     if dl is None:
         index[term] = docId
         return
     if isinstance(dl, int): dl = index[term] = IITreeSet((dl, ))
     dl.insert(docId)
Ejemplo n.º 20
0
class FilteredSetBase(Persistent):
    # A pre-calculated result list based on an expression.

    def __init__(self, id, expr):
        self.id = id
        self.expr = expr
        self.clear()

    def clear(self):
        self.ids = IITreeSet()

    def index_object(self, documentId, obj):
        raise NotImplementedError('index_object not defined')

    def unindex_object(self, documentId):
        try:
            self.ids.remove(documentId)
        except KeyError:
            pass

    def getId(self):
        return self.id

    def getExpression(self):
        # Get the expression.
        return self.expr

    def getIds(self):
        # Get the IDs of all objects for which the expression is True.
        return self.ids

    def getType(self):
        return self.meta_type

    def setExpression(self, expr):
        # Set the expression.
        self.expr = expr

    def __repr__(self):
        return '{0}: ({1}) {2}'.format(
            self.id, self.expr,
            list(map(None, self.ids))
        )

    __str__ = __repr__
 def clear(self):
     """Start over fresh."""
     self._always = IITreeSet()
     self._since_only = IOBTree()
     self._until_only = IOBTree()
     self._since = IOBTree()
     self._until = IOBTree()
     self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
     self._length = Length()
Ejemplo n.º 22
0
class FilteredSetBase(Persistent):
    # A pre-calculated result list based on an expression.

    implements(IFilteredSet)

    def __init__(self, id, expr):
        self.id = id
        self.expr = expr
        self.clear()

    def clear(self):
        self.ids = IITreeSet()

    def index_object(self, documentId, obj):
        raise RuntimeError, 'index_object not defined'

    def unindex_object(self, documentId):
        try:
            self.ids.remove(documentId)
        except KeyError:
            pass

    def getId(self):
        return self.id

    def getExpression(self):
        # Get the expression.
        return self.expr

    def getIds(self):
        # Get the IDs of all objects for which the expression is True.
        return self.ids

    def getType(self):
        return self.meta_type

    def setExpression(self, expr):
        # Set the expression.
        self.expr = expr

    def __repr__(self):
        return '%s: (%s) %s' % (self.id, self.expr, map(None, self.ids))

    __str__ = __repr__
Ejemplo n.º 23
0
    def index_object(self, docid, obj, threshold=100):
        """ hook for (Z)Catalog """

        # PathIndex first checks for an attribute matching its id and
        # falls back to getPhysicalPath only when failing to get one.
        # If self.indexed_attrs is not None, it's value overrides this behavior

        attrs = self.indexed_attrs
        index = attrs is None and self.id or attrs[0]

        path = getattr(obj, index, None)
        if path is not None:
            if safe_callable(path):
                path = path()

            if not isinstance(path, (str, tuple)):
                raise TypeError('path value must be string or tuple '
                                'of strings: (%r, %s)' % (index, repr(path)))
        else:
            try:
                path = obj.getPhysicalPath()
            except AttributeError:
                return 0

        if isinstance(path, (list, tuple)):
            path = '/' + '/'.join(path[1:])
        comps = [p for p in path.split('/') if p]

        # Make sure we reindex properly when path change
        old_path = self._unindex.get(docid, _marker)
        if old_path is not _marker:
            if old_path != path:
                self.unindex_object(docid, _old=old_path)
                # unindex reduces length, we need to counter that
                self._length.change(1)
        else:
            # We only get a new entry if the value wasn't there before.
            # If it already existed the length is unchanged
            self._length.change(1)

        for i, comp in enumerate(comps):
            self.insertEntry(comp, docid, i)

        # Add terminator
        self.insertEntry(None, docid, len(comps) - 1)

        # Add full-path indexes, to optimize certain edge cases
        parent_path = '/' + '/'.join(comps[:-1])
        parents = self._index_parents.get(parent_path, _marker)
        if parents is _marker:
            self._index_parents[parent_path] = parents = IITreeSet()
        parents.insert(docid)
        self._index_items[path] = docid

        self._unindex[docid] = path
        return 1
Ejemplo n.º 24
0
 def _insert(self, term, docId, _isInstance=isinstance, _IntType=int):
     '''index *docId* under *term*.'''
     i, k = self._findDocList(term, 1)[-1]
     dl = i.get(k)
     if dl is None:
         i[k] = docId
         self.__len__.change(1)
         return
     if _isInstance(dl, _IntType): dl = i[k] = IITreeSet((dl, ))
     dl.insert(docId)
Ejemplo n.º 25
0
 def clear(self):
     self._index = IITreeSet()
     self._index_length = BTrees.Length.Length()
     self._index_value = 1
     self._unindex = IIBTree()
     self._length = BTrees.Length.Length()
     if self._counter is None:
         self._counter = BTrees.Length.Length()
     else:
         self._increment_counter()
Ejemplo n.º 26
0
 def _insert(self, term, docId, _isInstance=isinstance, _IntType=IntType):
     '''index *docId* under *term*.'''
     index = self._index
     dl = index.get(term)
     if dl is None:
         index[term] = docId
         self.__len__.change(1)
         if self.ReverseOrder: self._reverseOrder.insert(reverseOrder(term))
         return
     if _isInstance(dl, _IntType): dl = index[term] = IITreeSet((dl, ))
     dl.insert(docId)
Ejemplo n.º 27
0
    def test_even_dist(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE
        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IITreeSet(xrange(bigsize))
        print '\nDifference Small set even distribution + large treeset'
        self.timing(small, large)

        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IISet(xrange(bigsize))
        print '\nDifference Small set even distribution + large set'
        self.timing(small, large)
Ejemplo n.º 28
0
def optimize_rangeindex_int_iiset(index):
    # migrate internal int and IISet to IITreeSet
    logger.info('Converting to IITreeSet for index `%s`.' % index.getId())
    for name in ('_since', '_since_only', '_until', '_until_only'):
        tree = getattr(index, name, None)
        if tree is not None:
            logger.info('Converting tree `%s`.' % name)
            i = 0
            for k, v in tree.items():
                if isinstance(v, IISet):
                    tree[k] = IITreeSet(v)
                    i += 1
                elif isinstance(v, int):
                    tree[k] = IITreeSet((v, ))
                    i += 1
                if i and i % 10000 == 0:
                    transaction.savepoint(optimistic=True)
                    logger.info('Processed %s items.' % i)

    transaction.savepoint(optimistic=True)
    logger.info('Finished conversion.')
Ejemplo n.º 29
0
    def insertEntry(self, comp, id, level):
        """ See IPathIndex
        """
        if not self._index.has_key(comp):
            self._index[comp] = IOBTree()

        if not self._index[comp].has_key(level):
            self._index[comp][level] = IITreeSet()

        self._index[comp][level].insert(id)
        if level > self._depth:
            self._depth = level
Ejemplo n.º 30
0
    def test_heavy_end(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE
        small = IISet(xrange(bigsize - smallsize, bigsize))
        large = IITreeSet(xrange(bigsize))
        print '\nDifference Small set high values + large treeset'
        self.timing(small, large)

        small = IISet(xrange(bigsize - smallsize, bigsize))
        large = IISet(xrange(bigsize))
        print '\nDifference Small set high values + large set'
        self.timing(small, large)
Ejemplo n.º 31
0
    def insert(self, params):
        record = self.tuplify(params)

        # Determine the primary key.
        primary_key = []
        for position, column in self.col_info:
            if column.primary:
                if record[position] is None:
                    raise ValueError, (
                        "No value provided for primary key column %s" %
                        repr(column.name))
                primary_key.append(record[position])
        if primary_key:
            primary_key = tuple(primary_key)
            if self.primary_index.has_key(primary_key):
                raise DuplicateError("Primary key %s in use" %
                                     repr(primary_key))

        # Add a record.
        rid = self.next_rid
        self.next_rid += 1  # XXX Hotspot!
        record = (rid, ) + record[1:]
        self.data[rid] = record
        if primary_key:
            self.primary_index[primary_key] = rid

        # Add to indexes.
        for position, column in self.col_info:
            name = column.name
            value = record[position]
            if value is not None:
                if self.indexes.has_key(name):
                    set = self.indexes[name].get(value)
                    if set is None:
                        set = IITreeSet()
                        self.indexes[name][value] = set
                    set.insert(rid)

        # Return the number of rows inserted.
        return 1
Ejemplo n.º 32
0
 def group(self, seq):
   sortIndex = self._sortIndex; sortReverse = self._sortReverse
   ns = len(seq); ni = len(sortIndex)
   if ns >= 0.1 * ni:
     # result large compared to index -- sort via index
     handled = IISet(); hn = 0
     _load = getattr(sortIndex, '_load', None)
     if _load is None:
       # not an optimized index
       items = sortIndex.items()
       
       _load = lambda (x1, x2): x2
       if sortReverse: items.reverse()
     elif sortReverse:
       gRO = getattr(sortIndex, 'getReverseOrder', None)
       items = gRO and gRO()
       if items is None:
         items = list(sortIndex._index.keys()); items.reverse()
     else: items = sortIndex._index.keys()
     for i in items:
       ids = intersection(seq, _load(i))
       if ids:
         handled.update(ids); hn += len(ids)
         yield i, ids
     if hn != len(seq): yield None, difference(seq, handled)
   else:
     # result relatively small -- sort via result
     keyFor = sortIndex.keyForDocument; m = OOBTree()
     noValue = IITreeSet()
     for doc in seq.keys():
       try: k = keyFor(doc)
       except KeyError: noValue.insert(doc); continue
       l = m.get(k)
       if l is None: l = m[k] = IITreeSet()
       l.insert(doc)
     items = m.items()
     if sortReverse: items = list(items); items.reverse()
     for i in items: yield i
     if noValue: yield None, noValue
Ejemplo n.º 33
0
 def clear(self):
     """Start over fresh."""
     self._always = IITreeSet()
     self._since_only = IOBTree()
     self._until_only = IOBTree()
     self._since = IOBTree()
     self._until = IOBTree()
     self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
     self._length = Length()
     if self._counter is None:
         self._counter = Length()
     else:
         self._increment_counter()
Ejemplo n.º 34
0
    def test_even_dist(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE

        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IITreeSet(xrange(smallsize))
        print '\nIntersection small set even distribution + small treeset'
        self.timing(small, large)

        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IITreeSet(xrange(bigsize))
        print '\nIntersection small set even distribution + large treeset'
        self.timing(small, large)

        small = IISet(xrange(0, bigsize, bigsize / smallsize))
        large = IISet(xrange(bigsize))
        print '\nIntersection small set even distribution + large set'
        self.timing(small, large)

        small = set(xrange(0, bigsize, bigsize / smallsize))
        large = set(xrange(bigsize))
        self.pytiming(small, large)
Ejemplo n.º 35
0
    def test_heavy_start(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE

        small = IISet(xrange(smallsize))
        large = IITreeSet(xrange(smallsize))
        print '\nIntersection small set low values + small treeset'
        self.timing(small, large)

        small = IISet(xrange(smallsize))
        large = IITreeSet(xrange(bigsize))
        print '\nIntersection small set low values + large treeset'
        self.timing(small, large)

        small = IISet(xrange(smallsize))
        large = IISet(xrange(bigsize))
        print '\nIntersection small set low values + large set'
        self.timing(small, large)

        small = set(xrange(smallsize))
        large = set(xrange(bigsize))
        self.pytiming(small, large)
Ejemplo n.º 36
0
    def insertEntry(self, comp, id, level):
        """ See IPathIndex
        """
        tree = self._index.get(comp, None)
        if tree is None:
            self._index[comp] = tree = IOBTree()

        tree2 = tree.get(level, None)
        if tree2 is None:
            tree[level] = tree2 = IITreeSet()

        tree2.insert(id)
        if level > self._depth:
            self._depth = level
Ejemplo n.º 37
0
    def insertForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and put it in the correct place
        in the forward index.

        This will also deal with creating the entire row if necessary.
        """
        indexRow = self._index.get(entry, _marker)

        # Make sure there's actually a row there already. If not, create
        # a set and stuff it in first.
        if indexRow is _marker:
            # We always use a set to avoid getting conflict errors on
            # multiple threads adding a new row at the same time
            self._index[entry] = IITreeSet((documentId, ))
            self._length.change(1)
        else:
            try:
                indexRow.insert(documentId)
            except AttributeError:
                # Inline migration: index row with one element was an int at
                # first (before Zope 2.13).
                indexRow = IITreeSet((indexRow, documentId))
                self._index[entry] = indexRow
Ejemplo n.º 38
0
    def _reindex_doc(self, docid, text):
        # Touch as few docid->w(docid, score) maps in ._wordinfo as possible.
        old_wids = self.get_words(docid)
        new_wids = self._lexicon.sourceToWordIds(text)
        if old_wids == new_wids:
            return len(new_wids)

        old_wid2w, old_docw = self._get_frequencies(old_wids)
        new_wid2w, new_docw = self._get_frequencies(new_wids)

        old_widset = IITreeSet(old_wid2w.keys())
        new_widset = IITreeSet(new_wid2w.keys())

        in_both_widset = intersection(old_widset, new_widset)
        only_old_widset = difference(old_widset, in_both_widset)
        only_new_widset = difference(new_widset, in_both_widset)
        del old_widset, new_widset

        for wid in only_old_widset.keys():
            self._del_wordinfo(wid, docid)

        for wid in only_new_widset.keys():
            self._add_wordinfo(wid, new_wid2w[wid], docid)

        for wid in in_both_widset.keys():
            # For the Okapi indexer, the "if" will trigger only for words
            # whose counts have changed.  For the cosine indexer, the "if"
            # may trigger for every wid, since W(d) probably changed and
            # W(d) is divided into every score.
            newscore = new_wid2w[wid]
            if old_wid2w[wid] != newscore:
                self._add_wordinfo(wid, newscore, docid)

        self._docweight[docid] = new_docw
        self._docwords[docid] = WidCode.encode(new_wids)
        return len(new_wids)
Ejemplo n.º 39
0
    def test_heavy_end(self):
        bigsize = BIGSETSIZE
        smallsize = SMALLSETSIZE

        small = IISet(xrange(bigsize - smallsize, bigsize))
        large = IITreeSet(xrange(smallsize))
        self.timing(small, large,
                    'Intersection small set high values + small treeset')
        self.timing(large, small,
                    'Intersection small treeset + small set high values')

        small = IISet(xrange(bigsize - smallsize, bigsize))
        large = IITreeSet(xrange(bigsize))
        self.timing(small, large,
                    'Intersection small set high values + large treeset')
        self.timing(large, small,
                    'Intersection large treeset + small set high values')

        small = IISet(xrange(bigsize - smallsize, bigsize))
        large = IISet(xrange(bigsize))
        self.timing(small, large,
                    'Intersection small set high values + large set')
        self.timing(large, small,
                    '\nIntersection large set + small set high values')
Ejemplo n.º 40
0
def convert_to_booleanindex(catalog, index):
    if isinstance(index, BooleanIndex):
        return
    logger.info('Converting index `%s` to BooleanIndex.' % index.getId())
    index.__class__ = BooleanIndex
    index._p_changed = True
    catalog._catalog._p_changed = True

    # convert _unindex from IOBTree to IIBTree
    sets = {0: IITreeSet(), 1: IITreeSet()}
    old_unindex = index._unindex
    index._unindex = _unindex = IIBTree()
    for k, v in old_unindex.items():
        # docid to value (True, False)
        value = int(bool(v))
        _unindex[k] = value
        sets[value].add(k)
    del old_unindex

    # convert _index from OOBTree to IITreeSet and set lengths
    false_length = len(sets[0])
    true_length = len(sets[1])
    index._length = Length(false_length + true_length)
    # we put the smaller set into the index
    if false_length < true_length:
        index._index_value = 0
        index._index_length = Length(false_length)
        index._index = sets[0]
        del sets[1]
    else:
        index._index_value = 1
        index._index_length = Length(true_length)
        index._index = sets[1]
        del sets[0]
    transaction.savepoint(optimistic=True)
    logger.info('Finished conversion.')
Ejemplo n.º 41
0
    def index_object(self, documentId, obj, threshold=None):
        """Index an object.

        - ``documentId`` is the integer ID of the document.

        - ``obj`` is the object to be indexed.

        - ``threshold`` is the number of words to process between committing
          subtransactions.  If None, subtransactions are disabled.

        For each name in ``getIndexSourceNames``, try to get the named
        attribute from ``obj``.

        - If the object does not have the attribute, do not add it to the
          index for that name.

        - If the attribute is a callable, call it to get the value.  If
          calling it raises an AttributeError, do not add it to the index.
          for that name.
        """
        # Clear the data structures before indexing the object. This will ensure
        # we don't leave any stale data behind when an object gets reindexed.
        self.unindex_object(documentId)

        ### 1. Get the values.
        start = self._getattr(self.start_attr, obj)
        end = self._getattr(self.end_attr, obj)
        if start is None:
            # Ignore calls if the obj does not have the start field.
            return False

        if end is None:
            # Singular event
            end = start

        recurrence = self._getattr(self.recurrence_attr, obj)
        if not recurrence:
            rule = None
        elif isinstance(recurrence, basestring):
            # XXX trap and log errors
            rule = rrule.rrulestr(recurrence, dtstart=start)
        elif isinstance(recurrence, rrule.rrulebase):
            rule = recurrence
        else:
            #XXX Log error
            rule = None

        # Strip out times from the recurrence:
        if rule is not None:
            sync_timezone(rule, start.tzinfo)

        ### 2. Make them into what should be indexed.
        # XXX Naive events are not comparable to timezoned events, so we convert
        # everything to utctimetuple(). This means naive events are assumed to
        # be GMT, but we can live with that at the moment.
        start_value = start.utctimetuple()
        end_value = end.utctimetuple()

        # The end value should be the end of the recurrence, if any:
        if rule is not None:
            if is_open_ended(rule):
                # This recurrence is open ended
                end_value = None
            else:
                duration = end - start
                allrecs = [x for x in rule._iter()]
                if allrecs:
                    last = allrecs[-1] + duration
                else:
                    # Real data may have invalud recurrence rules,
                    # which end before the start for example.
                    # Then we end up here.
                    last = end
                end_value = last.utctimetuple()

        ### 3. Store everything in the indexes:
        row = self._start2uid.get(start_value, None)
        if row is None:
            row = IITreeSet((documentId,))
            self._start2uid[start_value] = row
        else:
            row.insert(documentId)

        row = self._end2uid.get(end_value, None)
        if row is None:
            row = IITreeSet((documentId,))
            self._end2uid[end_value] = row
        else:
            row.insert(documentId)

        self._uid2start[documentId] = start_value
        self._uid2recurrence[documentId] = rule
        self._uid2end[documentId] = end_value
        self._uid2duration[documentId] = end - start

        return True
Ejemplo n.º 42
0
 def remove(self, obj):
     return IITreeSet.remove(self, self._get_id(obj))
Ejemplo n.º 43
0
class DateRangeIndex(UnIndex):

    """Index for date ranges, such as the "effective-expiration" range in CMF.

    Any object may return None for either the start or the end date: for the
    start date, this should be the logical equivalent of "since the beginning
    of time"; for the end date, "until the end of time".

    Therefore, divide the space of indexed objects into four containers:

    - Objects which always match (i.e., they returned None for both);

    - Objects which match after a given time (i.e., they returned None for the
      end date);

    - Objects which match until a given time (i.e., they returned None for the
      start date);

    - Objects which match only during a specific interval.
    """

    security = ClassSecurityInfo()

    meta_type = "DateRangeIndex"
    query_options = ('query', )

    manage_options = ({'label': 'Properties',
                       'action': 'manage_indexProperties'},
                      )

    since_field = until_field = None

    # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60)
    floor_value = -510162480
    # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60)
    ceiling_value = 278751600

    def __init__(self, id, since_field=None, until_field=None,
                 caller=None, extra=None,
                 floor_value=None, ceiling_value=None):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field
            floor_value = getattr(extra, 'floor_value', None)
            ceiling_value = getattr(extra, 'ceiling_value', None)

        self._setId(id)
        self._edit(since_field, until_field, floor_value, ceiling_value)
        self.clear()

    security.declareProtected(view, 'getSinceField')
    def getSinceField(self):
        """Get the name of the attribute indexed as start date.
        """
        return self._since_field

    security.declareProtected(view, 'getUntilField')
    def getUntilField(self):
        """Get the name of the attribute indexed as end date.
        """
        return self._until_field

    security.declareProtected(view, 'getFloorValue')
    def getFloorValue(self):
        """ """
        return self.floor_value

    security.declareProtected(view, 'getCeilingValue')
    def getCeilingValue(self):
        """ """
        return self.ceiling_value

    manage_indexProperties = DTMLFile('manageDateRangeIndex', _dtmldir)

    security.declareProtected(manage_zcatalog_indexes, 'manage_edit')
    def manage_edit(self, since_field, until_field, floor_value,
                    ceiling_value, REQUEST):
        """ """
        self._edit(since_field, until_field, floor_value, ceiling_value)
        REQUEST['RESPONSE'].redirect('%s/manage_main'
                                     '?manage_tabs_message=Updated'
                                     % REQUEST.get('URL2'))

    security.declarePrivate('_edit')
    def _edit(self, since_field, until_field, floor_value=None,
              ceiling_value=None):
        """Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field
        if floor_value is not None:
            self.floor_value = int(floor_value)
        if ceiling_value is not None:
            self.ceiling_value = int(ceiling_value)

    security.declareProtected(manage_zcatalog_indexes, 'clear')
    def clear(self):
        """Start over fresh."""
        self._always = IITreeSet()
        self._since_only = IOBTree()
        self._until_only = IOBTree()
        self._since = IOBTree()
        self._until = IOBTree()
        self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
        self._length = Length()
        if self._counter is None:
            self._counter = Length()
        else:
            self._increment_counter()

    def getEntryForObject(self, documentId, default=None):
        """Get all information contained for the specific object
        identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get(documentId, default)

    def index_object(self, documentId, obj, threshold=None):
        """Index an object:
        - 'documentId' is the integer ID of the document
        - 'obj' is the object to be indexed
        - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr(obj, self._since_field, None)
        if safe_callable(since):
            since = since()
        since = self._convertDateTime(since)

        until = getattr(obj, self._until_field, None)
        if safe_callable(until):
            until = until()
        until = self._convertDateTime(until)

        datum = (since, until)

        old_datum = self._unindex.get(documentId, None)
        if datum == old_datum:  # No change?  bail out!
            return 0

        self._increment_counter()

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry(old_since, old_until, documentId)

        self._insertForwardIndexEntry(since, until, documentId)
        self._unindex[documentId] = datum

        return 1

    def unindex_object(self, documentId):
        """Remove the object corresponding to 'documentId' from the index.
        """

        datum = self._unindex.get(documentId, None)
        if datum is None:
            return

        self._increment_counter()

        since, until = datum
        self._removeForwardIndexEntry(since, until, documentId)
        del self._unindex[documentId]

    def uniqueValues(self, name=None, withLengths=0):
        """Return a sequence of unique values for 'name'.

        If 'withLengths' is true, return a sequence of tuples, in
        the form '(value, length)'.
        """
        if name not in (self._since_field, self._until_field):
            raise StopIteration

        if name == self._since_field:
            sets = (self._since, self._since_only)
        else:
            sets = (self._until, self._until_only)

        if not withLengths:
            for s in sets:
                for key in s.keys():
                    yield key
        else:
            for s in sets:
                for key, value in s.items():
                    if isinstance(value, int):
                        yield (key, 1)
                    else:
                        yield (key, len(value))

    def getRequestCacheKey(self, record, resultset=None):
        term = self._convertDateTime(record.keys[0])
        tid = str(term)

        # unique index identifier
        iid = '_%s_%s_%s' % (self.__class__.__name__,
                             self.id, self.getCounter())
        # record identifier
        if resultset is None:
            rid = '_%s' % (tid, )
        else:
            rid = '_inverse_%s' % (tid, )

        return (iid, rid)

    def _apply_index(self, request, resultset=None):
        record = IndexQuery(request, self.id, self.query_options,
                            self.operators, self.useOperator)
        if record.keys is None:
            return None
        return (self.query_index(record, resultset=resultset),
                (self._since_field, self._until_field))

    def query_index(self, record, resultset=None):
        cache = self.getRequestCache()
        if cache is not None:
            cachekey = self.getRequestCacheKey(record, resultset)
            cached = cache.get(cachekey, None)
            if cached is not None:
                if resultset is None:
                    return cached
                else:
                    return difference(resultset, cached)

        term = self._convertDateTime(record.keys[0])
        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))
            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion([bounded, until_only, since_only,
                                 self._always])
            if cache is not None:
                cache[cachekey] = result

            return result
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([since, since_only, until_only, until])
            if cache is not None:
                cache[cachekey] = result

            return difference(resultset, result)

    def _insert_migrate(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is None:
            tree[key] = IITreeSet((value, ))
        else:
            if isinstance(treeset, IITreeSet):
                treeset.insert(value)
            elif isinstance(treeset, int):
                tree[key] = IITreeSet((treeset, value))
            else:
                tree[key] = IITreeSet(treeset)
                tree[key].insert(value)

    def _insertForwardIndexEntry(self, since, until, documentId):
        """Insert 'documentId' into the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.insert(documentId)
        elif since is None:
            self._insert_migrate(self._until_only, until, documentId)
        elif until is None:
            self._insert_migrate(self._since_only, since, documentId)
        else:
            self._insert_migrate(self._since, since, documentId)
            self._insert_migrate(self._until, until, documentId)

    def _remove_delete(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is not None:
            if isinstance(treeset, int):
                del tree[key]
            else:
                treeset.remove(value)
                if not treeset:
                    del tree[key]

    def _removeForwardIndexEntry(self, since, until, documentId):
        """Remove 'documentId' from the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.remove(documentId)
        elif since is None:
            self._remove_delete(self._until_only, until, documentId)
        elif until is None:
            self._remove_delete(self._since_only, since, documentId)
        else:
            self._remove_delete(self._since, since, documentId)
            self._remove_delete(self._until, until, documentId)

    def _convertDateTime(self, value):
        if value is None:
            return value
        if isinstance(value, (str, datetime)):
            dt_obj = DateTime(value)
            value = dt_obj.millis() / 1000 / 60  # flatten to minutes
        elif isinstance(value, DateTime):
            value = value.millis() / 1000 / 60  # flatten to minutes
        if value > MAX32 or value < -MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError('%s is not within the range of dates allowed'
                                'by a DateRangeIndex' % value)
        value = int(value)
        # handle values outside our specified range
        if value > self.ceiling_value:
            return None
        elif value < self.floor_value:
            return None
        return value
Ejemplo n.º 44
0
def _wrapLookup(r):
    if not isinstance(r, (IISet, IITreeSet)):
        r = IITreeSet(r.keys())
    return r
Ejemplo n.º 45
0
 def clear(self):
     self.ids  = IITreeSet()
Ejemplo n.º 46
0
 def readstatus(self):
     self.readstatus = IITreeSet()
     return self.readstatus
Ejemplo n.º 47
0
class DateRangeIndex(UnIndex):

    """Index for date ranges, such as the "effective-expiration" range in CMF.

    Any object may return None for either the start or the end date: for the
    start date, this should be the logical equivalent of "since the beginning
    of time"; for the end date, "until the end of time".

    Therefore, divide the space of indexed objects into four containers:

    - Objects which always match (i.e., they returned None for both);

    - Objects which match after a given time (i.e., they returned None for the
      end date);

    - Objects which match until a given time (i.e., they returned None for the
      start date);

    - Objects which match only during a specific interval.
    """

    implements(IDateRangeIndex)

    security = ClassSecurityInfo()

    meta_type = "DateRangeIndex"
    query_options = ("query",)

    manage_options = ({"label": "Properties", "action": "manage_indexProperties"},)

    since_field = until_field = None

    # int(DateTime('1000/1/1 0:00 GMT-12').millis() / 1000 / 60)
    floor_value = -510162480
    # int(DateTime('2499/12/31 0:00 GMT+12').millis() / 1000 / 60)
    ceiling_value = 278751600

    def __init__(
        self, id, since_field=None, until_field=None, caller=None, extra=None, floor_value=None, ceiling_value=None
    ):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field
            floor_value = getattr(extra, "floor_value", None)
            ceiling_value = getattr(extra, "ceiling_value", None)

        self._setId(id)
        self._edit(since_field, until_field, floor_value, ceiling_value)
        self.clear()

    security.declareProtected(view, "getSinceField")

    def getSinceField(self):
        """Get the name of the attribute indexed as start date.
        """
        return self._since_field

    security.declareProtected(view, "getUntilField")

    def getUntilField(self):
        """Get the name of the attribute indexed as end date.
        """
        return self._until_field

    security.declareProtected(view, "getFloorValue")

    def getFloorValue(self):
        """"""
        return self.floor_value

    security.declareProtected(view, "getCeilingValue")

    def getCeilingValue(self):
        """"""
        return self.ceiling_value

    manage_indexProperties = DTMLFile("manageDateRangeIndex", _dtmldir)

    security.declareProtected(manage_zcatalog_indexes, "manage_edit")

    def manage_edit(self, since_field, until_field, floor_value, ceiling_value, REQUEST):
        """"""
        self._edit(since_field, until_field, floor_value, ceiling_value)
        REQUEST["RESPONSE"].redirect("%s/manage_main" "?manage_tabs_message=Updated" % REQUEST.get("URL2"))

    security.declarePrivate("_edit")

    def _edit(self, since_field, until_field, floor_value=None, ceiling_value=None):
        """Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field
        if floor_value is not None:
            self.floor_value = int(floor_value)
        if ceiling_value is not None:
            self.ceiling_value = int(ceiling_value)

    security.declareProtected(manage_zcatalog_indexes, "clear")

    def clear(self):
        """Start over fresh."""
        self._always = IITreeSet()
        self._since_only = IOBTree()
        self._until_only = IOBTree()
        self._since = IOBTree()
        self._until = IOBTree()
        self._unindex = IOBTree()  # 'datum' will be a tuple of date ints
        self._length = Length()

    def getEntryForObject(self, documentId, default=None):
        """Get all information contained for the specific object
        identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get(documentId, default)

    def index_object(self, documentId, obj, threshold=None):
        """Index an object:
        - 'documentId' is the integer ID of the document
        - 'obj' is the object to be indexed
        - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr(obj, self._since_field, None)
        if safe_callable(since):
            since = since()
        since = self._convertDateTime(since)

        until = getattr(obj, self._until_field, None)
        if safe_callable(until):
            until = until()
        until = self._convertDateTime(until)

        datum = (since, until)

        old_datum = self._unindex.get(documentId, None)
        if datum == old_datum:  # No change?  bail out!
            return 0

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry(old_since, old_until, documentId)

        self._insertForwardIndexEntry(since, until, documentId)
        self._unindex[documentId] = datum

        return 1

    def unindex_object(self, documentId):
        """Remove the object corresponding to 'documentId' from the index.
        """
        datum = self._unindex.get(documentId, None)
        if datum is None:
            return

        since, until = datum
        self._removeForwardIndexEntry(since, until, documentId)
        del self._unindex[documentId]

    def uniqueValues(self, name=None, withLengths=0):
        """Return a list of unique values for 'name'.

        If 'withLengths' is true, return a sequence of tuples, in
        the form '(value, length)'.
        """
        if not name in (self._since_field, self._until_field):
            return []

        if name == self._since_field:
            t1 = self._since
            t2 = self._since_only
        else:
            t1 = self._until
            t2 = self._until_only

        result = []
        if not withLengths:
            result.extend(t1.keys())
            result.extend(t2.keys())
        else:
            for key in t1.keys():
                set = t1[key]
                if isinstance(set, int):
                    length = 1
                else:
                    length = len(set)
                result.append((key, length))
            for key in t2.keys():
                set = t2[key]
                if isinstance(set, int):
                    length = 1
                else:
                    length = len(set)
                result.append((key, length))
        return tuple(result)

    def _cache_key(self, catalog):
        cid = catalog.getId()
        counter = getattr(aq_base(catalog), "getCounter", None)
        if counter is not None:
            return "%s_%s" % (cid, counter())
        return cid

    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in 'request', which
        should be a mapping object.

        If the request does not contain the needed parameters, then
        return None.

        Otherwise return two objects.  The first object is a ResultSet
        containing the record numbers of the matching records.  The
        second object is a tuple containing the names of all data fields
        used.
        """
        iid = self.id
        record = parseIndexRequest(request, iid, self.query_options)
        if record.keys is None:
            return None

        term = self._convertDateTime(record.keys[0])
        REQUEST = aq_get(self, "REQUEST", None)
        if REQUEST is not None:
            catalog = aq_parent(aq_parent(aq_inner(self)))
            if catalog is not None:
                key = self._cache_key(catalog)
                cache = REQUEST.get(key, None)
                tid = isinstance(term, int) and term / 10 or "None"
                if resultset is None:
                    cachekey = "_daterangeindex_%s_%s" % (iid, tid)
                else:
                    cachekey = "_daterangeindex_inverse_%s_%s" % (iid, tid)
                if cache is None:
                    cache = REQUEST[key] = RequestCache()
                else:
                    cached = cache.get(cachekey, None)
                    if cached is not None:
                        if resultset is None:
                            return (cached, (self._since_field, self._until_field))
                        else:
                            return (difference(resultset, cached), (self._since_field, self._until_field))

        if resultset is None:
            # Aggregate sets for each bucket separately, to avoid
            # large-small union penalties.
            until_only = multiunion(self._until_only.values(term))
            since_only = multiunion(self._since_only.values(None, term))
            until = multiunion(self._until.values(term))

            # Total result is bound by resultset
            if REQUEST is None:
                until = intersection(resultset, until)

            since = multiunion(self._since.values(None, term))
            bounded = intersection(until, since)

            # Merge from smallest to largest.
            result = multiunion([bounded, until_only, since_only, self._always])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (result, (self._since_field, self._until_field))
        else:
            # Compute the inverse and subtract from res
            until_only = multiunion(self._until_only.values(None, term - 1))
            since_only = multiunion(self._since_only.values(term + 1))
            until = multiunion(self._until.values(None, term - 1))
            since = multiunion(self._since.values(term + 1))

            result = multiunion([since, since_only, until_only, until])
            if REQUEST is not None and catalog is not None:
                cache[cachekey] = result

            return (difference(resultset, result), (self._since_field, self._until_field))

    def _insert_migrate(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is None:
            tree[key] = IITreeSet((value,))
        else:
            if isinstance(treeset, IITreeSet):
                treeset.insert(value)
            elif isinstance(treeset, int):
                tree[key] = IITreeSet((treeset, value))
            else:
                tree[key] = IITreeSet(treeset)
                tree[key].insert(value)

    def _insertForwardIndexEntry(self, since, until, documentId):
        """Insert 'documentId' into the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.insert(documentId)
        elif since is None:
            self._insert_migrate(self._until_only, until, documentId)
        elif until is None:
            self._insert_migrate(self._since_only, since, documentId)
        else:
            self._insert_migrate(self._since, since, documentId)
            self._insert_migrate(self._until, until, documentId)

    def _remove_delete(self, tree, key, value):
        treeset = tree.get(key, None)
        if treeset is not None:
            if isinstance(treeset, int):
                del tree[key]
            else:
                treeset.remove(value)
                if not treeset:
                    del tree[key]

    def _removeForwardIndexEntry(self, since, until, documentId):
        """Remove 'documentId' from the appropriate set based on 'datum'.
        """
        if since is None and until is None:
            self._always.remove(documentId)
        elif since is None:
            self._remove_delete(self._until_only, until, documentId)
        elif until is None:
            self._remove_delete(self._since_only, since, documentId)
        else:
            self._remove_delete(self._since, since, documentId)
            self._remove_delete(self._until, until, documentId)

    def _convertDateTime(self, value):
        if value is None:
            return value
        if isinstance(value, (str, datetime)):
            dt_obj = DateTime(value)
            value = dt_obj.millis() / 1000 / 60  # flatten to minutes
        elif isinstance(value, DateTime):
            value = value.millis() / 1000 / 60  # flatten to minutes
        if value > MAX32 or value < -MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError("%s is not within the range of dates allowed" "by a DateRangeIndex" % value)
        value = int(value)
        # handle values outside our specified range
        if value > self.ceiling_value:
            return None
        elif value < self.floor_value:
            return None
        return value
Ejemplo n.º 48
0
class MessageStorage(Persistent, Location):
    interface.implements(IMessageStorage)

    notify = True
    principalId = None

    def __init__(self, principalId):
        self.index = OIBTree()
        self.messages = IOBTree()
        self.services = OOBTree()
        self.readstatus = IITreeSet()
        self.principalId = principalId

        self._next = Length(1)

    @Lazy
    def readstatus(self):
        self.readstatus = IITreeSet()
        return self.readstatus

    @property
    def principal(self):
        try:
            return getUtility(IAuthentication).getPrincipal(self.principalId)
        except:
            return None

    @property
    def unread(self):
        unread = 0
        for serviceId in self.services.keys():
            service = self.getService(serviceId)
            unread = unread + service.unread()
        return unread

    def getMessage(self, messageId):
        return self.messages.get(messageId)

    def getServiceIds(self):
        return list(self.services.keys())

    def getService(self, serviceId):
        service = self.services.get(serviceId)

        if not IMessageService.providedBy(service):
            factory = getUtility(IMessageServiceFactory, serviceId)
            service = factory(self)
            self.services[serviceId] = service

        return service

    def create(self, serviceId, **data):
        """ create and append message to storage """
        id = self._next()
        self._next.change(1)

        service = self.getService(serviceId)

        msg = service.create(**data)
        date = datetime.now(ITZInfo(self.principal, pytz.utc))

        while date in self.index:
            date = date + timedelta

        msg.__id__ = id
        msg.__date__ = date

        self.index[date] = id
        self.messages[id] = msg
        self.readstatus.insert(id)

        service.append(msg)

        event.notify(MessageCreatedEvent(msg, self))
        return id

    def remove(self, messageId):
        message = self.messages.get(messageId)

        if message is None:
            return
        else:
            self.clearReadStatus(message)

            del self.index[message.__date__]
            del self.messages[message.__id__]

            for serviceId in self.services.keys():
                service = self.getService(serviceId)
                service.remove(message)

            event.notify(MessageRemovedEvent(message, self))

    def readStatus(self, message):
        return message.__id__ in self.readstatus

    def clearReadStatus(self, message):
        if message.__id__ not in self.readstatus:
            return

        idx = message.__date__
        for serviceId in self.services.keys():
            service = self.getService(serviceId)
            if idx in service.index and service.unread() > 0:
                service.unread.change(-1)

        self.readstatus.remove(message.__id__)
Ejemplo n.º 49
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {'label': 'Settings',
         'action': 'manage_main'},
        {'label': 'Browse',
         'action': 'manage_browse'},
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()
        if self._counter is None:
            self._counter = BTrees.Length.Length()
        else:
            self._increment_counter()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # When we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one.
        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                self._index_length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%s: unindex_object could not remove documentId %s '
                    'from index %s. This should not happen.' % (
                        self.__class__.__name__,
                        str(documentId),
                        str(self.id)))
        elif check:
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (self._index_length.value) <= ((self._length.value - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self._increment_counter()

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s' % documentId, exc_info=True)

    def query_index(self, record, resultset=None):
        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return intersection(index, resultset)
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return union(difference(self._unindex, index), IISet([]))
                else:
                    return difference(resultset, index)
        return IISet()

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items
Ejemplo n.º 50
0
 def clear(self):
     self._index = IITreeSet()
     self._index_length = BTrees.Length.Length()
     self._index_value = 1
     self._unindex = IIBTree()
     self._length = BTrees.Length.Length()
Ejemplo n.º 51
0
class DateRangeIndex(UnIndex):
    """
        Index a date range, such as the canonical "effective-expiration"
        range in the CMF.  Any object may return None for either the
        start or the end date:  for the start date, this should be
        the logical equivalent of "since the beginning of time";  for the
        end date, "until the end of time".

        Therefore, divide the space of indexed objects into four containers:

        - Objects which always match ( i.e., they returned None for both );

        - Objects which match after a given time ( i.e., they returned None
          for the end date );

        - Objects which match until a given time ( i.e., they returned None
          for the start date );

        - Objects which match only during a specific interval.
    """
    __implements__ = ( PluggableIndex.PluggableIndexInterface, )

    security = ClassSecurityInfo()

    meta_type = "DateRangeIndex"

    manage_options= ( { 'label'     : 'Properties'
                      , 'action'    : 'manage_indexProperties'
                      }
                    ,
                    )

    query_options = ['query']

    since_field = until_field = None

    def __init__(self, id, since_field=None, until_field=None,
            caller=None, extra=None):

        if extra:
            since_field = extra.since_field
            until_field = extra.until_field

        self._setId(id)
        self._edit(since_field, until_field)
        self.clear()

    security.declareProtected( VIEW_PERMISSION
                             , 'getSinceField'
                             )
    def getSinceField( self ):
        """
        """
        return self._since_field

    security.declareProtected( VIEW_PERMISSION
                             , 'getUntilField'
                             )
    def getUntilField( self ):
        """
        """
        return self._until_field

    manage_indexProperties = DTMLFile( 'manageDateRangeIndex', _dtmldir )

    security.declareProtected( INDEX_MGMT_PERMISSION
                             , 'manage_edit'
                             )
    def manage_edit( self, since_field, until_field, REQUEST ):
        """
        """
        self._edit( since_field, until_field )
        REQUEST[ 'RESPONSE' ].redirect( '%s/manage_main'
                                        '?manage_tabs_message=Updated'
                                      % REQUEST.get('URL2')
                                      )

    security.declarePrivate( '_edit' )
    def _edit( self, since_field, until_field ):
        """
            Update the fields used to compute the range.
        """
        self._since_field = since_field
        self._until_field = until_field


    security.declareProtected( INDEX_MGMT_PERMISSION
                             , 'clear'
                             )
    def clear( self ):
        """
            Start over fresh.
        """
        self._always        = IITreeSet()
        self._since_only    = IOBTree()
        self._until_only    = IOBTree()
        self._since         = IOBTree()
        self._until         = IOBTree()
        self._unindex       = IOBTree() # 'datum' will be a tuple of date ints

    #
    #   PluggableIndexInterface implementation (XXX inherit assertions?)
    #
    def getEntryForObject( self, documentId, default=None ):
        """
            Get all information contained for the specific object
            identified by 'documentId'.  Return 'default' if not found.
        """
        return self._unindex.get( documentId, default )

    def index_object( self, documentId, obj, threshold=None ):
        """
            Index an object:

             - 'documentId' is the integer ID of the document

             - 'obj' is the object to be indexed

             - ignore threshold
        """
        if self._since_field is None:
            return 0

        since = getattr( obj, self._since_field, None )
        if callable( since ):
            since = since()
        since = self._convertDateTime( since )

        until = getattr( obj, self._until_field, None )
        if callable( until ):
            until = until()
        until = self._convertDateTime( until )

        datum = ( since, until )

        old_datum = self._unindex.get( documentId, None )
        if datum == old_datum: # No change?  bail out!
            return 0

        if old_datum is not None:
            old_since, old_until = old_datum
            self._removeForwardIndexEntry( old_since, old_until, documentId )

        self._insertForwardIndexEntry( since, until, documentId )
        self._unindex[ documentId ] = datum

        return 1

    def unindex_object( self, documentId ):
        """
            Remove the object corresponding to 'documentId' from the index.
        """
        datum = self._unindex.get( documentId, None )

        if datum is None:
            return

        since, until = datum

        self._removeForwardIndexEntry( since, until, documentId )
        del self._unindex[ documentId ]

    def uniqueValues( self, name=None, withLengths=0 ):
        """
            Return a list of unique values for 'name'.

            If 'withLengths' is true, return a sequence of tuples, in
            the form '( value, length )'.
        """
        if not name in ( self._since_field, self._until_field ):
            return []

        if name == self._since_field:

            t1 = self._since
            t2 = self._since_only

        else:

            t1 = self._until
            t2 = self._until_only

        result = []
        IntType = type( 0 )

        if not withValues:

            result.extend( t1.keys() )
            result.extend( t2.keys() )

        else:

            for key in t1.keys():
                set = t1[ key ]
                if type( set ) is IntType:
                    length = 1
                else:
                    length = len( set )
                result.append( ( key, length) )

            for key in t2.keys():
                set = t2[ key ]
                if type( set ) is IntType:
                    length = 1
                else:
                    length = len( set )
                result.append( ( key, length) )

        return tuple( result )

    def _apply_index( self, request, cid='' ):
        """
            Apply the index to query parameters given in 'request', which
            should be a mapping object.

            If the request does not contain the needed parametrs, then
            return None.

            If the request contains a parameter with the name of the
            column + "_usage", snif for information on how to handle
            applying the index.

            Otherwise return two objects.  The first object is a ResultSet
            containing the record numbers of the matching records.  The
            second object is a tuple containing the names of all data fields
            used.
        """
        record = parseIndexRequest( request, self.getId() )
        if record.keys is None:
            return None

        term        = self._convertDateTime( record.keys[0] )

        #
        #   Aggregate sets for each bucket separately, to avoid
        #   large-small union penalties.
        #
        #until_only  = IISet()
        #map( until_only.update, self._until_only.values( term ) )
        # XXX use multi-union
        until_only = multiunion( self._until_only.values( term ) )

        #since_only  = IISet()
        #map( since_only.update, self._since_only.values( None, term ) )
        # XXX use multi-union
        since_only = multiunion( self._since_only.values( None, term ) )

        #until       = IISet()
        #map( until.update, self._until.values( term ) )
        # XXX use multi-union
        until = multiunion( self._until.values( term ) )

        #since       = IISet()
        #map( since.update, self._since.values( None, term ) )
        # XXX use multi-union
        since = multiunion( self._since.values( None, term ) )

        bounded     = intersection( until, since )

        #   Merge from smallest to largest.
        #result      = union( self._always, until_only )
        result      = union( bounded, until_only )
        result      = union( result, since_only )
        #result      = union( result, bounded )
        result      = union( result, self._always )

        return result, ( self._since_field, self._until_field )

    #
    #   ZCatalog needs this, although it isn't (yet) part of the interface.
    #
    security.declareProtected( VIEW_PERMISSION , 'numObjects' )
    def numObjects( self ):
        """
        """
        return len( self._unindex )

    #
    #   Helper functions.
    #
    def _insertForwardIndexEntry( self, since, until, documentId ):
        """
            Insert 'documentId' into the appropriate set based on
            'datum'.
        """
        if since is None and until is None:

            self._always.insert( documentId )

        elif since is None:

            set = self._until_only.get( until, None )
            if set is None:
                set = self._until_only[ until ] = IISet()  # XXX: Store an int?
            set.insert( documentId )

        elif until is None:

            set = self._since_only.get( since, None )
            if set is None:
                set = self._since_only[ since ] = IISet()  # XXX: Store an int?
            set.insert( documentId )

        else:

            set = self._since.get( since, None )
            if set is None:
                set = self._since[ since ] = IISet()   # XXX: Store an int?
            set.insert( documentId )

            set = self._until.get( until, None )
            if set is None:
                set = self._until[ until ] = IISet() # XXX: Store an int?
            set.insert( documentId )

    def _removeForwardIndexEntry( self, since, until, documentId ):
        """
            Remove 'documentId' from the appropriate set based on
            'datum'.
        """
        if since is None and until is None:

            self._always.remove( documentId )

        elif since is None:

            set = self._until_only.get( until, None )
            if set is not None:

                set.remove( documentId )

                if not set:
                    del self._until_only[ until ]

        elif until is None:

            set = self._since_only.get( since, None )
            if set is not None:

                set.remove( documentId )

                if not set:
                    del self._since_only[ since ]

        else:

            set = self._since.get( since, None )
            if set is not None:
                set.remove( documentId )

                if not set:
                    del self._since[ since ]

            set = self._until.get( until, None )
            if set is not None:
                set.remove( documentId )

                if not set:
                    del self._until[ until ]

    def _convertDateTime( self, value ):
        if value is None:
            return value
        if type( value ) == type( '' ):
            dt_obj = DateTime( value )
            value = dt_obj.millis() / 1000 / 60 # flatten to minutes
        if isinstance( value, DateTime ):
            value = value.millis() / 1000 / 60 # flatten to minutes
        return int( value )
Ejemplo n.º 52
0
 def __contains__(self, obj):
     return IITreeSet.__contains__(self, self._get_id(obj))
Ejemplo n.º 53
0
 def __iter__(self):
     for item in IITreeSet.__iter__(self):
         yield self._get_object(item)