예제 #1
0
    def __init__(self, lexicon):
        self._lexicon = lexicon

        # wid -> {docid -> weight}; t -> D -> w(D, t)
        # Different indexers have different notions of term weight, but we
        # expect each indexer to use ._wordinfo to map wids to its notion
        # of a docid-to-weight map.
        # There are two kinds of OOV words:  wid 0 is explicitly OOV,
        # and it's possible that the lexicon will return a non-zero wid
        # for a word we don't currently know about.  For example, if we
        # unindex the last doc containing a particular word, that wid
        # remains in the lexicon, but is no longer in our _wordinfo map;
        # lexicons can also be shared across indices, and some other index
        # may introduce a lexicon word we've never seen.
        # A word is in-vocabulary for this index if and only if
        # _wordinfo.has_key(wid).  Note that wid 0 must not be a key.
        self._wordinfo = IOBTree()

        # docid -> weight
        # Different indexers have different notions of doc weight, but we
        # expect each indexer to use ._docweight to map docids to its
        # notion of what a doc weight is.
        self._docweight = IIBTree()

        # docid -> WidCode'd list of wids
        # Used for un-indexing, and for phrase search.
        self._docwords = IOBTree()

        # Use a BTree length for efficient length computation w/o conflicts
        self.length = Length()
        self.document_count = Length()
예제 #2
0
파일: text.py 프로젝트: barkinet/zerodb
    def clear(self):
        # wid -> {docid -> weight}; t -> D -> w(D, t)
        self._wordinfo = trees.family32.IO.BTree()
        # XXX
        # Scalability of this Zope's approach is pretty bad (esp. when many documents).
        # Following is needed:
        # _wordinfo = BTree of (word, weight, docid) - TreeSet could be used instead
        # searching a keyword will be as _wordinfo.keys((word_start, None, None)),
        # already sorted by weight (just has to be multiplied by idf)
        # this works for both search and glob_search
        # However, when searching multiple keywords,
        # Need to find an efficient (logarithmic) algorithm of
        # incremental, weighted set intersection
        # Even without efficient intersection, it is faster and more secure anyway
        # XXX

        # docid -> weight
        self._docweight = self.family.IF.BTree()

        # docid -> WidCode'd list of wids (~1/4 of document size)
        # Used for un-indexing, and for phrase search.
        self._docwords = self.family.IO.BTree()

        # Use a BTree length for efficient length computation w/o conflicts
        self.wordCount = Length()
        self.documentCount = Length()
예제 #3
0
파일: text.py 프로젝트: barkinet/zerodb
class Lexicon(_Lexicon):
    family = trees.family32  # In comparison with standard Lexicon, use bigger buckets

    def __init__(self, *pipeline):
        self._wids = self.family.OI.BTree()
        self._words = self.family.IO.BTree()
        self.wordCount = Length()
        self._pipeline = pipeline

    def sourceToWordIds(self, text):
        if text is None:
            text = ''
        last = _text2list(text)
        for element in self._pipeline:
            last = element.process(last)
        if not isinstance(self.wordCount, Length):
            # Make sure wordCount is overridden with a BTrees.Length.Length
            self.wordCount = Length(self.wordCount())
        # Strategically unload the length value so that we get the most
        # recent value written to the database to minimize conflicting wids
        # Because length is independent, this will load the most
        # recent value stored, regardless of whether MVCC is enabled
        self.wordCount._p_deactivate()
        parallel_traversal(self._wids, last)
        return list(map(self._getWordIdCreate, last))
예제 #4
0
 def _BTreeContainer__len(self):
     l = Length()
     ol = len(self.__data)
     if ol > 0:
         l.change(ol)
     self._p_changed = True
     return l
예제 #5
0
파일: indexes.py 프로젝트: arturodr/indico
class DateBookingList(Persistent):
    """ Simple set of booking objects with a count attribute.
    """

    def __init__(self):
        self._bookings = OOBTree.OOTreeSet()
        self._count = Length(0)

    def addBooking(self, booking):
        self._bookings.insert(booking)
        self._count.change(1)

    def removeBooking(self, booking):
        self._bookings.remove(booking)
        self._count.change(-1)

    def getCount(self):
        return self._count()

    def iterbookings(self):
        """ Iterator over the bookings
        """
        return self._bookings.__iter__()

    def getBookingsPerConf(self):
        """ Returns a dictionary where the keys are Conference objects
            and the values are the number of Vidyo bookings of that conference.
        """
        result = {}
        for b in self._bookings:
            result[b.getConference()] = result.setdefault(b.getConference(), 0) + 1
        return result
예제 #6
0
 def _QuestionRecord__len(self):
     l=Length()
     ol = len(self._tree)
     if ol>0:
         l.change(ol)
     self._p_changed=True
     return l
예제 #7
0
 def _Folder__len(self):
     l = Length()
     ol = len(self.__data)
     if ol > 0:
         l.change(ol)
     self._p_changed = True
     return l
예제 #8
0
 def _PersitentOOBTree__len(self):
     l = Length()
     ol = len(self._data)
     if ol > 0:
         l.change(ol)
     self._p_changed = True
     return l
예제 #9
0
 def __init__(self):
     """Setup our data structures"""
     self._anon_ratings = IOBTree()
     self._ratings = OOBTree()
     self._sessions = OOBTree()
     self._length = Length()
     self._anon_length = Length()
예제 #10
0
파일: pool.py 프로젝트: liqd/adhocracy3
 def _get_next_number(self, prefix):
     last = getattr(self, '_autoname_last_' + prefix, None)
     if last is None:
         last = Length()
         setattr(self, '_autoname_last_' + prefix, last)
     number = last.value
     last.change(1)
     return number
예제 #11
0
class CanopyLexicon(Lexicon) : # pragma: no cover
    def sourceToWordIds(self, last): 
        if last is None:
            last = [] 
        if not isinstance(self.wordCount, Length):
            self.wordCount = Length(self.wordCount())
        self.wordCount._p_deactivate()
        return list(map(self._getWordIdCreate, last))
 def clear(self):
   '''clear the index.'''
   l = self.__len__
   if isinstance(l, Length): l.set(0)
   else: self.__len__ = Length()
   try: self.numObjects.set(0)
   except AttributeError: self.numObjects= Length()
   if self.ReverseOrder: self._reverseOrder = OOTreeSet()
   self._setup()
예제 #13
0
    def clear(self):
        self._length = Length()
        self._index = OOBTree()
        self._unindex = IOBTree()

        if self._counter is None:
            self._counter = Length()
        else:
            self._increment_counter()
예제 #14
0
    def clear(self):
        # ._wordinfo = BTree(wids -> (TreeSet((weight, docid)), BTree(docid -> weight), Length))
        self._wordinfo = self.family.IO.BTree()

        # ._docwords = BTree(docid -> widcode)
        # used for document unindexing
        # but no phrase search
        self._docwords = self.family.IO.BTree()

        self.wordCount = Length()
        self.documentCount = Length()
예제 #15
0
class MailDataStorage(PersistentItem):
    interface.implements(IMailDataStorage)

    def __init__(self, **kw):
        self.count = Length(0)
        super(MailDataStorage, self).__init__(**kw)

    def append(self, form, record, request):
        mail = getMultiAdapter((form, request), IMailTemplate)
        mail.send((self.emailto,), record=record, storage=self)
        self.count.change(1)
예제 #16
0
파일: core.py 프로젝트: RyanWarm/graphagus
    def _init(self):
        self.nodes = IOBTree()
        self.edges = IOBTree()
        self.edgedata = IOBTree()

        self.outgoing = IOBTree()
        self.incoming = IOBTree()

        self.typeids = PObject()

        self._nodeid = Length(0)
        self._edgeid = Length(0)
        self._typeid = Length(0)
예제 #17
0
파일: index.py 프로젝트: DATAQC/dedupe
class CanopyLexicon(Lexicon) : # pragma : no cover
    def __init__(self, stop_words) : 
        super(CanopyLexicon, self).__init__()
        self._pipeline = [CustomStopWordRemover(stop_words)]

    def sourceToWordIds(self, last): 
        if last is None:
            last = [] 
        for element in self._pipeline:
            last = element.process(last)
        if not isinstance(self.wordCount, Length):
            self.wordCount = Length(self.wordCount())
        self.wordCount._p_deactivate()
        return list(map(self._getWordIdCreate, last))
예제 #18
0
class MessageService(Persistent, Location):
    interface.implements(IMessageService)

    def __init__(self, storage):
        self.__parent__ = storage

        self.index = OIBTree()
        self.unread = Length(0)

    def __len__(self):
        return len(self.index)

    def __iter__(self):
        return iter(self.index.values())

    def __contains__(self, key):
        msg = self.__parent__.getMessage(key)
        if msg is not None:
            return True
        else:
            return False

    def get(self, msgId, default=None):
        msg = self.__parent__.getMessage(msgId)
        if msg is not None:
            if msg.__date__ in self.index:
                return msg

        return default

    def append(self, message):
        message.__parent__ = self

        if self.__parent__.readStatus(message):
            self.unread.change(1)

        self.index[message.__date__] = message.__id__

    def remove(self, message):
        id = message.__date__

        if id in self.index:
            del self.index[id]

            if self.__parent__.readStatus(message) and self.unread() > 0:
                self.unread.change(-1)

    def create(self, **data):
        raise NotImplemented('create')
예제 #19
0
class MessageQueues(persistent.dict.PersistentDict):
    interface.implements(interfaces.IMessageQueues)

    def __init__(self, *args, **kwargs):
        super(MessageQueues, self).__init__(*args, **kwargs)
        for status in interfaces.MESSAGE_STATES:
            self[status] = queue.CompositeQueue()
        self._messages_sent = Length()

    @property
    def messages_sent(self):
        return self._messages_sent()

    def dispatch(self):
        try:
            lock = zc.lockfile.LockFile(LOCKFILE_NAME)
        except zc.lockfile.LockError:
            logger.info("Dispatching is locked by another process.")
            return (0, 0)

        try:
            return self._dispatch()
        finally:
            lock.close()

    def _dispatch(self):
        sent = 0
        failed = 0

        for name in 'new', 'retry':
            queue = self[name]
            while True:
                try:
                    message = queue.pull()
                except IndexError:
                    break
                else:
                    status, message = dispatch(message)
                    if status == 'sent':
                        sent += 1
                    else:
                        failed += 1

        self._messages_sent.change(sent)
        return sent, failed

    def clear(self, queue_names=('error', 'sent')):
        for name in queue_names:
            self[name] = self[name].__class__()
    def _migrateStorage(self):
        # we're going to use an LOBTree for storage. we need to
        # consider the possibility that self is from an
        # older version that uses the native Archetypes storage
        # or the former IOBTree (<= 1.6.0b2 )
        # in the SavedFormInput field.
        updated = base_hasattr(self, '_inputStorage') and \
                  base_hasattr(self, '_inputItems') and \
                  base_hasattr(self, '_length')

        if not updated:
            try:
                saved_input = self.getSavedFormInput()
            except AttributeError:
                saved_input = []

            self._inputStorage = SavedDataBTree()
            i = 0
            self._inputItems = 0
            self._length = Length()

            if len(saved_input):
                for row in saved_input:
                    self._inputStorage[i] = row
                    i += 1
                self.SavedFormInput = []
                self._inputItems = i
                self._length.set(i)
예제 #21
0
 def clear(self):
     """Empty the lexicon.
     """
     self.length = Length()
     self._wid_length_based = False
     self._wids = OIBTree()  # word -> wid
     self._words = IOBTree()  # wid -> word
예제 #22
0
    def add(self, name, other, send_events=True):
        """See IFolder."""
        if not isinstance(name, basestring):
            raise TypeError("Name must be a string rather than a %s" %
                            name.__class__.__name__)
        if not name:
            raise TypeError("Name must not be empty")

        name = unicodify(name)

        if name in self.data:
            raise KeyError('An object named %s already exists' % name)

        if send_events:
            objectEventNotify(ObjectWillBeAddedEvent(other, self, name))
        other.__parent__ = self
        other.__name__ = name

        # backwards compatibility: add a Length _num_objects to folders that
        # have none
        if self._num_objects is None:
            self._num_objects = Length(len(self.data))

        self.data[name] = other
        self._num_objects.change(1)

        if self._order is not None:
            self._order += (name,)

        if send_events:
            objectEventNotify(ObjectAddedEvent(other, self, name))
예제 #23
0
    def remove(self, name, send_events=True):
        """See IFolder."""
        name = unicodify(name)
        other = self.data[name]

        if send_events:
            objectEventNotify(ObjectWillBeRemovedEvent(other, self, name))

        if hasattr(other, '__parent__'):
            del other.__parent__

        if hasattr(other, '__name__'):
            del other.__name__

        # backwards compatibility: add a Length _num_objects to folders that
        # have none
        if self._num_objects is None:
            self._num_objects = Length(len(self.data))

        del self.data[name]
        self._num_objects.change(-1)

        if self._order is not None:
            self._order = tuple([x for x in self._order if x != name])

        if send_events:
            objectEventNotify(ObjectRemovedEvent(other, self, name))

        return other
예제 #24
0
    def _store_data(self, appstruct: dict):
        """Store data appstruct.

        `comments_count` value is converted from int to :class:`Btrees.Length`,
        to support ZODB conflict resultion.
        """
        if self._count_field_name in appstruct:  # pragma: no branch
            data = getattr(self.context, self._annotation_key, {})
            if self._count_field_name not in data:
                counter = Length(0)
            else:
                counter = data[self._count_field_name]
            count = appstruct[self._count_field_name]
            counter.set(count)
            appstruct[self._count_field_name] = counter
        super()._store_data(appstruct)
예제 #25
0
 def sourceToWordIds(self, last): 
     if last is None:
         last = [] 
     if not isinstance(self.wordCount, Length):
         self.wordCount = Length(self.wordCount())
     self.wordCount._p_deactivate()
     return list(map(self._getWordIdCreate, last))
예제 #26
0
파일: text.py 프로젝트: barkinet/zerodb
    def _mass_add_wordinfo(self, wid2weight, docid):
        dicttype = type({})
        # self._wordinfo - IOBTree of docid -> weight trees
        get_doc2score = self._wordinfo.get
        new_word_count = 0

        # Fill up cache for performance over the network
        wids = wid2weight.keys()
        parallel_traversal(self._wordinfo, wids)
        parallel_traversal(map(get_doc2score, wids), [docid] * len(wids))

        from time import time
        for wid, weight in wid2weight.items():
            doc2score = get_doc2score(wid)
            if doc2score is None:
                doc2score = {}
                new_word_count += 1
            elif (isinstance(doc2score, dicttype) and
                  len(doc2score) == self.DICT_CUTOFF):
                doc2score = self.family.IF.BTree(doc2score)
            doc2score[docid] = weight
            self._wordinfo[wid] = doc2score  # not redundant:  Persistency!
        try:
            self.wordCount.change(new_word_count)
        except AttributeError:
            # upgrade wordCount to Length object
            self.wordCount = Length(len(self._wordinfo))
예제 #27
0
 def _change_doc_len(self, delta):
     # Change total doc length used for scoring
     try:
         self._totaldoclen.change(delta)
     except AttributeError:
         # Opportunistically upgrade _totaldoclen attribute to Length object
         self._totaldoclen = Length(long(self._totaldoclen + delta))
예제 #28
0
 def clear(self):
     """Initialize forward and reverse mappings."""
     # The forward index maps indexed values to a sequence of docids
     self._fwd_index = self.family.OO.BTree()
     # The reverse index maps a docid to its index value
     self._rev_index = self.family.IO.BTree()
     self._num_docs = Length(0)
예제 #29
0
class PollRecord(BTreeContainer):
    implements(IPollRecord, IContentContainer)

    voteCount = None

    firstVote = None

    lastVote = None

    def __init__(self, *kv, **kw):
        super(PollRecord, self).__init__(*kv, **kw)
        self._results = OOBTree()
        self.voteCount = Length()

    def add(self, record):
        polling = getUtility(IPolling)
        for key, value in record.choices.items():
            item = self._results.get(key)
            if item is None:
                item = QuestionRecord()
                notify(ObjectCreatedEvent(item))
                self._results[key] = item
            for id in value:
                self.voteCount.change(1)
                polling.voteCount.change(1)
                item.voteCount.change(1)
                if item.firstVote is None:
                    item.firstVote = record
                item.lastVote = record
                answer = item.get(id)
                if answer:
                    answer.change(1)
                else:
                    item[id] = Length(1)
        if self.firstVote is None:
            self.firstVote = record
        self.lastVote = record
        self._p_changed = 1

    def getResults(self):
        res = {}
        for question, answers in self._results.items():
            res[question] = {}
            size = float(answers.voteCount.value)
            for answer, votes in answers.items():
                res[question][answer] = (votes.value, votes.value/size)
        return res, self
예제 #30
0
    def __init__(self, principalId):
        self.index = OIBTree()
        self.messages = IOBTree()
        self.services = OOBTree()
        self.readstatus = IITreeSet()
        self.principalId = principalId

        self._next = Length(1)
예제 #31
0
 def clear(self):
     self._length = Length()
     self._index = OOBTree()
     self._unindex = IOBTree()
예제 #32
0
class UUIDIndex(UnIndex):
    """Index for uuid fields with an unique value per key.

    The internal structure is:

    self._index = {datum:documentId]}
    self._unindex = {documentId:datum}

    For each datum only one documentId can exist.
    """

    meta_type = "UUIDIndex"

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ["query", "range"]

    manage = manage_main = DTMLFile('dtml/manageUUIDIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    def clear(self):
        self._length = Length()
        self._index = OIBTree()
        self._unindex = IOBTree()

    def numObjects(self):
        """Return the number of indexed objects. Since we have a 1:1 mapping
        from documents to values, we can reuse the stored length.
        """
        return self.indexSize()

    def uniqueValues(self, name=None, withLengths=0):
        """returns the unique values for name

        if withLengths is true, returns a sequence of
        tuples of (value, length)
        """
        if name is None:
            name = self.id
        elif name != self.id:
            return []

        if not withLengths:
            return tuple(self._index.keys())
        # We know the length for each value is one
        return [(k, 1) for k in self._index.keys()]

    def insertForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and put it in the correct place
        in the forward index.
        """
        if entry is None:
            return

        old_docid = self._index.get(entry, _marker)
        if old_docid is _marker:
            self._index[entry] = documentId
            self._length.change(1)
        elif old_docid != documentId:
            logger.exception("A different document with value '%s' already "
                             "exists in the index.'" % entry)

    def removeForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        old_docid = self._index.get(entry, _marker)
        if old_docid is not _marker:
            del self._index[entry]
            self._length.change(-1)

    def _get_object_datum(self, obj, attr):
        # for a uuid it never makes sense to acquire a parent value via
        # Acquisition
        has_attr = getattr(aq_base(obj), attr, _marker)
        if has_attr is _marker:
            return _marker
        return super(UUIDIndex, self)._get_object_datum(obj, attr)
예제 #33
0
class OkapiIndex(BaseIndex):

    # BM25 free parameters.
    K1 = 1.2
    B = 0.75
    assert K1 >= 0.0
    assert 0.0 <= B <= 1.0

    def __init__(self, lexicon):
        BaseIndex.__init__(self, lexicon)

        # ._wordinfo for Okapi is
        # wid -> {docid -> frequency}; t -> D -> f(D, t)

        # ._docweight for Okapi is
        # docid -> # of words in the doc
        # This is just len(self._docwords[docid]), but _docwords is stored
        # in compressed form, so uncompressing it just to count the list
        # length would be ridiculously expensive.

        # sum(self._docweight.values()), the total # of words in all docs
        # This is a long for "better safe than sorry" reasons.  It isn't
        # used often enough that speed should matter.
        # Use a BTree.Length.Length object to avoid concurrent write conflicts
        self._totaldoclen = Length(0)

    def index_doc(self, docid, text):
        count = BaseIndex.index_doc(self, docid, text)
        self._change_doc_len(count)
        return count

    def _reindex_doc(self, docid, text):
        self._change_doc_len(-self._docweight[docid])
        return BaseIndex._reindex_doc(self, docid, text)

    def unindex_doc(self, docid):
        self._change_doc_len(-self._docweight[docid])
        BaseIndex.unindex_doc(self, docid)

    def _change_doc_len(self, delta):
        # Change total doc length used for scoring
        if delta == 0:
            return
        try:
            self._totaldoclen.change(delta)
        except AttributeError:
            # Opportunistically upgrade _totaldoclen attribute to Length object
            self._totaldoclen = Length(int(self._totaldoclen + delta))

    def _search_wids(self, wids):
        # The workhorse. Return a list of (IIBucket, weight) pairs, one pair
        # for each wid t in wids. The IIBucket, times the weight, maps D to
        # TF(D,t) * IDF(t) for every docid D containing t.
        # As currently written, the weights are always 1, and the IIBucket maps
        # D to TF(D,t)*IDF(t) directly, where the product is computed
        # as a float but stored as a scaled_int.
        # Cautions: _search_wids hardcodes the the scaled_int function.

        if not wids:
            return []
        N = float(self.document_count())  # total # of docs
        try:
            doclen = self._totaldoclen()
        except TypeError:
            # _totaldoclen has not yet been upgraded
            doclen = self._totaldoclen
        meandoclen = doclen / N
        K1 = self.K1
        B = self.B
        K1_plus1 = K1 + 1.0
        B_from1 = 1.0 - B

        #                           f(D, t) * (k1 + 1)
        #   TF(D, t) =  -------------------------------------------
        #               f(D, t) + k1 * ((1-b) + b*len(D)/E(len(D)))

        L = []
        docid2len = self._docweight
        for t in wids:
            d2f = self._wordinfo[t]  # map {docid -> f(docid, t)}
            idf = inverse_doc_frequency(len(d2f), N)  # an unscaled float
            result = IIBucket()

            # inner score loop, was implemented in C before
            idf *= 1024.0  # float out part of the scaled_int computation
            for docid, f in d2f.items():
                lenweight = B_from1 + B * docid2len[docid] / meandoclen
                tf = f * K1_plus1 / (f + K1 * lenweight)
                result[docid] = int(tf * idf + 0.5)

            L.append((result, 1))
        return L

        # Note about the above:  the result is tf * idf.  tf is small -- it
        # can't be larger than k1+1 = 2.2.  idf is formally unbounded, but
        # is less than 14 for a term that appears in only 1 of a million
        # documents.  So the product is probably less than 32, or 5 bits
        # before the radix point.  If we did the scaled-int business on
        # both of them, we'd be up to 25 bits.  Add 64 of those and we'd
        # be in overflow territory.  That's pretty unlikely, so we *could*
        # just store scaled_int(tf) in result[docid], and use scaled_int(idf)
        # as an invariant weight across the whole result.  But besides
        # skating near the edge, it's not a speed cure, since the computation
        # of tf would still be done at Python speed, and it's a lot more
        # work than just multiplying by idf.

    def query_weight(self, terms):
        # Get the wids.
        wids = []
        for term in terms:
            termwids = self._lexicon.termToWordIds(term)
            wids.extend(termwids)
        # The max score for term t is the maximum value of
        #     TF(D, t) * IDF(Q, t)
        # We can compute IDF directly, and as noted in the comments below
        # TF(D, t) is bounded above by 1+K1.
        N = float(len(self._docweight))
        tfmax = 1.0 + self.K1
        sum = 0
        for t in self._remove_oov_wids(wids):
            idf = inverse_doc_frequency(len(self._wordinfo[t]), N)
            sum += scaled_int(idf * tfmax)
        return sum

    def _get_frequencies(self, wids):
        d = {}
        dget = d.get
        for wid in wids:
            d[wid] = dget(wid, 0) + 1
        return d, len(wids)
예제 #34
0
 def testUpgradeIdToolDicts(self):
   # With old erp5_core, we have no generators, no IdTool_* zsql methods,
   # and we have a dictionary stored on id tool
   id_tool = self.portal.portal_ids
   # Rebuild a persistent mapping like it already existed in beginning 2010
   # First persistent mapping of generateNewLengthIdList
   id_tool.dict_length_ids = PersistentMapping()
   id_tool.dict_length_ids['foo'] = Length(5)
   id_tool.dict_length_ids['bar'] = Length(5)
   id_tool.IdTool_zSetLastId(id_group='foo', last_id=5)
   id_tool.IdTool_zSetLastId(id_group='bar', last_id=10)
   # Then persistent mapping of generateNewId
   id_tool.dict_ids = PersistentMapping()
   id_tool.dict_ids['foo'] = 3
   # it was unfortunately possible to define something else
   # than strings
   id_tool.dict_ids[('bar','baz')] = 2
   # Delete portal type info and new generators
   id_tool.manage_delObjects(ids=list(id_tool.objectIds()))
   id_tool.__class__.getTypeInfo = lambda self: None
   # Test with compatibility
   self.tic()
   id_list = id_tool.generateNewLengthIdList(id_group='foo', store=1)
   self.assertEqual(id_list, [5])
   self.assertEqual(int(id_tool.dict_length_ids['foo'].value), 6)
   # Now, restore and make sure we can still generate ids
   del id_tool.__class__.getTypeInfo
   bt = self.portal.portal_templates.getInstalledBusinessTemplate('erp5_core',
                                                                 strict=True)
   for path, obj in bt._path_item._objects.iteritems():
     path, obj_id = path.rsplit('/', 1)
     if path == 'portal_ids':
       id_tool._setObject(obj_id, obj._getCopy(bt))
   self.tic()
   id_list = id_tool.generateNewLengthIdList(id_group='foo')
   # it is known that with current upgrade there is a hole
   self.assertEqual(id_list, [7])
   new_id = id_tool.generateNewId(id_group='foo')
   self.assertEqual(new_id, 4)
   new_id = id_tool.generateNewId(id_group=('bar','baz'))
   self.assertEqual(new_id, 3)
   # Make sure that the old code is not used any more, so the dic on
   # id tool should not change, checking for length_dict
   self.assertEqual(int(id_tool.dict_length_ids['foo'].value), 6)
   id_list = id_tool.generateNewLengthIdList(id_group='bar')
   self.assertEqual(id_list, [11])
   generator_list = [x for x in id_tool.objectValues()
                     if x.getReference()=='mysql_non_continuous_increasing']
   self.assertEqual(len(generator_list), 1)
   generator = generator_list[0]
   self.assertEqual(generator.last_max_id_dict['foo'].value, 7)
   self.assertEqual(generator.last_max_id_dict['bar'].value, 11)
   # Make sure that the old code is not used any more, so the dic on
   # id tool should not change, checking for dict
   self.assertEqual(id_tool.dict_ids['foo'], 3)
   generator_list = [x for x in id_tool.objectValues()
                     if x.getReference()=='zodb_continuous_increasing']
   self.assertEqual(len(generator_list), 1)
   generator = generator_list[0]
   self.assertEqual(generator.last_id_dict['foo'], 4)
   self.assertEqual(generator.last_id_dict["('bar', 'baz')"], 3)
예제 #35
0
class Lexicon(Persistent):

    def __init__(self, *pipeline):
        self._wids = OIBTree()  # word -> wid
        self._words = IOBTree() # wid -> word
        # wid 0 is reserved for words that aren't in the lexicon (OOV -- out
        # of vocabulary).  This can happen, e.g., if a query contains a word
        # we never saw before, and that isn't a known stopword (or otherwise
        # filtered out).  Returning a special wid value for OOV words is a
        # way to let clients know when an OOV word appears.
        self.wordCount = Length()
        self._pipeline = pipeline

    def wordCount(self):
        """Return the number of unique terms in the lexicon."""
        # overridden per instance
        return len(self._wids)

    def words(self):
        return self._wids.keys()

    def wids(self):
        return self._words.keys()

    def items(self):
        return self._wids.items()

    def sourceToWordIds(self, text):
        if text is None:
            text = ''
        last = _text2list(text)
        for element in self._pipeline:
            last = element.process(last)
        if not isinstance(self.wordCount, Length):
            # Make sure wordCount is overridden with a BTrees.Length.Length
            self.wordCount = Length(self.wordCount())
        # Strategically unload the length value so that we get the most
        # recent value written to the database to minimize conflicting wids
        # Because length is independent, this will load the most
        # recent value stored, regardless of whether MVCC is enabled
        self.wordCount._p_deactivate()
        return list(map(self._getWordIdCreate, last))

    def termToWordIds(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            last = element.process(last)
        wids = []
        for word in last:
            wids.append(self._wids.get(word, 0))
        return wids

    def parseTerms(self, text):
        last = _text2list(text)
        for element in self._pipeline:
            process = getattr(element, "processGlob", element.process)
            last = process(last)
        return last

    def isGlob(self, word):
        return "*" in word or "?" in word

    def get_word(self, wid):
        return self._words[wid]

    def get_wid(self, word):
        return self._wids.get(word, 0)

    def globToWordIds(self, pattern):
        # Implement * and ? just as in the shell, except the pattern
        # must not start with either of these
        prefix = ""
        while pattern and pattern[0] not in "*?":
            prefix += pattern[0]
            pattern = pattern[1:]
        if not pattern:
            # There were no globbing characters in the pattern
            wid = self._wids.get(prefix, 0)
            if wid:
                return [wid]
            else:
                return []
        if not prefix:
            # The pattern starts with a globbing character.
            # This is too efficient, so we raise an exception.
            raise QueryError(
                "pattern %r shouldn't start with glob character" % pattern)
        pat = prefix
        for c in pattern:
            if c == "*":
                pat += ".*"
            elif c == "?":
                pat += "."
            else:
                pat += re.escape(c)
        pat += "$"
        prog = re.compile(pat)
        keys = self._wids.keys(prefix) # Keys starting at prefix
        wids = []
        for key in keys:
            if not key.startswith(prefix):
                break
            if prog.match(key):
                wids.append(self._wids[key])
        return wids

    def _getWordIdCreate(self, word):
        wid = self._wids.get(word)
        if wid is None:
            wid = self._new_wid()
            self._wids[word] = wid
            self._words[wid] = word
        return wid

    def _new_wid(self):
        count = self.wordCount
        count.change(1)
        while count() in self._words:
            # just to be safe
            count.change(1)
        return count()
예제 #36
0
class Folder(Persistent):
    """ A folder implementation which acts much like a Python dictionary.

    Keys must be Unicode strings; values must be arbitrary Python objects.
    """
    family = BTrees.family64

    __name__ = None
    __parent__ = None
    __services__ = ()

    # Default uses ordering of underlying BTree.
    _order = None

    def _get_order(self):
        if self._order is not None:
            return list(self._order)
        return self.data.keys()

    def _set_order(self, value):
        # XXX:  should we test against self.data.keys()?
        self._order = tuple([unicode(x) for x in value])

    def _del_order(self):
        del self._order

    order = property(_get_order, _set_order, _del_order)

    def __init__(self, data=None, family=None):
        """ Constructor.  Data may be an initial dictionary mapping object
        name to object. """
        if family is not None:
            self.family = family
        if data is None:
            data = {}
        self.data = self.family.OO.BTree(data)
        self._num_objects = Length(len(data))

    def find_service(self, service_name):
        """ Return a service named by ``service_name`` in this folder *or any
        parent service folder* or ``None`` if no such service exists.  A
        shortcut for :func:`substanced.service.find_service`."""
        return find_service(self, service_name)

    def find_services(self, service_name):
        """ Returns a sequence of service objects named by ``service_name``
        in this folder's lineage or an empty sequence if no such service
        exists.  A shortcut for :func:`substanced.service.find_services`"""
        return find_services(self, service_name)

    def add_service(self, name, obj, registry=None, **kw):
        """ Add a service to this folder named ``name``."""
        if registry is None:
            registry = get_current_registry()
        kw['registry'] = registry
        self.add(name, obj, **kw)
        if not name in self.__services__:
            self.__services__ = self.__services__ + (name,)

    def keys(self):
        """ Return an iterable sequence of object names present in the folder.

        Respect ``order``, if set.
        """
        return self.order

    def __iter__(self):
        """ An alias for ``keys``
        """
        return iter(self.order)

    def values(self):
        """ Return an iterable sequence of the values present in the folder.

        Respect ``order``, if set.
        """
        if self._order is not None:
            return [self.data[name] for name in self.order]
        return self.data.values()

    def items(self):
        """ Return an iterable sequence of (name, value) pairs in the folder.

        Respect ``order``, if set.
        """
        if self._order is not None:
            return [(name, self.data[name]) for name in self.order]
        return self.data.items()

    def __len__(self):
        """ Return the number of objects in the folder.
        """
        return self._num_objects()

    def __nonzero__(self):
        """ Return ``True`` unconditionally.
        """
        return True

    def __repr__(self):
        klass = self.__class__
        classname = '%s.%s' % (klass.__module__, klass.__name__)
        return '<%s object %r at %#x>' % (classname,
                                          self.__name__,
                                          id(self))

    def __getitem__(self, name):
        """ Return the object named ``name`` added to this folder or raise
        ``KeyError`` if no such object exists.  ``name`` must be a Unicode
        object or directly decodeable to Unicode using the system default
        encoding.
        """
        name = unicode(name)
        return self.data[name]

    def get(self, name, default=None):
        """ Return the object named by ``name`` or the default.

        ``name`` must be a Unicode object or a bytestring object.

        If ``name`` is a bytestring object, it must be decodable using the
        system default encoding.
        """
        name = unicode(name)
        return self.data.get(name, default)

    def __contains__(self, name):
        """ Does the container contains an object named by name?

        ``name`` must be a Unicode object or a bytestring object.

        If ``name`` is a bytestring object, it must be decodable using the
        system default encoding.
        """
        name = unicode(name)
        return name in self.data

    def __setitem__(self, name, other):
        """ Set object ``other' into this folder under the name ``name``.

        ``name`` must be a Unicode object or a bytestring object.

        If ``name`` is a bytestring object, it must be decodable using the
        system default encoding.

        ``name`` cannot be the empty string.

        When ``other`` is seated into this folder, it will also be decorated
        with a ``__parent__`` attribute (a reference to the folder into which
        it is being seated) and ``__name__`` attribute (the name passed in to
        this function.  It must not already have a ``__parent__`` attribute
        before being seated into the folder, or an exception will be raised.

        If a value already exists in the foldr under the name ``name``, raise
        :exc:`KeyError`.

        When this method is called, the object will be added to the objectmap,
        an :class:`substanced.event.ObjectWillBeAdded` event will be emitted
        before the object obtains a ``__name__`` or ``__parent__`` value, then
        a :class:`substanced.event.ObjectAdded` will be emitted after the
        object obtains a ``__name__`` and ``__parent__`` value.
        """
        return self.add(name, other)

    def validate_name(self, name, reserved_names=()):
        """
        Validate the ``name`` passed to ensure that it's addable to the folder.
        Returns the name decoded to Unicode if it passes all addable checks.
        It's not addable if:

        - the name is not decodeable to Unicode.

        - the name starts with ``@@`` (conflicts with explicit view names).

        - the name has slashes in it (WSGI limitation).

        - the name is empty.

        If any of these conditions are untrue, raise a :exc:`ValueError`.  If
        the name passed is in the list of ``reserved_names``, raise a
        :exc:`ValueError`.
        """
        if not isinstance(name, basestring):
            raise ValueError("Name must be a string rather than a %s" %
                             name.__class__.__name__)
        if not name:
            raise ValueError("Name must not be empty")

        try:
            name = unicode(name)
        except UnicodeDecodeError:
            raise ValueError('Name "%s" not decodeable to unicode' % name)

        if name in reserved_names:
            raise ValueError('%s is a reserved name' % name)

        if name.startswith('@@'):
            raise ValueError('Names which start with "@@" are not allowed')

        if '/' in name:
            raise ValueError('Names which contain a slash ("/") are not '
                             'allowed')

        return name

    def check_name(self, name, reserved_names=()):
        """ Perform all the validation checks implied by
        :meth:`~substanced.folder.Folder.validate_name` against the ``name``
        supplied but also fail with a
        :class:`~substanced.folder.FolderKeyError` if an object with the name
        ``name`` already exists in the folder."""

        name = self.validate_name(name, reserved_names=reserved_names)

        if name in self.data:
            raise FolderKeyError('An object named %s already exists' % name)

        return name

    def add(self, name, other, send_events=True, reserved_names=(),
            duplicating=False, moving=False, registry=None):
        """ Same as ``__setitem__``.

        If ``send_events`` is False, suppress the sending of folder events.
        Don't allow names in the ``reserved_names`` sequence to be
        added. If ``duplicating`` is True, oids will be replaced in
        objectmap.

        This method returns the name used to place the subobject in the
        folder (a derivation of ``name``, usually the result of
        ``self.check_name(name)``).
        """
        if registry is None:
            registry = get_current_registry()

        name = self.check_name(name, reserved_names)

        if getattr(other, '__parent__', None):
            raise ValueError(
                'obj %s added to folder %s already has a __parent__ attribute, '
                'please remove it completely from its existing parent (%s) '
                'before trying to readd it to this one' % (
                    other, self, self.__parent__)
                )

        objectmap = find_objectmap(self)

        if objectmap is not None:

            basepath = resource_path_tuple(self)

            for node in postorder(other):
                node_path = node_path_tuple(node)
                path_tuple = basepath + (name,) + node_path[1:]
                # the below gives node an objectid; if the will-be-added event
                # is the result of a duplication, replace the oid of the node
                # with a new one
                objectmap.add(node, path_tuple, replace_oid=duplicating)

        if send_events:
            event = ObjectWillBeAdded(
                other, self, name, duplicating=duplicating, moving=moving
                )
            self._notify(event, registry)

        other.__parent__ = self
        other.__name__ = name

        self.data[name] = other
        self._num_objects.change(1)

        if self._order is not None:
            self._order += (name,)

        if send_events:
            event = ObjectAdded(
                other, self, name, duplicating=duplicating, moving=moving
                )
            self._notify(event, registry)

        return name

    def pop(self, name, default=marker, registry=None):
        """ Remove the item stored in the under ``name`` and return it.

        If ``name`` doesn't exist in the folder, and ``default`` **is not**
        passed, raise a :exc:`KeyError`.

        If ``name`` doesn't exist in the folder, and ``default`` **is**
        passed, return ``default``.

        When the object stored under ``name`` is removed from this folder,
        remove its ``__parent__`` and ``__name__`` values.

        When this method is called, emit an
        :class:`substanced.event.ObjectWillBeRemoved` event before the
        object loses its ``__name__`` or ``__parent__`` values.  Emit an
        :class:`substanced.event.ObjectRemoved` after the object loses its
        ``__name__`` and ``__parent__`` value,
        """
        if registry is None:
            registry = get_current_registry()
        try:
            result = self.remove(name, registry=registry)
        except KeyError:
            if default is marker:
                raise
            return default
        return result

    def _notify(self, event, registry=None):
        if registry is None:
            registry = get_current_registry()
        registry.subscribers((event, event.object, self), None)

    def __delitem__(self, name):
        """ Remove the object from this folder stored under ``name``.

        ``name`` must be a Unicode object or a bytestring object.

        If ``name`` is a bytestring object, it must be decodable using the
        system default encoding.

        If no object is stored in the folder under ``name``, raise a
        :exc:`KeyError`.

        When the object stored under ``name`` is removed from this folder,
        remove its ``__parent__`` and ``__name__`` values.

        When this method is called, the removed object will be removed from the
        objectmap, a :class:`substanced.event.ObjectWillBeRemoved` event will
        be emitted before the object loses its ``__name__`` or ``__parent__``
        values and a :class:`substanced.event.ObjectRemoved` will be emitted
        after the object loses its ``__name__`` and ``__parent__`` value,
        """
        return self.remove(name)

    def remove(self, name, send_events=True, moving=False, registry=None):
        """ Same thing as ``__delitem__``.

        If ``send_events`` is false, suppress the sending of folder events.
        If ``moving`` is True, the events sent will indicate that a move is
        in process.
        """
        name = unicode(name)
        other = self.data[name]
        oid = oid_of(other, None)

        if registry is None:
            registry = get_current_registry()

        if send_events:
            event = ObjectWillBeRemoved(other, self, name, moving=moving)
            self._notify(event, registry)

        if hasattr(other, '__parent__'):
            del other.__parent__

        if hasattr(other, '__name__'):
            del other.__name__

        del self.data[name]
        self._num_objects.change(-1)

        if name in self.__services__:
            self.__services__ = filter(lambda x: x != name, self.__services__)

        if self._order is not None:
            self._order = tuple([x for x in self._order if x != name])

        objectmap = find_objectmap(self)

        removed_oids = set([oid])

        if objectmap is not None and oid is not None:
            removed_oids = objectmap.remove(oid, references=not moving)

        if send_events:
            event = ObjectRemoved(other, self, name, removed_oids,
                                  moving=moving)
            self._notify(event, registry)

        return other

    def copy(self, name, other, newname=None, registry=None):
        """
        Copy a subobject named ``name`` from this folder to the folder
        represented by ``other``.  If ``newname`` is not none, it is used as
        the target object name; otherwise the existing subobject name is
        used.
        """
        if newname is None:
            newname = name

        if registry is None:
            registry = get_current_registry()

        with tempfile.TemporaryFile() as f:
            obj = self.get(name)
            obj._p_jar.exportFile(obj._p_oid, f)
            f.seek(0)
            new_obj = obj._p_jar.importFile(f)
            del new_obj.__parent__
            obj = other.add(newname, new_obj, duplicating=True,
                            registry=registry)
            return obj

    def move(self, name, other, newname=None, registry=None):
        """
        Move a subobject named ``name`` from this folder to the folder
        represented by ``other``.  If ``newname`` is not none, it is used as
        the target object name; otherwise the existing subobject name is
        used.

        This operation is done in terms of a remove and an add.  The Removed
        and WillBeRemoved events as well as the Added and WillBeAdded events
        sent will indicate that the object is moving.
        """
        is_service = False
        if newname is None:
            newname = name
        if name in self.__services__:
            is_service = True
        if registry is None:
            registry = get_current_registry()
        ob = self.remove(name, moving=True, registry=registry)
        other.add(newname, ob, moving=True, registry=registry)
        if is_service:
            other.__services__ = other.__services__ + (name,)
        return ob

    def rename(self, oldname, newname, registry=None):
        """
        Rename a subobject from oldname to newname.

        This operation is done in terms of a remove and an add.  The Removed
        and WillBeRemoved events sent will indicate that the object is
        moving.
        """
        if registry is None:
            registry = get_current_registry()
        return self.move(oldname, self, newname, registry=registry)

    def replace(self, name, newobject, registry=None):
        """ Replace an existing object named ``name`` in this folder with a
        new object ``newobject``.  If there isn't an object named ``name`` in
        this folder, an exception will *not* be raised; instead, the new
        object will just be added.

        This operation is done in terms of a remove and an add.  The Removed
        and WillBeRemoved events will be sent for the old object, and the
        WillBeAdded and Add events will be sent for the new object.
        """
        if registry is None:
            registry = get_current_registry()
        if name in self:
            self.remove(name)
        self.add(name, newobject, registry=registry)
예제 #37
0
class HBTreeFolder2Base(Persistent):
    """Base for BTree-based folders.

    BUG: Due to wrong design, we can't store 2 objects <A> and <A>-<B>
         where <A> does not contain '-'. We detect conflicts at the
         root level using 'type(ob) is OOBTree'
    """

    security = ClassSecurityInfo()

    manage_options = (({
        'label': 'Contents',
        'action': 'manage_main',
    }, ) + Folder.manage_options[1:])

    security.declareProtected(view_management_screens, 'manage_main')
    manage_main = DTMLFile('contents', globals())

    _htree = None  # OOBTree: { id -> object }
    _count = None  # A BTrees.Length
    _v_nextid = 0  # The integer component of the next generated ID
    title = ''

    def __init__(self, id=None):
        if id is not None:
            self.id = id
        self._initBTrees()

    def _initBTrees(self):
        self._htree = OOBTree()
        self._count = Length()

    def _populateFromFolder(self, source):
        """Fill this folder with the contents of another folder.
        """
        for name, value in source.objectItems():
            self._setOb(name, aq_base(value))

    security.declareProtected(view_management_screens, 'manage_fixCount')

    def manage_fixCount(self, dry_run=0):
        """Calls self._fixCount() and reports the result as text.
        """
        old, new = self._fixCount(dry_run)
        path = '/'.join(self.getPhysicalPath())
        if old == new:
            return "No count mismatch detected in HBTreeFolder2 at %s." % path
        else:
            return ("Fixed count mismatch in HBTreeFolder2 at %s. "
                    "Count was %d; corrected to %d" % (path, old, new))

    def _fixCount(self, dry_run=0):
        """Checks if the value of self._count disagrees with the content of
        the htree. If so, corrects self._count. Returns the old and new count
        values. If old==new, no correction was performed.
        """
        old = self._count()
        new = sum(1 for x in self._htree_iteritems())
        if old != new and not dry_run:
            self._count.set(new)
        return old, new

    def hashId(self, id):
        return id.split(H_SEPARATOR)

    def _htree_get(self, id):
        id_list = self.hashId(id)
        if len(id_list) == 1:
            ob = self._htree[id]
            if type(ob) is OOBTree:
                raise KeyError
        else:
            ob = self._htree[id_list.pop(0)]
            if type(ob) is not OOBTree:
                raise KeyError
            id_list[-1] = id
            for sub_id in id_list:
                ob = ob[sub_id]
        return ob

    def _getOb(self, id, default=_marker):
        """Return the named object from the folder
        """
        try:
            return self._htree_get(id).__of__(self)
        except KeyError:
            if default is _marker:
                raise KeyError(id)
        return default

    def __getitem__(self, id):
        try:
            return self._htree_get(id).__of__(self)
        except KeyError:
            raise KeyError(id)

    def _setOb(self, id, object):
        """Store the named object in the folder.
        """
        if type(object) is OOBTree:
            raise ValueError('HBTreeFolder2 can not store OOBTree objects')
        htree = self._htree
        for sub_id in self.hashId(id)[:-1]:
            try:
                htree = htree[sub_id]
            except KeyError:
                htree[sub_id] = htree = OOBTree()
                continue
            if type(htree) is not OOBTree:
                assert self._htree[sub_id] is htree, (htree, id)
                raise KeyError('There is already an item whose id is %r' %
                               sub_id)
        if id in htree:
            raise KeyError('There is already an item named %r.' % id)
        htree[id] = object
        self._count.change(1)

    def _delOb(self, id):
        """Remove the named object from the folder.
        """
        htree = self._htree
        h = []
        for sub_id in self.hashId(id)[:-1]:
            h.append((htree, sub_id))
            htree = htree.get(sub_id)
            if type(htree) is not OOBTree:
                raise KeyError(id)
        if type(htree[id]) is OOBTree:
            raise KeyError(id)
        del htree[id]
        self._count.change(-1)
        while h and not htree:
            htree, sub_id = h.pop()
            del htree[sub_id]

    security.declareProtected(view_management_screens, 'getBatchObjectListing')

    def getBatchObjectListing(self, REQUEST=None):
        """Return a structure for a page template to show the list of objects.
        """
        if REQUEST is None:
            REQUEST = {}
        pref_rows = int(REQUEST.get('dtpref_rows', 20))
        b_start = int(REQUEST.get('b_start', 1))
        b_count = int(REQUEST.get('b_count', 1000))
        b_end = b_start + b_count - 1
        url = self.absolute_url() + '/manage_main'
        count = self.objectCount()

        if b_end < count:
            next_url = url + '?b_start=%d' % (b_start + b_count)
        else:
            b_end = count
            next_url = ''

        if b_start > 1:
            prev_url = url + '?b_start=%d' % max(b_start - b_count, 1)
        else:
            prev_url = ''

        formatted = [listtext0 % pref_rows]
        for optID in islice(self.objectIds(), b_start - 1, b_end):
            optID = escape(optID)
            formatted.append(listtext1 % (escape(optID, quote=1), optID))
        formatted.append(listtext2)
        return {
            'b_start': b_start,
            'b_end': b_end,
            'prev_batch_url': prev_url,
            'next_batch_url': next_url,
            'formatted_list': ''.join(formatted)
        }

    security.declareProtected(view_management_screens,
                              'manage_object_workspace')

    def manage_object_workspace(self, ids=(), REQUEST=None):
        '''Redirects to the workspace of the first object in
        the list.'''
        if ids and REQUEST is not None:
            REQUEST.RESPONSE.redirect('%s/%s/manage_workspace' %
                                      (self.absolute_url(), quote(ids[0])))
        else:
            return self.manage_main(self, REQUEST)

    security.declareProtected(access_contents_information, 'tpValues')

    def tpValues(self):
        """Ensures the items don't show up in the left pane.
        """
        return ()

    security.declareProtected(access_contents_information, 'objectCount')

    def objectCount(self):
        """Returns the number of items in the folder."""
        return self._count()

    security.declareProtected(access_contents_information, 'has_key')

    def has_key(self, id):
        """Indicates whether the folder has an item by ID.
        """
        try:
            self._htree_get(id)
        except KeyError:
            return 0
        return 1

    # Work around for the performance regression introduced in Zope 2.12.23.
    # Otherwise, we use superclass' __contains__ implementation, which uses
    # objectIds, which is inefficient in HBTreeFolder2 to lookup a single key.
    __contains__ = has_key

    def _htree_iteritems(self, min=None):
        # BUG: Due to bad design of HBTreeFolder2, buckets other than the root
        #      one must not contain both buckets & leafs. Otherwise, this method
        #      fails.
        h = self._htree
        recurse_stack = []
        try:
            for sub_id in self.hashId(min) if min else ('', ):
                if recurse_stack:
                    next(i)
                    if type(h) is not OOBTree:
                        break
                    id += H_SEPARATOR + sub_id
                    if type(next(six.itervalues(h))) is not OOBTree:
                        sub_id = id
                else:
                    id = sub_id
                i = h.iteritems(sub_id)
                recurse_stack.append(i)
                h = h[sub_id]
        except (KeyError, StopIteration):
            pass
        while recurse_stack:
            i = recurse_stack.pop()
            try:
                while 1:
                    id, h = next(i)
                    if type(h) is OOBTree:
                        recurse_stack.append(i)
                        i = six.iteritems(h)
                    else:
                        yield id, h
            except StopIteration:
                pass

    security.declareProtected(access_contents_information, 'getTreeIdList')

    def getTreeIdList(self, htree=None):
        """ Return list of all tree ids
        """
        r = []
        s = [(None, six.iteritems(self._htree))]
        while s:
            base_id, items = s.pop()
            if base_id:
                for k, v in items:
                    if type(v) is not OOBTree:
                        r.append(base_id)
                        # As an optimization, and because _htree_iteritems does not
                        # support mixed buckets except at the root, we consider that
                        # this one only contains leafs.
                        break
                    s.append((base_id + H_SEPARATOR + k, six.iteritems(v)))
            else:
                for k, v in items:
                    if type(v) is not OOBTree:
                        r.append(base_id)
                        for k, v in items:
                            if type(v) is OOBTree:
                                s.append((k, six.iteritems(v)))
                        break
                    s.append((k, six.iteritems(v)))
        r.sort()
        return r

    security.declareProtected(access_contents_information, 'objectValues')

    def objectValues(self, base_id=_marker):
        return HBTreeObjectValues(self, base_id)

    security.declareProtected(access_contents_information, 'objectIds')

    def objectIds(self, base_id=_marker):
        return HBTreeObjectIds(self, base_id)

    security.declareProtected(access_contents_information, 'objectItems')

    def objectItems(self, base_id=_marker):
        # Returns a list of (id, subobject) tuples of the current object.
        return HBTreeObjectItems(self, base_id)

    # superValues() looks for the _objects attribute, but the implementation
    # would be inefficient, so superValues() support is disabled.
    _objects = ()

    security.declareProtected(access_contents_information, 'objectIds_d')

    def objectIds_d(self, t=None):
        return dict.fromkeys(self.objectIds(t), 1)

    def _checkId(self, id, allow_dup=0):
        if not allow_dup and id in self:
            raise BadRequestException(
                'The id %r is invalid--it is already in use.' % id)

    def _setObject(self, id, object, roles=None, user=None, set_owner=1):
        v = self._checkId(id)
        if v is not None: id = v

        # If an object by the given id already exists, remove it.
        if id in self:
            self._delObject(id)

        self._setOb(id, object)
        object = self._getOb(id)

        if set_owner:
            object.manage_fixupOwnershipAfterAdd()

            # Try to give user the local role "Owner", but only if
            # no local roles have been set on the object yet.
            if hasattr(object, '__ac_local_roles__'):
                if object.__ac_local_roles__ is None:
                    user = getSecurityManager().getUser()
                    if user is not None:
                        userid = user.getId()
                        if userid is not None:
                            object.manage_setLocalRoles(userid, ['Owner'])

        object.manage_afterAdd(object, self)
        return id

    def _delObject(self, id, dp=1):
        object = self._getOb(id)
        try:
            object.manage_beforeDelete(object, self)
        except BeforeDeleteException as ob:
            raise
        except ConflictError:
            raise
        except Exception:
            LOG('Zope', ERROR, 'manage_beforeDelete() threw', error=True)
        self._delOb(id)

    # Aliases for mapping-like access.
    __len__ = objectCount
    keys = objectIds
    values = objectValues
    items = objectItems

    # backward compatibility
    hasObject = has_key

    security.declareProtected(access_contents_information, 'get')

    def get(self, name, default=None):
        try:
            return self._htree_get(name).__of__(self)
        except KeyError:
            return default

    # Utility for generating unique IDs.

    security.declareProtected(access_contents_information, 'generateId')

    def generateId(self, prefix='item', suffix='', rand_ceiling=999999999):
        """Returns an ID not used yet by this folder.

        The ID is unlikely to collide with other threads and clients.
        The IDs are sequential to optimize access to objects
        that are likely to have some relation.
        """
        tree = self._htree
        n = self._v_nextid
        attempt = 0
        while 1:
            if n % 4000 != 0 and n <= rand_ceiling:
                id = '%s%d%s' % (prefix, n, suffix)
                if id not in tree:
                    break
            n = randint(1, rand_ceiling)
            attempt = attempt + 1
            if attempt > MAX_UNIQUEID_ATTEMPTS:
                # Prevent denial of service
                raise ExhaustedUniqueIdsError
        self._v_nextid = n + 1
        return id

    def __getattr__(self, name):
        # Boo hoo hoo!  Zope 2 prefers implicit acquisition over traversal
        # to subitems, and __bobo_traverse__ hooks don't work with
        # restrictedTraverse() unless __getattr__() is also present.
        # Oh well.
        try:
            return self._htree_get(name)
        except KeyError:
            raise AttributeError(name)
예제 #38
0
 def __init__(self):
     """Initialize self."""
     self.__changed_backrefs_counter__ = Length()
     """Counter that should increment if backreferences are changed."""
예제 #39
0
class CatalogTool(PloneBaseTool, BaseTool):
    """Plone's catalog tool"""

    meta_type = 'Plone Catalog Tool'
    security = ClassSecurityInfo()
    toolicon = 'skins/plone_images/book_icon.png'
    _counter = None

    manage_catalogAdvanced = DTMLFile('www/catalogAdvanced', globals())

    manage_options = (
        {'action': 'manage_main', 'label': 'Contents'},
        {'action': 'manage_catalogView', 'label': 'Catalog'},
        {'action': 'manage_catalogIndexes', 'label': 'Indexes'},
        {'action': 'manage_catalogSchema', 'label': 'Metadata'},
        {'action': 'manage_catalogAdvanced', 'label': 'Advanced'},
        {'action': 'manage_catalogReport', 'label': 'Query Report'},
        {'action': 'manage_catalogPlan', 'label': 'Query Plan'},
        {'action': 'manage_propertiesForm', 'label': 'Properties'},
    )

    def __init__(self):
        ZCatalog.__init__(self, self.getId())

    def _removeIndex(self, index):
        # Safe removal of an index.
        try:
            self.manage_delIndex(index)
        except:
            pass

    def _listAllowedRolesAndUsers(self, user):
        # Makes sure the list includes the user's groups.
        result = user.getRoles()
        if 'Anonymous' in result:
            # The anonymous user has no further roles
            return ['Anonymous']
        result = list(result)
        if hasattr(aq_base(user), 'getGroups'):
            groups = ['user:%s' % x for x in user.getGroups()]
            if groups:
                result = result + groups
        # Order the arguments from small to large sets
        result.insert(0, 'user:%s' % user.getId())
        result.append('Anonymous')
        return result

    @security.private
    def indexObject(self, object, idxs=None):
        # Add object to catalog.
        # The optional idxs argument is a list of specific indexes
        # to populate (all of them by default).
        if idxs is None:
            idxs = []
        self.reindexObject(object, idxs)

    @security.protected(ManageZCatalogEntries)
    def catalog_object(self, object, uid=None, idxs=None,
                       update_metadata=1, pghandler=None):
        if idxs is None:
            idxs = []
        self._increment_counter()

        w = object
        if not IIndexableObject.providedBy(object):
            # This is the CMF 2.2 compatible approach, which should be used
            # going forward
            wrapper = queryMultiAdapter((object, self), IIndexableObject)
            if wrapper is not None:
                w = wrapper

        ZCatalog.catalog_object(self, w, uid, idxs,
                                update_metadata, pghandler=pghandler)

    @security.protected(ManageZCatalogEntries)
    def uncatalog_object(self, *args, **kwargs):
        self._increment_counter()
        return BaseTool.uncatalog_object(self, *args, **kwargs)

    def _increment_counter(self):
        if self._counter is None:
            self._counter = Length()
        self._counter.change(1)

    @security.private
    def getCounter(self):
        processQueue()
        return self._counter is not None and self._counter() or 0

    @security.private
    def allow_inactive(self, query_kw):
        """Check, if the user is allowed to see inactive content.
        First, check if the user is allowed to see inactive content site-wide.
        Second, if there is a 'path' key in the query, check if the user is
        allowed to see inactive content for these paths.
        Conservative check: as soon as one path is disallowed, return False.
        If a path cannot be traversed, ignore it.
        """
        allow_inactive = _checkPermission(AccessInactivePortalContent, self)
        if allow_inactive:
            return True

        paths = query_kw.get('path', False)
        if not paths:
            return False

        if isinstance(paths, dict):
            # Like: {'path': {'depth': 0, 'query': ['/Plone/events/']}}
            # Or: {'path': {'depth': 0, 'query': '/Plone/events/'}}
            paths = paths.get('query', [])

        if isinstance(paths, six.string_types):
            paths = [paths]

        objs = []
        site = getSite()
        for path in list(paths):
            if six.PY2:
                path = path.encode('utf-8')  # paths must not be unicode
            try:
                site_path = '/'.join(site.getPhysicalPath())
                parts = path[len(site_path) + 1:].split('/')
                parent = site.unrestrictedTraverse('/'.join(parts[:-1]))
                objs.append(parent.restrictedTraverse(parts[-1]))
            except (KeyError, AttributeError, Unauthorized):
                # When no object is found don't raise an error
                pass

        if not objs:
            return False

        allow = True
        for ob in objs:
            allow = allow and\
                _checkPermission(AccessInactivePortalContent, ob)

        return allow

    @security.protected(SearchZCatalog)
    def searchResults(self, query=None, **kw):
        # Calls ZCatalog.searchResults with extra arguments that
        # limit the results to what the user is allowed to see.
        #
        # This version uses the 'effectiveRange' DateRangeIndex.
        #
        # It also accepts a keyword argument show_inactive to disable
        # effectiveRange checking entirely even for those without portal
        # wide AccessInactivePortalContent permission.

        # Make sure any pending index tasks have been processed
        processQueue()

        kw = kw.copy()
        show_inactive = kw.get('show_inactive', False)
        if isinstance(query, dict) and not show_inactive:
            show_inactive = 'show_inactive' in query

        user = _getAuthenticatedUser(self)
        kw['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user)

        if not show_inactive and not self.allow_inactive(kw):
            kw['effectiveRange'] = DateTime()

        # filter out invalid sort_on indexes
        sort_on = kw.get('sort_on') or []
        if isinstance(sort_on, six.string_types):
            sort_on = [sort_on]
        valid_indexes = self.indexes()
        try:
            sort_on = [idx for idx in sort_on if idx in valid_indexes]
        except TypeError:
            # sort_on is not iterable
            sort_on = []
        if not sort_on:
            kw.pop('sort_on', None)
        else:
            kw['sort_on'] = sort_on

        return ZCatalog.searchResults(self, query, **kw)

    __call__ = searchResults

    def search(self, query,
               sort_index=None, reverse=0, limit=None, merge=1):
        # Wrap search() the same way that searchResults() is

        # Make sure any pending index tasks have been processed
        processQueue()

        user = _getAuthenticatedUser(self)
        query['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user)

        if not self.allow_inactive(query):
            query['effectiveRange'] = DateTime()

        return super(CatalogTool, self).search(
            query, sort_index, reverse, limit, merge)

    @security.protected(ManageZCatalogEntries)
    def clearFindAndRebuild(self):
        # Empties catalog, then finds all contentish objects (i.e. objects
        # with an indexObject method), and reindexes them.
        # This may take a long time.
        idxs = list(self.indexes())

        def indexObject(obj, path):
            if (base_hasattr(obj, 'reindexObject') and
                    safe_callable(obj.reindexObject)):
                try:
                    self.reindexObject(obj, idxs=idxs)
                    # index conversions from plone.app.discussion
                    annotions = IAnnotations(obj)
                    if DISCUSSION_ANNOTATION_KEY in annotions:
                        conversation = annotions[DISCUSSION_ANNOTATION_KEY]
                        conversation = conversation.__of__(obj)
                        for comment in conversation.getComments():
                            try:
                                self.indexObject(comment, idxs=idxs)
                            except StopIteration:  # pragma: no cover
                                pass
                except TypeError:
                    # Catalogs have 'indexObject' as well, but they
                    # take different args, and will fail
                    pass
        self.manage_catalogClear()
        portal = aq_parent(aq_inner(self))
        portal.ZopeFindAndApply(
            portal,
            search_sub=True,
            apply_func=indexObject
        )

    @security.protected(ManageZCatalogEntries)
    def manage_catalogRebuild(self, RESPONSE=None, URL1=None):
        """Clears the catalog and indexes all objects with an 'indexObject'
        method. This may take a long time.
        """
        elapse = time.time()
        c_elapse = process_time()

        self.clearFindAndRebuild()

        elapse = time.time() - elapse
        c_elapse = process_time() - c_elapse

        msg = ('Catalog Rebuilt\n'
               'Total time: %s\n'
               'Total CPU time: %s' % (repr(elapse), repr(c_elapse)))
        logger.info(msg)

        if RESPONSE is not None:
            RESPONSE.redirect(
                URL1 + '/manage_catalogAdvanced?manage_tabs_message=' +
                urllib.parse.quote(msg))
예제 #40
0
 def testUpgradeIdToolDicts(self):
   # With old erp5_core, we have no generators, no IdTool_* zsql methods,
   # and we have a dictionary stored on id tool
   id_tool = self.getPortal().portal_ids
   # Rebuild a persistent mapping like it already existed in beginning 2010
   # First persistent mapping of generateNewLengthIdList
   id_tool.dict_length_ids = PersistentMapping()
   id_tool.dict_length_ids['foo'] = Length(5)
   id_tool.dict_length_ids['bar'] = Length(5)
   id_tool.IdTool_zSetLastId(id_group='foo', last_id=5)
   id_tool.IdTool_zSetLastId(id_group='bar', last_id=10)
   # Then persistent mapping of generateNewId
   id_tool.dict_ids = PersistentMapping()
   id_tool.dict_ids['foo'] = 3
   # it was unfortunately possible to define something else
   # than strings
   id_tool.dict_ids[('bar','baz')] = 2
   # Delete new zsql methods which are used by new code
   skin_folder = self.getPortal().portal_skins.erp5_core
   custom_skin_folder = self.getPortal().portal_skins.custom
   script_id_list = [x for x in skin_folder.objectIds() 
                     if x.startswith('IdTool')]
   self.assertTrue(len(script_id_list)>0)
   cp_data = skin_folder.manage_cutObjects(ids=script_id_list)
   custom_skin_folder.manage_pasteObjects(cp_data)
   # Set old revision for erp5_core bt, because the id tool decide which code
   # to run depending on this revision
   template_tool = self.getPortal().portal_templates
   erp5_core_bt_list = [x for x in template_tool.objectValues()
                        if x.getTitle()=='erp5_core']
   self.assertEqual(len(erp5_core_bt_list), 1)
   erp5_core_bt = erp5_core_bt_list[0]
   erp5_core_bt.setRevision(1561)
   # Delete all new generators
   generator_id_list = [x for x in id_tool.objectIds()]
   id_tool.manage_delObjects(ids=generator_id_list)
   id_list = id_tool.generateNewLengthIdList(id_group='foo', store=1)
   self.assertEqual(id_list, [5])
   self.assertEqual(int(id_tool.dict_length_ids['foo'].value), 6)
   # Now, reinstall erp5_core, and make sure we still have the possibility
   # to continue generating ids
   cp_data = template_tool.manage_copyObjects(ids=(erp5_core_bt.getId(),))
   new_id = template_tool.manage_pasteObjects(cp_data)[0]['new_id']
   new_bt = template_tool[new_id]
   self.tic()
   self.commit()
   new_bt.install(force=1)
   erp5_core_bt.setRevision(1562)
   cp_data = custom_skin_folder.manage_cutObjects(ids=script_id_list)
   skin_folder.manage_pasteObjects(cp_data)
   id_list = id_tool.generateNewLengthIdList(id_group='foo')
   # it is known that with current upgrade there is a whole
   self.assertEqual(id_list, [7])
   new_id = id_tool.generateNewId(id_group='foo')
   self.assertEqual(new_id, 4)
   new_id = id_tool.generateNewId(id_group=('bar','baz'))
   self.assertEqual(new_id, 3)
   # Make sure that the old code is not used any more, so the dic on
   # id tool should not change, checking for length_dict
   self.assertEqual(int(id_tool.dict_length_ids['foo'].value), 6)
   id_list = id_tool.generateNewLengthIdList(id_group='bar')
   self.assertEqual(id_list, [11])
   generator_list = [x for x in id_tool.objectValues()
                     if x.getReference()=='mysql_non_continuous_increasing']
   self.assertEqual(len(generator_list), 1)
   generator = generator_list[0]
   self.assertEqual(generator.last_max_id_dict['foo'].value, 7)
   self.assertEqual(generator.last_max_id_dict['bar'].value, 11)
   # Make sure that the old code is not used any more, so the dic on
   # id tool should not change, checking for dict
   self.assertEqual(id_tool.dict_ids['foo'], 3)
   generator_list = [x for x in id_tool.objectValues()
                     if x.getReference()=='zodb_continuous_increasing']
   self.assertEqual(len(generator_list), 1)
   generator = generator_list[0]
   self.assertEqual(generator.last_id_dict['foo'], 4)
   self.assertEqual(generator.last_id_dict["('bar', 'baz')"], 3)
예제 #41
0
파일: indexes.py 프로젝트: sylvestre/indico
 def clear(self):
     """ Clears all the information stored
     """
     self._tree = LOBTree.LOBTree()
     self._count = Length(0)
예제 #42
0
파일: indexes.py 프로젝트: sylvestre/indico
 def __init__(self):
     self._tree = LOBTree.LOBTree()
     self._count = Length(0)
예제 #43
0
파일: indexes.py 프로젝트: sylvestre/indico
class EventEndDateIndex(Persistent):
    """ List of bookings ordered by their event's ending date
    """

    def __init__(self):
        self._tree = LOBTree.LOBTree()
        self._count = Length(0)

    ## private class methods ##
    @classmethod
    def _dateToKey(cls, date):
        if date:
            return datetimeToUnixTimeInt(date)
        else:
            return None

    @classmethod
    def _keyToDate(cls, key):
        if key:
            return unixTimeToDatetime(key)
        else:
            return None

    @classmethod
    def _bookingToKey(cls, booking):
        return cls._dateToKey(booking.getConference().getAdjustedEndDate(tz = 'UTC'))

    ## public instance methods ##
    def clear(self):
        """ Clears all the information stored
        """
        self._tree = LOBTree.LOBTree()
        self._count = Length(0)

    def getCount(self):
        """ Returns the number of bookings (not keys) stored
        """
        return self._count() #to get the value of a Length object, one has to "call" the object

    def indexBooking(self, booking):
        """ Stores a booking in the index
        """
        key = EventEndDateIndex._bookingToKey(booking)
        if not key in self._tree:
            self._tree[key] = DateBookingList()
        self._tree[key].addBooking(booking)
        self._count.change(1)

    def unindexBooking(self, booking):
        """ Removes a booking from the index
        """
        key = EventEndDateIndex._bookingToKey(booking)
        try:
            self._tree[key].removeBooking(booking)
            if self._tree[key].getCount() == 0:
                del self._tree[key]
            self._count.change(-1)
        except KeyError:
            Logger.get('Vidyo').warning("Could not unindex booking: (confId=%s, id=%s) from Vidyo's GlobalData. Tried with key: %s." %
                                            (booking.getConference().getId(), booking.getId(), str(key)))

    def moveBooking(self, booking, oldDate):
        """ Changes the position of a booking in the index
        """
        oldKey = EventEndDateIndex._dateToKey(oldDate)
        newKey = EventEndDateIndex._bookingToKey(booking)
        try:
            self._tree[oldKey].removeBooking(booking)
            if self._tree[oldKey].getCount() == 0:
                del self._tree[oldKey]
            if not newKey in self._tree:
                self._tree[newKey] = DateBookingList()
            self._tree[newKey].addBooking(booking)
        except KeyError:
            Logger.get('Vidyo').warning("Could not move booking: (confId=%s, id=%s) from Vidyo's GlobalData. Tried moving from key: %s to key: %s." %
                                            (booking.getConference().getId(), booking.getId(), str(oldKey), str(newKey)))

    def iterbookings(self, minDate = None, maxDate = None):
        """ Will return an iterator over Vidyo bookings attached to conferences whose
            end date is between minDate and maxDate
        """
        minKey = EventEndDateIndex._dateToKey(minDate)
        maxKey = EventEndDateIndex._dateToKey(maxDate)
        for bookingList in self._tree.itervalues(min = minKey, max = maxKey):
            for b in bookingList.iterbookings():
                yield b

    def deleteKeys(self, minDate = None, maxDate = None):
        """
        """
        minKey = EventEndDateIndex._dateToKey(minDate)
        maxKey = EventEndDateIndex._dateToKey(maxDate)
        for key in list(self._tree.keys(min = minKey, max = maxKey)): #we want a copy because we are going to modify
            self._deleteKey(key)

    def _deleteKey(self, key):
        Logger.get("Vidyo").info("Vidyo EventEndDateIndex: deleting key %s (%s)" % (str(key), str(EventEndDateIndex._keyToDate(key)) + " (UTC)"))
        self._count.change(-self._tree[key].getCount())
        del self._tree[key]

    def initialize(self, dbi=None):
        """ Cleans the indexes, and then indexes all the vidyo bookings from all the conferences
            WARNING: obviously, this can potentially take a while
        """
        i = 0
        self.clear()
        for conf in ConferenceHolder().getList():
            csbm = conf.getCSBookingManager()
            for booking in csbm.getBookingList():
                if booking.getType() == "Vidyo" and booking.isCreated():
                    self.indexBooking(booking)
            i += 1
            if dbi and i % 100 == 0:
                dbi.commit()
예제 #44
0
 def _initBTrees(self):
     self._htree = OOBTree()
     self._count = Length()
예제 #45
0
파일: queue.py 프로젝트: marcosmolla/indico
 def _reset(self):
     # this counter keeps the number of elements
     self._elem_counter = Length(0)
     self._container = IOBTree()
예제 #46
0
 def clear(self):
     self._depth = 0
     self._index = self.family.OO.BTree()
     self._unindex = self.family.IO.BTree()
     self._length = Length(0)
예제 #47
0
파일: queue.py 프로젝트: marcosmolla/indico
class PersistentWaitingQueue(Persistent):
    """
    A Waiting queue, implemented using a map structure (BTree...)
    It is persistent, but very vulnerable to conflicts. This is due to the
    fact that sets are used as container, and there can happen a situation
    where two different sets are assigned to the same timestamp. This will
    for sure result in conflict.

    That said, the commits of objects like these have to be carefully
    synchronized. See `indico.modules.scheduler.controllers` for more info
    (particularly the way we use the 'spool').
    """
    def __init__(self):
        super(PersistentWaitingQueue, self).__init__()
        self._reset()

    def _reset(self):
        # this counter keeps the number of elements
        self._elem_counter = Length(0)
        self._container = IOBTree()

    def _gc_bin(self, t):
        """
        'garbage-collect' bins
        """
        if len(self._container[t]) == 0:
            del self._container[t]

    def _check_gc_consistency(self):
        """
        'check that there are no empty bins'
        """
        for t in self._container:
            if len(self._container[t]) == 0:
                return False

        return True

    def enqueue(self, t, obj):
        """
        Add an element to the queue
        """

        if t not in self._container:
            self._container[t] = OOTreeSet()

        if obj in self._container[t]:
            raise DuplicateElementException(obj)

        self._container[t].add(obj)
        self._elem_counter.change(1)

    def dequeue(self, t, obj):
        """
        Remove an element from the queue
        """
        self._container[t].remove(obj)
        self._gc_bin(t)
        self._elem_counter.change(-1)

    def _next_timestamp(self):
        """
        Return the next 'priority' to be served
        """
        i = iter(self._container)

        try:
            t = i.next()
            return t
        except StopIteration:
            return None

    def peek(self):
        """
        Return the next element
        """
        t = self._next_timestamp()
        if t:
            # just to be sure
            assert (len(self._container[t]) != 0)

            # find the next element
            i = iter(self._container[t])
            # store it
            elem = i.next()

            # return the element
            return t, elem
        else:
            return None

    def pop(self):
        """
        Remove and return the next set of elements to be processed
        """
        pair = self.peek()
        if pair:
            self.dequeue(*pair)

            # return the element
            return pair
        else:
            return None

    def nbins(self):
        """
        Return the number of 'bins' (map entries) currently used
        """
        # get 'real' len()
        return len(self._container)

    def __len__(self):
        return self._elem_counter()

    def __getitem__(self, param):
        return self._container.__getitem__(param)

    def __iter__(self):

        # tree iterator
        for tstamp in iter(self._container):
            cur_set = self._container[tstamp]
            try:
                # set iterator
                for elem in cur_set:
                    yield tstamp, elem
            except StopIteration:
                pass
예제 #48
0
파일: catalog.py 프로젝트: araymund/karl
class CachingCatalog(Catalog):
    implements(ICatalog)

    os = os  # for unit tests
    generation = None  # b/c

    def __init__(self):
        super(CachingCatalog, self).__init__()
        self.generation = Length(0)

    def clear(self):
        self.invalidate()
        super(CachingCatalog, self).clear()

    def index_doc(self, *arg, **kw):
        self.invalidate()
        super(CachingCatalog, self).index_doc(*arg, **kw)

    def unindex_doc(self, *arg, **kw):
        self.invalidate()
        super(CachingCatalog, self).unindex_doc(*arg, **kw)

    def reindex_doc(self, *arg, **kw):
        self.invalidate()
        super(CachingCatalog, self).reindex_doc(*arg, **kw)

    def __setitem__(self, *arg, **kw):
        self.invalidate()
        super(CachingCatalog, self).__setitem__(*arg, **kw)

    @MetricMod('CS.%s')
    @metricmethod
    def search(self, *arg, **kw):
        use_cache = True

        if 'use_cache' in kw:
            use_cache = kw.pop('use_cache')

        if 'NO_CATALOG_CACHE' in self.os.environ:
            use_cache = False

        if 'tags' in kw:
            # The tags index changes without invalidating the catalog,
            # so don't cache any query involving the tags index.
            use_cache = False

        if not use_cache:
            return self._search(*arg, **kw)

        cache = queryUtility(ICatalogSearchCache)

        if cache is None:
            return self._search(*arg, **kw)

        key = cPickle.dumps((arg, kw))

        generation = self.generation

        if generation is None:
            generation = Length(0)

        genval = generation.value

        if (genval == 0) or (genval > cache.generation):
            # an update in another process requires that the local cache be
            # invalidated
            cache.clear()
            cache.generation = genval

        if cache.get(key) is None:
            num, docids = self._search(*arg, **kw)

            # We don't cache large result sets because the time it takes to
            # unroll the result set turns out to be far more time than it
            # takes to run the search. In a particular instance using OSI's
            # catalog a search that took 0.015s but returned nearly 35,295
            # results took over 50s to unroll the result set for caching,
            # significantly slowing search performance.
            if num > LARGE_RESULT_SET:
                return num, docids

            # we need to unroll here; a btree-based structure may have
            # a reference to its connection
            docids = list(docids)
            cache[key] = (num, docids)

        return cache.get(key)

    @metricmethod
    def _search(self, *arg, **kw):
        start = time.time()
        res = super(CachingCatalog, self).search(*arg, **kw)
        duration = time.time() - start
        notify(CatalogQueryEvent(self, kw, duration, res))
        return res

    def invalidate(self):
        # Increment the generation; this tells *another process* that
        # its catalog cache needs to be cleared
        generation = self.generation

        if generation is None:
            generation = self.generation = Length(0)

        if generation.value >= sys.maxint:
            # don't keep growing the generation integer; wrap at sys.maxint
            self.generation.set(0)
        else:
            self.generation.change(1)

        # Clear the cache for *this process*
        cache = queryUtility(ICatalogSearchCache)
        if cache is not None:
            cache.clear()
            cache.generation = self.generation.value
예제 #49
0
파일: IdTool.py 프로젝트: poses/erp5
    def generateNewIdList(self,
                          id_group=None,
                          id_count=1,
                          default=None,
                          store=_marker,
                          id_generator=None):
        """
      Generate a list of next ids in the sequence of ids of a particular group
    """
        if id_group in (None, 'None'):
            raise ValueError, '%s is not a valid id_group' % (repr(id_group), )
        # for compatibilty with sql data, must not use id_group as a list
        if not isinstance(id_group, str):
            id_group = repr(id_group)
            warnings.warn(
                'id_group must be a string, other types '
                'are deprecated.', DeprecationWarning)
        if id_generator is None:
            id_generator = 'uid'
        if store is not _marker:
            warnings.warn("Use of 'store' argument is deprecated.",
                          DeprecationWarning)
        try:
            #use _getLatestGeneratorValue here for that the technical level
            #must not call the method
            last_generator = self._getLatestGeneratorValue(id_generator)
            new_id_list = last_generator.generateNewIdList(id_group=id_group,
                                                           id_count=id_count,
                                                           default=default)
        except (KeyError, ValueError):
            template_tool = getattr(self, 'portal_templates', None)
            revision = template_tool.getInstalledBusinessTemplateRevision(
                'erp5_core')
            # XXX backward compatiblity
            if int(revision) > 1561:
                LOG('generateNewIdList', ERROR, 'while generating id')
                raise
            else:
                # Compatibility code below, in case the last version of erp5_core
                # is not installed yet
                warnings.warn(
                    "You are using an old version of erp5_core to generate"
                    "ids.\nPlease update erp5_core business template to "
                    "use new id generators", DeprecationWarning)
                new_id = None
                if default is None:
                    default = 1
                # XXX It's temporary, a New API will be implemented soon
                #     the code will be change
                portal = self.getPortalObject()
                query = getattr(portal, 'IdTool_zGenerateId', None)
                commit = getattr(portal, 'IdTool_zCommit', None)

                if query is None or commit is None:
                    portal_catalog = getattr(self,
                                             'portal_catalog').getSQLCatalog()
                    query = getattr(portal_catalog, 'z_portal_ids_generate_id')
                    commit = getattr(portal_catalog, 'z_portal_ids_commit')
                if None in (query, commit):
                    raise AttributeError, 'Error while generating Id: ' \
                      'idTool_zGenerateId and/or idTool_zCommit could not ' \
                      'be found.'
                try:
                    result = query(id_group=id_group,
                                   id_count=id_count,
                                   default=default)
                finally:
                    commit()
                new_id = result[0]['LAST_INSERT_ID()']
                if store:
                    if getattr(aq_base(self), 'dict_length_ids', None) is None:
                        # Length objects are stored in a persistent mapping: there is one
                        # Length object per id_group.
                        self.dict_length_ids = PersistentMapping()
                    if self.dict_length_ids.get(id_group) is None:
                        self.dict_length_ids[id_group] = Length(new_id)
                    self.dict_length_ids[id_group].set(new_id)
                new_id_list = range(new_id - id_count, new_id)
        return new_id_list
예제 #50
0
 def _increment_counter(self):
     if self._counter is None:
         self._counter = Length()
     self._counter.change(1)
예제 #51
0
 def __init__(self, id_=None):
     self._Folder__data = OOBTree()
     self.__len = Length()
     super(Folder, self).__init__()
예제 #52
0
class CatalogPathIndex(CatalogIndex):

    """Index for model paths (tokens separated by '/' characters)

    A path index stores all path components of the physical path of an object.

    Internal datastructure:

    - a physical path of an object is split into its components

    - every component is kept as a  key of a OOBTree in self._indexes

    - the value is a mapping 'level of the path component' to
      'all docids with this path component on this level'


    Query types supported:

    - Eq

    - NotEq

    """
    useOperator = 'or'

    family = BTrees.family32

    def __init__(self, discriminator):
        if not callable(discriminator):
            if not isinstance(discriminator, six.string_types):
                raise ValueError('discriminator value must be callable or a '
                                 'string')
        self.discriminator = discriminator
        self._not_indexed = self.family.IF.Set()
        self.clear()

    def clear(self):
        self._depth = 0
        self._index = self.family.OO.BTree()
        self._unindex = self.family.IO.BTree()
        self._length = Length(0)

    def insertEntry(self, comp, id, level):
        """Insert an entry.

           comp is a path component
           id is the docid
           level is the level of the component inside the path
        """

        if comp not in self._index:
            self._index[comp] = self.family.IO.BTree()

        if level not in self._index[comp]:
            self._index[comp][level] = self.family.IF.TreeSet()

        self._index[comp][level].insert(id)
        if level > self._depth:
            self._depth = level

    def index_doc(self, docid, object):
        if callable(self.discriminator):
            value = self.discriminator(object, _marker)
        else:
            value = getattr(object, self.discriminator, _marker)

        if value is _marker:
            # unindex the previous value
            self.unindex_doc(docid)

            # Store docid in set of unindexed docids
            self._not_indexed.add(docid)

            return None

        if isinstance(value, Persistent):
            raise ValueError('Catalog cannot index persistent object %s' %
                             value)

        if docid in self._not_indexed:
            # Remove from set of unindexed docs if it was in there.
            self._not_indexed.remove(docid)

        path = value

        if isinstance(path, (list, tuple)):
            path = '/'+ '/'.join(path[1:])

        comps = [_f for _f in path.split('/') if _f]

        if docid not in self._unindex:
            self._length.change(1)

        for i in range(len(comps)):
            self.insertEntry(comps[i], docid, i)

        self._unindex[docid] = path
        return 1

    def unindex_doc(self, docid):
        _not_indexed = self._not_indexed
        if docid in _not_indexed:
            _not_indexed.remove(docid)

        if docid not in self._unindex:
            return

        comps =  self._unindex[docid].split('/')

        for level in range(len(comps[1:])):
            comp = comps[level+1]

            try:
                self._index[comp][level].remove(docid)

                if not self._index[comp][level]:
                    del self._index[comp][level]

                if not self._index[comp]:
                    del self._index[comp]
            except KeyError:
                pass

        self._length.change(-1)
        del self._unindex[docid]

    def _indexed(self):
        return list(self._unindex.keys())

    def search(self, path, default_level=0):
        """
        path is either a string representing a
        relative URL or a part of a relative URL or
        a tuple (path,level).

        level >= 0  starts searching at the given level
        level <  0  not implemented yet
        """
        if isinstance(path, six.string_types):
            level = default_level
        else:
            level = int(path[1])
            path  = path[0]

        comps = [_f for _f in path.split('/') if _f]

        if len(comps) == 0:
            return self.family.IF.Set(list(self._unindex.keys()))

        results = None
        if level >= 0:
            for i, comp in enumerate(comps):
                if comp not in self._index:
                    return self.family.IF.Set()
                if level+i not in self._index[comp]:
                    return self.family.IF.Set()
                results = self.family.IF.intersection(
                    results, self._index[comp][level+i])

        else:
            for level in range(self._depth + 1):
                ids = None
                for i, comp in enumerate(comps):
                    try:
                        ids = self.family.IF.intersection(
                            ids, self._index[comp][level+i])
                    except KeyError:
                        break
                else:
                    results = self.family.IF.union(results, ids)
        return results

    def numObjects(self):
        """ return the number distinct values """
        return len(self._unindex)

    def getEntryForObject(self, docid):
        """ Takes a document ID and returns all the information
            we have on that specific object.
        """
        return self._unindex.get(docid)

    def apply(self, query):
        """
        """
        level = 0
        operator = self.useOperator

        if isinstance(query, six.string_types):
            paths = [query]
        elif isinstance(query, (tuple, list)):
            paths = query
        else:
            paths = query.get('query', [])
            if isinstance(paths, six.string_types):
                paths = [ paths ]
            level = query.get('level', 0)
            operator = query.get('operator', self.useOperator).lower()

        sets = []
        for path in paths:
            sets.append(self.search(path, level))

        if operator == 'or':
            rs = self.family.IF.multiunion(sets)

        else:
            rs = None
            sets.sort(lambda x, y: cmp(len(x), len(y)))
            for set in sets:
                rs = self.family.IF.intersection(rs, set)
                if not rs:
                    break

        if rs:
            return rs
        else:
            return self.family.IF.Set()

    applyEq = apply
예제 #53
0
파일: catalog.py 프로젝트: araymund/karl
 def __init__(self):
     super(CachingCatalog, self).__init__()
     self.generation = Length(0)
예제 #54
0
class UnIndex(SimpleItem):
    """Simple forward and reverse index.
    """
    implements(ILimitedResultIndex, IUniqueValueIndex, ISortIndex)

    def __init__(self,
                 id,
                 ignore_ex=None,
                 call_methods=None,
                 extra=None,
                 caller=None):
        """Create an unindex

        UnIndexes are indexes that contain two index components, the
        forward index (like plain index objects) and an inverted
        index.  The inverted index is so that objects can be unindexed
        even when the old value of the object is not known.

        e.g.

        self._index = {datum:[documentId1, documentId2]}
        self._unindex = {documentId:datum}

        The arguments are:

          'id' -- the name of the item attribute to index.  This is
          either an attribute name or a record key.

          'ignore_ex' -- should be set to true if you want the index
          to ignore exceptions raised while indexing instead of
          propagating them.

          'call_methods' -- should be set to true if you want the index
          to call the attribute 'id' (note: 'id' should be callable!)
          You will also need to pass in an object in the index and
          uninded methods for this to work.

          'extra' -- a mapping object that keeps additional
          index-related parameters - subitem 'indexed_attrs'
          can be string with comma separated attribute names or
          a list

          'caller' -- reference to the calling object (usually
          a (Z)Catalog instance
        """
        def _get(o, k, default):
            """ return a value for a given key of a dict/record 'o' """
            if isinstance(o, dict):
                return o.get(k, default)
            else:
                return getattr(o, k, default)

        self.id = id
        self.ignore_ex = ignore_ex  # currently unimplimented
        self.call_methods = call_methods

        self.operators = ('or', 'and')
        self.useOperator = 'or'

        # allow index to index multiple attributes
        ia = _get(extra, 'indexed_attrs', id)
        if isinstance(ia, str):
            self.indexed_attrs = ia.split(',')
        else:
            self.indexed_attrs = list(ia)
        self.indexed_attrs = [
            attr.strip() for attr in self.indexed_attrs if attr
        ]
        if not self.indexed_attrs:
            self.indexed_attrs = [id]

        self.clear()

    def __len__(self):
        return self._length()

    def getId(self):
        return self.id

    def clear(self):
        self._length = Length()
        self._index = OOBTree()
        self._unindex = IOBTree()

    def __nonzero__(self):
        return not not self._unindex

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        for item in self._index.items():
            if isinstance(item, int):
                entry = 1  # "set" length is 1
            else:
                key, value = item
                entry = len(value)
            histogram[entry] = histogram.get(entry, 0) + 1

        return histogram

    def referencedObjects(self):
        """Generate a list of IDs for which we have referenced objects."""
        return self._unindex.keys()

    def getEntryForObject(self, documentId, default=_marker):
        """Takes a document ID and returns all the information we have
        on that specific object.
        """
        if default is _marker:
            return self._unindex.get(documentId)
        else:
            return self._unindex.get(documentId, default)

    def removeForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        indexRow = self._index.get(entry, _marker)
        if indexRow is not _marker:
            try:
                indexRow.remove(documentId)
                if not indexRow:
                    del self._index[entry]
                    self._length.change(-1)

            except ConflictError:
                raise

            except AttributeError:
                # index row is an int
                try:
                    del self._index[entry]
                except KeyError:
                    # XXX swallow KeyError because it was probably
                    # removed and then _length AttributeError raised
                    pass
                if isinstance(self.__len__, Length):
                    self._length = self.__len__
                    del self.__len__
                self._length.change(-1)

            except:
                LOG.error(
                    '%s: unindex_object could not remove '
                    'documentId %s from index %s.  This '
                    'should not happen.' %
                    (self.__class__.__name__, str(documentId), str(self.id)),
                    exc_info=sys.exc_info())
        else:
            LOG.error('%s: unindex_object tried to retrieve set %s '
                      'from index %s but couldn\'t.  This '
                      'should not happen.' %
                      (self.__class__.__name__, repr(entry), str(self.id)))

    def insertForwardIndexEntry(self, entry, documentId):
        """Take the entry provided and put it in the correct place
        in the forward index.

        This will also deal with creating the entire row if necessary.
        """
        indexRow = self._index.get(entry, _marker)

        # Make sure there's actually a row there already. If not, create
        # a set and stuff it in first.
        if indexRow is _marker:
            # We always use a set to avoid getting conflict errors on
            # multiple threads adding a new row at the same time
            self._index[entry] = IITreeSet((documentId, ))
            self._length.change(1)
        else:
            try:
                indexRow.insert(documentId)
            except AttributeError:
                # Inline migration: index row with one element was an int at
                # first (before Zope 2.13).
                indexRow = IITreeSet((indexRow, documentId))
                self._index[entry] = indexRow

    def index_object(self, documentId, obj, threshold=None):
        """ wrapper to handle indexing of multiple attributes """

        fields = self.getIndexSourceNames()

        res = 0
        for attr in fields:
            res += self._index_object(documentId, obj, threshold, attr)

        return res > 0

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except:
                        LOG.error(
                            'Should not happen: oldDatum was there, now its not,'
                            'for document with id %s' % documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)
                self._unindex[documentId] = datum

            returnStatus = 1

        return returnStatus

    def _get_object_datum(self, obj, attr):
        # self.id is the name of the index, which is also the name of the
        # attribute we're interested in.  If the attribute is callable,
        # we'll do so.
        try:
            datum = getattr(obj, attr)
            if safe_callable(datum):
                datum = datum()
        except (AttributeError, TypeError):
            datum = _marker
        return datum

    def numObjects(self):
        """Return the number of indexed objects."""
        return len(self._unindex)

    def indexSize(self):
        """Return the size of the index in terms of distinct values."""
        return len(self)

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
        except ConflictError:
            raise
        except:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s' % documentId,
                      exc_info=True)

    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in the request arg.

        The request argument should be a mapping object.

        If the request does not have a key which matches the "id" of
        the index instance, then None is returned.

        If the request *does* have a key which matches the "id" of
        the index instance, one of a few things can happen:

          - if the value is a blank string, None is returned (in
            order to support requests from web forms where
            you can't tell a blank string from empty).

          - if the value is a nonblank string, turn the value into
            a single-element sequence, and proceed.

          - if the value is a sequence, return a union search.

          - If the value is a dict and contains a key of the form
            '<index>_operator' this overrides the default method
            ('or') to combine search results. Valid values are "or"
            and "and".

        If None is not returned as a result of the abovementioned
        constraints, two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.

        FAQ answer:  to search a Field Index for documents that
        have a blank string as their value, wrap the request value
        up in a tuple ala: request = {'id':('',)}
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        index = self._index
        r = None
        opr = None

        # experimental code for specifing the operator
        operator = record.get('operator', self.useOperator)
        if not operator in self.operators:
            raise RuntimeError("operator not valid: %s" % escape(operator))

        # Range parameter
        range_parm = record.get('range', None)
        if range_parm:
            opr = "range"
            opr_args = []
            if range_parm.find("min") > -1:
                opr_args.append("min")
            if range_parm.find("max") > -1:
                opr_args.append("max")

        if record.get('usage', None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr == "range":  # range search
            if 'min' in opr_args: lo = min(record.keys)
            else: lo = None
            if 'max' in opr_args: hi = max(record.keys)
            else: hi = None
            if hi:
                setlist = index.values(lo, hi)
            else:
                setlist = index.values(lo)

            # If we only use one key, intersect and return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result, ))
                return result, (self.id, )

            if operator == 'or':
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s, ))
                    tmp.append(s)
                r = multiunion(tmp)
            else:
                # For intersection, sort with smallest data set first
                tmp = []
                for s in setlist:
                    if isinstance(s, int):
                        s = IISet((s, ))
                    tmp.append(s)
                if len(tmp) > 2:
                    setlist = sorted(tmp, key=len)
                else:
                    setlist = tmp
                r = resultset
                for s in setlist:
                    # the result is bound by the resultset
                    r = intersection(r, s)

        else:  # not a range search
            # Filter duplicates
            setlist = []
            for k in record.keys:
                s = index.get(k, None)
                # If None, try to bail early
                if s is None:
                    if operator == 'or':
                        # If union, we can't possibly get a bigger result
                        continue
                    # If intersection, we can't possibly get a smaller result
                    return IISet(), (self.id, )
                elif isinstance(s, int):
                    s = IISet((s, ))
                setlist.append(s)

            # If we only use one key return immediately
            if len(setlist) == 1:
                result = setlist[0]
                if isinstance(result, int):
                    result = IISet((result, ))
                return result, (self.id, )

            if operator == 'or':
                # If we already get a small result set passed in, intersecting
                # the various indexes with it and doing the union later is
                # faster than creating a multiunion first.
                if resultset is not None and len(resultset) < 200:
                    smalllist = []
                    for s in setlist:
                        smalllist.append(intersection(resultset, s))
                    r = multiunion(smalllist)
                else:
                    r = multiunion(setlist)
            else:
                # For intersection, sort with smallest data set first
                if len(setlist) > 2:
                    setlist = sorted(setlist, key=len)
                r = resultset
                for s in setlist:
                    r = intersection(r, s)

        if isinstance(r, int):
            r = IISet((r, ))
        if r is None:
            return IISet(), (self.id, )
        else:
            return r, (self.id, )

    def hasUniqueValuesFor(self, name):
        """has unique values for column name"""
        if name == self.id:
            return 1
        else:
            return 0

    def getIndexSourceNames(self):
        """ return sequence of indexed attributes """
        # BBB:  older indexes didn't have 'indexed_attrs'
        return getattr(self, 'indexed_attrs', [self.id])

    def uniqueValues(self, name=None, withLengths=0):
        """returns the unique values for name

        if withLengths is true, returns a sequence of
        tuples of (value, length)
        """
        if name is None:
            name = self.id
        elif name != self.id:
            return []

        if not withLengths:
            return tuple(self._index.keys())
        else:
            rl = []
            for i in self._index.keys():
                set = self._index[i]
                if isinstance(set, int):
                    l = 1
                else:
                    l = len(set)
                rl.append((i, l))
            return tuple(rl)

    def keyForDocument(self, id):
        # This method is superceded by documentToKeyMap
        return self._unindex[id]

    def documentToKeyMap(self):
        return self._unindex

    def items(self):
        items = []
        for k, v in self._index.items():
            if isinstance(v, int):
                v = IISet((v, ))
            items.append((k, v))
        return items
예제 #55
0
파일: base.py 프로젝트: stomanin/indico
class SIndex(Index):

    _fwd_class = None
    _fwd_set_class = None

    def __init__(self, adapter):
        self._adapter = adapter
        self._fwd_index = self._fwd_class()
        self._num_objs = Length(0)

    def _gc_entry(self, v):
        """
        'Garbage collect' empty set entries
        """
        if len(self._fwd_index[v]) == 0:
            del self._fwd_index[v]

    def index_obj(self, obj):

        values = self._adapter(obj)

        if type(values) != list:
            values = [values]

        for value in values:
            vset = self._fwd_index.get(value, self._fwd_set_class())
            if obj in vset:
                raise InconsistentIndexException("%r already in fwd[%r]" % (obj, value))
            else:
                vset.add(obj)
            self._fwd_index[value] = vset
        self._num_objs.change(1)


    def _unindex_obj_from_key(self, key, obj):
        if key in self._fwd_index:
            vset = self._fwd_index[key]
            if obj in vset:
                vset.remove(obj)
                self._fwd_index[key] = vset
                self._gc_entry(key)
            else:
                raise InconsistentIndexException("'%s' not in fwd[%s]",
                                                 (obj, key))
        else:
            raise InconsistentIndexException("'%s' not in fwd index" % key)

    def unindex_obj(self, obj):
        """
        Slightly dumber than the one in DIndex, takes the indexation value (key)
        instead of looking it up in the reverse index
        """
        keys = self._adapter(obj)
        if type(keys) != list:
            keys = [keys]
        for k in keys:
            self._unindex_obj_from_key(k, obj)
        self._num_objs.change(-1)

    def values(self, *args):
        return list(self.itervalues(*args))

    def itervalues(self, *args):
        for s in self._fwd_index.itervalues(*args):
            for t in s:
                yield t

    def iteritems(self, *args):
        for ts, s in self._fwd_index.iteritems(*args):
            for t in s:
                yield ts, t

    def minKey(self):
        return self._fwd_index.minKey()

    def maxKey(self):
        return self._fwd_index.maxKey()

    def __iter__(self):
        return iter(self._fwd_index)

    def __len__(self):
        return self._num_objs()

    def __getitem__(self, item):
        return self._fwd_index[item]

    def get(self, item, default=None):
        return self._fwd_index.get(item, default)

    def clear(self):
        """
        Initialize index
        """

        # The forward index maps indexed values to a sequence of docids
        self._fwd_index = self._fwd_class()
        self._num_objs = Length(0)
예제 #56
0
class CatalogTool(PloneBaseTool, BaseTool):
    """Plone's catalog tool"""

    implements(IPloneCatalogTool)

    meta_type = 'Plone Catalog Tool'
    security = ClassSecurityInfo()
    toolicon = 'skins/plone_images/book_icon.png'
    _counter = None

    manage_catalogAdvanced = DTMLFile('www/catalogAdvanced', globals())

    manage_options = (
        {
            'action': 'manage_main',
            'label': 'Contents'
        },
        {
            'action': 'manage_catalogView',
            'label': 'Catalog'
        },
        {
            'action': 'manage_catalogIndexes',
            'label': 'Indexes'
        },
        {
            'action': 'manage_catalogSchema',
            'label': 'Metadata'
        },
        {
            'action': 'manage_catalogAdvanced',
            'label': 'Advanced'
        },
        {
            'action': 'manage_catalogReport',
            'label': 'Query Report'
        },
        {
            'action': 'manage_catalogPlan',
            'label': 'Query Plan'
        },
        {
            'action': 'manage_propertiesForm',
            'label': 'Properties'
        },
    )

    def __init__(self):
        ZCatalog.__init__(self, self.getId())

    def _removeIndex(self, index):
        """Safe removal of an index.
        """
        try:
            self.manage_delIndex(index)
        except:
            pass

    def _listAllowedRolesAndUsers(self, user):
        """Makes sure the list includes the user's groups.
        """
        result = user.getRoles()
        if 'Anonymous' in result:
            # The anonymous user has no further roles
            return ['Anonymous']
        result = list(result)
        if hasattr(aq_base(user), 'getGroups'):
            groups = ['user:%s' % x for x in user.getGroups()]
            if groups:
                result = result + groups
        result.append('Anonymous')
        result.append('user:%s' % user.getId())
        return result

    security.declarePrivate('indexObject')

    def indexObject(self, object, idxs=[]):
        """Add object to catalog.

        The optional idxs argument is a list of specific indexes
        to populate (all of them by default).
        """
        self.reindexObject(object, idxs)

    security.declareProtected(ManageZCatalogEntries, 'catalog_object')

    def catalog_object(self,
                       object,
                       uid=None,
                       idxs=[],
                       update_metadata=1,
                       pghandler=None):
        self._increment_counter()

        w = object
        if not IIndexableObject.providedBy(object):
            # This is the CMF 2.2 compatible approach, which should be used
            # going forward
            wrapper = queryMultiAdapter((object, self), IIndexableObject)
            if wrapper is not None:
                w = wrapper

        ZCatalog.catalog_object(self,
                                w,
                                uid,
                                idxs,
                                update_metadata,
                                pghandler=pghandler)

    security.declareProtected(ManageZCatalogEntries, 'catalog_object')

    def uncatalog_object(self, *args, **kwargs):
        self._increment_counter()
        return BaseTool.uncatalog_object(self, *args, **kwargs)

    def _increment_counter(self):
        if self._counter is None:
            self._counter = Length()
        self._counter.change(1)

    security.declarePrivate('getCounter')

    def getCounter(self):
        return self._counter is not None and self._counter() or 0

    security.declareProtected(SearchZCatalog, 'searchResults')

    def searchResults(self, REQUEST=None, **kw):
        """Calls ZCatalog.searchResults with extra arguments that
        limit the results to what the user is allowed to see.

        This version uses the 'effectiveRange' DateRangeIndex.

        It also accepts a keyword argument show_inactive to disable
        effectiveRange checking entirely even for those without portal
        wide AccessInactivePortalContent permission.
        """
        kw = kw.copy()
        show_inactive = kw.get('show_inactive', False)
        if isinstance(REQUEST, dict) and not show_inactive:
            show_inactive = 'show_inactive' in REQUEST

        user = _getAuthenticatedUser(self)
        kw['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user)

        if not show_inactive and not _checkPermission(
                AccessInactivePortalContent, self):

            kw['effectiveRange'] = DateTime()

        return ZCatalog.searchResults(self, REQUEST, **kw)

    __call__ = searchResults

    security.declareProtected(ManageZCatalogEntries, 'clearFindAndRebuild')

    def clearFindAndRebuild(self):
        """Empties catalog, then finds all contentish objects (i.e. objects
           with an indexObject method), and reindexes them.
           This may take a long time.
        """
        def indexObject(obj, path):
            if (base_hasattr(obj, 'indexObject')
                    and safe_callable(obj.indexObject)):
                try:
                    obj.indexObject()
                except TypeError:
                    # Catalogs have 'indexObject' as well, but they
                    # take different args, and will fail
                    pass

        self.manage_catalogClear()
        portal = aq_parent(aq_inner(self))
        portal.ZopeFindAndApply(portal,
                                search_sub=True,
                                apply_func=indexObject)

    security.declareProtected(ManageZCatalogEntries, 'manage_catalogRebuild')

    def manage_catalogRebuild(self, RESPONSE=None, URL1=None):
        """Clears the catalog and indexes all objects with an 'indexObject'
        method. This may take a long time.
        """
        elapse = time.time()
        c_elapse = time.clock()

        self.clearFindAndRebuild()

        elapse = time.time() - elapse
        c_elapse = time.clock() - c_elapse

        if RESPONSE is not None:
            RESPONSE.redirect(URL1 +
                              '/manage_catalogAdvanced?manage_tabs_message=' +
                              urllib.quote('Catalog Rebuilt\n'
                                           'Total time: %s\n'
                                           'Total CPU time: %s' %
                                           ( ` elapse `, ` c_elapse `)))
예제 #57
0
파일: base.py 프로젝트: stomanin/indico
 def __init__(self, adapter):
     self._adapter = adapter
     self._fwd_index = self._fwd_class()
     self._num_objs = Length(0)
예제 #58
0
class CatalogTool(PloneBaseTool, BaseTool):
    """Plone's catalog tool"""

    meta_type = 'Plone Catalog Tool'
    security = ClassSecurityInfo()
    toolicon = 'skins/plone_images/book_icon.png'
    _counter = None

    manage_catalogAdvanced = DTMLFile('www/catalogAdvanced', globals())

    manage_options = (
        {
            'action': 'manage_main',
            'label': 'Contents'
        },
        {
            'action': 'manage_catalogView',
            'label': 'Catalog'
        },
        {
            'action': 'manage_catalogIndexes',
            'label': 'Indexes'
        },
        {
            'action': 'manage_catalogSchema',
            'label': 'Metadata'
        },
        {
            'action': 'manage_catalogAdvanced',
            'label': 'Advanced'
        },
        {
            'action': 'manage_catalogReport',
            'label': 'Query Report'
        },
        {
            'action': 'manage_catalogPlan',
            'label': 'Query Plan'
        },
        {
            'action': 'manage_propertiesForm',
            'label': 'Properties'
        },
    )

    def __init__(self):
        ZCatalog.__init__(self, self.getId())

    def _removeIndex(self, index):
        # Safe removal of an index.
        try:
            self.manage_delIndex(index)
        except:
            pass

    def _listAllowedRolesAndUsers(self, user):
        # Makes sure the list includes the user's groups.
        result = user.getRoles()
        if 'Anonymous' in result:
            # The anonymous user has no further roles
            return ['Anonymous']
        result = list(result)
        if hasattr(aq_base(user), 'getGroups'):
            groups = ['user:%s' % x for x in user.getGroups()]
            if groups:
                result = result + groups
        # Order the arguments from small to large sets
        result.insert(0, 'user:%s' % user.getId())
        result.append('Anonymous')
        return result

    @security.private
    def indexObject(self, object, idxs=None):
        # Add object to catalog.
        # The optional idxs argument is a list of specific indexes
        # to populate (all of them by default).
        if idxs is None:
            idxs = []
        self.reindexObject(object, idxs)

    @security.protected(ManageZCatalogEntries)
    def catalog_object(self,
                       object,
                       uid=None,
                       idxs=None,
                       update_metadata=1,
                       pghandler=None):
        if idxs is None:
            idxs = []
        self._increment_counter()

        w = object
        if not IIndexableObject.providedBy(object):
            # This is the CMF 2.2 compatible approach, which should be used
            # going forward
            wrapper = queryMultiAdapter((object, self), IIndexableObject)
            if wrapper is not None:
                w = wrapper

        ZCatalog.catalog_object(self,
                                w,
                                uid,
                                idxs,
                                update_metadata,
                                pghandler=pghandler)

    @security.protected(ManageZCatalogEntries)
    def uncatalog_object(self, *args, **kwargs):
        self._increment_counter()
        return BaseTool.uncatalog_object(self, *args, **kwargs)

    def _increment_counter(self):
        if self._counter is None:
            self._counter = Length()
        self._counter.change(1)

    @security.private
    def getCounter(self):
        return self._counter is not None and self._counter() or 0

    @security.protected(SearchZCatalog)
    def searchResults(self, REQUEST=None, **kw):
        # Calls ZCatalog.searchResults with extra arguments that
        # limit the results to what the user is allowed to see.
        #
        # This version uses the 'effectiveRange' DateRangeIndex.
        #
        # It also accepts a keyword argument show_inactive to disable
        # effectiveRange checking entirely even for those without portal
        # wide AccessInactivePortalContent permission.

        kw = kw.copy()
        show_inactive = kw.get('show_inactive', False)
        if isinstance(REQUEST, dict) and not show_inactive:
            show_inactive = 'show_inactive' in REQUEST

        user = _getAuthenticatedUser(self)
        kw['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user)

        if not show_inactive \
           and not _checkPermission(AccessInactivePortalContent, self):
            kw['effectiveRange'] = DateTime()

        return ZCatalog.searchResults(self, REQUEST, **kw)

    __call__ = searchResults

    def search(self, *args, **kw):
        # Wrap search() the same way that searchResults() is
        query = {}

        if args:
            query = args[0]
        elif 'query_request' in kw:
            query = kw.get('query_request')

        kw['query_request'] = query.copy()

        user = _getAuthenticatedUser(self)
        query['allowedRolesAndUsers'] = self._listAllowedRolesAndUsers(user)

        if not _checkPermission(AccessInactivePortalContent, self):
            query['effectiveRange'] = DateTime()

        kw['query_request'] = query

        return super(CatalogTool, self).search(**kw)

    @security.protected(ManageZCatalogEntries)
    def clearFindAndRebuild(self):
        # Empties catalog, then finds all contentish objects (i.e. objects
        # with an indexObject method), and reindexes them.
        # This may take a long time.

        def indexObject(obj, path):
            if (base_hasattr(obj, 'indexObject')
                    and safe_callable(obj.indexObject)):
                try:
                    obj.indexObject()

                    # index conversions from plone.app.discussion
                    annotions = IAnnotations(obj)
                    catalog = getToolByName(obj, "portal_catalog")
                    if DISCUSSION_ANNOTATION_KEY in annotions:
                        conversation = annotions[DISCUSSION_ANNOTATION_KEY]
                        conversation = conversation.__of__(obj)
                        for comment in conversation.getComments():
                            try:
                                if catalog:
                                    catalog.indexObject(comment)
                            except StopIteration:  # pragma: no cover
                                pass

                except TypeError:
                    # Catalogs have 'indexObject' as well, but they
                    # take different args, and will fail
                    pass

        self.manage_catalogClear()
        portal = aq_parent(aq_inner(self))
        portal.ZopeFindAndApply(portal,
                                search_sub=True,
                                apply_func=indexObject)

    @security.protected(ManageZCatalogEntries)
    def manage_catalogRebuild(self, RESPONSE=None, URL1=None):
        """Clears the catalog and indexes all objects with an 'indexObject'
        method. This may take a long time.
        """
        elapse = time.time()
        c_elapse = time.clock()

        self.clearFindAndRebuild()

        elapse = time.time() - elapse
        c_elapse = time.clock() - c_elapse

        msg = ('Catalog Rebuilt\n'
               'Total time: %s\n'
               'Total CPU time: %s' % (repr(elapse), repr(c_elapse)))
        logger.info(msg)

        if RESPONSE is not None:
            RESPONSE.redirect(URL1 +
                              '/manage_catalogAdvanced?manage_tabs_message=' +
                              urllib.quote(msg))
예제 #59
0
파일: indexes.py 프로젝트: sylvestre/indico
 def __init__(self):
     self._bookings = OOBTree.OOTreeSet()
     self._count = Length(0)
예제 #60
0
파일: catalog.py 프로젝트: araymund/karl
class GranularIndex(CatalogFieldIndex):
    """Indexes integer values using multiple granularity levels.

    The multiple levels of granularity make it possible to query large
    ranges without loading many IFTreeSets from the forward index.
    """
    implements(
        ICatalogIndex,
        IStatistics,
    )

    def __init__(self, discriminator, levels=(1000, )):
        """Create an index.

        levels is a sequence of integer coarseness levels.
        The default is (1000,).
        """
        self._levels = tuple(levels)
        super(GranularIndex, self).__init__(discriminator)

    def clear(self):
        """Initialize all mappings."""
        # The forward index maps an indexed value to IFSet(docids)
        self._fwd_index = self.family.IO.BTree()
        # The reverse index maps a docid to its index value
        self._rev_index = self.family.II.BTree()
        self._num_docs = Length(0)
        # self._granular_indexes: [(level, BTree(value -> IFSet([docid])))]
        self._granular_indexes = [(level, self.family.IO.BTree())
                                  for level in self._levels]

    def index_doc(self, docid, obj):
        if callable(self.discriminator):
            value = self.discriminator(obj, _marker)
        else:
            value = getattr(obj, self.discriminator, _marker)

        if value is _marker:
            # unindex the previous value
            self.unindex_doc(docid)
            return

        if not isinstance(value, int):
            raise ValueError(
                'GranularIndex cannot index non-integer value %s' % value)

        rev_index = self._rev_index
        if docid in rev_index:
            if docid in self._fwd_index.get(value, ()):
                # There's no need to index the doc; it's already up to date.
                return
            # unindex doc if present
            self.unindex_doc(docid)

        # Insert into forward index.
        set = self._fwd_index.get(value)
        if set is None:
            set = self.family.IF.TreeSet()
            self._fwd_index[value] = set
        set.insert(docid)

        # increment doc count
        self._num_docs.change(1)

        # Insert into reverse index.
        rev_index[docid] = value

        for level, ndx in self._granular_indexes:
            v = value // level
            set = ndx.get(v)
            if set is None:
                set = self.family.IF.TreeSet()
                ndx[v] = set
            set.insert(docid)

    def unindex_doc(self, docid):
        rev_index = self._rev_index
        value = rev_index.get(docid)
        if value is None:
            return  # not in index

        del rev_index[docid]

        self._num_docs.change(-1)

        ndx = self._fwd_index
        try:
            set = ndx[value]
            set.remove(docid)
            if not set:
                del ndx[value]
        except KeyError:
            pass

        for level, ndx in self._granular_indexes:
            v = value // level
            try:
                set = ndx[v]
                set.remove(docid)
                if not set:
                    del ndx[v]
            except KeyError:
                pass

    def search(self, queries, operator='or'):
        sets = []
        for query in queries:
            if isinstance(query, Range):
                query = query.as_tuple()
            else:
                query = (query, query)

            set = self.family.IF.multiunion(self.docids_in_range(*query))
            sets.append(set)

        result = None

        if len(sets) == 1:
            result = sets[0]
        elif operator == 'and':
            sets.sort()
            for set in sets:
                result = self.family.IF.intersection(set, result)
        else:
            result = self.family.IF.multiunion(sets)

        return result

    def docids_in_range(self, min, max):
        """List the docids for an integer range, inclusive on both ends.

        min or max can be None, making them unbounded.

        Returns an iterable of IFSets.
        """
        for level, ndx in sorted(self._granular_indexes, reverse=True):
            # Try to fill the range using coarse buckets first.
            # Use only buckets that completely fill the range.
            # For example, if start is 2 and level is 10, then we can't
            # use bucket 0; only buckets 1 and greater are useful.
            # Similarly, if end is 18 and level is 10, then we can't use
            # bucket 1; only buckets 0 and less are useful.
            if min is not None:
                a = (min + level - 1) // level
            else:
                a = None
            if max is not None:
                b = (max - level + 1) // level
            else:
                b = None
            # a and b are now coarse bucket values (or None).
            if a is None or b is None or a <= b:
                sets = []
                if a is not None and min < a * level:
                    # include the gap before
                    sets.extend(self.docids_in_range(min, a * level - 1))
                sets.extend(ndx.values(a, b))
                if b is not None and (b + 1) * level - 1 < max:
                    # include the gap after
                    sets.extend(self.docids_in_range((b + 1) * level, max))
                return sets

        return self._fwd_index.values(min, max)