Example #1
0
class TagTimeKey(TimeKey):

    def __init__(self, tag, minutes=each10minutes, name='tagtime'):
        self.minutes = IIBTree([(i, 1) for i in minutes])

        self.tag = tag
        self.name = name

    def __call__(self, objectId, instance, *arg, **kw):
        tag = self.tag.query(instance.context)
        if not tag:
            return ()

        tag = time.localtime(tag)

        minute = tag[4]
        if not (self.minutes.has_key(-1) or self.minutes.has_key(minute)):
            minute = self.minutes.maxKey(minute)

        try:
            minute2 = self.minutes.minKey(minute+1)
            time2 = time.mktime(tag[:4] + (minute2, 0, 0, 0, 0))
        except ValueError:
            time2 = time.mktime(tag[:4] + (0, 0, 0, 0, 0)) + 3600

        if time2 < time.time():
            return {'%s:%s'%(self.name, self.tag.name): time2}

        return {'%s:%s'%(self.name, self.tag.name):
                    time.mktime(tag[:4] + (minute, 0, 0, 0, 0))}
Example #2
0
    def testPairs(self):
        t1 = IIBTree([(1, 10), (3, 30), (7, 70)])
        t2 = IIBTree([(3, 30), (5, 50), (7, 7), (9, 90)])
        allkeys = [1, 3, 5, 7, 9]
        b1 = IIBucket(t1)
        b2 = IIBucket(t2)
        for x in t1, t2, b1, b2:
            for key in x.keys():
                self.assertEqual(key in allkeys, 1)
            for y in t1, t2, b1, b2:
                for w1, w2 in (0, 0), (1, 10), (10, 1), (2, 3):
                    # Test the union.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) or y.has_key(key):
                            result = x.get(key, 0) * w1 + y.get(key, 0) * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedUnion([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedUnion([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))

                    # Test the intersection.
                    expected = []
                    for key in allkeys:
                        if x.has_key(key) and y.has_key(key):
                            result = x[key] * w1 + y[key] * w2
                            expected.append((key, result))
                    expected.sort()
                    got = mass_weightedIntersection([(x, w1), (y, w2)])
                    self.assertEqual(expected, list(got.items()))
                    got = mass_weightedIntersection([(y, w2), (x, w1)])
                    self.assertEqual(expected, list(got.items()))
Example #3
0
 def convertScores(scores,
                   type=type,
                   TupleType=TupleType,
                   IIBTree=IIBTree):
     if type(scores) is not TupleType and type(scores) is not IIBTree():
         scores = IIBTree(scores)
     return scores
Example #4
0
 def __init__(self, id=None, **kwargs):
     super(TracListing, self).__init__(id, **kwargs)
     # manage an index of parent-to-child, int parent ticket id key
     # to PersistentList of int ticket id value:
     self._children = IOBTree()
     # indexes for score and reward-ratio values:
     self._scores = IIBTree()  # int (ticket#) -> int (sum/score)
     self._reward = IOBTree()  # int (ticket#) -> float (ratio)
Example #5
0
 def clear(self):
     """ Complete reset """
     self._index = IOBTree()
     self._unindex = IIBTree()
     self._length = Length()
     if self._counter is None:
         self._counter = Length()
     else:
         self._increment_counter()
Example #6
0
 def clear(self):
     self._index = IITreeSet()
     self._index_length = BTrees.Length.Length()
     self._index_value = 1
     self._unindex = IIBTree()
     self._length = BTrees.Length.Length()
     if self._counter is None:
         self._counter = BTrees.Length.Length()
     else:
         self._increment_counter()
Example #7
0
class VerificationCodeUtility(Persistent):
    
    implements(IVerificationCodeUtility)

    def __init__(self):
        self._verification_codes = IIBTree()

    def generate(self, order):
        verification_code = random.randint(LOWER, UPPER)
        count = 0
        while not self.is_unique(verification_code) and count < RETRIES:
            count += 1
            verification_code = random.randint(LOWER, UPPER)

        if count > RETRIES - 1:
            raise Exception('Could not find unique verification code.')
        
        self.add(verification_code, order)
        return str(verification_code)
        
    def is_unique(self, code):
        if code in self._verification_codes.keys():
            return False
        else:
            return True

    def add(self, verification_code, order):
        order_id = order.getId()
        if not isinstance(order_id, IntType):
            order_id = int(order_id)

        self._verification_codes[verification_code] = order_id
Example #8
0
    def __init__(self, lexicon):
        self._lexicon = lexicon

        # wid -> {docid -> weight}; t -> D -> w(D, t)
        # Different indexers have different notions of term weight, but we
        # expect each indexer to use ._wordinfo to map wids to its notion
        # of a docid-to-weight map.
        # There are two kinds of OOV words:  wid 0 is explicitly OOV,
        # and it's possible that the lexicon will return a non-zero wid
        # for a word we don't currently know about.  For example, if we
        # unindex the last doc containing a particular word, that wid
        # remains in the lexicon, but is no longer in our _wordinfo map;
        # lexicons can also be shared across indices, and some other index
        # may introduce a lexicon word we've never seen.
        # A word is in-vocabulary for this index if and only if
        # _wordinfo.has_key(wid).  Note that wid 0 must not be a key.
        self._wordinfo = IOBTree()

        # docid -> weight
        # Different indexers have different notions of doc weight, but we
        # expect each indexer to use ._docweight to map docids to its
        # notion of what a doc weight is.
        self._docweight = IIBTree()

        # docid -> WidCode'd list of wids
        # Used for un-indexing, and for phrase search.
        self._docwords = IOBTree()

        # Use a BTree length for efficient length computation w/o conflicts
        self.length = Length()
        self.document_count = Length()
Example #9
0
 def search_phrase(self, phrase):
     wids = self._lexicon.termToWordIds(phrase)
     cleaned_wids = self._remove_oov_wids(wids)
     if len(wids) != len(cleaned_wids):
         # At least one wid was OOV:  can't possibly find it.
         return IIBTree()
     scores = self._search_wids(wids)
     hits = mass_weightedIntersection(scores)
     if not hits:
         return hits
     code = WidCode.encode(wids)
     result = IIBTree()
     for docid, weight in hits.items():
         docwords = self._docwords[docid]
         if docwords.find(code) >= 0:
             result[docid] = weight
     return result
Example #10
0
 def testIdentity(self):
     t = IIBTree([(1, 2)])
     b = IIBucket([(1, 2)])
     for x in t, b:
         for func in mass_weightedUnion, mass_weightedIntersection:
             result = func([(x, 1)])
             self.assertEqual(len(result), 1)
             self.assertEqual(list(result.items()), list(x.items()))
Example #11
0
 def __init__(self, id=None, **kwargs):
     super(TracListing, self).__init__(id, **kwargs)
     # manage an index of parent-to-child, int parent ticket id key
     # to PersistentList of int ticket id value:
     self._children = IOBTree()
     # indexes for score and reward-ratio values:
     self._scores = IIBTree()  # int (ticket#) -> int (sum/score)
     self._reward = IOBTree()  # int (ticket#) -> float (ratio)
Example #12
0
    def testMany(self):
        import random
        N = 15  # number of IIBTrees to feed in
        L = []
        commonkey = N * 1000
        allkeys = {commonkey: 1}
        for i in range(N):
            t = IIBTree()
            t[commonkey] = i
            for j in range(N-i):
                key = i + j
                allkeys[key] = 1
                t[key] = N*i + j
            L.append((t, i+1))
        random.shuffle(L)
        allkeys = allkeys.keys()
        allkeys.sort()

        # Test the union.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
            expected.append((key, sum))
        # print 'union', expected
        got = mass_weightedUnion(L)
        self.assertEqual(expected, list(got.items()))

        # Test the intersection.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
                else:
                    break
            else:
                # We didn't break out of the loop so it's in the intersection.
                expected.append((key, sum))
        # print 'intersection', expected
        got = mass_weightedIntersection(L)
        self.assertEqual(expected, list(got.items()))
Example #13
0
    def testMany(self):
        import random
        N = 15  # number of IIBTrees to feed in
        L = []
        commonkey = N * 1000
        allkeys = {commonkey: 1}
        for i in range(N):
            t = IIBTree()
            t[commonkey] = i
            for j in range(N - i):
                key = i + j
                allkeys[key] = 1
                t[key] = N * i + j
            L.append((t, i + 1))
        random.shuffle(L)
        allkeys = allkeys.keys()
        allkeys.sort()

        # Test the union.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
            expected.append((key, sum))
        # print 'union', expected
        got = mass_weightedUnion(L)
        self.assertEqual(expected, list(got.items()))

        # Test the intersection.
        expected = []
        for key in allkeys:
            sum = 0
            for t, w in L:
                if t.has_key(key):
                    sum += t[key] * w
                else:
                    break
            else:
                # We didn't break out of the loop so it's in the intersection.
                expected.append((key, sum))
        # print 'intersection', expected
        got = mass_weightedIntersection(L)
        self.assertEqual(expected, list(got.items()))
    def _apply_index(self, request):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
        """
        if query_blocker.blocked:
            return
        record = parseIndexRequest(request, self.id)
        if record.keys is None:
            return None
        template_params = {
            'keys': record.keys,
        }
        query_body = self._apply_template(template_params)
        logger.info(query_body)
        es_kwargs = dict(
            index=index_name(),
            body=query_body,
            size=BATCH_SIZE,
            scroll='1m',
            _source_include=['rid'],
        )
        es = get_query_client()
        result = es.search(**es_kwargs)
        # initial return value, other batches to be applied

        def score(record):
            return int(10000 * float(record['_score']))

        retval = IIBTree()
        for r in result['hits']['hits']:
            retval[r['_source']['rid']] = score(r)

        total = result['hits']['total']
        if total > BATCH_SIZE:
            sid = result['_scroll_id']
            counter = BATCH_SIZE
            while counter < total:
                result = es.scroll(scroll_id=sid, scroll='1m')
                for record in result['hits']['hits']:
                    retval[record['_source']['rid']] = score(record)
                counter += BATCH_SIZE
        return retval, (self.id,)
 def clear(self):
     """ Complete reset """
     self._index = IOBTree()
     self._unindex = IIBTree()
     self._length = Length()
     if self._counter is None:
         self._counter = Length()
     else:
         self._increment_counter()
 def clear(self):
     self._index = IITreeSet()
     self._index_length = BTrees.Length.Length()
     self._index_value = 1
     self._unindex = IIBTree()
     self._length = BTrees.Length.Length()
     if self._counter is None:
         self._counter = BTrees.Length.Length()
     else:
         self._increment_counter()
Example #17
0
 def __init__(self, name, root):
     # m_order maintains a newest-first mapping of int -> version id.
     # m_date maintains a mapping of a packed date (int # of minutes
     # since the epoch) to a lookup key in m_order. The two structures
     # are separate because we only support minute precision for date
     # lookups (and multiple versions could be added in a minute).
     self.date_created = time.time()
     self.m_order = IOBTree()
     self.m_date = IIBTree()
     self.name = name
     self.root = root
Example #18
0
 def testScalarMultiply(self):
     t = IIBTree([(1, 2), (2, 3), (3, 4)])
     allkeys = [1, 2, 3]
     b = IIBucket(t)
     for x in t, b:
         self.assertEqual(list(x.keys()), allkeys)
         for func in mass_weightedUnion, mass_weightedIntersection:
             for factor in 0, 1, 5, 10:
                 result = func([(x, factor)])
                 self.assertEqual(allkeys, list(result.keys()))
                 for key in x.keys():
                     self.assertEqual(x[key] * factor, result[key])
Example #19
0
 def test_walk_w_normal_btree(self):
     from BTrees.IIBTree import IIBTree
     obj = IIBTree()
     for i in range(1000):
         obj[i] = i
     walker = self._makeOne(obj)
     path = '/'
     parent = object()
     is_mapping = True
     keys = []
     kids = []
     lo = 0
     hi = None
     self.assertRaises(NotImplementedError, walker.walk)
Example #20
0
 def _mass_add_wordinfo(self, wid2weight, docid):
     dicttype = type({})
     get_doc2score = self._wordinfo.get
     new_word_count = 0
     for wid, weight in wid2weight.items():
         doc2score = get_doc2score(wid)
         if doc2score is None:
             doc2score = {}
             new_word_count += 1
         elif (isinstance(doc2score, dicttype)
               and len(doc2score) == self.DICT_CUTOFF):
             doc2score = IIBTree(doc2score)
         doc2score[docid] = weight
         self._wordinfo[wid] = doc2score  # not redundant:  Persistency!
     self.length.change(new_word_count)
Example #21
0
def optimize_dateindex(index):
    # migrate _unindex from OIBTree to IIBTree
    old_unindex = index._unindex
    if isinstance(old_unindex, IIBTree):
        return
    index._unindex = _unindex = IIBTree()
    logger.info('Converting to IIBTree for index `%s`.' % index.getId())
    for pos, (k, v) in enumerate(old_unindex.items()):
        _unindex[k] = v
        if pos and pos % 10000 == 0:
            transaction.savepoint(optimistic=True)
            logger.info('Processed %s items.' % pos)

    transaction.savepoint(optimistic=True)
    logger.info('Finished conversion.')
Example #22
0
    def insertDocument(self, docid, widlist):
        Storage.insertDocument(self, docid, widlist)

        occurences = {}  # wid -> #(occurences)
        num_wids = float(len(widlist))
        for wid in widlist:
            if not occurences.has_key(wid):
                occurences[wid] = 1
            else:
                occurences[wid] += 1

        self._frequencies[docid] = IIBTree()
        tree = self._frequencies[docid]
        for wid, num in occurences.items():
            tree[wid] = num
Example #23
0
    def insertForwardIndexEntry(self, entry, documentId, score=1):
        """Uses the information provided to update the indexes.

        The basic logic for choice of data structure is based on
        the number of entries as follows:

            1      tuple
            2-3    dictionary
            4+     bucket.
        """

        index = self._index
        indexRow = index.get(entry, None)

        if indexRow is not None:
            if type(indexRow) is TupleType:
                # Tuples are only used for rows which have only
                # a single entry.  Since we now need more, we'll
                # promote it to a mapping object (dictionary).

                # First, make sure we're not already in it, if so
                # update the score if necessary.
                if indexRow[0] == documentId:
                    if indexRow[1] != score:
                        indexRow = (documentId, score)
                        index[entry] = indexRow
                else:
                    indexRow = {
                        indexRow[0]: indexRow[1],
                        documentId: score,
                    }
                    index[entry] = indexRow
            else:
                if indexRow.get(documentId, -1) != score:
                    # score changed (or new entry)

                    if type(indexRow) is DictType:
                        indexRow[documentId] = score
                        if len(indexRow) > 3:
                            # Big enough to give it's own database record
                            indexRow = IIBTree(indexRow)
                        index[entry] = indexRow
                    else:
                        indexRow[documentId] = score
        else:
            # We don't have any information at this point, so we'll
            # put our first entry in, and use a tuple to save space
            index[entry] = (documentId, score)
Example #24
0
def optimize_dateindex(index):
    # migrate _unindex from OIBTree to IIBTree
    old_unindex = index._unindex
    if isinstance(old_unindex, IIBTree):
        return
    index._unindex = _unindex = IIBTree()
    logger.info('Converting to IIBTree for index `%s`.', index.getId())
    for pos, (k, v) in enumerate(old_unindex.items()):
        _unindex[k] = v
        # Note: flake8 erroneously complains about module formatter.
        if pos and pos % 10000 == 0:  # noqa S001
            transaction.savepoint(optimistic=True)
            logger.info('Processed %s items.', pos)

    transaction.savepoint(optimistic=True)
    logger.info('Finished conversion.')
Example #25
0
 def insert(self, idx, results, relnames=None, treePrefix=None):
     unindex = None
     for brain in results:
         # Use the first brain to get a reference to the index, then reuse
         # that reference
         unindex = unindex or brain.global_catalog._catalog.indexes[
             'path']._unindex
         path = brain.getPath()
         if treePrefix and not path.startswith(treePrefix):
             for p in unindex[brain.getRID()]:
                 if p.startswith(treePrefix):
                     path = p
                     break
         path = path.split('/', 3)[-1]
         for depth in xrange(path.count('/') + 1):
             comp = idx.setdefault(path, IIBTree())
             comp[depth] = comp.get(depth, 0) + 1
             path = path.rsplit('/', 1)[0]
Example #26
0
class database:
    def __init__(self):
        self.data = []
        self.dict = IIBTree()
        self.index = -1  #index of the last element in the list

    def insert(self, key, value):
        if self.dict.has_key(key):
            index = self.dict.get(key)
            self.data[index] = value
        else:
            self.data.append(value)
            self.index = self.index + 1
            index = self.index
            self.dict.update({key: index})

    def search(self, key):
        if self.dict.has_key(key):
            value = self.data[self.dict.get(key)]
            return value
        return "NOT PRESENT"

    def delete(self, key):
        self.dict.pop(key)
Example #27
0
 def __init__(self, datafs, writable=0, trans=0, pack=0):
     self.trans_limit = trans
     self.pack_limit = pack
     self.trans_count = 0
     self.pack_count = 0
     self.stopdict = get_stopdict()
     self.mh = mhlib.MH()
     self.filestorage = FileStorage(datafs, read_only=(not writable))
     self.database = DB(self.filestorage)
     self.connection = self.database.open()
     self.root = self.connection.root()
     try:
         self.index = self.root["index"]
     except KeyError:
         self.index = self.root["index"] = TextIndex()
     try:
         self.docpaths = self.root["docpaths"]
     except KeyError:
         self.docpaths = self.root["docpaths"] = IOBTree()
     try:
         self.doctimes = self.root["doctimes"]
     except KeyError:
         self.doctimes = self.root["doctimes"] = IIBTree()
     try:
         self.watchfolders = self.root["watchfolders"]
     except KeyError:
         self.watchfolders = self.root["watchfolders"] = {}
     self.path2docid = OIBTree()
     for docid in self.docpaths.keys():
         path = self.docpaths[docid]
         self.path2docid[path] = docid
     try:
         self.maxdocid = max(self.docpaths.keys())
     except ValueError:
         self.maxdocid = 0
     print len(self.docpaths), "Document ids"
     print len(self.path2docid), "Pathnames"
     print self.index.lexicon.length(), "Words"
Example #28
0
    def _add_wordinfo(self, wid, f, docid):
        # Store a wordinfo in a dict as long as there are less than
        # DICT_CUTOFF docids in the dict.  Otherwise use an IIBTree.

        # The pickle of a dict is smaller than the pickle of an
        # IIBTree, substantially so for small mappings.  Thus, we use
        # a dictionary until the mapping reaches DICT_CUTOFF elements.

        # The cutoff is chosen based on the implementation
        # characteristics of Python dictionaries.  The dict hashtable
        # always has 2**N slots and is resized whenever it is 2/3s
        # full.  A pickled dict with 10 elts is half the size of an
        # IIBTree with 10 elts, and 10 happens to be 2/3s of 2**4.  So
        # choose 10 as the cutoff for now.

        # The IIBTree has a smaller in-memory representation than a
        # dictionary, so pickle size isn't the only consideration when
        # choosing the threshold.  The pickle of a 500-elt dict is 92%
        # of the size of the same IIBTree, but the dict uses more
        # space when it is live in memory.  An IIBTree stores two C
        # arrays of ints, one for the keys and one for the values.  It
        # holds up to 120 key-value pairs in a single bucket.
        doc2score = self._wordinfo.get(wid)
        if doc2score is None:
            doc2score = {}
            self.length.change(1)
        else:
            # _add_wordinfo() is called for each update.  If the map
            # size exceeds the DICT_CUTOFF, convert to an IIBTree.
            # Obscure:  First check the type.  If it's not a dict, it
            # can't need conversion, and then we can avoid an expensive
            # len(IIBTree).
            if (isinstance(doc2score, type({}))
                    and len(doc2score) == self.DICT_CUTOFF):
                doc2score = IIBTree(doc2score)
        doc2score[docid] = f
        self._wordinfo[wid] = doc2score  # not redundant:  Persistency!
Example #29
0
def convert_to_booleanindex(catalog, index):
    if isinstance(index, BooleanIndex):
        return
    logger.info('Converting index `%s` to BooleanIndex.' % index.getId())
    index.__class__ = BooleanIndex
    index._p_changed = True
    catalog._catalog._p_changed = True

    # convert _unindex from IOBTree to IIBTree
    sets = {0: IITreeSet(), 1: IITreeSet()}
    old_unindex = index._unindex
    index._unindex = _unindex = IIBTree()
    for k, v in old_unindex.items():
        # docid to value (True, False)
        value = int(bool(v))
        _unindex[k] = value
        sets[value].add(k)
    del old_unindex

    # convert _index from OOBTree to IITreeSet and set lengths
    false_length = len(sets[0])
    true_length = len(sets[1])
    index._length = Length(false_length + true_length)
    # we put the smaller set into the index
    if false_length < true_length:
        index._index_value = 0
        index._index_length = Length(false_length)
        index._index = sets[0]
        del sets[1]
    else:
        index._index_value = 1
        index._index_length = Length(true_length)
        index._index = sets[1]
        del sets[0]
    transaction.savepoint(optimistic=True)
    logger.info('Finished conversion.')
Example #30
0
class TimeKey(object):
    interface.implements(ITimeKey)

    def __init__(self, hours=eachHour, minutes=each0minute, name='time'):
        self.hours = IIBTree([(i, 1) for i in hours])
        self.minutes = IIBTree([(i, 1) for i in minutes])
        self.name = name

    def __call__(self, *args, **kw):
        now = time.localtime()

        minute = now[4]
        if not (self.minutes.has_key(-1) or self.minutes.has_key(minute)):
            minute = self.minutes.maxKey(minute)

        hour = now[3]
        if not (self.hours.has_key(-1) or self.hours.has_key(hour)):
            hour = self.hours.maxKey(hour)

        return {self.name: time.mktime(now[:3] + (hour, minute, 0, 0, 0, 0))}
    def _apply_index(self, request):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
        """
        config = get_configuration()
        timeout = getattr(config, 'request_timeout', 20)
        search_fields = getattr(config, 'search_fields', None)
        if not search_fields:
            search_fields = SEARCH_FIELDS
        search_fields = search_fields.split()
        logger.info(search_fields)
        if query_blocker.blocked:
            return
        record = parseIndexRequest(request, self.id)
        if record.keys is None:
            return None
        es = get_query_client()
        search = Search(using=es, index=index_name())
        search = search.params(request_timeout=timeout)
        search = search.sort('rid', '_id')
        search = search.source(include='rid')
        query_string = record.keys[0].decode('utf8')
        logger.info(query_string)
        if '*' in query_string:
            query_string = query_string.replace('*', ' ')
        query_string = query_string.strip()
        search = search.query('simple_query_string',
                              query=query_string,
                              fields=search_fields)
        results_count = search.count()
        search = search.params(request_timeout=timeout,
                               size=BATCH_SIZE,
                               track_scores=True)
        # setup highlighting
        for field in search_fields:
            name = field.split('^')[0]
            if name == 'title':
                # title shows up in results anyway
                continue
            search = search.highlight(name, fragment_size=FRAGMENT_SIZE)

        # initial return value, other batches to be applied
        retval = IIBTree()
        highlights = OOBTree()
        last_seen = None
        count = 0
        batch_count = results_count / BATCH_SIZE
        if results_count % BATCH_SIZE != 0:
            batch_count = batch_count + 1
        for i in xrange(batch_count):
            if last_seen is not None:
                search = search.update_from_dict({'search_after': last_seen})
            try:
                results = search.execute(ignore_cache=True)
            except TransportError:
                # No es client, return empty results
                logger.exception('ElasticSearch client not available.')
                return IIBTree(), (self.id, )

            for r in results:
                rid = getattr(r, 'rid', None)
                if rid is not None:
                    retval[rid] = int(10000 * float(r.meta.score))
                    # Index query returns only rids, so we need
                    # to save highlights for later use
                    highlight_list = []
                    if getattr(r.meta, 'highlight', None) is not None:
                        for key in dir(r.meta.highlight):
                            highlight_list.extend(r.meta.highlight[key])
                    highlights[r.meta.id] = highlight_list
                last_seen = [rid, r.meta.id]
                count = count + 1

        # store highlights
        try:
            annotations = IAnnotations(self.REQUEST)
            annotations[HIGHLIGHT_KEY] = highlights
        except TypeError:
            # maybe we are in a test
            pass

        return retval, (self.id, )
Example #32
0
 def __init__(self, hours=eachHour, minutes=each0minute, name='time'):
     self.hours = IIBTree([(i, 1) for i in hours])
     self.minutes = IIBTree([(i, 1) for i in minutes])
     self.name = name
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {'label': 'Settings',
         'action': 'manage_main'},
        {'label': 'Browse',
         'action': 'manage_browse'},
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()
        if self._counter is None:
            self._counter = BTrees.Length.Length()
        else:
            self._increment_counter()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # When we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one.
        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                self._index_length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%s: unindex_object could not remove documentId %s '
                    'from index %s. This should not happen.' % (
                        self.__class__.__name__,
                        str(documentId),
                        str(self.id)))
        elif check:
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (self._index_length.value) <= ((self._length.value - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self._increment_counter()

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s' % documentId, exc_info=True)

    def query_index(self, record, resultset=None):
        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return intersection(index, resultset)
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return union(difference(self._unindex, index), IISet([]))
                else:
                    return difference(resultset, index)
        return IISet()

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items
Example #34
0
class DateIndex(UnIndex, PropertyManager):

    """Index for dates.
    """
    implements(IDateIndex)

    meta_type = 'DateIndex'
    query_options = ('query', 'range')

    index_naive_time_as_local = True # False means index as UTC
    _properties=({'id':'index_naive_time_as_local',
                  'type':'boolean',
                  'mode':'w'},)

    manage = manage_main = DTMLFile( 'dtml/manageDateIndex', globals() )
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    manage_main._setName( 'manage_main' )
    manage_options = ( { 'label' : 'Settings'
                       , 'action' : 'manage_main'
                       },
                       {'label': 'Browse',
                        'action': 'manage_browse',
                       },
                     ) + PropertyManager.manage_options

    def clear( self ):
        """ Complete reset """
        self._index = IOBTree()
        self._unindex = IIBTree()
        self._length = Length()

    def index_object( self, documentId, obj, threshold=None ):
        """index an object, normalizing the indexed value to an integer

           o Normalized value has granularity of one minute.

           o Objects which have 'None' as indexed value are *omitted*,
             by design.
        """
        returnStatus = 0

        try:
            date_attr = getattr( obj, self.id )
            if safe_callable( date_attr ):
                date_attr = date_attr()

            ConvertedDate = self._convert( value=date_attr, default=_marker )
        except AttributeError:
            ConvertedDate = _marker

        oldConvertedDate = self._unindex.get( documentId, _marker )

        if ConvertedDate != oldConvertedDate:
            if oldConvertedDate is not _marker:
                self.removeForwardIndexEntry(oldConvertedDate, documentId)
                if ConvertedDate is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except:
                        LOG.error("Should not happen: ConvertedDate was there,"
                                  " now it's not, for document with id %s" %
                                  documentId)

            if ConvertedDate is not _marker:
                self.insertForwardIndexEntry( ConvertedDate, documentId )
                self._unindex[documentId] = ConvertedDate

            returnStatus = 1

        return returnStatus

    def _apply_index(self, request, resultset=None):
        """Apply the index to query parameters given in the argument

        Normalize the 'query' arguments into integer values at minute
        precision before querying.
        """
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        keys = map( self._convert, record.keys )

        index = self._index
        r = None
        opr = None

        #experimental code for specifing the operator
        operator = record.get( 'operator', self.useOperator )
        if not operator in self.operators :
            raise RuntimeError("operator not valid: %s" % operator)

        # depending on the operator we use intersection or union
        if operator=="or":
            set_func = union
        else:
            set_func = intersection

        # range parameter
        range_arg = record.get('range',None)
        if range_arg:
            opr = "range"
            opr_args = []
            if range_arg.find("min") > -1:
                opr_args.append("min")
            if range_arg.find("max") > -1:
                opr_args.append("max")

        if record.get('usage',None):
            # see if any usage params are sent to field
            opr = record.usage.lower().split(':')
            opr, opr_args = opr[0], opr[1:]

        if opr=="range":   # range search
            if 'min' in opr_args:
                lo = min(keys)
            else:
                lo = None

            if 'max' in opr_args:
                hi = max(keys)
            else:
                hi = None

            if hi:
                setlist = index.values(lo,hi)
            else:
                setlist = index.values(lo)

            r = multiunion(setlist)

        else: # not a range search
            for key in keys:
                set = index.get(key, None)
                if set is not None:
                    if isinstance(set, int):
                        set = IISet((set,))
                    else:
                        # set can't be bigger than resultset
                        set = intersection(set, resultset)
                    r = set_func(r, set)

        if isinstance(r, int):
            r = IISet((r,))

        if r is None:
            return IISet(), (self.id,)
        else:
            return r, (self.id,)

    def _convert( self, value, default=None ):
        """Convert Date/Time value to our internal representation"""
        # XXX: Code patched 20/May/2003 by Kiran Jonnalagadda to
        # convert dates to UTC first.
        if isinstance(value, DateTime):
            t_tup = value.toZone('UTC').parts()
        elif isinstance(value, (float, int)):
            t_tup = time.gmtime( value )
        elif isinstance(value, str) and value:
            t_obj = DateTime( value ).toZone('UTC')
            t_tup = t_obj.parts()
        elif isinstance(value, datetime):
            if self.index_naive_time_as_local and value.tzinfo is None:
                value = value.replace(tzinfo=Local)
            # else if tzinfo is None, naive time interpreted as UTC
            t_tup = value.utctimetuple()
        elif isinstance(value, date):
            t_tup = value.timetuple()
        else:
            return default

        yr = t_tup[0]
        mo = t_tup[1]
        dy = t_tup[2]
        hr = t_tup[3]
        mn = t_tup[4]

        t_val = ( ( ( ( yr * 12 + mo ) * 31 + dy ) * 24 + hr ) * 60 + mn )

        if t_val > MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError(
                "%s is not within the range of indexable dates (index: %s)"
                % (value, self.id))

        return t_val
Example #35
0
 def clear( self ):
     """ Complete reset """
     self._index = IOBTree()
     self._unindex = IIBTree()
     self._length = Length()
Example #36
0
    def __init__(self, tag, minutes=each10minutes, name='tagtime'):
        self.minutes = IIBTree([(i, 1) for i in minutes])

        self.tag = tag
        self.name = name
Example #37
0
 def clear(self):
     self._doc2wid = IOBTree()  # docid -> [wordids]
     self._wid2doc = IOBTree()  # wordid -> [docids]
     self._docweight = IIBTree()  # docid -> (# terms in document)
     self._length = BTrees.Length.Length()
Example #38
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index

       False doesn't have actual entries in _index.
    """

    meta_type = "BooleanIndex"

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ["query"]

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def _inline_migration(self):
        self._length = BTrees.Length.Length(len(self._unindex.keys()))
        self._index_length = BTrees.Length.Length(len(self._index))
        if self._index_length.value > (self._length.value / 2):
            self._index_value = 1
            self._invert_index()
        else:
            # set an instance variable
            self._index_value = 1

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # when we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one
        # BBB inline migration
        if self._index_length is None:
            self._inline_migration()

        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        index_length = self._index_length
        if index_length is None:
            self._inline_migration()

        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                # BBB inline migration
                length = self._index_length
                length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%s: unindex_object could not remove '
                    'documentId %s from index %s. This '
                    'should not happen.' %
                    (self.__class__.__name__, str(documentId), str(self.id)))
        elif check:
            length = self._length.value
            index_length = self._index_length.value
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (index_length) <= ((length - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error('Should not happen: oldDatum was there, now '
                                  'its not, for document with id %s' %
                                  documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s' % documentId,
                      exc_info=True)

    def _apply_index(self, request, resultset=None):
        record = parseIndexRequest(request, self.id, self.query_options)
        if record.keys is None:
            return None

        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return (intersection(index, resultset), (self.id, ))
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return (union(difference(self._unindex, index),
                                  IISet([])), (self.id, ))
                else:
                    return (difference(resultset, index), (self.id, ))
        return (IISet(), (self.id, ))

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items
Example #39
0
 def clear(self):
     self._index = IITreeSet()
     self._index_length = BTrees.Length.Length()
     self._index_value = 1
     self._unindex = IIBTree()
     self._length = BTrees.Length.Length()
Example #40
0
class TracListing(Container):
    """Content class for trac listing"""

    implements(ITracListing)

    def __init__(self, id=None, **kwargs):
        super(TracListing, self).__init__(id, **kwargs)
        # manage an index of parent-to-child, int parent ticket id key
        # to PersistentList of int ticket id value:
        self._children = IOBTree()
        # indexes for score and reward-ratio values:
        self._scores = IIBTree()  # int (ticket#) -> int (sum/score)
        self._reward = IOBTree()  # int (ticket#) -> float (ratio)

    def index_parent_child(self, parent, child=None):
        if ITracTicket.providedBy(parent):
            parent = parent.getId()
        if ITracTicket.providedBy(child):
            child = child.getId()
        parent = int(parent)
        child = int(child) if child else None
        self._children[parent] = self._children.get(parent, PersistentList())
        if child is not None:
            self._children[parent].append(child)

    def _adapter(self):
        if getattr(self, '_v_trac_adapter', None) is None:
            self._v_trac_adapter = TracTickets(self)  # adapt once
        return self._v_trac_adapter
    
    def select(self, query):
        return self._adapter().select(query)

    def _add(self, ticket_number, adapter=None):
        adapter = adapter or self._adapter()
        # generated id from title will be str(ticket_number), we
        # then change the title in sync after creation.
        content = createContentInContainer(
            self,
            portal_type='uu.trac.ticket',
            title=unicode(ticket_number),
            )
        content.sync()
        # check for and recursively add child tickets:
        childq = 'parent=~#%s' % ticket_number
        children = adapter.select(childq)
        for child_id in children:
            self._add(child_id, adapter)

    def sync(self):
        adapter = self._adapter()
        self._children = IOBTree()   # reset all parent/child refs
        q = 'status!=closed'
        for number in adapter.select(q):
            if str(number) not in self.objectIds():
                if number in (self.visible_tickets or []):
                    self._add(number, adapter)
            else:
                self.get(str(number)).sync()

    def children_for(self, ticket_number):
        return list(self._children.get(int(ticket_number), []))

    def result(self, reward=False):
        """
        Return sorted listing of tuples with ticket number, score.
        If reward is True, use reward-ratio instead of score.
        """
        keyfn = lambda t: t[1]  # sort function, on score, not ticket
        _sorted = lambda l: sorted(l, key=keyfn, reverse=True)
        _visible = lambda t: t[0] in self.visible_tickets
        if reward:
            return _sorted(filter(_visible, self._reward.items()))
        return _sorted(filter(_visible, self._scores.items()))

    def index(self, ticket):
        if not ITracTicket.providedBy(ticket):
            ticket = self.get(int(ticket))
        tid = int(ticket.getId())
        parent = ticket.parent
        if parent:
            self.index_parent_child(parent, tid)
        self._scores[tid] = ticket.score()
        self._reward[tid] = ticket.reward_ratio()
Example #41
0
class BooleanIndex(UnIndex):
    """Index for booleans

       self._index = set([documentId1, documentId2])
       self._unindex = {documentId:[True/False]}

       self._length is the length of the unindex
       self._index_length is the length of the index
       self._index_value is the indexed value

       The document ids in self._index have self._index_value as their value.

       Since there are only two possible values (True/False), the index
       only stores a forward index for the less common value.
       It starts off with the opposite of value of the first document
       and later checks and inverts itself, if more than 60% of all
       documents now have the indexed value. It does the inversion
       at 60% to avoid inverting itself constantly for an index that
       has a roughly equal 50/50 split.
    """

    meta_type = 'BooleanIndex'

    manage_options = (
        {
            'label': 'Settings',
            'action': 'manage_main'
        },
        {
            'label': 'Browse',
            'action': 'manage_browse'
        },
    )

    query_options = ['query']

    manage = manage_main = DTMLFile('dtml/manageBooleanIndex', globals())
    manage_main._setName('manage_main')
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    _index_value = 1
    _index_length = None

    def clear(self):
        self._index = IITreeSet()
        self._index_length = BTrees.Length.Length()
        self._index_value = 1
        self._unindex = IIBTree()
        self._length = BTrees.Length.Length()
        if self._counter is None:
            self._counter = BTrees.Length.Length()
        else:
            self._increment_counter()

    def histogram(self):
        """Return a mapping which provides a histogram of the number of
        elements found at each point in the index.
        """
        histogram = {}
        indexed = bool(self._index_value)
        histogram[indexed] = self._index_length.value
        histogram[not indexed] = self._length.value - self._index_length.value
        return histogram

    def _invert_index(self, documentId=None):
        self._index_value = indexed = int(not self._index_value)
        self._index.clear()
        length = 0
        for rid, value in self._unindex.iteritems():
            if value == indexed:
                self._index.add(rid)
                length += 1
        # documentId is the rid of the currently processed object that
        # triggered the invert. in the case of unindexing, the rid hasn't
        # been removed from the unindex yet. While indexing, the rid will
        # be added to the index and unindex after this method is done
        if documentId is not None:
            self._index.remove(documentId)
            length -= 1
        self._index_length = BTrees.Length.Length(length)

    def insertForwardIndexEntry(self, entry, documentId):
        """If the value matches the indexed one, insert into treeset
        """
        # When we get the first entry, decide to index the opposite of what
        # we got, as indexing zero items is fewer than one.
        if self._length.value == 0:
            self._index_value = int(not bool(entry))

        # if the added entry value is index value, insert it into index
        if bool(entry) is bool(self._index_value):
            self._index_length.change(1)
            self._index.insert(documentId)

        # insert value into global unindex (before computing index invert)
        self._unindex[documentId] = entry
        self._length.change(1)

        # is the index (after adding the current entry) larger than 60%
        # of the total length? than switch the indexed value
        if bool(entry) is bool(self._index_value):
            if (self._index_length.value) >= ((self._length.value) * 0.6):
                self._invert_index()

    def removeForwardIndexEntry(self, entry, documentId, check=True):
        """Take the entry provided and remove any reference to documentId
        in its entry in the index.
        """
        if bool(entry) is bool(self._index_value):
            try:
                self._index.remove(documentId)
                self._index_length.change(-1)
            except ConflictError:
                raise
            except Exception:
                LOG.exception(
                    '%(context)s: unindex_object could not '
                    'remove documentId %(doc_id)s from '
                    'index %(index)r. This should not '
                    'happen.',
                    dict(context=self.__class__.__name__,
                         doc_id=documentId,
                         index=self.id))
        elif check:
            # is the index (after removing the current entry) larger than
            # 60% of the total length? than switch the indexed value
            if (self._index_length.value) <= ((self._length.value - 1) * 0.6):
                self._invert_index(documentId)
                return

    def _index_object(self, documentId, obj, threshold=None, attr=''):
        """ index and object 'obj' with integer id 'documentId'"""
        returnStatus = 0

        # First we need to see if there's anything interesting to look at
        datum = self._get_object_datum(obj, attr)

        # Make it boolean, int as an optimization
        if datum is not _marker:
            datum = int(bool(datum))

        # We don't want to do anything that we don't have to here, so we'll
        # check to see if the new and existing information is the same.
        oldDatum = self._unindex.get(documentId, _marker)
        if datum != oldDatum:
            if oldDatum is not _marker:
                self.removeForwardIndexEntry(oldDatum, documentId, check=False)
                if datum is _marker:
                    try:
                        del self._unindex[documentId]
                        self._length.change(-1)
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error(
                            'Should not happen: oldDatum was there, now '
                            'its not, for document with id %s', documentId)

            if datum is not _marker:
                self.insertForwardIndexEntry(datum, documentId)

            returnStatus = 1

        return returnStatus

    def unindex_object(self, documentId):
        """ Unindex the object with integer id 'documentId' and don't
        raise an exception if we fail
        """
        unindexRecord = self._unindex.get(documentId, _marker)
        if unindexRecord is _marker:
            return None

        self._increment_counter()

        self.removeForwardIndexEntry(unindexRecord, documentId)

        try:
            del self._unindex[documentId]
            self._length.change(-1)
        except ConflictError:
            raise
        except Exception:
            LOG.debug('Attempt to unindex nonexistent document'
                      ' with id %s',
                      documentId,
                      exc_info=True)

    def query_index(self, record, resultset=None):
        index = self._index
        indexed = self._index_value

        for key in record.keys:
            if bool(key) is bool(indexed):
                # If we match the indexed value, check index
                return intersection(index, resultset)
            else:
                # Otherwise, remove from resultset or _unindex
                if resultset is None:
                    return union(difference(self._unindex, index), IISet([]))
                else:
                    return intersection(difference(resultset, index),
                                        self._unindex)
        return IISet()

    def indexSize(self):
        """Return distinct values, as an optimization we always claim 2."""
        return 2

    def items(self):
        # return a list of value to int set of rid tuples
        indexed = self._index_value
        items = [(bool(indexed), self._index)]
        false = IISet()
        for rid, value in self._unindex.iteritems():
            if value != indexed:
                false.add(rid)
        items.append((not bool(indexed), false))
        return items

    def uniqueValues(self, name=None, withLengths=0):
        """returns the unique values for name

        if withLengths is true, returns a sequence of
        tuples of (value, length)
        """
        if name is None:
            name = self.id
        elif name != self.id:
            return

        indexed = bool(self._index_value)
        unique_values = (indexed, not indexed)
        if not withLengths:
            for key in unique_values:
                yield key
        else:
            for key in unique_values:
                ilen = len(self._index)
                if key is indexed:
                    yield (key, ilen)
                else:
                    ulen = len(self._unindex)
                    yield (key, ulen - ilen)
Example #42
0
 def clear(self):
     self._index = IITreeSet()
     self._index_length = BTrees.Length.Length()
     self._index_value = 1
     self._unindex = IIBTree()
     self._length = BTrees.Length.Length()
class DateIndex(UnIndex, PropertyManager):
    """Index for dates.
    """

    meta_type = 'DateIndex'
    query_options = ('query', 'range', 'not')

    index_naive_time_as_local = True  # False means index as UTC
    _properties = ({'id': 'index_naive_time_as_local',
                    'type': 'boolean',
                    'mode': 'w'},)

    manage = manage_main = DTMLFile('dtml/manageDateIndex', globals())
    manage_browse = DTMLFile('../dtml/browseIndex', globals())

    manage_main._setName('manage_main')
    manage_options = ({'label': 'Settings', 'action': 'manage_main'},
                      {'label': 'Browse', 'action': 'manage_browse'},
                      ) + PropertyManager.manage_options

    def clear(self):
        """ Complete reset """
        self._index = IOBTree()
        self._unindex = IIBTree()
        self._length = Length()
        if self._counter is None:
            self._counter = Length()
        else:
            self._increment_counter()

    def index_object(self, documentId, obj, threshold=None):
        """index an object, normalizing the indexed value to an integer

           o Normalized value has granularity of one minute.

           o Objects which have 'None' as indexed value are *omitted*,
             by design.
        """
        returnStatus = 0

        try:
            date_attr = getattr(obj, self.id)
            if safe_callable(date_attr):
                date_attr = date_attr()

            ConvertedDate = self._convert(value=date_attr, default=_marker)
        except AttributeError:
            ConvertedDate = _marker

        oldConvertedDate = self._unindex.get(documentId, _marker)

        if ConvertedDate != oldConvertedDate:
            if oldConvertedDate is not _marker:
                self.removeForwardIndexEntry(oldConvertedDate, documentId)
                if ConvertedDate is _marker:
                    try:
                        del self._unindex[documentId]
                    except ConflictError:
                        raise
                    except Exception:
                        LOG.error("Should not happen: ConvertedDate was there,"
                                  " now it's not, for document with id %s" %
                                  documentId)

            if ConvertedDate is not _marker:
                self.insertForwardIndexEntry(ConvertedDate, documentId)
                self._unindex[documentId] = ConvertedDate

            returnStatus = 1

        if returnStatus > 0:
            self._increment_counter()

        return returnStatus

    def _convert(self, value, default=None):
        """Convert Date/Time value to our internal representation"""
        if isinstance(value, DateTime):
            t_tup = value.toZone('UTC').parts()
        elif isinstance(value, (float, int)):
            t_tup = time.gmtime(value)
        elif isinstance(value, str) and value:
            t_obj = DateTime(value).toZone('UTC')
            t_tup = t_obj.parts()
        elif isinstance(value, datetime):
            if self.index_naive_time_as_local and value.tzinfo is None:
                value = value.replace(tzinfo=Local)
            # else if tzinfo is None, naive time interpreted as UTC
            t_tup = value.utctimetuple()
        elif isinstance(value, date):
            t_tup = value.timetuple()
        else:
            return default

        yr = t_tup[0]
        mo = t_tup[1]
        dy = t_tup[2]
        hr = t_tup[3]
        mn = t_tup[4]

        t_val = ((((yr * 12 + mo) * 31 + dy) * 24 + hr) * 60 + mn)

        if t_val > MAX32:
            # t_val must be integer fitting in the 32bit range
            raise OverflowError(
                "%s is not within the range of indexable dates (index: %s)"
                % (value, self.id))
        return t_val
Example #44
0
    def _apply_index(self, request):
        """Apply the index to query parameters given in 'request'.

        The argument should be a mapping object.

        If the request does not contain the needed parameters, then
        None is returned.

        If the request contains a parameter with the name of the
        column and this parameter is either a Record or a class
        instance then it is assumed that the parameters of this index
        are passed as attribute (Note: this is the recommended way to
        pass parameters since Zope 2.4)

        Otherwise two objects are returned.  The first object is a
        ResultSet containing the record numbers of the matching
        records.  The second object is a tuple containing the names of
        all data fields used.
        """
        config = get_configuration()
        timeout = getattr(config, 'request_timeout', 20)
        search_fields = getattr(config, 'search_fields', None)
        if not search_fields:
            search_fields = SEARCH_FIELDS
        search_fields = search_fields.split()
        if query_blocker.blocked:
            return
        record = parseIndexRequest(request, self.id)
        if record.keys is None:
            return None
        es = get_query_client()
        search = Search(using=es, index=index_name())
        search = search.params(
            request_timeout=timeout,
            size=BATCH_SIZE,
            preserve_order=True,
        )
        search = search.source(include='rid')
        query_string = record.keys[0]
        if query_string and query_string.startswith('*'):
            # plone.app.querystring contains op sends a leading *, remove it
            query_string = query_string[1:]
        search = search.query('simple_query_string',
                              query=query_string,
                              fields=search_fields)
        # setup highlighting
        for field in search_fields:
            name = field.split('^')[0]
            if name == 'title':
                # title shows up in results anyway
                continue
            search = search.highlight(name, fragment_size=FRAGMENT_SIZE)

        try:
            result = search.scan()
        except TransportError:
            # No es client, return empty results
            logger.exception('ElasticSearch client not available.')
            return IIBTree(), (self.id, )
        # initial return value, other batches to be applied

        retval = IIBTree()
        highlights = OOBTree()
        for r in result:
            if getattr(r, 'rid', None) is None:
                # something was indexed with no rid. Ignore for now.
                # this is only for highlights, so no big deal if we
                # skip one
                continue
            retval[r.rid] = int(10000 * float(r.meta.score))
            # Index query returns only rids, so we need
            # to save highlights for later use
            highlight_list = []
            if getattr(r.meta, 'highlight', None) is not None:
                for key in dir(r.meta.highlight):
                    highlight_list.extend(r.meta.highlight[key])
            highlights[r.meta.id] = highlight_list

        # store highlights
        try:
            annotations = IAnnotations(self.REQUEST)
            annotations[HIGHLIGHT_KEY] = highlights
        except TypeError:
            # maybe we are in a test
            pass

        return retval, (self.id, )
Example #45
0
 def __init__(self):
     self._verification_codes = IIBTree()
Example #46
0
 def _makeOne(self):
     from BTrees.IIBTree import IIBTree
     return IIBTree()