コード例 #1
0
    def test_batch_lazy_map(self):
        def get(key):
            return key

        sequence = LazyMap(get, range(80, 90), actual_result_count=95)
        batch = Batch(sequence, size=10, start=80)
        self.assertEqual([b for b in batch],
                         [80, 81, 82, 83, 84, 85, 86, 87, 88, 89])

        self.assertEqual(batch.numpages, 10)
        self.assertEqual(batch.pagenumber, 9)
        self.assertEqual(batch.navlist, range(6, 11))
        self.assertEqual(batch.leapback, [])
        self.assertEqual(batch.prevlist, range(6, 9))
        self.assertEqual(batch.previous.length, 10)
        self.assertEqual(batch.next.length, 5)
        self.assertEqual(batch.pageurl({}), 'b_start:int=80')
        self.assertListEqual(list(batch.prevurls({})), [
            (6, 'b_start:int=50'),
            (7, 'b_start:int=60'),
            (8, 'b_start:int=70'),
        ])
        self.assertListEqual(
            list(batch.nexturls({})),
            [(10, 'b_start:int=90')],
        )
コード例 #2
0
    def objectMap(self):
        # Returns a tuple of mappings containing subobject meta-data.

        def func(value):
            k, v = value
            return {'id': k, 'meta_type': getattr(v, 'meta_type', None)}

        return LazyMap(func, list(self._tree.items()), self._count())
コード例 #3
0
ファイル: Catalog.py プロジェクト: d-maurer/Products.ZCatalog
def mergeResults(results, has_sort_keys, reverse):
    """Sort/merge sub-results, generating a flat sequence.

    results is a list of result set sequences, all with or without sort keys
    """
    if not has_sort_keys:
        return LazyCat(results)
    else:
        # Concatenate the catalog results into one list and sort it
        # Each result record consists of a list of tuples with three values:
        # (sortkey, docid, catalog__getitem__)
        combined = []
        if len(results) > 1:
            for r in results:
                combined.extend(r)
        elif len(results) == 1:
            combined = results[0]
        else:
            return []
        if reverse:
            combined.sort(reverse=True)
        else:
            combined.sort()
        return LazyMap(lambda rec: rec[2](rec[1]), combined, len(combined))
コード例 #4
0
ファイル: Catalog.py プロジェクト: d-maurer/Products.ZCatalog
    def sortResults(self,
                    rs,
                    sort_index,
                    reverse=False,
                    limit=None,
                    merge=True,
                    actual_result_count=None,
                    b_start=0,
                    b_size=None):
        # Sort a result set using one or more sort indexes. Both sort_index
        # and reverse can be lists of indexes and reverse specifications.
        # Return a lazy result set in sorted order if merge is true otherwise
        # returns a list of (sortkey, uid, getter_function) tuples, where
        # sortkey can be a tuple on its own.
        second_indexes = None
        second_indexes_key_map = None
        sort_index_length = 1
        if isinstance(sort_index, list):
            sort_index_length = len(sort_index)
            if sort_index_length > 1:
                second_indexes = sort_index[1:]
                second_indexes_key_map = []
                for si in second_indexes:
                    second_indexes_key_map.append(si.documentToKeyMap())
            sort_index = sort_index[0]

        result = []
        if hasattr(rs, 'keys'):
            rs = rs.keys()
        if actual_result_count is None:
            rlen = len(rs)
            actual_result_count = rlen
        else:
            rlen = actual_result_count

        # don't limit to more than what we have
        if limit is not None and limit >= rlen:
            limit = rlen

        # if we want a batch from the end of the result set, reverse sorting
        # order and limit it, then reverse the result set again
        switched_reverse = False
        if b_size and b_start and b_start > rlen / 2:
            if isinstance(reverse, list):
                reverse = [not r for r in reverse]
            else:
                reverse = not reverse
            switched_reverse = True
            b_end = b_start + b_size
            if b_end >= rlen:
                overrun = rlen - b_end
                if b_start >= rlen:
                    # bail out, we are outside the possible range
                    return LazyCat([], 0, actual_result_count)
                else:
                    b_size += overrun
                b_start = 0
            else:
                b_start = rlen - b_end
            limit = b_start + b_size

        # determine sort_spec
        if isinstance(reverse, list):
            sort_spec = [r and -1 or 1 for r in reverse]
            # limit to current maximum of sort indexes
            sort_spec = sort_spec[:sort_index_length]
            # use first sort order for choosing the algorithm
            first_reverse = reverse[0]
        else:
            sort_spec = []
            for i in xrange(sort_index_length):
                sort_spec.append(reverse and -1 or 1)
            first_reverse = reverse

        # Special first condition, as it changes post-processing.
        iterate_sort_index = (merge and limit is None
                              and (rlen >
                                   (len(sort_index) * (rlen / 100 + 1))))

        # Choose one of the sort algorithms.
        if iterate_sort_index:
            sort_func = self._sort_iterate_index
        elif limit is None or (limit * 4 > rlen):
            sort_func = self._sort_iterate_resultset
        elif first_reverse:
            sort_func = self._sort_nbest
        else:
            sort_func = self._sort_nbest_reverse

        actual_result_count, length, result = sort_func(
            actual_result_count, result, rs, limit, merge, reverse, sort_index,
            sort_index_length, sort_spec, second_indexes_key_map)

        sequence, slen = self._limit_sequence(result, length, b_start, b_size,
                                              switched_reverse)

        if iterate_sort_index:
            result = LazyCat(LazyValues(sequence), slen, actual_result_count)
        else:
            if not merge:
                return sequence

            result = LazyValues(sequence)
            result.actual_result_count = actual_result_count

        return LazyMap(self.__getitem__,
                       result,
                       len(result),
                       actual_result_count=actual_result_count)
コード例 #5
0
ファイル: Catalog.py プロジェクト: d-maurer/Products.ZCatalog
    def search(self,
               query,
               sort_index=None,
               reverse=False,
               limit=None,
               merge=True):
        """Iterate through the indexes, applying the query to each one. If
        merge is true then return a lazy result set (sorted if appropriate)
        otherwise return the raw (possibly scored) results for later merging.
        Limit is used in conjunction with sorting or scored results to inform
        the catalog how many results you are really interested in. The catalog
        can then use optimizations to save time and memory. The number of
        results is not guaranteed to fall within the limit however, you should
        still slice or batch the results as usual."""

        # Indexes fulfill a fairly large contract here. We hand each
        # index the query mapping we are given (which may be composed
        # of some combination of web request, kw mappings or plain old dicts)
        # and the index decides what to do with it. If the index finds work
        # for itself in the query, it returns the results and a tuple of
        # the attributes that were used. If the index finds nothing for it
        # to do then it returns None.

        # Canonicalize the request into a sensible query before passing it on
        query = self.make_query(query)

        cr = self.getCatalogPlan(query)
        cr.start()

        plan = cr.plan()
        if not plan:
            plan = self._sorted_search_indexes(query)

        rs = None  # result set
        for index_id in plan:
            # The actual core loop over all indices.
            if index_id not in self.indexes:
                # We can have bogus keys or the plan can contain index names
                # that have been removed in the meantime.
                continue

            rs = self._search_index(cr, index_id, query, rs)
            if not rs:
                break

        if not rs:
            # None of the indexes found anything to do with the query.
            result = LazyCat([])
            cr.stop()
            return result

        # Try to deduce the sort limit from batching arguments.
        b_start, b_size, limit, sort_report_name = self._sort_limit_arguments(
            query, sort_index, reverse, limit)

        # We got some results from the indexes, sort and convert to sequences.
        rlen = len(rs)
        if sort_index is None and hasattr(rs, 'items'):
            # Having a 'items' means we have a data structure with
            # scores. Build a new result set, sort it by score, reverse
            # it, compute the normalized score, and Lazify it.

            if not merge:
                # Don't bother to sort here, return a list of
                # three tuples to be passed later to mergeResults.
                # Note that data_record_normalized_score_ cannot be
                # calculated and will always be 1 in this case.
                result = [(score, (1, score, rid), self.__getitem__)
                          for rid, score in rs.items()]
            else:
                cr.start_split('sort_on#score')

                # Sort it by score.
                rs = rs.byValue(0)
                max = float(rs[0][0])

                # Here we define our getter function inline so that
                # we can conveniently store the max value as a default arg
                # and make the normalized score computation lazy
                def getScoredResult(item, max=max, self=self):
                    """
                    Returns instances of self._v_brains, or whatever is
                    passed into self.useBrains.
                    """
                    score, key = item
                    norm_score = int(100.0 * score / max)
                    return self.instantiate((key, self.data[key]),
                                            score_data=(score, norm_score))

                sequence, slen = self._limit_sequence(rs, rlen, b_start,
                                                      b_size)
                result = LazyMap(getScoredResult,
                                 sequence,
                                 slen,
                                 actual_result_count=rlen)
                cr.stop_split('sort_on#score', None)

        elif sort_index is None and not hasattr(rs, 'values'):
            # no scores
            if hasattr(rs, 'keys'):
                rs = rs.keys()
            sequence, slen = self._limit_sequence(rs, rlen, b_start, b_size)
            result = LazyMap(self.__getitem__,
                             sequence,
                             slen,
                             actual_result_count=rlen)
        else:
            # Sort. If there are scores, then this block is not
            # reached, therefore 'sort-on' does not happen in the
            # context of a text index query.  This should probably
            # sort by relevance first, then the 'sort-on' attribute.
            cr.start_split(sort_report_name)
            result = self.sortResults(rs,
                                      sort_index,
                                      reverse,
                                      limit,
                                      merge,
                                      actual_result_count=rlen,
                                      b_start=b_start,
                                      b_size=b_size)
            cr.stop_split(sort_report_name, None)

        cr.stop()
        return result
コード例 #6
0
    def _makequery(self,
                   query=None,
                   batch=False,
                   b_start=0,
                   b_size=30,
                   sort_on=None,
                   sort_order=None,
                   limit=0,
                   brains=False,
                   custom_query=None):
        """Parse the (form)query and return using multi-adapter"""
        query_modifiers = getUtilitiesFor(IQueryModifier)
        for name, modifier in sorted(query_modifiers, key=itemgetter(0)):
            query = modifier(query)

        parsedquery = queryparser.parseFormquery(self.context, query, sort_on,
                                                 sort_order)

        index_modifiers = getUtilitiesFor(IParsedQueryIndexModifier)
        for name, modifier in index_modifiers:
            if name in parsedquery:
                new_name, query = modifier(parsedquery[name])
                parsedquery[name] = query
                # if a new index name has been returned, we need to replace
                # the native ones
                if name != new_name:
                    del parsedquery[name]
                    parsedquery[new_name] = query

        # Check for valid indexes
        catalog = getToolByName(self.context, 'portal_catalog')
        valid_indexes = [
            index for index in parsedquery if index in catalog.indexes()
        ]

        # We'll ignore any invalid index, but will return an empty set if none
        # of the indexes are valid.
        if not valid_indexes:
            logger.warning(
                "Using empty query because there are no valid indexes used.")
            parsedquery = {}

        empty_query = not parsedquery  # store emptiness
        if batch:
            parsedquery['b_start'] = b_start
            parsedquery['b_size'] = b_size
        elif limit:
            parsedquery['sort_limit'] = limit

        if 'path' not in parsedquery:
            parsedquery['path'] = {'query': ''}

        if isinstance(custom_query, dict) and custom_query:
            # Update the parsed query with an extra query dictionary. This may
            # override the parsed query. The custom_query is a dictonary of
            # index names and their associated query values.
            parsedquery.update(custom_query)
            empty_query = False

        # filter bad term and operator in query
        parsedquery = self.filter_query(parsedquery)
        results = []
        if not empty_query:
            results = catalog(**parsedquery)
            if getattr(results, 'actual_result_count', False) and limit\
                    and results.actual_result_count > limit:
                results.actual_result_count = limit

        collapse_on = self.request.get(
            'collapse_on', getattr(self.context, 'collapse_on', None))
        if collapse_on is not None:
            fc = FieldCollapser(query={'collapse_on': collapse_on})
            results = LazyMap(lambda x: x,
                              LazyFilter(results, test=fc.collapse),
                              length=results._len,
                              actual_result_count=results.actual_result_count)

        if not brains:
            results = IContentListing(results)
        if batch:
            results = Batch(results, b_size, start=b_start)
        return results
コード例 #7
0
 def _createLMap(self, mapfunc, *seq):
     from ZTUtils.Lazy import LazyMap
     totalseq = []
     for s in seq:
         totalseq.extend(s)
     return LazyMap(mapfunc, totalseq)
コード例 #8
0
 def objectItems(self, spec=None):
     # Returns a list of (id, subobject) tuples of the current object.
     # If 'spec' is specified, returns only objects whose meta_type match
     # 'spec'
     return LazyMap(lambda id, _getOb=self._getOb: (id, _getOb(id)),
                    self.objectIds(spec))
コード例 #9
0
 def objectValues(self, spec=None):
     # Returns a list of actual subobjects of the current object.
     # If 'spec' is specified, returns only objects whose meta_type
     # match 'spec'.
     return LazyMap(self._getOb, self.objectIds(spec))