Esempio n. 1
0
    def sortResults(self,
                    rs,
                    sort_index,
                    reverse=0,
                    limit=None,
                    merge=1,
                    actual_result_count=None,
                    b_start=0,
                    b_size=None):
        # Sort a result set using a sort index. Return a lazy
        # result set in sorted order if merge is true otherwise
        # returns a list of (sortkey, uid, getter_function) tuples
        #
        # The two 'for' loops in here contribute a significant
        # proportion of the time to perform an indexed search.
        # Try to avoid all non-local attribute lookup inside
        # those loops.
        _intersection = intersection
        _self__getitem__ = self.__getitem__
        index_key_map = sort_index.documentToKeyMap()
        _None = None
        _keyerror = KeyError
        result = []
        append = result.append
        if hasattr(rs, 'keys'):
            rs = rs.keys()
        if actual_result_count is None:
            rlen = len(rs)
            actual_result_count = rlen
        else:
            rlen = actual_result_count

        # don't limit to more than what we have
        if limit is not None and limit >= rlen:
            limit = rlen

        # if we want a batch from the end of the resultset, reverse sorting
        # order and limit it, then reverse the resultset again
        switched_reverse = False
        if b_size and b_start and b_start > rlen / 2:
            reverse = not reverse
            switched_reverse = True
            b_end = b_start + b_size
            if b_end >= rlen:
                overrun = rlen - b_end
                if b_start >= rlen:
                    # bail out, we are outside the possible range
                    return LazyCat([], 0, actual_result_count)
                else:
                    b_size += overrun
                b_start = 0
            else:
                b_start = b_end - b_start
            limit = b_start + b_size

        if merge and limit is None and (rlen >
                                        (len(sort_index) * (rlen / 100 + 1))):
            # The result set is much larger than the sorted index,
            # so iterate over the sorted index for speed.
            # This is rarely exercised in practice...

            length = 0

            try:
                intersection(rs, IISet(()))
            except TypeError:
                # rs is not an object in the IIBTree family.
                # Try to turn rs into an IISet.
                rs = IISet(rs)

            for k, intset in sort_index.items():
                # We have an index that has a set of values for
                # each sort key, so we intersect with each set and
                # get a sorted sequence of the intersections.
                intset = _intersection(rs, intset)
                if intset:
                    keys = getattr(intset, 'keys', _None)
                    if keys is not _None:
                        # Is this ever true?
                        intset = keys()
                    length += len(intset)
                    append((k, intset, _self__getitem__))
                    # Note that sort keys are unique.

            if reverse:
                result.sort(reverse=True)
            else:
                result.sort()
            sequence, slen = self._limit_sequence(result, length, b_start,
                                                  b_size, switched_reverse)
            result = LazyCat(LazyValues(sequence), slen, actual_result_count)
        elif limit is None or (limit * 4 > rlen):
            # Iterate over the result set getting sort keys from the index
            for did in rs:
                try:
                    key = index_key_map[did]
                except _keyerror:
                    # This document is not in the sort key index, skip it.
                    pass
                else:
                    append((key, did, _self__getitem__))
                    # The reference back to __getitem__ is used in case
                    # we do not merge now and need to intermingle the
                    # results with those of other catalogs while avoiding
                    # the cost of instantiating a LazyMap per result
            if merge:
                if reverse:
                    result.sort(reverse=True)
                else:
                    result.sort()
                if limit is not None:
                    result = result[:limit]
                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
                                                   switched_reverse)
                result = LazyValues(sequence)
                result.actual_result_count = actual_result_count
            else:
                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
                                                   switched_reverse)
                return sequence
        elif reverse:
            # Limit/sort results using N-Best algorithm
            # This is faster for large sets then a full sort
            # And uses far less memory
            keys = []
            n = 0
            worst = None
            for did in rs:
                try:
                    key = index_key_map[did]
                except _keyerror:
                    # This document is not in the sort key index, skip it.
                    pass
                else:
                    if n >= limit and key <= worst:
                        continue
                    i = bisect(keys, key)
                    keys.insert(i, key)
                    result.insert(i, (key, did, _self__getitem__))
                    if n == limit:
                        del keys[0], result[0]
                    else:
                        n += 1
                    worst = keys[0]
            result.reverse()
            if merge:
                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
                                                   switched_reverse)
                result = LazyValues(sequence)
                result.actual_result_count = actual_result_count
            else:
                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
                                                   switched_reverse)
                return sequence
        elif not reverse:
            # Limit/sort results using N-Best algorithm in reverse (N-Worst?)
            keys = []
            n = 0
            best = None
            for did in rs:
                try:
                    key = index_key_map[did]
                except _keyerror:
                    # This document is not in the sort key index, skip it.
                    pass
                else:
                    if n >= limit and key >= best:
                        continue
                    i = bisect(keys, key)
                    keys.insert(i, key)
                    result.insert(i, (key, did, _self__getitem__))
                    if n == limit:
                        del keys[-1], result[-1]
                    else:
                        n += 1
                    best = keys[-1]
            if merge:
                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
                                                   switched_reverse)
                result = LazyValues(sequence)
                result.actual_result_count = actual_result_count
            else:
                sequence, _ = self._limit_sequence(result, 0, b_start, b_size,
                                                   switched_reverse)
                return sequence

        return LazyMap(self.__getitem__,
                       result,
                       len(result),
                       actual_result_count=actual_result_count)
Esempio n. 2
0
    def sortResults(self, rs, sort_index, reverse=0, limit=None, merge=1):
        # Sort a result set using a sort index. Return a lazy
        # result set in sorted order if merge is true otherwise
        # returns a sortable list of raw result tuples
        #
        # The two 'for' loops in here contribute a significant
        # proportion of the time to perform an indexed search.
        # Try to avoid all non-local attribute lookup inside
        # those loops.
        assert limit is None or limit > 0, 'Limit value must be 1 or greater'
        _lazymap = LazyMap
        _intersection = intersection
        _self__getitem__ = self.__getitem__
        _index_keyForDocument = sort_index.keyForDocument
        _None = None
        _keyerror = KeyError
        result = []
        append = result.append
        if hasattr(rs, 'keys'):
            rs = rs.keys()
        rlen = len(rs)
        
        if (merge and limit is None and 
            (rlen > (len(sort_index) * (rlen / 100 + 1)))):
            # The result set is much larger than the sorted index,
            # so iterate over the sorted index for speed.
            # This is rarely exercised in practice...
            
            length = 0

            try:
                intersection(rs, IISet(()))
            except TypeError:
                # rs is not an object in the IIBTree family.
                # Try to turn rs into an IISet.
                rs = IISet(rs)

            for k, intset in sort_index.items():
                # We have an index that has a set of values for
                # each sort key, so we intersect with each set and
                # get a sorted sequence of the intersections.
                intset = _intersection(rs, intset)
                if intset:
                    keys = getattr(intset, 'keys', _None)
                    if keys is not _None:
                        # Is this ever true?
                        intset = keys()
                    length += len(intset)
                    result.append((k, LazyMap(_self__getitem__, intset)))
                    # Note that sort keys are unique.
            if merge:
                result.sort()
                if reverse:
                    result.reverse()
                return LazyCat(LazyValues(result), length)
            else:
                return result            
        elif limit is None or (limit * 4 > rlen):
            # Iterate over the result set getting sort keys from the index
            for did in rs:
                try:
                    key = _index_keyForDocument(did)
                except _keyerror:
                    # This document is not in the sort key index, skip it.
                    pass
                else:
                    append((key, did, _self__getitem__))
                    # The reference back to __getitem__ is used in case
                    # we do not merge now and need to intermingle the
                    # results with those of other catalogs while avoiding
                    # the cost of instantiating a LazyMap per result
            if merge:
                result.sort()
                if reverse:
                    result.reverse()
                if limit is not None:
                    result = result[:limit]                    
                result = LazyValues(result)
            else:
                return result
        elif reverse: 
            # Limit/sort results using N-Best algorithm
            # This is faster for large sets then a full sort
            # And uses far less memory
            keys = []
            n = 0
            worst = None
            for did in rs:
                try:
                    key = _index_keyForDocument(did)
                except _keyerror:
                    # This document is not in the sort key index, skip it.
                    pass
                else:
                    if n >= limit and key <= worst:
                        continue
                    i = bisect(keys, key)
                    keys.insert(i, key)
                    result.insert(i, (key, did, _self__getitem__))
                    if n == limit:
                        del keys[0], result[0]
                    else:
                        n += 1
                    worst = keys[0]
            result.reverse()
            if merge:
                result = LazyValues(result) 
            else:
                return result
        elif not reverse:
            # Limit/sort results using N-Best algorithm in reverse (N-Worst?)
            keys = []
            n = 0
            best = None
            for did in rs:
                try:
                    key = _index_keyForDocument(did)
                except _keyerror:
                    # This document is not in the sort key index, skip it.
                    pass
                else:
                    if n >= limit and key >= best:
                        continue
                    i = bisect(keys, key)
                    keys.insert(i, key)
                    result.insert(i, (key, did, _self__getitem__))
                    if n == limit:
                        del keys[-1], result[-1]
                    else:
                        n += 1
                    best = keys[-1]
            if merge:
                result = LazyValues(result) 
            else:
                return result
        
        result = LazyMap(self.__getitem__, result, len(result))
        result.actual_result_count = rlen
        return result