def sortResults(self, rs, sort_index, reverse=0, limit=None, merge=1, actual_result_count=None, b_start=0, b_size=None): # Sort a result set using a sort index. Return a lazy # result set in sorted order if merge is true otherwise # returns a list of (sortkey, uid, getter_function) tuples # # The two 'for' loops in here contribute a significant # proportion of the time to perform an indexed search. # Try to avoid all non-local attribute lookup inside # those loops. _intersection = intersection _self__getitem__ = self.__getitem__ index_key_map = sort_index.documentToKeyMap() _None = None _keyerror = KeyError result = [] append = result.append if hasattr(rs, 'keys'): rs = rs.keys() if actual_result_count is None: rlen = len(rs) actual_result_count = rlen else: rlen = actual_result_count # don't limit to more than what we have if limit is not None and limit >= rlen: limit = rlen # if we want a batch from the end of the resultset, reverse sorting # order and limit it, then reverse the resultset again switched_reverse = False if b_size and b_start and b_start > rlen / 2: reverse = not reverse switched_reverse = True b_end = b_start + b_size if b_end >= rlen: overrun = rlen - b_end if b_start >= rlen: # bail out, we are outside the possible range return LazyCat([], 0, actual_result_count) else: b_size += overrun b_start = 0 else: b_start = b_end - b_start limit = b_start + b_size if merge and limit is None and (rlen > (len(sort_index) * (rlen / 100 + 1))): # The result set is much larger than the sorted index, # so iterate over the sorted index for speed. # This is rarely exercised in practice... length = 0 try: intersection(rs, IISet(())) except TypeError: # rs is not an object in the IIBTree family. # Try to turn rs into an IISet. rs = IISet(rs) for k, intset in sort_index.items(): # We have an index that has a set of values for # each sort key, so we intersect with each set and # get a sorted sequence of the intersections. intset = _intersection(rs, intset) if intset: keys = getattr(intset, 'keys', _None) if keys is not _None: # Is this ever true? intset = keys() length += len(intset) append((k, intset, _self__getitem__)) # Note that sort keys are unique. if reverse: result.sort(reverse=True) else: result.sort() sequence, slen = self._limit_sequence(result, length, b_start, b_size, switched_reverse) result = LazyCat(LazyValues(sequence), slen, actual_result_count) elif limit is None or (limit * 4 > rlen): # Iterate over the result set getting sort keys from the index for did in rs: try: key = index_key_map[did] except _keyerror: # This document is not in the sort key index, skip it. pass else: append((key, did, _self__getitem__)) # The reference back to __getitem__ is used in case # we do not merge now and need to intermingle the # results with those of other catalogs while avoiding # the cost of instantiating a LazyMap per result if merge: if reverse: result.sort(reverse=True) else: result.sort() if limit is not None: result = result[:limit] sequence, _ = self._limit_sequence(result, 0, b_start, b_size, switched_reverse) result = LazyValues(sequence) result.actual_result_count = actual_result_count else: sequence, _ = self._limit_sequence(result, 0, b_start, b_size, switched_reverse) return sequence elif reverse: # Limit/sort results using N-Best algorithm # This is faster for large sets then a full sort # And uses far less memory keys = [] n = 0 worst = None for did in rs: try: key = index_key_map[did] except _keyerror: # This document is not in the sort key index, skip it. pass else: if n >= limit and key <= worst: continue i = bisect(keys, key) keys.insert(i, key) result.insert(i, (key, did, _self__getitem__)) if n == limit: del keys[0], result[0] else: n += 1 worst = keys[0] result.reverse() if merge: sequence, _ = self._limit_sequence(result, 0, b_start, b_size, switched_reverse) result = LazyValues(sequence) result.actual_result_count = actual_result_count else: sequence, _ = self._limit_sequence(result, 0, b_start, b_size, switched_reverse) return sequence elif not reverse: # Limit/sort results using N-Best algorithm in reverse (N-Worst?) keys = [] n = 0 best = None for did in rs: try: key = index_key_map[did] except _keyerror: # This document is not in the sort key index, skip it. pass else: if n >= limit and key >= best: continue i = bisect(keys, key) keys.insert(i, key) result.insert(i, (key, did, _self__getitem__)) if n == limit: del keys[-1], result[-1] else: n += 1 best = keys[-1] if merge: sequence, _ = self._limit_sequence(result, 0, b_start, b_size, switched_reverse) result = LazyValues(sequence) result.actual_result_count = actual_result_count else: sequence, _ = self._limit_sequence(result, 0, b_start, b_size, switched_reverse) return sequence return LazyMap(self.__getitem__, result, len(result), actual_result_count=actual_result_count)
def sortResults(self, rs, sort_index, reverse=0, limit=None, merge=1): # Sort a result set using a sort index. Return a lazy # result set in sorted order if merge is true otherwise # returns a sortable list of raw result tuples # # The two 'for' loops in here contribute a significant # proportion of the time to perform an indexed search. # Try to avoid all non-local attribute lookup inside # those loops. assert limit is None or limit > 0, 'Limit value must be 1 or greater' _lazymap = LazyMap _intersection = intersection _self__getitem__ = self.__getitem__ _index_keyForDocument = sort_index.keyForDocument _None = None _keyerror = KeyError result = [] append = result.append if hasattr(rs, 'keys'): rs = rs.keys() rlen = len(rs) if (merge and limit is None and (rlen > (len(sort_index) * (rlen / 100 + 1)))): # The result set is much larger than the sorted index, # so iterate over the sorted index for speed. # This is rarely exercised in practice... length = 0 try: intersection(rs, IISet(())) except TypeError: # rs is not an object in the IIBTree family. # Try to turn rs into an IISet. rs = IISet(rs) for k, intset in sort_index.items(): # We have an index that has a set of values for # each sort key, so we intersect with each set and # get a sorted sequence of the intersections. intset = _intersection(rs, intset) if intset: keys = getattr(intset, 'keys', _None) if keys is not _None: # Is this ever true? intset = keys() length += len(intset) result.append((k, LazyMap(_self__getitem__, intset))) # Note that sort keys are unique. if merge: result.sort() if reverse: result.reverse() return LazyCat(LazyValues(result), length) else: return result elif limit is None or (limit * 4 > rlen): # Iterate over the result set getting sort keys from the index for did in rs: try: key = _index_keyForDocument(did) except _keyerror: # This document is not in the sort key index, skip it. pass else: append((key, did, _self__getitem__)) # The reference back to __getitem__ is used in case # we do not merge now and need to intermingle the # results with those of other catalogs while avoiding # the cost of instantiating a LazyMap per result if merge: result.sort() if reverse: result.reverse() if limit is not None: result = result[:limit] result = LazyValues(result) else: return result elif reverse: # Limit/sort results using N-Best algorithm # This is faster for large sets then a full sort # And uses far less memory keys = [] n = 0 worst = None for did in rs: try: key = _index_keyForDocument(did) except _keyerror: # This document is not in the sort key index, skip it. pass else: if n >= limit and key <= worst: continue i = bisect(keys, key) keys.insert(i, key) result.insert(i, (key, did, _self__getitem__)) if n == limit: del keys[0], result[0] else: n += 1 worst = keys[0] result.reverse() if merge: result = LazyValues(result) else: return result elif not reverse: # Limit/sort results using N-Best algorithm in reverse (N-Worst?) keys = [] n = 0 best = None for did in rs: try: key = _index_keyForDocument(did) except _keyerror: # This document is not in the sort key index, skip it. pass else: if n >= limit and key >= best: continue i = bisect(keys, key) keys.insert(i, key) result.insert(i, (key, did, _self__getitem__)) if n == limit: del keys[-1], result[-1] else: n += 1 best = keys[-1] if merge: result = LazyValues(result) else: return result result = LazyMap(self.__getitem__, result, len(result)) result.actual_result_count = rlen return result