Example #1
0
	def rank_actions(self, objects, key, item_check=None, decorator=None):
		"""
		rank @objects, which should be a sequence of KupferObjects,
		for @key, with the action ranker algorithm.

		Filters and return value like .score().
		"""
		if not item_check: item_check = identity
		if not decorator: decorator = identity

		rankables = search.make_rankables(item_check(objects))
		if key:
			rankables = search.score_objects(rankables, key)
			matches = search.bonus_objects(rankables, key)
		else:
			matches = search.score_actions(rankables)
		matches = sorted(matches, key=operator.attrgetter("rank"), reverse=True)

		match, match_iter = peekfirst(decorator(matches))
		return match, match_iter
Example #2
0
	def search(self, sources, key, score=True, item_check=None, decorator=None):
		"""
		@sources is a sequence listing the inputs, which should be
		Sources, TextSources or sequences of KupferObjects

		If @score, sort by rank.
		filters (with identity() as default):
			@item_check: Check items before adding to search pool
			@decorator: Decorate items before access

		Return (first, match_iter), where first is the first match,
		and match_iter an iterator to all matches, including the first match.
		"""
		if not self._old_key or not key.startswith(self._old_key):
			self._source_cache.clear()
		self._old_key = key

		if not item_check: item_check = identity
		if not decorator: decorator = identity

		match_iters = []
		for src in sources:
			fixedrank = 0
			rankables = None
			if isinstance(src, base.Source):
				try:
					# stored rankables
					rankables = self._source_cache[src]
				except KeyError:
					# check uncached items
					items = item_check(src.get_leaves())
			elif isinstance(src, base.TextSource):
				items = item_check(src.get_items(key))
				fixedrank = src.get_rank()
			else:
				items = item_check(src)

			if not rankables:
				rankables = search.make_rankables(items)

			if score:
				if fixedrank:
					rankables = search.add_rank_objects(rankables, fixedrank)
				elif key:
					rankables = search.score_objects(rankables, key)
				matches = search.bonus_objects(rankables, key)
				if isinstance(src, base.Source):
					# we fork off a copy of the iterator to save
					matches, self._source_cache[src] = itertools.tee(matches)
			else:
				# we only want to list them
				matches = rankables

			match_iters.append(matches)
		
		matches = itertools.chain(*match_iters)
		if score:
			matches = sorted(matches, key=operator.attrgetter("rank"),
					reverse=True)

		def as_set_iter(seq):
			key = operator.attrgetter("object")
			return datatools.UniqueIterator(seq, key=key)

		def valid_check(seq):
			"""yield items of @seq that are valid"""
			for itm in seq:
				obj = itm.object
				if (not hasattr(obj, "is_valid")) or obj.is_valid():
					yield itm

		def peekfirst(seq):
			"""This function will return (firstitem, iter)
			where firstitem is the first item of @seq or None if empty,
			and iter an equivalent copy of @seq
			"""
			seq = iter(seq)
			for itm in seq:
				old_iter = itertools.chain((itm, ), seq)
				return (itm, old_iter)
			return (None, seq)

		# Check if the items are valid as the search
		# results are accessed through the iterators
		unique_matches = as_set_iter(matches)
		match, match_iter = peekfirst(decorator(valid_check(unique_matches)))
		return match, match_iter
Example #3
0
File: data.py Project: engla/kupfer
    def search(self, sources, key, score=True, item_check=None, decorator=None):
        """
        @sources is a sequence listing the inputs, which should be
        Sources, TextSources or sequences of KupferObjects

        If @score, sort by rank.
        filters (with identity() as default):
            @item_check: Check items before adding to search pool
            @decorator: Decorate items before access

        Return (first, match_iter), where first is the first match,
        and match_iter an iterator to all matches, including the first match.
        """
        if not self._old_key or not key.startswith(self._old_key):
            self._source_cache.clear()
        self._old_key = key

        # General strategy: Extract a `list` from each source,
        # and perform ranking as in place operations on lists

        if not item_check: item_check = identity
        if not decorator: decorator = identity

        start_time = pretty.timing_start()
        match_lists = []
        for src in sources:
            fixedrank = 0
            can_cache = True
            rankables = None
            if is_iterable(src):
                items = item_check(src)
                can_cache = False
            else:
                # Look in source cache for stored rankables
                try:
                    rankables = self._source_cache[src]
                except KeyError:
                    try:
                        items = item_check(src.get_text_items(key))
                        fixedrank = src.get_rank()
                        can_cache = False
                    except AttributeError:
                        items = item_check(src.get_leaves())

            if rankables is None:
                rankables = search.make_rankables(items)

            if score:
                if fixedrank:
                    search.add_rank_objects(rankables, fixedrank)
                elif key:
                    search.score_objects(rankables, key)
                    search.bonus_objects(rankables, key)
                if can_cache:
                    self._source_cache[src] = rankables
            matches = rankables

            match_lists.append(matches)
        
        if score:
            matches = search.find_best_sort(match_lists)
        else:
            matches = itertools.chain(*match_lists)

        def as_set_iter(seq):
            key = operator.attrgetter("object")
            return datatools.UniqueIterator(seq, key=key)

        def valid_check(seq):
            """yield items of @seq that are valid"""
            for itm in seq:
                obj = itm.object
                if (not hasattr(obj, "is_valid")) or obj.is_valid():
                    yield itm

        # Check if the items are valid as the search
        # results are accessed through the iterators
        unique_matches = as_set_iter(matches)
        match, match_iter = peekfirst(decorator(valid_check(unique_matches)))
        pretty.timing_step(__name__, start_time, "ranked")
        return match, match_iter