예제 #1
0
    def batch_search(self, queries: List[str], qids: List[str], k: int = 10,
                     threads: int = 1) -> Dict[str, List[JSimpleSearcherResult]]:
        """Search the collection concurrently for multiple queries, using multiple threads.

        Parameters
        ----------
        queries : List[str]
            List of query strings.
        qids : List[str]
            List of corresponding query ids.
        k : int
            Number of hits to return.
        threads : int
            Maximum number of threads to use.

        Returns
        -------
        Dict[str, List[JSimpleSearcherResult]]
            Dictionary holding the search results, with the query ids as keys and the corresponding lists of search
            results as the values.
        """
        query_strings = JArrayList()
        qid_strings = JArrayList()
        for query in queries:
            jq = JString(query.encode('utf8'))
            query_strings.add(jq)

        for qid in qids:
            jqid = JString(qid)
            qid_strings.add(jqid)

        results = self.object.batchSearch(query_strings, qid_strings, int(k), int(threads)).entrySet().toArray()
        return {r.getKey(): r.getValue() for r in results}
예제 #2
0
    def batch_search(
        self,
        queries: List[str],
        qids: List[str],
        k: int = 10,
        threads: int = 1,
        fields=dict()) -> Dict[str, List[JImpactSearcherResult]]:
        """Search the collection concurrently for multiple queries, using multiple threads.

        Parameters
        ----------
        queries : List[str]
            List of query string.
        qids : List[str]
            List of corresponding query ids.
        k : int
            Number of hits to return.
        threads : int
            Maximum number of threads to use.
        min_idf : int
            Minimum idf for query tokens
        fields : dict
            Optional map of fields to search with associated boosts.

        Returns
        -------
        Dict[str, List[JImpactSearcherResult]]
            Dictionary holding the search results, with the query ids as keys and the corresponding lists of search
            results as the values.
        """
        query_lst = JArrayList()
        qid_lst = JArrayList()
        for q in queries:
            encoded_query = self.query_encoder.encode(q)
            jquery = JHashMap()
            for (token, weight) in encoded_query.items():
                if token in self.idf and self.idf[token] > self.min_idf:
                    jquery.put(token, JFloat(weight))
            query_lst.add(jquery)

        for qid in qids:
            jqid = qid
            qid_lst.add(jqid)

        jfields = JHashMap()
        for (field, boost) in fields.items():
            jfields.put(field, JFloat(boost))

        if not fields:
            results = self.object.batchSearch(query_lst, qid_lst, int(k),
                                              int(threads))
        else:
            results = self.object.batchSearchFields(query_lst, qid_lst, int(k),
                                                    int(threads), jfields)
        return {r.getKey(): r.getValue() for r in results.entrySet().toArray()}
예제 #3
0
    def batch_search(self, queries: List[str], qids: List[str], k: int = 10, threads: int = 1,
                     query_generator: JQueryGenerator = None, fields = dict()) -> Dict[str, List[JSimpleSearcherResult]]:
        """Search the collection concurrently for multiple queries, using multiple threads.

        Parameters
        ----------
        queries : List[str]
            List of query strings.
        qids : List[str]
            List of corresponding query ids.
        k : int
            Number of hits to return.
        threads : int
            Maximum number of threads to use.
        query_generator : JQueryGenerator
            Generator to build queries. Set to ``None`` by default to use Anserini default.
        fields : dict
            Optional map of fields to search with associated boosts.

        Returns
        -------
        Dict[str, List[JSimpleSearcherResult]]
            Dictionary holding the search results, with the query ids as keys and the corresponding lists of search
            results as the values.
        """
        query_strings = JArrayList()
        qid_strings = JArrayList()
        for query in queries:
            jq = JString(query.encode('utf8'))
            query_strings.add(jq)

        for qid in qids:
            jqid = JString(qid)
            qid_strings.add(jqid)

        jfields = JHashMap()
        for (field, boost) in fields.items():
            jfields.put(JString(field), JFloat(boost))

        if query_generator:
            if not fields:
                results = self.object.batchSearch(query_generator, query_strings, qid_strings, int(k), int(threads))
            else:
                results = self.object.batchSearchFields(query_generator, query_strings, qid_strings, int(k), int(threads), jfields)
        else:
            if not fields:
                results = self.object.batchSearch(query_strings, qid_strings, int(k), int(threads))
            else:
                results = self.object.batchSearchFields(query_strings, qid_strings, int(k), int(threads), jfields)
        return {r.getKey(): r.getValue() for r in results.entrySet().toArray()}