Exemple #1
0
    def search(self, words, max_docs, weight_func=lambda n: np.ones(n), score_func=lambda s: s):
        '''
        Search the index for the given words, return total score
        '''
        searcher = self._get_searcher()
        if type(words)==str:
            search_text = words
            search_text = AsciiConvertor.convert(search_text)
            for c in '/+-&|!(){}[]^"~*?:':
                search_text = search_text.replace('%s'%c, '\%s'%c)
        else:
            search_text = ' '.join(words)
        print 'search_text: %s' % search_text
        # note that whatever parser that we put as our argument, eventually when searching with query, we will use Lucene parser to split query words
        query = QueryParser(Version.LUCENE_CURRENT, "text", self._analyzer).parse(search_text)
        hits = searcher.search(query, max_docs)

        score_sum = 0.0
        weights = weight_func(len(hits.scoreDocs))
        for hit,weight in zip(hits.scoreDocs, weights):
            score_sum += weight * score_func(hit.score)
        return score_sum
Exemple #2
0
 def convert_ascii(self, text):
     if self.ascii_conversion:
         return AsciiConvertor.convert(text)
     return text