def search(self, words, max_docs, weight_func=lambda n: np.ones(n), score_func=lambda s: s): ''' Search the index for the given words, return total score ''' searcher = self._get_searcher() if type(words)==str: search_text = words search_text = AsciiConvertor.convert(search_text) for c in '/+-&|!(){}[]^"~*?:': search_text = search_text.replace('%s'%c, '\%s'%c) else: search_text = ' '.join(words) print 'search_text: %s' % search_text # note that whatever parser that we put as our argument, eventually when searching with query, we will use Lucene parser to split query words query = QueryParser(Version.LUCENE_CURRENT, "text", self._analyzer).parse(search_text) hits = searcher.search(query, max_docs) score_sum = 0.0 weights = weight_func(len(hits.scoreDocs)) for hit,weight in zip(hits.scoreDocs, weights): score_sum += weight * score_func(hit.score) return score_sum
def convert_ascii(self, text): if self.ascii_conversion: return AsciiConvertor.convert(text) return text