예제 #1
0
 def get_query_doc_scores(self, query, did, model, skip_invividual=False):
     sim = self._model(model)
     self._searcher().setSimilarity(sim)
     ldid = self._get_index_utils().convertDocidToLuceneDocid(did)
     if ldid == -1:
         return -999. * len(query), [-999.] * len(query)
     analyzer = self._get_stemmed_analyzer()
     query = list(itertools.chain(*[J.A_AnalyzerUtils.analyze(analyzer, t).toArray() for t in query]))
     if not skip_invividual:
         result = []
         for q in query:
             q = _anserini_escape(q, J)
             lquery = J.L_QueryParser().parse(q, J.A_IndexArgs.CONTENTS)
             explain = self._searcher().explain(lquery, ldid)
             result.append(explain.getValue().doubleValue())
         return sum(result), result
     lquery = J.L_QueryParser().parse(_anserini_escape(' '.join(query), J), J.A_IndexArgs.CONTENTS)
     explain = self._searcher().explain(lquery, ldid)
     return explain.getValue()
예제 #2
0
 def get_query_doc_scores_batch(self, query, dids, model):
     sim = self._model(model)
     self._searcher().setSimilarity(sim)
     ldids = {self._get_index_utils().convertDocidToLuceneDocid(did): did for did in dids}
     analyzer = self._get_stemmed_analyzer()
     query = J.A_AnalyzerUtils.analyze(analyzer, query).toArray()
     query = ' '.join(_anserini_escape(q, J) for q in query)
     docs = ' '.join(f'{J.A_IndexArgs.ID}:{did}' for did in dids)
     lquery = J.L_QueryParser().parse(f'({query}) AND ({docs})', J.A_IndexArgs.CONTENTS)
     result = {}
     search_results = self._searcher().search(lquery, len(dids))
     for top_doc in search_results.scoreDocs:
         result[ldids[top_doc.doc]] = top_doc.score
     del search_results
     return result