def get_query_doc_scores(self, query, did, model, skip_invividual=False): sim = self._model(model) self._searcher().setSimilarity(sim) ldid = self._get_index_utils().convertDocidToLuceneDocid(did) if ldid == -1: return -999. * len(query), [-999.] * len(query) analyzer = self._get_stemmed_analyzer() query = list(itertools.chain(*[J.A_AnalyzerUtils.analyze(analyzer, t).toArray() for t in query])) if not skip_invividual: result = [] for q in query: q = _anserini_escape(q, J) lquery = J.L_QueryParser().parse(q, J.A_IndexArgs.CONTENTS) explain = self._searcher().explain(lquery, ldid) result.append(explain.getValue().doubleValue()) return sum(result), result lquery = J.L_QueryParser().parse(_anserini_escape(' '.join(query), J), J.A_IndexArgs.CONTENTS) explain = self._searcher().explain(lquery, ldid) return explain.getValue()
def get_query_doc_scores_batch(self, query, dids, model): sim = self._model(model) self._searcher().setSimilarity(sim) ldids = {self._get_index_utils().convertDocidToLuceneDocid(did): did for did in dids} analyzer = self._get_stemmed_analyzer() query = J.A_AnalyzerUtils.analyze(analyzer, query).toArray() query = ' '.join(_anserini_escape(q, J) for q in query) docs = ' '.join(f'{J.A_IndexArgs.ID}:{did}' for did in dids) lquery = J.L_QueryParser().parse(f'({query}) AND ({docs})', J.A_IndexArgs.CONTENTS) result = {} search_results = self._searcher().search(lquery, len(dids)) for top_doc in search_results.scoreDocs: result[ldids[top_doc.doc]] = top_doc.score del search_results return result