def rescore(self, query: Query, texts: List[Text]) -> List[Text]: texts = deepcopy(texts) doc_pairs = list(permutations(texts, 2)) scores = defaultdict(float) batch_input = DuoQueryDocumentBatch(query=query, doc_pairs=doc_pairs) for batch in self.tokenizer.traverse_duo_query_document(batch_input): with torch.cuda.amp.autocast(enabled=self.use_amp): input_ids = batch.output['input_ids'].to(self.device) attn_mask = batch.output['attention_mask'].to(self.device) _, batch_scores = greedy_decode(self.model, input_ids, length=1, attention_mask=attn_mask, return_last_logits=True) # 6136 and 1176 are the indexes of the tokens false and true in T5. batch_scores = batch_scores[:, [6136, 1176]] batch_scores = torch.nn.functional.softmax(batch_scores, dim=1) batch_probs = batch_scores[:, 1].tolist() for doc, score in zip(batch.doc_pairs, batch_probs): scores[doc[0].metadata['docid']] += score scores[doc[1].metadata['docid']] += (1 - score) for text in texts: text.score = scores[text.metadata['docid']] return texts
def rerank(self, query: Query, texts: List[Text]) -> List[Text]: texts = deepcopy(texts) batch_input = QueryDocumentBatch(query=query, documents=texts) for batch in self.tokenizer.traverse_query_document(batch_input): input_ids = batch.output['input_ids'] attn_mask = batch.output['attention_mask'] _, batch_scores = greedy_decode(self.model, input_ids.to(self.device), length=2, attention_mask=attn_mask.to(self.device), return_last_logits=True) # 6136 and 1176 are the indexes of the tokens false and true in T5. batch_scores = batch_scores[:, [6136, 1176]] batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1) batch_log_probs = batch_scores[:, 1].tolist() for doc, score in zip(batch.documents, batch_log_probs): doc.score = score return texts
def rescore(self, query: Query, texts: List[Text]) -> List[Text]: texts = deepcopy(texts) batch_input = QueryDocumentBatch(query=query, documents=texts) for batch in self.tokenizer.traverse_query_document(batch_input): with torch.cuda.amp.autocast(enabled=self.use_amp): input_ids = batch.output['input_ids'].to(self.device) attn_mask = batch.output['attention_mask'].to(self.device) _, batch_scores = greedy_decode(self.model, input_ids, length=1, attention_mask=attn_mask, return_last_logits=True) batch_scores = batch_scores[:, [ self.token_false_id, self.token_true_id ]] batch_scores = torch.nn.functional.log_softmax(batch_scores, dim=1) batch_log_probs = batch_scores[:, 1].tolist() for doc, score in zip(batch.documents, batch_log_probs): doc.score = score return texts