Ejemplo n.º 1
0
  def retrieval_score(self,
                      k: int,
                      documents_list: List[environment_pb2.Document],
                      gold_answer: List[str],
                      score_type: str = 'dcg') -> float:
    """Return the retrieval score @k for documents_list."""
    # compute the relevance as a binary score that is 1 if any of the gold
    # answers is present at the document.
    relevances = [
        float(utils.gold_answer_present(doc.content, gold_answer))
        for doc in documents_list
    ]

    # Since we want to compute the relevance @k we need to cut the relevance
    # list at k
    relevances = relevances[:k]

    if score_type == 'dcg':
      return utils.dcg_score(relevances)
    elif score_type == 'ndcg':
      return utils.ndcg_score(relevances)
    elif score_type == 'mrr':
      return utils.mrr_score(relevances)
    else:
      raise NotImplementedError(
          f'Score type {score_type} is not yet implemented.')
Ejemplo n.º 2
0
def ndcg(documents: Sequence[environment_pb2.Document],
         answers: Sequence[str]) -> float:
    """NDCG metric."""
    relevances = [
        float(utils.gold_answer_present(doc.content, answers))
        for doc in documents
    ]
    return utils.ndcg_score(relevances)
Ejemplo n.º 3
0
    def score(self,
              identifier: str,
              documents_list: List[environment_pb2.Document],
              gold_answer: Optional[List[str]] = None,
              **kwargs):
        """Return the answer score for documents_list."""
        if gold_answer is None:
            gold_answer = list(self.original_query.gold_answer)

        if not documents_list or not gold_answer:
            return 0.

        if identifier == 'em':
            return utils.compute_em(documents_list[0].answer.answer,
                                    gold_answer)
        elif identifier == 'em_at_k':
            assert 'k' in kwargs, f'Provide "k" for computing the {identifier} score.'
            return max([
                utils.compute_em(document.answer.answer, gold_answer)
                for document in documents_list[:kwargs['k']]
            ])
        elif identifier == 'f1':
            return utils.compute_f1(documents_list[0].answer.answer,
                                    gold_answer)
        elif identifier in ('dcg', 'mrr', 'ndcg'):
            assert 'k' in kwargs, f'Provide "k" for computing the {identifier} score.'
            return self.retrieval_score(documents_list=documents_list,
                                        gold_answer=gold_answer,
                                        score_type=identifier,
                                        **kwargs)
        elif identifier == 'recall_at_k':
            assert 'k' in kwargs, f'Provide "k" for computing the {identifier} score.'
            return max([
                float(utils.gold_answer_present(doc.content, gold_answer))
                for doc in documents_list[:kwargs['k']]
            ])
        else:
            raise NotImplementedError(
                f'Score "{identifier}" is not yet implemented.')