Example #1
0
  def retrieval_score(self,
                      k: int,
                      documents_list: List[environment_pb2.Document],
                      gold_answer: List[str],
                      score_type: str = 'dcg') -> float:
    """Return the retrieval score @k for documents_list."""
    # compute the relevance as a binary score that is 1 if any of the gold
    # answers is present at the document.
    relevances = [
        float(utils.gold_answer_present(doc.content, gold_answer))
        for doc in documents_list
    ]

    # Since we want to compute the relevance @k we need to cut the relevance
    # list at k
    relevances = relevances[:k]

    if score_type == 'dcg':
      return utils.dcg_score(relevances)
    elif score_type == 'ndcg':
      return utils.ndcg_score(relevances)
    elif score_type == 'mrr':
      return utils.mrr_score(relevances)
    else:
      raise NotImplementedError(
          f'Score type {score_type} is not yet implemented.')
Example #2
0
    def _intermediate_reward(self, step: int) -> float:
        """Computes an 'intermediate' reward after each step.

    r_t = S(d_t | q) - S(d_t-1 | q).
    Intermediate rewards (which  make up the bulk of our reward scheme) get
    computed after each step and, usually, quantify the "goodness" of local
    change.  For example, the improvement (or decrease) in a metric such as
    answer F1, relevant document recall, ... between the previous step and
    the current step.

    Args:
      step:  The step for which we want to compute the intermediate reward.

    Returns:
      The intermediate reward, as defined by the configuration.
    """

        assert step >= 2, (
            f'Intermediate reward computation requires at least 2 '
            f'history entries. Requested was "{step}".')
        current_documents_list = self._get_current_documents_list(step)
        previous_documents_list = self._get_previous_documents_list(step)

        if common_flags.REWARD.value == 'curiosity+dcg':
            curiosity = len(
                set([d.content for d in current_documents_list]) -
                set([d.content for d in previous_documents_list])) / float(
                    common_flags.NUM_DOCUMENTS_TO_RETRIEVE.value)
            dcg_current = self.state.score(
                identifier='dcg',
                documents_list=current_documents_list,
                k=common_flags.K.value)
            dcg_previous = self.state.score(
                identifier='dcg',
                documents_list=previous_documents_list,
                k=common_flags.K.value)
            ideal_dcg = utils.dcg_score(relevances=[1.] * common_flags.K.value)
            ndcg_improvement = (dcg_current - dcg_previous) / ideal_dcg
            return common_flags.REWARD_INTERPOLATION_VALUE.value * curiosity + (
                1 - common_flags.REWARD_INTERPOLATION_VALUE.value
            ) * ndcg_improvement

        else:
            reward = common_flags.REWARD.value
            raise NotImplementedError(
                f'Intermediate episode reward for type {reward} is not implemented.'
            )
Example #3
0
    def _compute_reward(
            self, current_documents: List[environment_pb2.Document]) -> float:
        if common_flags.REWARD.value == 'curiosity+dcg':
            curiosity = len(
                set([d.content for d in current_documents]) -
                set([d.content
                     for d in self.state.history[0].documents])) / float(
                         common_flags.NUM_DOCUMENTS_TO_RETRIEVE.value)
            dcg_current = self.state.score(identifier='dcg',
                                           documents_list=current_documents,
                                           k=common_flags.K.value)
            ideal_dcg = utils.dcg_score(relevances=[1.] * common_flags.K.value)
            ndcg = dcg_current / ideal_dcg
            return common_flags.REWARD_INTERPOLATION_VALUE.value * curiosity + (
                1 - common_flags.REWARD_INTERPOLATION_VALUE.value) * ndcg

        else:
            reward = common_flags.REWARD.value
            raise NotImplementedError(
                f'Episode reward for type {reward} is not implemented.')