def retrieval_score(self, k: int, documents_list: List[environment_pb2.Document], gold_answer: List[str], score_type: str = 'dcg') -> float: """Return the retrieval score @k for documents_list.""" # compute the relevance as a binary score that is 1 if any of the gold # answers is present at the document. relevances = [ float(utils.gold_answer_present(doc.content, gold_answer)) for doc in documents_list ] # Since we want to compute the relevance @k we need to cut the relevance # list at k relevances = relevances[:k] if score_type == 'dcg': return utils.dcg_score(relevances) elif score_type == 'ndcg': return utils.ndcg_score(relevances) elif score_type == 'mrr': return utils.mrr_score(relevances) else: raise NotImplementedError( f'Score type {score_type} is not yet implemented.')
def _intermediate_reward(self, step: int) -> float: """Computes an 'intermediate' reward after each step. r_t = S(d_t | q) - S(d_t-1 | q). Intermediate rewards (which make up the bulk of our reward scheme) get computed after each step and, usually, quantify the "goodness" of local change. For example, the improvement (or decrease) in a metric such as answer F1, relevant document recall, ... between the previous step and the current step. Args: step: The step for which we want to compute the intermediate reward. Returns: The intermediate reward, as defined by the configuration. """ assert step >= 2, ( f'Intermediate reward computation requires at least 2 ' f'history entries. Requested was "{step}".') current_documents_list = self._get_current_documents_list(step) previous_documents_list = self._get_previous_documents_list(step) if common_flags.REWARD.value == 'curiosity+dcg': curiosity = len( set([d.content for d in current_documents_list]) - set([d.content for d in previous_documents_list])) / float( common_flags.NUM_DOCUMENTS_TO_RETRIEVE.value) dcg_current = self.state.score( identifier='dcg', documents_list=current_documents_list, k=common_flags.K.value) dcg_previous = self.state.score( identifier='dcg', documents_list=previous_documents_list, k=common_flags.K.value) ideal_dcg = utils.dcg_score(relevances=[1.] * common_flags.K.value) ndcg_improvement = (dcg_current - dcg_previous) / ideal_dcg return common_flags.REWARD_INTERPOLATION_VALUE.value * curiosity + ( 1 - common_flags.REWARD_INTERPOLATION_VALUE.value ) * ndcg_improvement else: reward = common_flags.REWARD.value raise NotImplementedError( f'Intermediate episode reward for type {reward} is not implemented.' )
def _compute_reward( self, current_documents: List[environment_pb2.Document]) -> float: if common_flags.REWARD.value == 'curiosity+dcg': curiosity = len( set([d.content for d in current_documents]) - set([d.content for d in self.state.history[0].documents])) / float( common_flags.NUM_DOCUMENTS_TO_RETRIEVE.value) dcg_current = self.state.score(identifier='dcg', documents_list=current_documents, k=common_flags.K.value) ideal_dcg = utils.dcg_score(relevances=[1.] * common_flags.K.value) ndcg = dcg_current / ideal_dcg return common_flags.REWARD_INTERPOLATION_VALUE.value * curiosity + ( 1 - common_flags.REWARD_INTERPOLATION_VALUE.value) * ndcg else: reward = common_flags.REWARD.value raise NotImplementedError( f'Episode reward for type {reward} is not implemented.')