def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails: cpe_details = CpeDetails() cpe_details.reward_estimates = self.score_cpe("Reward", edp) if ( self.metrics_to_score is not None and edp.logged_metrics is not None and self.action_names is not None ): for i, metric in enumerate(self.metrics_to_score): logger.info( "--------- Running CPE on metric: {} ---------".format(metric) ) metric_reward_edp = edp.set_metric_as_reward(i, len(self.action_names)) cpe_details.metric_estimates[metric] = self.score_cpe( metric, metric_reward_edp ) if self.action_names is not None: if edp.optimal_q_values is not None: value_means = edp.optimal_q_values.mean(dim=0) cpe_details.q_value_means = { action: float(value_means[i]) for i, action in enumerate(self.action_names) } value_stds = edp.optimal_q_values.std(dim=0) cpe_details.q_value_stds = { action: float(value_stds[i]) for i, action in enumerate(self.action_names) } if edp.eval_action_idxs is not None: cpe_details.action_distribution = { # pyre-fixme[6]: Expected `Union[_SupportsIndex, bytearray, # bytes, str, typing.SupportsFloat]` for 1st param but got # `ByteTensor`. action: float((edp.eval_action_idxs == i).sum()) / edp.eval_action_idxs.shape[0] for i, action in enumerate(self.action_names) } # Compute MC Loss on Aggregate Reward cpe_details.mc_loss = float( F.mse_loss(edp.logged_values, edp.model_values_for_logged_action) ) # pyre-fixme[16]: `Evaluator` has no attribute `notify_observers`. self.notify_observers(cpe_details=cpe_details) return cpe_details
def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails: cpe_details = CpeDetails() cpe_details.reward_estimates = self.score_cpe("Reward", edp) if (self.metrics_to_score is not None and edp.logged_metrics is not None and self.action_names is not None): for i, metric in enumerate(self.metrics_to_score): logger.info( "--------- Running CPE on metric: {} ---------".format( metric)) metric_reward_edp = edp.set_metric_as_reward( i, len(self.action_names)) cpe_details.metric_estimates[metric] = self.score_cpe( metric, metric_reward_edp) if self.action_names is not None: if edp.optimal_q_values is not None: value_means = edp.optimal_q_values.mean(dim=0) cpe_details.q_value_means = { action: float(value_means[i]) for i, action in enumerate(self.action_names) } value_stds = edp.optimal_q_values.std(dim=0) # type: ignore cpe_details.q_value_stds = { action: float(value_stds[i]) for i, action in enumerate(self.action_names) } if edp.eval_action_idxs is not None: cpe_details.action_distribution = { action: float( (edp.eval_action_idxs == i).sum()) # type: ignore / edp.eval_action_idxs.shape[0] for i, action in enumerate(self.action_names) } # Compute MC Loss on Aggregate Reward cpe_details.mc_loss = float( F.mse_loss(edp.logged_values, edp.model_values_for_logged_action)) self.notify_observers(cpe_details=cpe_details) # type: ignore return cpe_details
def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails: cpe_details = CpeDetails() cpe_details.reward_estimates = self.score_cpe("Reward", edp) if ( self.metrics_to_score is not None and edp.logged_metrics is not None and self.action_names is not None ): for i, metric in enumerate(self.metrics_to_score): logger.info( "--------- Running CPE on metric: {} ---------".format(metric) ) metric_reward_edp = edp.set_metric_as_reward(i, len(self.action_names)) cpe_details.metric_estimates[metric] = self.score_cpe( metric, metric_reward_edp ) if self.action_names is not None: if edp.optimal_q_values is not None: value_means = edp.optimal_q_values.mean(dim=0) cpe_details.q_value_means = { action: float(value_means[i]) for i, action in enumerate(self.action_names) } # pyre-ignore [16]: `Optional` has no attribute `std` value_stds = edp.optimal_q_values.std(dim=0) cpe_details.q_value_stds = { action: float(value_stds[i]) for i, action in enumerate(self.action_names) } if edp.eval_action_idxs is not None: cpe_details.action_distribution = { # pyre-ignore [16]: `bool` has no attribute `sum` action: float((edp.eval_action_idxs == i).sum()) # pyre-ignore [16]: `Optional` has no attribute `shape` / edp.eval_action_idxs.shape[0] for i, action in enumerate(self.action_names) } # pyre-fixme[16]: `Evaluator` has no attribute `notify_observers`. self.notify_observers(cpe_details=cpe_details) return cpe_details
def get_last_cpe_results(self): if len(self.results) == 0: return CpeDetails() return self.results[-1]