Esempio n. 1
0
    def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails:
        cpe_details = CpeDetails()

        self.score_cpe("Reward", edp, cpe_details.reward_estimates)

        if (
            self.metrics_to_score is not None
            and edp.logged_metrics is not None
            and self.action_names is not None
        ):
            for i, metric in enumerate(self.metrics_to_score):
                logger.info(
                    "--------- Running CPE on metric: {} ---------".format(metric)
                )

                metric_reward_edp = edp.set_metric_as_reward(i, len(self.action_names))

                cpe_details.metric_estimates[metric] = CpeEstimateSet()
                self.score_cpe(
                    metric, metric_reward_edp, cpe_details.metric_estimates[metric]
                )

        # Compute MC Loss on Aggregate Reward
        cpe_details.mc_loss = float(
            torch.mean(torch.abs(edp.logged_values - edp.model_values))
        )

        return cpe_details
Esempio n. 2
0
    def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails:
        cpe_details = CpeDetails()

        cpe_details.reward_estimates = self.score_cpe("Reward", edp)

        if (self.metrics_to_score is not None
                and edp.logged_metrics is not None
                and self.action_names is not None):
            for i, metric in enumerate(self.metrics_to_score):
                logger.info(
                    "--------- Running CPE on metric: {} ---------".format(
                        metric))

                metric_reward_edp = edp.set_metric_as_reward(
                    i, len(self.action_names))

                cpe_details.metric_estimates[metric] = self.score_cpe(
                    metric, metric_reward_edp)

        if self.action_names is not None:
            if edp.optimal_q_values is not None:
                value_means = edp.optimal_q_values.mean(dim=0)
                cpe_details.q_value_means = {
                    action: float(value_means[i])
                    for i, action in enumerate(self.action_names)
                }
                value_stds = edp.optimal_q_values.std(dim=0)  # type: ignore
                cpe_details.q_value_stds = {
                    action: float(value_stds[i])
                    for i, action in enumerate(self.action_names)
                }
            if edp.eval_action_idxs is not None:
                cpe_details.action_distribution = {
                    action: float(
                        (edp.eval_action_idxs == i).sum())  # type: ignore
                    / edp.eval_action_idxs.shape[0]
                    for i, action in enumerate(self.action_names)
                }

        # Compute MC Loss on Aggregate Reward
        cpe_details.mc_loss = float(
            torch.mean(torch.abs(edp.logged_values -
                                 edp.model_values))  # type: ignore
        )

        return cpe_details
Esempio n. 3
0
    def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails:
        cpe_details = CpeDetails()

        self.score_cpe("Reward", edp, cpe_details.reward_estimates)

        if self.metrics_to_score is not None:
            for i, metric in enumerate(self.metrics_to_score):
                logger.info(
                    "--------- Running CPE on metric: {} ---------".format(
                        metric))

                metric_reward_edp = edp.set_metric_as_reward(
                    i, len(self.action_names))

                cpe_details.metric_estimates[metric] = CpeEstimateSet()
                self.score_cpe(metric, metric_reward_edp,
                               cpe_details.metric_estimates[metric])

        # Compute MC Loss on Aggregate Reward
        cpe_details.mc_loss = float(
            torch.mean(torch.abs(edp.logged_values - edp.model_values)))

        return cpe_details