Beispiel #1
0
    def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails:
        cpe_details = CpeDetails()

        cpe_details.reward_estimates = self.score_cpe("Reward", edp)

        if (
            self.metrics_to_score is not None
            and edp.logged_metrics is not None
            and self.action_names is not None
        ):
            for i, metric in enumerate(self.metrics_to_score):
                logger.info(
                    "--------- Running CPE on metric: {} ---------".format(metric)
                )

                metric_reward_edp = edp.set_metric_as_reward(i, len(self.action_names))

                cpe_details.metric_estimates[metric] = self.score_cpe(
                    metric, metric_reward_edp
                )

        if self.action_names is not None:
            if edp.optimal_q_values is not None:
                value_means = edp.optimal_q_values.mean(dim=0)
                cpe_details.q_value_means = {
                    action: float(value_means[i])
                    for i, action in enumerate(self.action_names)
                }
                value_stds = edp.optimal_q_values.std(dim=0)
                cpe_details.q_value_stds = {
                    action: float(value_stds[i])
                    for i, action in enumerate(self.action_names)
                }
            if edp.eval_action_idxs is not None:
                cpe_details.action_distribution = {
                    # pyre-fixme[6]: Expected `Union[_SupportsIndex, bytearray,
                    #  bytes, str, typing.SupportsFloat]` for 1st param but got
                    #  `ByteTensor`.
                    action: float((edp.eval_action_idxs == i).sum())
                    / edp.eval_action_idxs.shape[0]
                    for i, action in enumerate(self.action_names)
                }
        # Compute MC Loss on Aggregate Reward
        cpe_details.mc_loss = float(
            F.mse_loss(edp.logged_values, edp.model_values_for_logged_action)
        )
        # pyre-fixme[16]: `Evaluator` has no attribute `notify_observers`.
        self.notify_observers(cpe_details=cpe_details)
        return cpe_details
Beispiel #2
0
    def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails:
        cpe_details = CpeDetails()

        cpe_details.reward_estimates = self.score_cpe("Reward", edp)

        if (
            self.metrics_to_score is not None
            and edp.logged_metrics is not None
            and self.action_names is not None
        ):
            for i, metric in enumerate(self.metrics_to_score):
                logger.info(
                    "--------- Running CPE on metric: {} ---------".format(metric)
                )

                metric_reward_edp = edp.set_metric_as_reward(i, len(self.action_names))

                cpe_details.metric_estimates[metric] = self.score_cpe(
                    metric, metric_reward_edp
                )

        if self.action_names is not None:
            if edp.optimal_q_values is not None:
                value_means = edp.optimal_q_values.mean(dim=0)
                cpe_details.q_value_means = {
                    action: float(value_means[i])
                    for i, action in enumerate(self.action_names)
                }
                # pyre-ignore [16]: `Optional` has no attribute `std`
                value_stds = edp.optimal_q_values.std(dim=0)
                cpe_details.q_value_stds = {
                    action: float(value_stds[i])
                    for i, action in enumerate(self.action_names)
                }
            if edp.eval_action_idxs is not None:
                cpe_details.action_distribution = {
                    # pyre-ignore [16]: `bool` has no attribute `sum`
                    action: float((edp.eval_action_idxs == i).sum())
                    # pyre-ignore [16]: `Optional` has no attribute `shape`
                    / edp.eval_action_idxs.shape[0]
                    for i, action in enumerate(self.action_names)
                }
        # pyre-fixme[16]: `Evaluator` has no attribute `notify_observers`.
        self.notify_observers(cpe_details=cpe_details)
        return cpe_details
Beispiel #3
0
    def evaluate_post_training(self, edp: EvaluationDataPage) -> CpeDetails:
        cpe_details = CpeDetails()

        cpe_details.reward_estimates = self.score_cpe("Reward", edp)

        if (self.metrics_to_score is not None
                and edp.logged_metrics is not None
                and self.action_names is not None):
            for i, metric in enumerate(self.metrics_to_score):
                logger.info(
                    "--------- Running CPE on metric: {} ---------".format(
                        metric))

                metric_reward_edp = edp.set_metric_as_reward(
                    i, len(self.action_names))

                cpe_details.metric_estimates[metric] = self.score_cpe(
                    metric, metric_reward_edp)

        if self.action_names is not None:
            if edp.optimal_q_values is not None:
                value_means = edp.optimal_q_values.mean(dim=0)
                cpe_details.q_value_means = {
                    action: float(value_means[i])
                    for i, action in enumerate(self.action_names)
                }
                value_stds = edp.optimal_q_values.std(dim=0)  # type: ignore
                cpe_details.q_value_stds = {
                    action: float(value_stds[i])
                    for i, action in enumerate(self.action_names)
                }
            if edp.eval_action_idxs is not None:
                cpe_details.action_distribution = {
                    action: float(
                        (edp.eval_action_idxs == i).sum())  # type: ignore
                    / edp.eval_action_idxs.shape[0]
                    for i, action in enumerate(self.action_names)
                }
        # Compute MC Loss on Aggregate Reward
        cpe_details.mc_loss = float(
            F.mse_loss(edp.logged_values, edp.model_values_for_logged_action))
        self.notify_observers(cpe_details=cpe_details)  # type: ignore
        return cpe_details