def test_performance_of_binary_outcome_models(
        fixed_synthetic_bandit_feedback: BanditFeedback,
        random_action_dist: np.ndarray) -> None:
    """
    Test the performance of ope estimators using synthetic bandit data and random evaluation policy
    when the importance weight estimator is estimated by a logistic regression
    """
    bandit_feedback = fixed_synthetic_bandit_feedback.copy()
    action_dist = random_action_dist
    random_state = 12345
    auc_scores: Dict[str, float] = {}
    fit_methods = ["sample", "raw"]
    for fit_method in fit_methods:
        for model_name, model in binary_model_dict.items():
            importance_weight_estimator = ImportanceWeightEstimator(
                n_actions=bandit_feedback["n_actions"],
                action_context=bandit_feedback["action_context"],
                base_model=model(**hyperparams[model_name]),
                fitting_method=fit_method,
                len_list=1,
            )
            # train importance weight estimator on logged bandit feedback data
            estimated_importance_weight = importance_weight_estimator.fit_predict(
                context=bandit_feedback["context"],
                action=bandit_feedback["action"],
                action_dist=action_dist,
                n_folds=2,  # 2-fold cross-fitting
                random_state=random_state,
                evaluate_model_performance=True,
            )
            assert np.all(estimated_importance_weight >= 0
                          ), "estimated_importance_weight must be non-negative"
            # extract predictions
            tmp_y = []
            tmp_pred = []
            for i in range(len(importance_weight_estimator.eval_result["y"])):
                tmp_y.append(importance_weight_estimator.eval_result["y"][i])
                tmp_pred.append(
                    importance_weight_estimator.eval_result["proba"][i])
            y_test = np.array(tmp_y).flatten()
            y_pred = np.array(tmp_pred).flatten()
            auc_scores[model_name + "_" + fit_method] = roc_auc_score(
                y_true=y_test,
                y_score=y_pred,
            )

    for model_name in auc_scores:
        print(f"AUC of {model_name} is {auc_scores[model_name]}")
        assert (auc_scores[model_name] >
                0.5), f"AUC of {model_name} should be greater than 0.5"
Example #2
0
def test_performance_of_binary_outcome_models(
    fixed_synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the performance of ope estimators using synthetic bandit data and random evaluation policy
    when the propensity score estimator is estimated by a logistic regression
    """
    bandit_feedback = fixed_synthetic_bandit_feedback.copy()
    random_state = 12345
    auc_scores: Dict[str, float] = {}
    for model_name, model in binary_model_dict.items():
        propensity_score_estimator = PropensityScoreEstimator(
            n_actions=bandit_feedback["n_actions"],
            base_model=model(**hyperparams[model_name]),
            len_list=1,
        )
        # train propensity score estimator on logged bandit feedback data
        estimated_propensity_score = propensity_score_estimator.fit_predict(
            context=bandit_feedback["context"],
            action=bandit_feedback["action"],
            n_folds=2,  # 2-fold cross-fitting
            random_state=random_state,
            evaluate_model_performance=True,
        )
        assert np.all(
            estimated_propensity_score >= 0
        ), "estimated_propensity_score must be non-negative"
        # extract predictions
        tmp_y = []
        tmp_pred = []
        for i in range(len(propensity_score_estimator.eval_result["y"])):
            tmp_y.append(propensity_score_estimator.eval_result["y"][i])
            tmp_pred.append(propensity_score_estimator.eval_result["proba"][i])
        y_test = np.array(tmp_y).flatten()
        y_pred = np.array(tmp_pred).reshape(-1, tmp_pred[0].shape[1])
        auc_scores[model_name] = roc_auc_score(
            y_true=y_test, y_score=y_pred, multi_class="ovo"
        )

    for model_name in auc_scores:
        print(f"AUC (macro-ovo) of {model_name} is {auc_scores[model_name]}")
        assert (
            auc_scores[model_name] > 0.5
        ), f"AUC of {model_name} should be greater than 0.5"
Example #3
0
def test_performance_of_binary_outcome_models(
    fixed_synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray
) -> None:
    """
    Test the performance of ope estimators using synthetic bandit data and random evaluation policy
    when the regression model is estimated by a logistic regression
    """
    bandit_feedback = fixed_synthetic_bandit_feedback.copy()
    expected_reward = np.expand_dims(bandit_feedback["expected_reward"], axis=-1)
    action_dist = random_action_dist
    # compute ground truth policy value using expected reward
    q_pi_e = np.average(expected_reward[:, :, 0], weights=action_dist[:, :, 0], axis=1)
    # compute statistics of ground truth policy value
    gt_mean = q_pi_e.mean()
    random_state = 12345
    auc_scores: Dict[str, float] = {}
    # check ground truth
    print(f"gt_mean: {gt_mean}")
    # check the performance of regression models using doubly robust criteria (|\hat{q} - q| <= |q| is satisfied with a high probability)
    dr_criteria_pass_rate = 0.8
    fit_methods = ["normal", "iw", "mrdr"]
    for fit_method in fit_methods:
        for model_name, model in binary_model_dict.items():
            regression_model = RegressionModel(
                n_actions=bandit_feedback["n_actions"],
                len_list=int(bandit_feedback["position"].max() + 1),
                action_context=bandit_feedback["action_context"],
                base_model=model(**hyperparams[model_name]),
                fitting_method=fit_method,
            )
            if fit_method == "normal":
                # train regression model on logged bandit feedback data
                estimated_rewards_by_reg_model = regression_model.fit_predict(
                    context=bandit_feedback["context"],
                    action=bandit_feedback["action"],
                    reward=bandit_feedback["reward"],
                    n_folds=3,  # 3-fold cross-fitting
                    random_state=random_state,
                )
            else:
                # train regression model on logged bandit feedback data
                estimated_rewards_by_reg_model = regression_model.fit_predict(
                    context=bandit_feedback["context"],
                    action=bandit_feedback["action"],
                    reward=bandit_feedback["reward"],
                    pscore=bandit_feedback["pscore"],
                    position=bandit_feedback["position"],
                    action_dist=action_dist,
                    n_folds=3,  # 3-fold cross-fitting
                    random_state=random_state,
                )
            auc_scores[model_name + "_" + fit_method] = roc_auc_score(
                y_true=bandit_feedback["reward"],
                y_score=estimated_rewards_by_reg_model[
                    np.arange(bandit_feedback["reward"].shape[0]),
                    bandit_feedback["action"],
                    bandit_feedback["position"],
                ],
            )
            # compare dr criteria
            dr_criteria = np.abs((gt_mean - estimated_rewards_by_reg_model)) - np.abs(
                gt_mean
            )
            print(
                f"Dr criteria is satisfied with probability {np.mean(dr_criteria <= 0)} ------ model: {model_name} ({fit_method}),"
            )
            assert (
                np.mean(dr_criteria <= 0) >= dr_criteria_pass_rate
            ), f" should be satisfied with a probability at least {dr_criteria_pass_rate}"

    for model_name in auc_scores:
        print(f"AUC of {model_name} is {auc_scores[model_name]}")
        assert (
            auc_scores[model_name] > 0.5
        ), f"AUC of {model_name} should be greater than 0.5"