Python BanditFeedback.copy Examples

Programming Language: Python

Namespace/Package Name: obp.types

Class/Type: BanditFeedback

Method/Function: copy

Examples at hotexamples.com: 3

Python BanditFeedback.copy - 3 examples found. These are the top rated real world Python examples of obp.types.BanditFeedback.copy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

items(14)

copy(3)

keys(1)

Frequently Used Methods

items (14)

copy (3)

keys (1)

Example #1

Show file

File: test_importance_weight_estimator.py Project: aiueola/zr-obp

def test_performance_of_binary_outcome_models(
        fixed_synthetic_bandit_feedback: BanditFeedback,
        random_action_dist: np.ndarray) -> None:
    """
    Test the performance of ope estimators using synthetic bandit data and random evaluation policy
    when the importance weight estimator is estimated by a logistic regression
    """
    bandit_feedback = fixed_synthetic_bandit_feedback.copy()
    action_dist = random_action_dist
    random_state = 12345
    auc_scores: Dict[str, float] = {}
    fit_methods = ["sample", "raw"]
    for fit_method in fit_methods:
        for model_name, model in binary_model_dict.items():
            importance_weight_estimator = ImportanceWeightEstimator(
                n_actions=bandit_feedback["n_actions"],
                action_context=bandit_feedback["action_context"],
                base_model=model(**hyperparams[model_name]),
                fitting_method=fit_method,
                len_list=1,
            )
            # train importance weight estimator on logged bandit feedback data
            estimated_importance_weight = importance_weight_estimator.fit_predict(
                context=bandit_feedback["context"],
                action=bandit_feedback["action"],
                action_dist=action_dist,
                n_folds=2,  # 2-fold cross-fitting
                random_state=random_state,
                evaluate_model_performance=True,
            )
            assert np.all(estimated_importance_weight >= 0
                          ), "estimated_importance_weight must be non-negative"
            # extract predictions
            tmp_y = []
            tmp_pred = []
            for i in range(len(importance_weight_estimator.eval_result["y"])):
                tmp_y.append(importance_weight_estimator.eval_result["y"][i])
                tmp_pred.append(
                    importance_weight_estimator.eval_result["proba"][i])
            y_test = np.array(tmp_y).flatten()
            y_pred = np.array(tmp_pred).flatten()
            auc_scores[model_name + "_" + fit_method] = roc_auc_score(
                y_true=y_test,
                y_score=y_pred,
            )

    for model_name in auc_scores:
        print(f"AUC of {model_name} is {auc_scores[model_name]}")
        assert (auc_scores[model_name] >
                0.5), f"AUC of {model_name} should be greater than 0.5"

Example #2

Show file

def test_performance_of_binary_outcome_models(
    fixed_synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the performance of ope estimators using synthetic bandit data and random evaluation policy
    when the propensity score estimator is estimated by a logistic regression
    """
    bandit_feedback = fixed_synthetic_bandit_feedback.copy()
    random_state = 12345
    auc_scores: Dict[str, float] = {}
    for model_name, model in binary_model_dict.items():
        propensity_score_estimator = PropensityScoreEstimator(
            n_actions=bandit_feedback["n_actions"],
            base_model=model(**hyperparams[model_name]),
            len_list=1,
        )
        # train propensity score estimator on logged bandit feedback data
        estimated_propensity_score = propensity_score_estimator.fit_predict(
            context=bandit_feedback["context"],
            action=bandit_feedback["action"],
            n_folds=2,  # 2-fold cross-fitting
            random_state=random_state,
            evaluate_model_performance=True,
        )
        assert np.all(
            estimated_propensity_score >= 0
        ), "estimated_propensity_score must be non-negative"
        # extract predictions
        tmp_y = []
        tmp_pred = []
        for i in range(len(propensity_score_estimator.eval_result["y"])):
            tmp_y.append(propensity_score_estimator.eval_result["y"][i])
            tmp_pred.append(propensity_score_estimator.eval_result["proba"][i])
        y_test = np.array(tmp_y).flatten()
        y_pred = np.array(tmp_pred).reshape(-1, tmp_pred[0].shape[1])
        auc_scores[model_name] = roc_auc_score(
            y_true=y_test, y_score=y_pred, multi_class="ovo"
        )

    for model_name in auc_scores:
        print(f"AUC (macro-ovo) of {model_name} is {auc_scores[model_name]}")
        assert (
            auc_scores[model_name] > 0.5
        ), f"AUC of {model_name} should be greater than 0.5"

Example #3

Show file

def test_performance_of_binary_outcome_models(
    fixed_synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray
) -> None:
    """
    Test the performance of ope estimators using synthetic bandit data and random evaluation policy
    when the regression model is estimated by a logistic regression
    """
    bandit_feedback = fixed_synthetic_bandit_feedback.copy()
    expected_reward = np.expand_dims(bandit_feedback["expected_reward"], axis=-1)
    action_dist = random_action_dist
    # compute ground truth policy value using expected reward
    q_pi_e = np.average(expected_reward[:, :, 0], weights=action_dist[:, :, 0], axis=1)
    # compute statistics of ground truth policy value
    gt_mean = q_pi_e.mean()
    random_state = 12345
    auc_scores: Dict[str, float] = {}
    # check ground truth
    print(f"gt_mean: {gt_mean}")
    # check the performance of regression models using doubly robust criteria (|\hat{q} - q| <= |q| is satisfied with a high probability)
    dr_criteria_pass_rate = 0.8
    fit_methods = ["normal", "iw", "mrdr"]
    for fit_method in fit_methods:
        for model_name, model in binary_model_dict.items():
            regression_model = RegressionModel(
                n_actions=bandit_feedback["n_actions"],
                len_list=int(bandit_feedback["position"].max() + 1),
                action_context=bandit_feedback["action_context"],
                base_model=model(**hyperparams[model_name]),
                fitting_method=fit_method,
            )
            if fit_method == "normal":
                # train regression model on logged bandit feedback data
                estimated_rewards_by_reg_model = regression_model.fit_predict(
                    context=bandit_feedback["context"],
                    action=bandit_feedback["action"],
                    reward=bandit_feedback["reward"],
                    n_folds=3,  # 3-fold cross-fitting
                    random_state=random_state,
                )
            else:
                # train regression model on logged bandit feedback data
                estimated_rewards_by_reg_model = regression_model.fit_predict(
                    context=bandit_feedback["context"],
                    action=bandit_feedback["action"],
                    reward=bandit_feedback["reward"],
                    pscore=bandit_feedback["pscore"],
                    position=bandit_feedback["position"],
                    action_dist=action_dist,
                    n_folds=3,  # 3-fold cross-fitting
                    random_state=random_state,
                )
            auc_scores[model_name + "_" + fit_method] = roc_auc_score(
                y_true=bandit_feedback["reward"],
                y_score=estimated_rewards_by_reg_model[
                    np.arange(bandit_feedback["reward"].shape[0]),
                    bandit_feedback["action"],
                    bandit_feedback["position"],
                ],
            )
            # compare dr criteria
            dr_criteria = np.abs((gt_mean - estimated_rewards_by_reg_model)) - np.abs(
                gt_mean
            )
            print(
                f"Dr criteria is satisfied with probability {np.mean(dr_criteria <= 0)} ------ model: {model_name} ({fit_method}),"
            )
            assert (
                np.mean(dr_criteria <= 0) >= dr_criteria_pass_rate
            ), f" should be satisfied with a probability at least {dr_criteria_pass_rate}"

    for model_name in auc_scores:
        print(f"AUC of {model_name} is {auc_scores[model_name]}")
        assert (
            auc_scores[model_name] > 0.5
        ), f"AUC of {model_name} should be greater than 0.5"