Python OffPolicyEvaluation.estimate_intervals 예제들, obp.ope.OffPolicyEvaluation.estimate_intervals Python 예제들

예제 #1

0

파일 보기

파일: test_meta.py 프로젝트: jq/zr-obp

def test_meta_estimation_format(synthetic_bandit_feedback: BanditFeedback,
                                random_action_dist: np.ndarray) -> None:
    """
    Test the response format of OffPolicyEvaluation
    """
    # single ope estimator
    ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback,
                               ope_estimators=[dm])
    assert ope_.estimate_policy_values(random_action_dist) == {
        "dm": mock_policy_value
    }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod]) returns a wrong value"
    assert ope_.estimate_intervals(random_action_dist) == {
        "dm": mock_confidence_interval
    }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod]) returns a wrong value"
    with pytest.raises(AssertionError,
                       match=r"action_dist must be 3-dimensional.*"):
        ope_.estimate_policy_values(
            random_action_dist[:, :, 0]
        ), "action_dist must be 3-dimensional when using OffPolicyEvaluation"
    # multiple ope estimators
    ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback,
                               ope_estimators=[dm, ipw])
    assert ope_.estimate_policy_values(random_action_dist) == {
        "dm": mock_policy_value,
        "ipw": mock_policy_value + ipw.eps,
    }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod, IPW]) returns a wrong value"
    assert ope_.estimate_intervals(random_action_dist) == {
        "dm": mock_confidence_interval,
        "ipw": {k: v + ipw.eps
                for k, v in mock_confidence_interval.items()},
    }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod]) returns a wrong value"

예제 #2

0

파일 보기

파일: test_meta.py 프로젝트: zwcdp/zr-obp

def test_meta_estimate_intervals_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    alpha,
    n_bootstrap_samples,
    random_state,
    description_2: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of estimate_intervals using invalid data
    """
    ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback,
                               ope_estimators=[dm])
    with pytest.raises(ValueError, match=f"{description_2}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            alpha=alpha,
            n_bootstrap_samples=n_bootstrap_samples,
            random_state=random_state,
        )
    # estimate_intervals function is called in summarize_off_policy_estimates
    with pytest.raises(ValueError, match=f"{description_2}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            alpha=alpha,
            n_bootstrap_samples=n_bootstrap_samples,
            random_state=random_state,
        )

예제 #3

0

파일 보기

파일: test_meta.py 프로젝트: aiueola/zr-obp

def test_meta_create_estimator_inputs_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using invalid data
    """
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[ipw]
    )
    estimator_inputs = ope_._create_estimator_inputs(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    assert set(estimator_inputs.keys()) == set(["ipw"])
    assert set(estimator_inputs["ipw"].keys()) == set(
        [
            "reward",
            "action",
            "pscore",
            "position",
            "action_dist",
            "estimated_rewards_by_reg_model",
            "estimated_pscore",
            "estimated_importance_weights",
            "p_e_a",
            "pi_b",
            "context",
            "action_embed",
        ]
    ), f"Invalid response of _create_estimator_inputs (test case: {description})"
    # _create_estimator_inputs function is called in the following functions
    _ = ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.evaluate_performance_of_estimators(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_estimators_comparison(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )

예제 #4

0

파일 보기

파일: test_meta.py 프로젝트: aiueola/zr-obp

def test_meta_estimate_intervals_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    alpha: float,
    n_bootstrap_samples: int,
    random_state: int,
    description_2: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of estimate_intervals using valid data
    """
    # single ope estimator
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[dm]
    )
    assert ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        alpha=alpha,
        n_bootstrap_samples=n_bootstrap_samples,
        random_state=random_state,
    ) == {
        "dm": mock_confidence_interval
    }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod]) returns a wrong value"
    # multiple ope estimators
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[dm, ipw]
    )
    assert ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        alpha=alpha,
        n_bootstrap_samples=n_bootstrap_samples,
        random_state=random_state,
    ) == {
        "dm": mock_confidence_interval,
        "ipw": {k: v + ipw.eps for k, v in mock_confidence_interval.items()},
    }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod, IPW]) returns a wrong value"

예제 #5

0

파일 보기

파일: test_meta.py 프로젝트: aiueola/zr-obp

def test_meta_estimated_rewards_by_reg_model_inputs(
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the estimate_policy_values/estimate_intervals functions wrt estimated_rewards_by_reg_model
    """
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[DirectMethod()]
    )

    action_dist = np.zeros(
        (synthetic_bandit_feedback["n_rounds"], synthetic_bandit_feedback["n_actions"])
    )
    with pytest.raises(ValueError):
        ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=None,
        )

    with pytest.raises(ValueError):
        ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=None,
        )

예제 #6

0

파일 보기

def test_response_format_of_ope_estimators_using_random_evaluation_policy(
        synthetic_bandit_feedback: BanditFeedback,
        random_action_dist: np.ndarray) -> None:
    """
    Test the response format of ope estimators using synthetic bandit data and random evaluation policy
    """
    expected_reward = synthetic_bandit_feedback["expected_reward"][:, :,
                                                                   np.newaxis]
    action_dist = random_action_dist
    # test all estimators
    all_estimators = ope.__all_estimators__
    estimators = [
        getattr(ope.estimators, estimator_name)()
        for estimator_name in all_estimators
    ]
    # conduct OPE
    ope_instance = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=estimators)
    estimated_policy_value = ope_instance.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=expected_reward)
    estimated_intervals = ope_instance.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=expected_reward,
        random_state=12345,
    )
    # check the format of OPE
    for key in estimated_policy_value:
        # check the keys of the output dictionary of the estimate_intervals method
        assert set(estimated_intervals[key].keys()) == set([
            "mean", "95.0% CI (lower)", "95.0% CI (upper)"
        ]), f"Confidence interval of {key} has invalid keys"
        # check the relationship between the means and the confidence bounds estimated by OPE estimators
        assert (
            estimated_intervals[key]["95.0% CI (lower)"] <=
            estimated_policy_value[key]
        ) and (
            estimated_intervals[key]["95.0% CI (upper)"] >=
            estimated_policy_value[key]
        ), f"Estimated policy value of {key} is not included in estimated intervals of that estimator"
        assert (estimated_intervals[key]["mean"] >=
                estimated_intervals[key]["95.0% CI (lower)"]
                ), f"Invalid confidence interval of {key}: lower bound > mean"
        assert (estimated_intervals[key]["mean"] <=
                estimated_intervals[key]["95.0% CI (upper)"]
                ), f"Invalid confidence interval of {key}: upper bound < mean"

예제 #7

0

파일 보기

파일: test_meta.py 프로젝트: aiueola/zr-obp

def test_meta_create_estimator_inputs_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using valid data
    """
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[ipw]
    )
    # raise ValueError when the shape of two arrays are different
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_._create_estimator_inputs(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    # _create_estimator_inputs function is called in the following functions
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.evaluate_performance_of_estimators(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_estimators_comparison(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )

예제 #8

0

파일 보기

파일: test_all_estimators.py 프로젝트: aiueola/zr-obp

def test_response_format_of_ope_estimators_using_random_evaluation_policy(
    synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray
) -> None:
    """
    Test the response format of ope estimators using synthetic bandit data and random evaluation policy
    """
    expected_reward = synthetic_bandit_feedback["expected_reward"][:, :, np.newaxis]
    action_dist = random_action_dist
    # test all estimators
    all_estimators = ope.__all_estimators__
    estimators_standard = [
        getattr(ope.estimators, estimator_name)() for estimator_name in all_estimators
    ]
    all_estimators_tuning = ope.__all_estimators_tuning__
    estimators_tuning = [
        getattr(ope.estimators_tuning, estimator_name)(
            lambdas=[1, 100, 10000, np.inf],
            tuning_method=tuning_method,
        )
        for estimator_name in all_estimators_tuning
        for tuning_method in ["slope", "mse"]
    ]
    all_estimators_tuning_sg = ope.__all_estimators_tuning_sg__
    estimators_tuning_sg = [
        getattr(ope.estimators_tuning, estimator_name)(
            lambdas=[0.001, 0.01, 0.1, 1.0],
            tuning_method=tuning_method,
        )
        for estimator_name in all_estimators_tuning_sg
        for tuning_method in ["slope", "mse"]
    ]
    estimators = estimators_standard + estimators_tuning + estimators_tuning_sg
    # skip estimation
    estimated_pscore = None
    estimated_importance_weights = (
        random_action_dist[
            np.arange(synthetic_bandit_feedback["action"].shape[0]),
            synthetic_bandit_feedback["action"],
            np.zeros(
                synthetic_bandit_feedback["action"].shape[0], dtype=int
            ),  # position is None
        ]
        / synthetic_bandit_feedback["pscore"]
    )
    # conduct OPE
    ope_instance = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=estimators
    )
    estimated_policy_value = ope_instance.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=expected_reward,
        estimated_pscore=estimated_pscore,
        estimated_importance_weights=estimated_importance_weights,
    )
    estimated_intervals = ope_instance.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=expected_reward,
        estimated_pscore=estimated_pscore,
        estimated_importance_weights=estimated_importance_weights,
        random_state=12345,
    )
    # check the format of OPE
    for key in estimated_policy_value:
        # check the keys of the output dictionary of the estimate_intervals method
        assert set(estimated_intervals[key].keys()) == set(
            ["mean", "95.0% CI (lower)", "95.0% CI (upper)"]
        ), f"Confidence interval of {key} has invalid keys"
        # check the relationship between the means and the confidence bounds estimated by OPE estimators
        assert (
            estimated_intervals[key]["95.0% CI (lower)"] <= estimated_policy_value[key]
        ) and (
            estimated_intervals[key]["95.0% CI (upper)"] >= estimated_policy_value[key]
        ), f"Estimated policy value of {key} is not included in estimated intervals of that estimator"
        assert (
            estimated_intervals[key]["mean"]
            >= estimated_intervals[key]["95.0% CI (lower)"]
        ), f"Invalid confidence interval of {key}: lower bound > mean"
        assert (
            estimated_intervals[key]["mean"]
            <= estimated_intervals[key]["95.0% CI (upper)"]
        ), f"Invalid confidence interval of {key}: upper bound < mean"