Python MultiLoggersOffPolicyEvaluation Beispiele, obp.ope.MultiLoggersOffPolicyEvaluation Python Beispiele

Beispiel #1

0

Datei anzeigen

def test_meta_estimate_intervals_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    alpha,
    n_bootstrap_samples,
    random_state,
    err,
    description_2: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of estimate_intervals using invalid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[dm])
    with pytest.raises(err, match=f"{description_2}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            alpha=alpha,
            n_bootstrap_samples=n_bootstrap_samples,
            random_state=random_state,
        )
    # estimate_intervals function is called in summarize_off_policy_estimates
    with pytest.raises(err, match=f"{description_2}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            alpha=alpha,
            n_bootstrap_samples=n_bootstrap_samples,
            random_state=random_state,
        )

Beispiel #2

0

Datei anzeigen

def test_meta_evaluate_performance_of_estimators_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    metric,
    ground_truth_policy_value,
    err,
    description_2: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of evaluate_performance_of_estimators using invalid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[dm])
    with pytest.raises(err, match=f"{description_2}*"):
        _ = ope_.evaluate_performance_of_estimators(
            ground_truth_policy_value=ground_truth_policy_value,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            action_dist=action_dist,
            metric=metric,
        )
    # estimate_intervals function is called in summarize_off_policy_estimates
    with pytest.raises(err, match=f"{description_2}*"):
        _ = ope_.summarize_estimators_comparison(
            ground_truth_policy_value=ground_truth_policy_value,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            action_dist=action_dist,
            metric=metric,
        )

Beispiel #3

0

Datei anzeigen

def test_meta_post_init(
        synthetic_multi_bandit_feedback: BanditFeedback) -> None:
    """
    Test the __post_init__ function
    """
    # __post_init__ saves the latter estimator when the same estimator name is used
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[ipw, ipw2])
    assert ope_.ope_estimators_ == {
        "ipw": ipw2
    }, "__post_init__ returns a wrong value"
    # __post_init__ can handle the same estimator if the estimator names are different
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[ipw, ipw3])
    assert ope_.ope_estimators_ == {
        "ipw": ipw,
        "ipw3": ipw3,
    }, "__post_init__ returns a wrong value"
    # __post__init__ raises RuntimeError when necessary_keys are not included in the bandit_feedback
    necessary_keys = ["action", "position", "reward"]
    for i in range(len(necessary_keys)):
        for deleted_keys in itertools.combinations(necessary_keys, i + 1):
            invalid_bandit_feedback_dict = {key: "_" for key in necessary_keys}
            # delete
            for k in deleted_keys:
                del invalid_bandit_feedback_dict[k]
            with pytest.raises(RuntimeError, match=r"Missing key*"):
                _ = MultiLoggersOffPolicyEvaluation(
                    bandit_feedback=invalid_bandit_feedback_dict,
                    ope_estimators=[ipw])

Beispiel #4

0

Datei anzeigen

def test_meta_evaluate_performance_of_estimators_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    metric,
    ground_truth_policy_value,
    description_2: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of evaluate_performance_of_estimators using valid data
    """
    if metric == "relative-ee":
        # calculate relative-ee
        eval_metric_ope_dict = {
            "ipw":
            np.abs((mock_policy_value + ipw.eps - ground_truth_policy_value) /
                   ground_truth_policy_value),
            "ipw3":
            np.abs((mock_policy_value + ipw3.eps - ground_truth_policy_value) /
                   ground_truth_policy_value),
        }
    else:
        # calculate se
        eval_metric_ope_dict = {
            "ipw":
            (mock_policy_value + ipw.eps - ground_truth_policy_value)**2,
            "ipw3":
            (mock_policy_value + ipw3.eps - ground_truth_policy_value)**2,
        }
    # check performance estimators
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[ipw, ipw3])
    performance = ope_.evaluate_performance_of_estimators(
        ground_truth_policy_value=ground_truth_policy_value,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        action_dist=action_dist,
        metric=metric,
    )
    for k, v in performance.items():
        assert k in eval_metric_ope_dict, "Invalid key of performance response"
        assert v == eval_metric_ope_dict[
            k], "Invalid value of performance response"
    performance_df = ope_.summarize_estimators_comparison(
        ground_truth_policy_value=ground_truth_policy_value,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        action_dist=action_dist,
        metric=metric,
    )
    assert_frame_equal(
        performance_df,
        pd.DataFrame(eval_metric_ope_dict,
                     index=[metric]).T), "Invalid summarization (performance)"

Beispiel #5

0

Datei anzeigen

def test_meta_estimate_policy_values_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of estimate_policy_values using valid data
    """
    # single ope estimator
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[dm])
    ope_.is_model_dependent = True
    assert ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    ) == {
        "dm": mock_policy_value
    }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod]) returns a wrong value"
    # multiple ope estimators
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[dm, ipw])
    ope_.is_model_dependent = True
    assert ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    ) == {
        "dm": mock_policy_value,
        "ipw": mock_policy_value + ipw.eps,
    }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod, IPW]) returns a wrong value"

Beispiel #6

0

Datei anzeigen

def test_meta_create_estimator_inputs_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using invalid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[ipw])
    estimator_inputs = ope_._create_estimator_inputs(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    assert set(estimator_inputs.keys()) == set(["ipw"])
    assert set(estimator_inputs["ipw"].keys()) == set(
        [
            "reward",
            "action",
            "pscore",
            "position",
            "action_dist",
            "stratum_idx",
            "pscore_avg",
            "estimated_rewards_by_reg_model",
            "estimated_pscore",
            "estimated_pscore_avg",
        ]
    ), f"Invalid response of _create_estimator_inputs (test case: {description})"
    # _create_estimator_inputs function is called in the following functions
    _ = ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.evaluate_performance_of_estimators(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_estimators_comparison(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )

Beispiel #7

0

Datei anzeigen

def test_meta_create_estimator_inputs_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using valid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[ipw])
    # raise ValueError when the shape of two arrays are different
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_._create_estimator_inputs(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    # _create_estimator_inputs function is called in the following functions
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.evaluate_performance_of_estimators(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_estimators_comparison(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )

Beispiel #8

0

Datei anzeigen

def test_meta_estimate_intervals_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    alpha: float,
    n_bootstrap_samples: int,
    random_state: int,
    description_2: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of estimate_intervals using valid data
    """
    # single ope estimator
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[dm])
    assert ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        alpha=alpha,
        n_bootstrap_samples=n_bootstrap_samples,
        random_state=random_state,
    ) == {
        "dm": mock_confidence_interval
    }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod]) returns a wrong value"
    # multiple ope estimators
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[dm, ipw])
    assert ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        alpha=alpha,
        n_bootstrap_samples=n_bootstrap_samples,
        random_state=random_state,
    ) == {
        "dm": mock_confidence_interval,
        "ipw": {k: v + ipw.eps
                for k, v in mock_confidence_interval.items()},
    }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod, IPW]) returns a wrong value"

Beispiel #9

0

Datei anzeigen

def test_meta_estimated_rewards_by_reg_model_inputs(
    synthetic_multi_bandit_feedback: BanditFeedback, ) -> None:
    """
    Test the estimate_policy_values/estimate_intervals functions wrt estimated_rewards_by_reg_model
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[DirectMethod()])

    action_dist = np.zeros((
        synthetic_multi_bandit_feedback["n_rounds"],
        synthetic_multi_bandit_feedback["n_actions"],
    ))
    with pytest.raises(ValueError):
        ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=None,
        )

    with pytest.raises(ValueError):
        ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=None,
        )

Beispiel #10

0

Datei anzeigen

def test_meta_summarize_off_policy_estimates(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    alpha: float,
    n_bootstrap_samples: int,
    random_state: int,
    description_2: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of summarize_off_policy_estimates using valid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[ipw, ipw3])
    value, interval = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        alpha=alpha,
        n_bootstrap_samples=n_bootstrap_samples,
        random_state=random_state,
    )
    expected_value = pd.DataFrame(
        {
            "ipw": mock_policy_value + ipw.eps,
            "ipw3": mock_policy_value + ipw3.eps,
        },
        index=["estimated_policy_value"],
    ).T
    expected_value["relative_estimated_policy_value"] = (
        expected_value["estimated_policy_value"] /
        synthetic_multi_bandit_feedback["reward"].mean())
    expected_interval = pd.DataFrame({
        "ipw": {k: v + ipw.eps
                for k, v in mock_confidence_interval.items()},
        "ipw3": {k: v + ipw3.eps
                 for k, v in mock_confidence_interval.items()},
    }).T
    assert_frame_equal(value,
                       expected_value), "Invalid summarization (policy value)"
    assert_frame_equal(interval,
                       expected_interval), "Invalid summarization (interval)"
    # check relative estimated policy value when the average of bandit_feedback["reward"] is zero
    zero_reward_bandit_feedback = deepcopy(synthetic_multi_bandit_feedback)
    zero_reward_bandit_feedback["reward"] = np.zeros(
        zero_reward_bandit_feedback["reward"].shape[0])
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=zero_reward_bandit_feedback,
        ope_estimators=[ipw, ipw3])
    value, _ = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        alpha=alpha,
        n_bootstrap_samples=n_bootstrap_samples,
        random_state=random_state,
    )
    expected_value = pd.DataFrame(
        {
            "ipw": mock_policy_value + ipw.eps,
            "ipw3": mock_policy_value + ipw3.eps,
        },
        index=["estimated_policy_value"],
    ).T
    expected_value["relative_estimated_policy_value"] = np.nan
    assert_frame_equal(value,
                       expected_value), "Invalid summarization (policy value)"