Exemple #1
0
def test_meta_create_estimator_inputs_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using invalid data
    """
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[ipw]
    )
    estimator_inputs = ope_._create_estimator_inputs(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    assert set(estimator_inputs.keys()) == set(["ipw"])
    assert set(estimator_inputs["ipw"].keys()) == set(
        [
            "reward",
            "action",
            "pscore",
            "position",
            "action_dist",
            "estimated_rewards_by_reg_model",
            "estimated_pscore",
            "estimated_importance_weights",
            "p_e_a",
            "pi_b",
            "context",
            "action_embed",
        ]
    ), f"Invalid response of _create_estimator_inputs (test case: {description})"
    # _create_estimator_inputs function is called in the following functions
    _ = ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.evaluate_performance_of_estimators(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_estimators_comparison(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
Exemple #2
0
def test_meta_create_estimator_inputs_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using valid data
    """
    ope_ = OffPolicyEvaluation(
        bandit_feedback=synthetic_bandit_feedback, ope_estimators=[ipw]
    )
    # raise ValueError when the shape of two arrays are different
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_._create_estimator_inputs(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    # _create_estimator_inputs function is called in the following functions
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.evaluate_performance_of_estimators(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_estimators_comparison(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
Exemple #3
0
def test_meta_create_estimator_inputs_format(
        synthetic_bandit_feedback: BanditFeedback,
        random_action_dist: np.ndarray) -> None:
    """
    Test the _create_estimator_inputs format of OffPolicyEvaluation
    """
    # __post_init__ saves the latter estimator when the same estimator name is used
    ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback,
                               ope_estimators=[ipw])
    inputs = ope_._create_estimator_inputs(action_dist=None,
                                           estimated_rewards_by_reg_model=None)
    assert set(inputs.keys()) == set([
        "reward",
        "action",
        "pscore",
        "position",
        "action_dist",
        "estimated_rewards_by_reg_model",
    ]), "Invalid response format of _create_estimator_inputs"