def test_meta_estimation_format(synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray) -> None: """ Test the response format of OffPolicyEvaluation """ # single ope estimator ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback, ope_estimators=[dm]) assert ope_.estimate_policy_values(random_action_dist) == { "dm": mock_policy_value }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod]) returns a wrong value" assert ope_.estimate_intervals(random_action_dist) == { "dm": mock_confidence_interval }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod]) returns a wrong value" with pytest.raises(AssertionError, match=r"action_dist must be 3-dimensional.*"): ope_.estimate_policy_values( random_action_dist[:, :, 0] ), "action_dist must be 3-dimensional when using OffPolicyEvaluation" # multiple ope estimators ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback, ope_estimators=[dm, ipw]) assert ope_.estimate_policy_values(random_action_dist) == { "dm": mock_policy_value, "ipw": mock_policy_value + ipw.eps, }, "OffPolicyEvaluation.estimate_policy_values ([DirectMethod, IPW]) returns a wrong value" assert ope_.estimate_intervals(random_action_dist) == { "dm": mock_confidence_interval, "ipw": {k: v + ipw.eps for k, v in mock_confidence_interval.items()}, }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod]) returns a wrong value"
def test_meta_estimate_intervals_using_invalid_input_data( action_dist, estimated_rewards_by_reg_model, description_1: str, alpha, n_bootstrap_samples, random_state, description_2: str, synthetic_bandit_feedback: BanditFeedback, ) -> None: """ Test the response of estimate_intervals using invalid data """ ope_ = OffPolicyEvaluation(bandit_feedback=synthetic_bandit_feedback, ope_estimators=[dm]) with pytest.raises(ValueError, match=f"{description_2}*"): _ = ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, alpha=alpha, n_bootstrap_samples=n_bootstrap_samples, random_state=random_state, ) # estimate_intervals function is called in summarize_off_policy_estimates with pytest.raises(ValueError, match=f"{description_2}*"): _ = ope_.summarize_off_policy_estimates( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, alpha=alpha, n_bootstrap_samples=n_bootstrap_samples, random_state=random_state, )
def test_meta_create_estimator_inputs_using_valid_input_data( action_dist, estimated_rewards_by_reg_model, description: str, synthetic_bandit_feedback: BanditFeedback, ) -> None: """ Test the _create_estimator_inputs using invalid data """ ope_ = OffPolicyEvaluation( bandit_feedback=synthetic_bandit_feedback, ope_estimators=[ipw] ) estimator_inputs = ope_._create_estimator_inputs( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) assert set(estimator_inputs.keys()) == set(["ipw"]) assert set(estimator_inputs["ipw"].keys()) == set( [ "reward", "action", "pscore", "position", "action_dist", "estimated_rewards_by_reg_model", "estimated_pscore", "estimated_importance_weights", "p_e_a", "pi_b", "context", "action_embed", ] ), f"Invalid response of _create_estimator_inputs (test case: {description})" # _create_estimator_inputs function is called in the following functions _ = ope_.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) _ = ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) _ = ope_.summarize_off_policy_estimates( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) _ = ope_.evaluate_performance_of_estimators( ground_truth_policy_value=0.1, action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) _ = ope_.summarize_estimators_comparison( ground_truth_policy_value=0.1, action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, )
def test_meta_estimate_intervals_using_valid_input_data( action_dist, estimated_rewards_by_reg_model, description_1: str, alpha: float, n_bootstrap_samples: int, random_state: int, description_2: str, synthetic_bandit_feedback: BanditFeedback, ) -> None: """ Test the response of estimate_intervals using valid data """ # single ope estimator ope_ = OffPolicyEvaluation( bandit_feedback=synthetic_bandit_feedback, ope_estimators=[dm] ) assert ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, alpha=alpha, n_bootstrap_samples=n_bootstrap_samples, random_state=random_state, ) == { "dm": mock_confidence_interval }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod]) returns a wrong value" # multiple ope estimators ope_ = OffPolicyEvaluation( bandit_feedback=synthetic_bandit_feedback, ope_estimators=[dm, ipw] ) assert ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, alpha=alpha, n_bootstrap_samples=n_bootstrap_samples, random_state=random_state, ) == { "dm": mock_confidence_interval, "ipw": {k: v + ipw.eps for k, v in mock_confidence_interval.items()}, }, "OffPolicyEvaluation.estimate_intervals ([DirectMethod, IPW]) returns a wrong value"
def test_meta_estimated_rewards_by_reg_model_inputs( synthetic_bandit_feedback: BanditFeedback, ) -> None: """ Test the estimate_policy_values/estimate_intervals functions wrt estimated_rewards_by_reg_model """ ope_ = OffPolicyEvaluation( bandit_feedback=synthetic_bandit_feedback, ope_estimators=[DirectMethod()] ) action_dist = np.zeros( (synthetic_bandit_feedback["n_rounds"], synthetic_bandit_feedback["n_actions"]) ) with pytest.raises(ValueError): ope_.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=None, ) with pytest.raises(ValueError): ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=None, )
def test_response_format_of_ope_estimators_using_random_evaluation_policy( synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray) -> None: """ Test the response format of ope estimators using synthetic bandit data and random evaluation policy """ expected_reward = synthetic_bandit_feedback["expected_reward"][:, :, np.newaxis] action_dist = random_action_dist # test all estimators all_estimators = ope.__all_estimators__ estimators = [ getattr(ope.estimators, estimator_name)() for estimator_name in all_estimators ] # conduct OPE ope_instance = OffPolicyEvaluation( bandit_feedback=synthetic_bandit_feedback, ope_estimators=estimators) estimated_policy_value = ope_instance.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=expected_reward) estimated_intervals = ope_instance.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=expected_reward, random_state=12345, ) # check the format of OPE for key in estimated_policy_value: # check the keys of the output dictionary of the estimate_intervals method assert set(estimated_intervals[key].keys()) == set([ "mean", "95.0% CI (lower)", "95.0% CI (upper)" ]), f"Confidence interval of {key} has invalid keys" # check the relationship between the means and the confidence bounds estimated by OPE estimators assert ( estimated_intervals[key]["95.0% CI (lower)"] <= estimated_policy_value[key] ) and ( estimated_intervals[key]["95.0% CI (upper)"] >= estimated_policy_value[key] ), f"Estimated policy value of {key} is not included in estimated intervals of that estimator" assert (estimated_intervals[key]["mean"] >= estimated_intervals[key]["95.0% CI (lower)"] ), f"Invalid confidence interval of {key}: lower bound > mean" assert (estimated_intervals[key]["mean"] <= estimated_intervals[key]["95.0% CI (upper)"] ), f"Invalid confidence interval of {key}: upper bound < mean"
def test_meta_create_estimator_inputs_using_invalid_input_data( action_dist, estimated_rewards_by_reg_model, description: str, synthetic_bandit_feedback: BanditFeedback, ) -> None: """ Test the _create_estimator_inputs using valid data """ ope_ = OffPolicyEvaluation( bandit_feedback=synthetic_bandit_feedback, ope_estimators=[ipw] ) # raise ValueError when the shape of two arrays are different with pytest.raises(ValueError, match=f"{description}*"): _ = ope_._create_estimator_inputs( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) # _create_estimator_inputs function is called in the following functions with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.summarize_off_policy_estimates( action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.evaluate_performance_of_estimators( ground_truth_policy_value=0.1, action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) with pytest.raises(ValueError, match=f"{description}*"): _ = ope_.summarize_estimators_comparison( ground_truth_policy_value=0.1, action_dist=action_dist, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, )
def test_response_format_of_ope_estimators_using_random_evaluation_policy( synthetic_bandit_feedback: BanditFeedback, random_action_dist: np.ndarray ) -> None: """ Test the response format of ope estimators using synthetic bandit data and random evaluation policy """ expected_reward = synthetic_bandit_feedback["expected_reward"][:, :, np.newaxis] action_dist = random_action_dist # test all estimators all_estimators = ope.__all_estimators__ estimators_standard = [ getattr(ope.estimators, estimator_name)() for estimator_name in all_estimators ] all_estimators_tuning = ope.__all_estimators_tuning__ estimators_tuning = [ getattr(ope.estimators_tuning, estimator_name)( lambdas=[1, 100, 10000, np.inf], tuning_method=tuning_method, ) for estimator_name in all_estimators_tuning for tuning_method in ["slope", "mse"] ] all_estimators_tuning_sg = ope.__all_estimators_tuning_sg__ estimators_tuning_sg = [ getattr(ope.estimators_tuning, estimator_name)( lambdas=[0.001, 0.01, 0.1, 1.0], tuning_method=tuning_method, ) for estimator_name in all_estimators_tuning_sg for tuning_method in ["slope", "mse"] ] estimators = estimators_standard + estimators_tuning + estimators_tuning_sg # skip estimation estimated_pscore = None estimated_importance_weights = ( random_action_dist[ np.arange(synthetic_bandit_feedback["action"].shape[0]), synthetic_bandit_feedback["action"], np.zeros( synthetic_bandit_feedback["action"].shape[0], dtype=int ), # position is None ] / synthetic_bandit_feedback["pscore"] ) # conduct OPE ope_instance = OffPolicyEvaluation( bandit_feedback=synthetic_bandit_feedback, ope_estimators=estimators ) estimated_policy_value = ope_instance.estimate_policy_values( action_dist=action_dist, estimated_rewards_by_reg_model=expected_reward, estimated_pscore=estimated_pscore, estimated_importance_weights=estimated_importance_weights, ) estimated_intervals = ope_instance.estimate_intervals( action_dist=action_dist, estimated_rewards_by_reg_model=expected_reward, estimated_pscore=estimated_pscore, estimated_importance_weights=estimated_importance_weights, random_state=12345, ) # check the format of OPE for key in estimated_policy_value: # check the keys of the output dictionary of the estimate_intervals method assert set(estimated_intervals[key].keys()) == set( ["mean", "95.0% CI (lower)", "95.0% CI (upper)"] ), f"Confidence interval of {key} has invalid keys" # check the relationship between the means and the confidence bounds estimated by OPE estimators assert ( estimated_intervals[key]["95.0% CI (lower)"] <= estimated_policy_value[key] ) and ( estimated_intervals[key]["95.0% CI (upper)"] >= estimated_policy_value[key] ), f"Estimated policy value of {key} is not included in estimated intervals of that estimator" assert ( estimated_intervals[key]["mean"] >= estimated_intervals[key]["95.0% CI (lower)"] ), f"Invalid confidence interval of {key}: lower bound > mean" assert ( estimated_intervals[key]["mean"] <= estimated_intervals[key]["95.0% CI (upper)"] ), f"Invalid confidence interval of {key}: upper bound < mean"