Python MultiLoggersOffPolicyEvaluation.summarize_estimators_comparison 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: obp.ope

메소드/함수: summarize_estimators_comparison

hotexamples.com에서의 예제들: 4

Python MultiLoggersOffPolicyEvaluation.summarize_estimators_comparison - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 obp.ope.MultiLoggersOffPolicyEvaluation.summarize_estimators_comparison에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

MultiLoggersOffPolicyEvaluation(10)

estimate_intervals(5)

estimate_policy_values(4)

evaluate_performance_of_estimators(4)

summarize_estimators_comparison(4)

summarize_off_policy_estimates(4)

_create_estimator_inputs(2)

is_model_dependent(1)

예제 #1

파일 보기

def test_meta_evaluate_performance_of_estimators_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    metric,
    ground_truth_policy_value,
    err,
    description_2: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of evaluate_performance_of_estimators using invalid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[dm])
    with pytest.raises(err, match=f"{description_2}*"):
        _ = ope_.evaluate_performance_of_estimators(
            ground_truth_policy_value=ground_truth_policy_value,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            action_dist=action_dist,
            metric=metric,
        )
    # estimate_intervals function is called in summarize_off_policy_estimates
    with pytest.raises(err, match=f"{description_2}*"):
        _ = ope_.summarize_estimators_comparison(
            ground_truth_policy_value=ground_truth_policy_value,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            action_dist=action_dist,
            metric=metric,
        )

예제 #2

파일 보기

def test_meta_evaluate_performance_of_estimators_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description_1: str,
    metric,
    ground_truth_policy_value,
    description_2: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the response of evaluate_performance_of_estimators using valid data
    """
    if metric == "relative-ee":
        # calculate relative-ee
        eval_metric_ope_dict = {
            "ipw":
            np.abs((mock_policy_value + ipw.eps - ground_truth_policy_value) /
                   ground_truth_policy_value),
            "ipw3":
            np.abs((mock_policy_value + ipw3.eps - ground_truth_policy_value) /
                   ground_truth_policy_value),
        }
    else:
        # calculate se
        eval_metric_ope_dict = {
            "ipw":
            (mock_policy_value + ipw.eps - ground_truth_policy_value)**2,
            "ipw3":
            (mock_policy_value + ipw3.eps - ground_truth_policy_value)**2,
        }
    # check performance estimators
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback,
        ope_estimators=[ipw, ipw3])
    performance = ope_.evaluate_performance_of_estimators(
        ground_truth_policy_value=ground_truth_policy_value,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        action_dist=action_dist,
        metric=metric,
    )
    for k, v in performance.items():
        assert k in eval_metric_ope_dict, "Invalid key of performance response"
        assert v == eval_metric_ope_dict[
            k], "Invalid value of performance response"
    performance_df = ope_.summarize_estimators_comparison(
        ground_truth_policy_value=ground_truth_policy_value,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        action_dist=action_dist,
        metric=metric,
    )
    assert_frame_equal(
        performance_df,
        pd.DataFrame(eval_metric_ope_dict,
                     index=[metric]).T), "Invalid summarization (performance)"

예제 #3

파일 보기

def test_meta_create_estimator_inputs_using_valid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using invalid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[ipw])
    estimator_inputs = ope_._create_estimator_inputs(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    assert set(estimator_inputs.keys()) == set(["ipw"])
    assert set(estimator_inputs["ipw"].keys()) == set(
        [
            "reward",
            "action",
            "pscore",
            "position",
            "action_dist",
            "stratum_idx",
            "pscore_avg",
            "estimated_rewards_by_reg_model",
            "estimated_pscore",
            "estimated_pscore_avg",
        ]
    ), f"Invalid response of _create_estimator_inputs (test case: {description})"
    # _create_estimator_inputs function is called in the following functions
    _ = ope_.estimate_policy_values(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.estimate_intervals(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_off_policy_estimates(
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.evaluate_performance_of_estimators(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )
    _ = ope_.summarize_estimators_comparison(
        ground_truth_policy_value=0.1,
        action_dist=action_dist,
        estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
    )

예제 #4

파일 보기

def test_meta_create_estimator_inputs_using_invalid_input_data(
    action_dist,
    estimated_rewards_by_reg_model,
    description: str,
    synthetic_multi_bandit_feedback: BanditFeedback,
) -> None:
    """
    Test the _create_estimator_inputs using valid data
    """
    ope_ = MultiLoggersOffPolicyEvaluation(
        bandit_feedback=synthetic_multi_bandit_feedback, ope_estimators=[ipw])
    # raise ValueError when the shape of two arrays are different
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_._create_estimator_inputs(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    # _create_estimator_inputs function is called in the following functions
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_policy_values(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.estimate_intervals(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_off_policy_estimates(
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.evaluate_performance_of_estimators(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = ope_.summarize_estimators_comparison(
            ground_truth_policy_value=0.1,
            action_dist=action_dist,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )