コード例 #1
0
def test_dr_init_using_valid_input_data(lambda_: float,
                                        description: str) -> None:
    _ = DoublyRobust(lambda_=lambda_)
    _ = DoublyRobustWithShrinkage(lambda_=lambda_)
    _ = SwitchDoublyRobust(lambda_=lambda_)
    if lambda_ < np.inf:
        _ = SubGaussianDoublyRobust(lambda_=lambda_)
コード例 #2
0
def test_dr_init_using_invalid_inputs(
    lambda_,
    use_estimated_pscore,
    err,
    description,
):
    with pytest.raises(err, match=f"{description}*"):
        _ = DoublyRobust(lambda_=lambda_,
                         use_estimated_pscore=use_estimated_pscore)

    with pytest.raises(err, match=f"{description}*"):
        _ = SwitchDoublyRobust(lambda_=lambda_,
                               use_estimated_pscore=use_estimated_pscore)

    with pytest.raises(err, match=f"{description}*"):
        _ = DoublyRobustWithShrinkage(
            lambda_=lambda_, use_estimated_pscore=use_estimated_pscore)
コード例 #3
0
def test_dr_variants_using_valid_input_data(
    action_dist: np.ndarray,
    action: np.ndarray,
    reward: np.ndarray,
    pscore: np.ndarray,
    position: np.ndarray,
    estimated_rewards_by_reg_model: np.ndarray,
    hyperparameter: float,
    description: str,
) -> None:
    # check dr variants
    switch_dr = SwitchDoublyRobust(tau=hyperparameter)
    dr_os = DoublyRobustWithShrinkage(lambda_=hyperparameter)
    for estimator in [switch_dr, dr_os]:
        est = estimator.estimate_policy_value(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
        )
        assert est == 0.0, f"policy value must be 0, but {est}"
コード例 #4
0
    hyperparams = yaml.safe_load(f)

base_model_dict = dict(
    logistic_regression=LogisticRegression,
    lightgbm=HistGradientBoostingClassifier,
    random_forest=RandomForestClassifier,
)

# compared OPE estimators
ope_estimators = [
    DirectMethod(),
    InverseProbabilityWeighting(),
    SelfNormalizedInverseProbabilityWeighting(),
    DoublyRobust(),
    SelfNormalizedDoublyRobust(),
    SwitchDoublyRobust(tau=1.0, estimator_name="switch-dr (tau=1)"),
    SwitchDoublyRobust(tau=100.0, estimator_name="switch-dr (tau=100)"),
    DoublyRobustWithShrinkage(lambda_=1.0, estimator_name="dr-os (lambda=1)"),
    DoublyRobustWithShrinkage(lambda_=100.0, estimator_name="dr-os (lambda=100)"),
]

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="evaluate off-policy estimators with synthetic bandit data."
    )
    parser.add_argument(
        "--n_runs", type=int, default=1, help="number of simulations in the experiment."
    )
    parser.add_argument(
        "--n_rounds",
        type=int,
コード例 #5
0
    if counterfactual_policy in [
            "linear_ucb",
            "linear_egreedy",
            "logistic_ucb",
            "logistic_egreedy",
    ]:
        kwargs["epsilon"] = 0.01
    policy = counterfactual_policy_dict[counterfactual_policy](**kwargs)
    # compared OPE estimators
    ope_estimators = [
        DirectMethod(),
        InverseProbabilityWeighting(),
        SelfNormalizedInverseProbabilityWeighting(),
        DoublyRobust(),
        SelfNormalizedDoublyRobust(),
        SwitchDoublyRobust(),
    ]
    # a base ML model for regression model used in Direct Method and Doubly Robust
    base_model = CalibratedClassifierCV(RandomForest(**hyperparams))

    evaluation_of_ope_results = {
        est.estimator_name: np.zeros(n_runs)
        for est in ope_estimators
    }
    for i in np.arange(n_runs):
        # sample a new set of logged bandit feedback
        bandit_feedback = dataset.obtain_batch_bandit_feedback(
            n_rounds=n_rounds)
        # run a counterfactual bandit algorithm on logged bandit feedback data
        selected_actions = run_bandit_simulation(
            bandit_feedback=bandit_feedback, policy=policy)
コード例 #6
0
    SelfNormalizedInverseProbabilityWeighting,
    DirectMethod,
    DoublyRobust,
    SelfNormalizedDoublyRobust,
    SwitchDoublyRobust,
    DoublyRobustWithShrinkage,
)

# OPE estimators compared
ope_estimators = [
    DirectMethod(),
    InverseProbabilityWeighting(),
    SelfNormalizedInverseProbabilityWeighting(),
    DoublyRobust(),
    SelfNormalizedDoublyRobust(),
    SwitchDoublyRobust(tau=5, estimator_name="switch-dr (tau=5)"),
    SwitchDoublyRobust(tau=10, estimator_name="switch-dr (tau=10)"),
    SwitchDoublyRobust(tau=50, estimator_name="switch-dr (tau=50)"),
    SwitchDoublyRobust(tau=100, estimator_name="switch-dr (tau=100)"),
    SwitchDoublyRobust(tau=500, estimator_name="switch-dr (tau=500)"),
    SwitchDoublyRobust(tau=1000, estimator_name="switch-dr (tau=1000)"),
    DoublyRobustWithShrinkage(lambda_=5, estimator_name="dr-os (lambda=5)"),
    DoublyRobustWithShrinkage(lambda_=10, estimator_name="dr-os (lambda=10)"),
    DoublyRobustWithShrinkage(lambda_=50, estimator_name="dr-os (lambda=50)"),
    DoublyRobustWithShrinkage(lambda_=100,
                              estimator_name="dr-os (lambda=100)"),
    DoublyRobustWithShrinkage(lambda_=500,
                              estimator_name="dr-os (lambda=500)"),
    DoublyRobustWithShrinkage(lambda_=1000,
                              estimator_name="dr-os (lambda=1000)"),
]
コード例 #7
0
def test_switch_using_invalid_input_data(tau: float, description: str) -> None:
    with pytest.raises(ValueError, match=f"{description}*"):
        _ = SwitchDoublyRobust(tau=tau)
コード例 #8
0
from obp.ope import (
    DirectMethod,
    DoublyRobust,
    DoublyRobustWithShrinkage,
    SwitchDoublyRobust,
    SelfNormalizedDoublyRobust,
)
from conftest import generate_action_dist

# prepare instances
dm = DirectMethod()
dr = DoublyRobust()
dr_shrink_0 = DoublyRobustWithShrinkage(lambda_=0.0)
dr_shrink_max = DoublyRobustWithShrinkage(lambda_=1e10)
sndr = SelfNormalizedDoublyRobust()
switch_dr_0 = SwitchDoublyRobust(tau=0.0)
switch_dr_max = SwitchDoublyRobust(tau=1e10)

dr_estimators = [dr, dr_shrink_0, sndr, switch_dr_0]

# dr and self-normalized dr
# action_dist, action, reward, pscore, position, estimated_rewards_by_reg_model, description
invalid_input_of_dr = [
    (
        generate_action_dist(5, 4, 3),
        None,
        np.zeros(5, dtype=int),
        np.ones(5),
        np.random.choice(3, size=5),
        np.zeros((5, 4, 3)),
        "action must be ndarray",
コード例 #9
0
def test_switch_using_valid_input_data(tau: float, description: str) -> None:
    _ = SwitchDoublyRobust(tau=tau)
コード例 #10
0
def test_dr_variants_using_valid_input_data(
    action_dist: np.ndarray,
    action: np.ndarray,
    reward: np.ndarray,
    pscore: np.ndarray,
    position: np.ndarray,
    estimated_rewards_by_reg_model: np.ndarray,
    estimated_pscore: np.ndarray,
    hyperparameter: float,
    description: str,
) -> None:
    # check dr variants
    switch_dr = SwitchDoublyRobust(lambda_=hyperparameter)
    switch_dr_tuning_mse = SwitchDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        tuning_method="mse",
    )
    switch_dr_tuning_slope = SwitchDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        tuning_method="slope",
    )
    dr_os = DoublyRobustWithShrinkage(lambda_=hyperparameter)
    dr_os_tuning_mse = DoublyRobustWithShrinkageTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        tuning_method="mse",
    )
    dr_os_tuning_slope = DoublyRobustWithShrinkageTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        tuning_method="slope",
    )
    sg_dr = SubGaussianDoublyRobust(lambda_=hyperparameter)
    sg_dr_tuning_mse = SubGaussianDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter / 10],
        tuning_method="mse",
    )
    sg_dr_tuning_slope = SubGaussianDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter / 10],
        tuning_method="slope",
    )
    switch_dr_estimated_pscore = SwitchDoublyRobust(lambda_=hyperparameter,
                                                    use_estimated_pscore=True)
    switch_dr_tuning_estimated_pscore = SwitchDoublyRobustTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        use_estimated_pscore=True)
    dr_os_estimated_pscore = DoublyRobustWithShrinkage(
        lambda_=hyperparameter, use_estimated_pscore=True)
    dr_os_tuning_estimated_pscore = DoublyRobustWithShrinkageTuning(
        lambdas=[hyperparameter, hyperparameter * 10],
        use_estimated_pscore=True)
    for estimator in [
            sg_dr,
            sg_dr_tuning_mse,
            sg_dr_tuning_slope,
            switch_dr,
            switch_dr_tuning_mse,
            switch_dr_tuning_slope,
            switch_dr_estimated_pscore,
            switch_dr_tuning_estimated_pscore,
            dr_os,
            dr_os_tuning_mse,
            dr_os_tuning_slope,
            dr_os_estimated_pscore,
            dr_os_tuning_estimated_pscore,
    ]:
        est = estimator.estimate_policy_value(
            action_dist=action_dist,
            action=action,
            reward=reward,
            pscore=pscore,
            position=position,
            estimated_rewards_by_reg_model=estimated_rewards_by_reg_model,
            estimated_pscore=estimated_pscore,
        )
        assert est == 0.0, f"policy value must be 0, but {est}"
コード例 #11
0
dr_tuning_mse = DoublyRobustTuning(lambdas=[1, 100],
                                   tuning_method="mse",
                                   estimator_name="dr_tuning_mse")
dr_tuning_slope = DoublyRobustTuning(lambdas=[1, 100],
                                     tuning_method="slope",
                                     estimator_name="dr_tuning_slope")
dr_os_0 = DoublyRobustWithShrinkage(lambda_=0.0)
dr_os_tuning_mse = DoublyRobustWithShrinkageTuning(
    lambdas=[1, 100], tuning_method="mse", estimator_name="dr_os_tuning_mse")
dr_os_tuning_slope = DoublyRobustWithShrinkageTuning(
    lambdas=[1, 100],
    tuning_method="slope",
    estimator_name="dr_os_tuning_slope")
dr_os_max = DoublyRobustWithShrinkage(lambda_=np.inf)
sndr = SelfNormalizedDoublyRobust()
switch_dr_0 = SwitchDoublyRobust(lambda_=0.0)
switch_dr_tuning_mse = SwitchDoublyRobustTuning(
    lambdas=[1, 100],
    tuning_method="mse",
    estimator_name="switch_dr_tuning_mse")
switch_dr_tuning_slope = SwitchDoublyRobustTuning(
    lambdas=[1, 100],
    tuning_method="slope",
    estimator_name="switch_dr_tuning_slope")
switch_dr_max = SwitchDoublyRobust(lambda_=np.inf)
sg_dr_0 = SubGaussianDoublyRobust(lambda_=0.0)
sg_dr_tuning_mse = SubGaussianDoublyRobustTuning(
    lambdas=[0.01, 0.1],
    tuning_method="mse",
    estimator_name="sg_dr_tuning_mse")
sg_dr_tuning_slope = SubGaussianDoublyRobustTuning(