def test_dr_init_using_valid_input_data(lambda_: float, description: str) -> None: _ = DoublyRobust(lambda_=lambda_) _ = DoublyRobustWithShrinkage(lambda_=lambda_) _ = SwitchDoublyRobust(lambda_=lambda_) if lambda_ < np.inf: _ = SubGaussianDoublyRobust(lambda_=lambda_)
def test_dr_init_using_invalid_inputs( lambda_, use_estimated_pscore, err, description, ): with pytest.raises(err, match=f"{description}*"): _ = DoublyRobust(lambda_=lambda_, use_estimated_pscore=use_estimated_pscore) with pytest.raises(err, match=f"{description}*"): _ = SwitchDoublyRobust(lambda_=lambda_, use_estimated_pscore=use_estimated_pscore) with pytest.raises(err, match=f"{description}*"): _ = DoublyRobustWithShrinkage( lambda_=lambda_, use_estimated_pscore=use_estimated_pscore)
def test_dr_variants_using_valid_input_data( action_dist: np.ndarray, action: np.ndarray, reward: np.ndarray, pscore: np.ndarray, position: np.ndarray, estimated_rewards_by_reg_model: np.ndarray, hyperparameter: float, description: str, ) -> None: # check dr variants switch_dr = SwitchDoublyRobust(tau=hyperparameter) dr_os = DoublyRobustWithShrinkage(lambda_=hyperparameter) for estimator in [switch_dr, dr_os]: est = estimator.estimate_policy_value( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, ) assert est == 0.0, f"policy value must be 0, but {est}"
hyperparams = yaml.safe_load(f) base_model_dict = dict( logistic_regression=LogisticRegression, lightgbm=HistGradientBoostingClassifier, random_forest=RandomForestClassifier, ) # compared OPE estimators ope_estimators = [ DirectMethod(), InverseProbabilityWeighting(), SelfNormalizedInverseProbabilityWeighting(), DoublyRobust(), SelfNormalizedDoublyRobust(), SwitchDoublyRobust(tau=1.0, estimator_name="switch-dr (tau=1)"), SwitchDoublyRobust(tau=100.0, estimator_name="switch-dr (tau=100)"), DoublyRobustWithShrinkage(lambda_=1.0, estimator_name="dr-os (lambda=1)"), DoublyRobustWithShrinkage(lambda_=100.0, estimator_name="dr-os (lambda=100)"), ] if __name__ == "__main__": parser = argparse.ArgumentParser( description="evaluate off-policy estimators with synthetic bandit data." ) parser.add_argument( "--n_runs", type=int, default=1, help="number of simulations in the experiment." ) parser.add_argument( "--n_rounds", type=int,
if counterfactual_policy in [ "linear_ucb", "linear_egreedy", "logistic_ucb", "logistic_egreedy", ]: kwargs["epsilon"] = 0.01 policy = counterfactual_policy_dict[counterfactual_policy](**kwargs) # compared OPE estimators ope_estimators = [ DirectMethod(), InverseProbabilityWeighting(), SelfNormalizedInverseProbabilityWeighting(), DoublyRobust(), SelfNormalizedDoublyRobust(), SwitchDoublyRobust(), ] # a base ML model for regression model used in Direct Method and Doubly Robust base_model = CalibratedClassifierCV(RandomForest(**hyperparams)) evaluation_of_ope_results = { est.estimator_name: np.zeros(n_runs) for est in ope_estimators } for i in np.arange(n_runs): # sample a new set of logged bandit feedback bandit_feedback = dataset.obtain_batch_bandit_feedback( n_rounds=n_rounds) # run a counterfactual bandit algorithm on logged bandit feedback data selected_actions = run_bandit_simulation( bandit_feedback=bandit_feedback, policy=policy)
SelfNormalizedInverseProbabilityWeighting, DirectMethod, DoublyRobust, SelfNormalizedDoublyRobust, SwitchDoublyRobust, DoublyRobustWithShrinkage, ) # OPE estimators compared ope_estimators = [ DirectMethod(), InverseProbabilityWeighting(), SelfNormalizedInverseProbabilityWeighting(), DoublyRobust(), SelfNormalizedDoublyRobust(), SwitchDoublyRobust(tau=5, estimator_name="switch-dr (tau=5)"), SwitchDoublyRobust(tau=10, estimator_name="switch-dr (tau=10)"), SwitchDoublyRobust(tau=50, estimator_name="switch-dr (tau=50)"), SwitchDoublyRobust(tau=100, estimator_name="switch-dr (tau=100)"), SwitchDoublyRobust(tau=500, estimator_name="switch-dr (tau=500)"), SwitchDoublyRobust(tau=1000, estimator_name="switch-dr (tau=1000)"), DoublyRobustWithShrinkage(lambda_=5, estimator_name="dr-os (lambda=5)"), DoublyRobustWithShrinkage(lambda_=10, estimator_name="dr-os (lambda=10)"), DoublyRobustWithShrinkage(lambda_=50, estimator_name="dr-os (lambda=50)"), DoublyRobustWithShrinkage(lambda_=100, estimator_name="dr-os (lambda=100)"), DoublyRobustWithShrinkage(lambda_=500, estimator_name="dr-os (lambda=500)"), DoublyRobustWithShrinkage(lambda_=1000, estimator_name="dr-os (lambda=1000)"), ]
def test_switch_using_invalid_input_data(tau: float, description: str) -> None: with pytest.raises(ValueError, match=f"{description}*"): _ = SwitchDoublyRobust(tau=tau)
from obp.ope import ( DirectMethod, DoublyRobust, DoublyRobustWithShrinkage, SwitchDoublyRobust, SelfNormalizedDoublyRobust, ) from conftest import generate_action_dist # prepare instances dm = DirectMethod() dr = DoublyRobust() dr_shrink_0 = DoublyRobustWithShrinkage(lambda_=0.0) dr_shrink_max = DoublyRobustWithShrinkage(lambda_=1e10) sndr = SelfNormalizedDoublyRobust() switch_dr_0 = SwitchDoublyRobust(tau=0.0) switch_dr_max = SwitchDoublyRobust(tau=1e10) dr_estimators = [dr, dr_shrink_0, sndr, switch_dr_0] # dr and self-normalized dr # action_dist, action, reward, pscore, position, estimated_rewards_by_reg_model, description invalid_input_of_dr = [ ( generate_action_dist(5, 4, 3), None, np.zeros(5, dtype=int), np.ones(5), np.random.choice(3, size=5), np.zeros((5, 4, 3)), "action must be ndarray",
def test_switch_using_valid_input_data(tau: float, description: str) -> None: _ = SwitchDoublyRobust(tau=tau)
def test_dr_variants_using_valid_input_data( action_dist: np.ndarray, action: np.ndarray, reward: np.ndarray, pscore: np.ndarray, position: np.ndarray, estimated_rewards_by_reg_model: np.ndarray, estimated_pscore: np.ndarray, hyperparameter: float, description: str, ) -> None: # check dr variants switch_dr = SwitchDoublyRobust(lambda_=hyperparameter) switch_dr_tuning_mse = SwitchDoublyRobustTuning( lambdas=[hyperparameter, hyperparameter * 10], tuning_method="mse", ) switch_dr_tuning_slope = SwitchDoublyRobustTuning( lambdas=[hyperparameter, hyperparameter * 10], tuning_method="slope", ) dr_os = DoublyRobustWithShrinkage(lambda_=hyperparameter) dr_os_tuning_mse = DoublyRobustWithShrinkageTuning( lambdas=[hyperparameter, hyperparameter * 10], tuning_method="mse", ) dr_os_tuning_slope = DoublyRobustWithShrinkageTuning( lambdas=[hyperparameter, hyperparameter * 10], tuning_method="slope", ) sg_dr = SubGaussianDoublyRobust(lambda_=hyperparameter) sg_dr_tuning_mse = SubGaussianDoublyRobustTuning( lambdas=[hyperparameter, hyperparameter / 10], tuning_method="mse", ) sg_dr_tuning_slope = SubGaussianDoublyRobustTuning( lambdas=[hyperparameter, hyperparameter / 10], tuning_method="slope", ) switch_dr_estimated_pscore = SwitchDoublyRobust(lambda_=hyperparameter, use_estimated_pscore=True) switch_dr_tuning_estimated_pscore = SwitchDoublyRobustTuning( lambdas=[hyperparameter, hyperparameter * 10], use_estimated_pscore=True) dr_os_estimated_pscore = DoublyRobustWithShrinkage( lambda_=hyperparameter, use_estimated_pscore=True) dr_os_tuning_estimated_pscore = DoublyRobustWithShrinkageTuning( lambdas=[hyperparameter, hyperparameter * 10], use_estimated_pscore=True) for estimator in [ sg_dr, sg_dr_tuning_mse, sg_dr_tuning_slope, switch_dr, switch_dr_tuning_mse, switch_dr_tuning_slope, switch_dr_estimated_pscore, switch_dr_tuning_estimated_pscore, dr_os, dr_os_tuning_mse, dr_os_tuning_slope, dr_os_estimated_pscore, dr_os_tuning_estimated_pscore, ]: est = estimator.estimate_policy_value( action_dist=action_dist, action=action, reward=reward, pscore=pscore, position=position, estimated_rewards_by_reg_model=estimated_rewards_by_reg_model, estimated_pscore=estimated_pscore, ) assert est == 0.0, f"policy value must be 0, but {est}"
dr_tuning_mse = DoublyRobustTuning(lambdas=[1, 100], tuning_method="mse", estimator_name="dr_tuning_mse") dr_tuning_slope = DoublyRobustTuning(lambdas=[1, 100], tuning_method="slope", estimator_name="dr_tuning_slope") dr_os_0 = DoublyRobustWithShrinkage(lambda_=0.0) dr_os_tuning_mse = DoublyRobustWithShrinkageTuning( lambdas=[1, 100], tuning_method="mse", estimator_name="dr_os_tuning_mse") dr_os_tuning_slope = DoublyRobustWithShrinkageTuning( lambdas=[1, 100], tuning_method="slope", estimator_name="dr_os_tuning_slope") dr_os_max = DoublyRobustWithShrinkage(lambda_=np.inf) sndr = SelfNormalizedDoublyRobust() switch_dr_0 = SwitchDoublyRobust(lambda_=0.0) switch_dr_tuning_mse = SwitchDoublyRobustTuning( lambdas=[1, 100], tuning_method="mse", estimator_name="switch_dr_tuning_mse") switch_dr_tuning_slope = SwitchDoublyRobustTuning( lambdas=[1, 100], tuning_method="slope", estimator_name="switch_dr_tuning_slope") switch_dr_max = SwitchDoublyRobust(lambda_=np.inf) sg_dr_0 = SubGaussianDoublyRobust(lambda_=0.0) sg_dr_tuning_mse = SubGaussianDoublyRobustTuning( lambdas=[0.01, 0.1], tuning_method="mse", estimator_name="sg_dr_tuning_mse") sg_dr_tuning_slope = SubGaussianDoublyRobustTuning(