コード例 #1
0
ファイル: test_real.py プロジェクト: aiueola/zr-obp
def test_real_init():
    # behavior_policy
    with pytest.raises(ValueError):
        OpenBanditDataset(behavior_policy="aaa", campaign="all")

    # campaign
    with pytest.raises(ValueError):
        OpenBanditDataset(behavior_policy="random", campaign="aaa")

    # data_path
    with pytest.raises(ValueError):
        OpenBanditDataset(behavior_policy="random",
                          campaign="all",
                          data_path=5)

    # load_raw_data
    obd = OpenBanditDataset(behavior_policy="random", campaign="all")
    # check the value exists and has the right type
    assert (isinstance(obd.data, pd.DataFrame)
            and isinstance(obd.item_context, pd.DataFrame)
            and isinstance(obd.action, np.ndarray)
            and isinstance(obd.position, np.ndarray)
            and isinstance(obd.reward, np.ndarray)
            and isinstance(obd.pscore, np.ndarray))

    # pre_process (context and action_context)
    assert isinstance(obd.context, np.ndarray) and isinstance(
        obd.action_context, np.ndarray)
コード例 #2
0
def test_sample_bootstrap_bandit_feedback():
    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback = dataset.obtain_batch_bandit_feedback()
    bootstrap_bf = dataset.sample_bootstrap_bandit_feedback()

    assert len(bandit_feedback["action"]) == len(bootstrap_bf["action"])
    assert len(bandit_feedback["position"]) == len(bootstrap_bf["position"])
    assert len(bandit_feedback["reward"]) == len(bootstrap_bf["reward"])
    assert len(bandit_feedback["pscore"]) == len(bootstrap_bf["pscore"])
    assert len(bandit_feedback["context"]) == len(bootstrap_bf["context"])
コード例 #3
0
def test_sample_bootstrap_bandit_feedback():
    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback = dataset.obtain_batch_bandit_feedback()
    bootstrap_bf = dataset.sample_bootstrap_bandit_feedback()

    bf_keys = {"action", "position", "reward", "pscore", "context"}
    for k in bf_keys:
        assert len(bandit_feedback[k]) == len(bootstrap_bf[k])

    bandit_feedback_timeseries: Dict = dataset.obtain_batch_bandit_feedback(
        is_timeseries_split=True)[0]
    bootstrap_bf_timeseries = dataset.sample_bootstrap_bandit_feedback(
        is_timeseries_split=True)
    for k in bf_keys:
        assert len(bandit_feedback_timeseries[k]) == len(
            bootstrap_bf_timeseries[k])
コード例 #4
0
ファイル: test_real.py プロジェクト: aiueola/zr-obp
def test_calc_on_policy_policy_value_estimate():
    ground_truth_policy_value = OpenBanditDataset.calc_on_policy_policy_value_estimate(
        behavior_policy="random", campaign="all")
    assert isinstance(ground_truth_policy_value, float)
コード例 #5
0
ファイル: test_real.py プロジェクト: aiueola/zr-obp
def test_obtain_batch_bandit_feedback():
    # invalid test_size
    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.obtain_batch_bandit_feedback(is_timeseries_split=True,
                                             test_size=1.3)

    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.obtain_batch_bandit_feedback(is_timeseries_split=True,
                                             test_size=-0.5)

    with pytest.raises(TypeError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.obtain_batch_bandit_feedback(is_timeseries_split=True,
                                             test_size="0.5")

    with pytest.raises(TypeError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.obtain_batch_bandit_feedback(is_timeseries_split="True",
                                             test_size=0.5)

    # existence of keys
    # is_timeseries_split=False (default)
    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback = dataset.obtain_batch_bandit_feedback()

    assert "n_rounds" in bandit_feedback.keys()
    assert "n_actions" in bandit_feedback.keys()
    assert "action" in bandit_feedback.keys()
    assert "position" in bandit_feedback.keys()
    assert "reward" in bandit_feedback.keys()
    assert "pscore" in bandit_feedback.keys()
    assert "context" in bandit_feedback.keys()
    assert "action_context" in bandit_feedback.keys()

    # is_timeseries_split=True
    bandit_feedback_timeseries = dataset.obtain_batch_bandit_feedback(
        is_timeseries_split=True)
    assert isinstance(bandit_feedback_timeseries, Tuple)
    bandit_feedback_train = bandit_feedback_timeseries[0]
    bandit_feedback_test = bandit_feedback_timeseries[1]

    bf_elems = {
        "n_rounds",
        "n_actions",
        "action",
        "position",
        "reward",
        "pscore",
        "context",
        "action_context",
    }
    assert all(k in bandit_feedback_train.keys() for k in bf_elems)
    assert all(k in bandit_feedback_test.keys() for k in bf_elems)
コード例 #6
0
ファイル: test_real.py プロジェクト: aiueola/zr-obp
def test_sample_bootstrap_bandit_feedback():
    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(is_timeseries_split=True,
                                                 test_size=1.3)

    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(is_timeseries_split=True,
                                                 test_size=-0.5)

    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(sample_size=-50)

    with pytest.raises(TypeError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(sample_size=50.0)

    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.sample_bootstrap_bandit_feedback(sample_size=10000000)

    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback = dataset.obtain_batch_bandit_feedback()
    bootstrap_bf = dataset.sample_bootstrap_bandit_feedback()

    bf_keys = {"action", "position", "reward", "pscore", "context"}
    for k in bf_keys:
        assert len(bandit_feedback[k]) == len(bootstrap_bf[k])

    bandit_feedback_timeseries: Dict = dataset.obtain_batch_bandit_feedback(
        is_timeseries_split=True)[0]
    bootstrap_bf_timeseries = dataset.sample_bootstrap_bandit_feedback(
        is_timeseries_split=True)
    for k in bf_keys:
        assert len(bandit_feedback_timeseries[k]) == len(
            bootstrap_bf_timeseries[k])

    sample_size = 1000
    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bootstrap_bf = dataset.sample_bootstrap_bandit_feedback(
        sample_size=sample_size)
    assert bootstrap_bf["n_rounds"] == sample_size
    for k in bf_keys:
        assert len(bootstrap_bf[k]) == sample_size
コード例 #7
0
        help="campaign name, men, women, or all.",
    )
    parser.add_argument("--random_state", type=int, default=12345)
    args = parser.parse_args()
    print(args)

    n_boot_samples = args.n_boot_samples
    counterfactual_policy = args.counterfactual_policy
    behavior_policy = args.behavior_policy
    campaign = args.campaign
    random_state = args.random_state
    np.random.seed(random_state)
    data_path = Path("../open_bandit_dataset")

    obd = OpenBanditDataset(
        behavior_policy=behavior_policy, campaign=campaign, data_path=data_path
    )
    # hyparparameters for counterfactual policies
    kwargs = dict(
        n_actions=obd.n_actions, len_list=obd.len_list, random_state=random_state
    )
    if counterfactual_policy == "bts":
        kwargs["alpha"] = production_prior_for_bts[campaign]["alpha"]
        kwargs["beta"] = production_prior_for_bts[campaign]["beta"]
        kwargs["batch_size"] = production_batch_size_for_bts[campaign]
    policy = counterfactual_policy_dict[counterfactual_policy](**kwargs)
    # compared OPE estimators
    ope_estimators = [DirectMethod(), InverseProbabilityWeighting(), DoublyRobust()]
    # a base ML model for regression model used in Direct Method and Doubly Robust
    base_model = CalibratedClassifierCV(LogisticRegression(**hyperparams))
    # ground-truth policy value of a counterfactual policy
コード例 #8
0
    print(args)

    # configurations
    n_runs = args.n_runs
    base_model = args.base_model
    evaluation_policy = args.evaluation_policy
    behavior_policy = args.behavior_policy
    campaign = args.campaign
    n_sim_to_compute_action_dist = args.n_sim_to_compute_action_dist
    n_jobs = args.n_jobs
    random_state = args.random_state
    np.random.seed(random_state)
    data_path = Path(".").resolve().parents[1] / "obd"

    obd = OpenBanditDataset(behavior_policy=behavior_policy,
                            campaign=campaign,
                            data_path=data_path)
    # compute action distribution by evaluation policy
    kwargs = dict(n_actions=obd.n_actions,
                  len_list=obd.len_list,
                  random_state=random_state)
    if evaluation_policy == "bts":
        kwargs["is_zozotown_prior"] = True
        kwargs["campaign"] = campaign
    policy = evaluation_policy_dict[evaluation_policy](**kwargs)
    action_dist_single_round = policy.compute_batch_action_dist(
        n_sim=n_sim_to_compute_action_dist)
    # ground-truth policy value of an evaluation policy
    # , which is estimated with factual (observed) rewards (on-policy estimation)
    ground_truth_policy_value = OpenBanditDataset.calc_on_policy_policy_value_estimate(
        behavior_policy=evaluation_policy,
コード例 #9
0
    is_timeseries_split = args.is_timeseries_split
    n_sim_to_compute_action_dist = args.n_sim_to_compute_action_dist
    n_jobs = args.n_jobs
    random_state = args.random_state
    np.random.seed(random_state)
    data_path = Path("../open_bandit_dataset")

    # prepare path
    log_path = (Path("./logs") / behavior_policy / campaign / "out_sample" /
                base_model if is_timeseries_split else Path("./logs") /
                behavior_policy / campaign / "in_sample" / base_model)
    reg_model_path = log_path / "trained_reg_models"
    reg_model_path.mkdir(exist_ok=True, parents=True)

    obd = OpenBanditDataset(behavior_policy=behavior_policy,
                            campaign=campaign,
                            data_path=data_path)
    # ground-truth policy value of a evaluation policy
    # , which is estimated with factual (observed) rewards (on-policy estimation)
    ground_truth_policy_value = OpenBanditDataset.calc_on_policy_policy_value_estimate(
        behavior_policy=evaluation_policy,
        campaign=campaign,
        data_path=data_path,
        test_size=test_size,
        is_timeseries_split=is_timeseries_split,
    )
    # compute action distribution by evaluation policy
    if evaluation_policy == "bts":
        policy = BernoulliTS(
            n_actions=obd.n_actions,
            len_list=obd.len_list,
コード例 #10
0
    campaign = args.campaign
    n_sim_for_action_dist = args.n_sim_for_action_dist
    test_size = args.test_size
    is_timeseries_split = args.is_timeseries_split
    n_sim_to_compute_action_dist = args.n_sim_to_compute_action_dist
    random_state = args.random_state
    data_path = Path("../open_bandit_dataset")
    # prepare path
    log_path = (Path("./logs") / behavior_policy / campaign / "out_sample" /
                base_model if is_timeseries_split else Path("./logs") /
                behavior_policy / campaign / "in_sample" / base_model)
    reg_model_path = log_path / "trained_reg_models"
    reg_model_path.mkdir(exist_ok=True, parents=True)

    obd = OpenBanditDataset(behavior_policy=behavior_policy,
                            campaign=campaign,
                            data_path=data_path)
    # ground-truth policy value of a evaluation policy
    # , which is estimated with factual (observed) rewards (on-policy estimation)
    ground_truth_policy_value = OpenBanditDataset.calc_on_policy_policy_value_estimate(
        behavior_policy=evaluation_policy,
        campaign=campaign,
        data_path=data_path,
        test_size=test_size,
        is_timeseries_split=is_timeseries_split,
    )

    start = time.time()
    relative_ee = {
        est.estimator_name: np.zeros(n_boot_samples)
        for est in ope_estimators
コード例 #11
0
    is_mrdr = args.is_mrdr
    n_sim_to_compute_action_dist = args.n_sim_to_compute_action_dist
    n_jobs = args.n_jobs
    random_state = args.random_state
    np.random.seed(random_state)
    data_path = Path("../open_bandit_dataset")

    # prepare path
    log_path = (Path("./logs") / behavior_policy / campaign / "out_sample" /
                base_model if is_timeseries_split else Path("./logs") /
                behavior_policy / campaign / "in_sample" / base_model)
    reg_model_path = log_path / "trained_reg_models"
    reg_model_path.mkdir(exist_ok=True, parents=True)

    obd = OpenBanditDataset(behavior_policy=behavior_policy,
                            campaign=campaign,
                            data_path=data_path)
    # action distribution by evaluation policy
    # (more robust doubly robust needs evaluation policy information)
    if is_mrdr:
        if behavior_policy == "random":
            policy = BernoulliTS(
                n_actions=obd.n_actions,
                len_list=obd.len_list,
                is_zozotown_prior=
                True,  # replicate the policy in the ZOZOTOWN production
                campaign=campaign,
                random_state=random_state,
            )
        else:
            policy = Random(
コード例 #12
0
def test_obtain_batch_bandit_feedback():
    # invalid test_size
    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.obtain_batch_bandit_feedback(is_timeseries_split=True,
                                             test_size=1.3)

    with pytest.raises(ValueError):
        dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
        dataset.obtain_batch_bandit_feedback(is_timeseries_split=True,
                                             test_size=-0.5)

    # existence of keys
    # is_timeseries_split=False (default)
    dataset = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback = dataset.obtain_batch_bandit_feedback()

    assert "n_rounds" in bandit_feedback.keys()
    assert "n_actions" in bandit_feedback.keys()
    assert "action" in bandit_feedback.keys()
    assert "position" in bandit_feedback.keys()
    assert "reward" in bandit_feedback.keys()
    assert "pscore" in bandit_feedback.keys()
    assert "context" in bandit_feedback.keys()
    assert "action_context" in bandit_feedback.keys()

    # is_timeseries_split=True
    dataset2 = OpenBanditDataset(behavior_policy="random", campaign="all")
    bandit_feedback2 = dataset2.obtain_batch_bandit_feedback(
        is_timeseries_split=True)

    assert "n_rounds" in bandit_feedback2.keys()
    assert "n_actions" in bandit_feedback2.keys()
    assert "action" in bandit_feedback2.keys()
    assert "action_test" in bandit_feedback2.keys()
    assert "position" in bandit_feedback2.keys()
    assert "position_test" in bandit_feedback2.keys()
    assert "reward" in bandit_feedback2.keys()
    assert "reward_test" in bandit_feedback2.keys()
    assert "pscore" in bandit_feedback2.keys()
    assert "pscore_test" in bandit_feedback2.keys()
    assert "context" in bandit_feedback2.keys()
    assert "context_test" in bandit_feedback2.keys()
    assert "action_context" in bandit_feedback2.keys()
コード例 #13
0
def main(cfg: DictConfig) -> None:
    print(cfg)
    logger.info(f"The current working directory is {Path().cwd()}")
    start_time = time.time()
    logger.info("initializing experimental condition..")

    # compared ope estimators
    lambdas = list(dict(cfg.estimator_hyperparams)["lambdas"])
    ope_estimators = [
        DoublyRobustWithShrinkage(lambda_=lam_,
                                  estimator_name=f"DRos ({lam_})")
        for lam_ in lambdas
    ] + [
        DoublyRobustWithShrinkageTuning(lambdas=lambdas,
                                        estimator_name="DRos (tuning)"),
    ]

    # configurations
    n_seeds = cfg.setting.n_seeds
    sample_size = cfg.setting.sample_size
    reg_model = cfg.setting.reg_model
    campaign = cfg.setting.campaign
    behavior_policy = cfg.setting.behavior_policy
    test_size = cfg.setting.test_size
    is_timeseries_split = cfg.setting.is_timeseries_split
    n_folds = cfg.setting.n_folds
    obd_path = (Path().cwd().parents[5] /
                "open_bandit_dataset" if cfg.setting.is_full_obd else None)
    random_state = cfg.setting.random_state
    np.random.seed(random_state)

    # define dataset
    dataset_ts = OpenBanditDataset(behavior_policy="bts",
                                   campaign=campaign,
                                   data_path=obd_path)
    dataset_ur = OpenBanditDataset(behavior_policy="random",
                                   campaign=campaign,
                                   data_path=obd_path)

    # prepare logged bandit feedback and evaluation policies
    if behavior_policy == "random":
        if is_timeseries_split:
            bandit_feedback_ur = dataset_ur.obtain_batch_bandit_feedback(
                test_size=test_size,
                is_timeseries_split=True,
            )[0]
        else:
            bandit_feedback_ur = dataset_ur.obtain_batch_bandit_feedback()
        bandit_feedbacks = [bandit_feedback_ur]
        # obtain the ground-truth policy value
        ground_truth_ts = OpenBanditDataset.calc_on_policy_policy_value_estimate(
            behavior_policy="bts",
            campaign=campaign,
            data_path=obd_path,
            test_size=test_size,
            is_timeseries_split=is_timeseries_split,
        )
        # obtain action choice probabilities and define evaluation policies
        policy_ts = BernoulliTS(
            n_actions=dataset_ts.n_actions,
            len_list=dataset_ts.len_list,
            random_state=random_state,
            is_zozotown_prior=True,
            campaign=campaign,
        )
        action_dist_ts = policy_ts.compute_batch_action_dist(n_rounds=1000000)
        evaluation_policies = [(ground_truth_ts, action_dist_ts)]
    else:
        if is_timeseries_split:
            bandit_feedback_ts = dataset_ts.obtain_batch_bandit_feedback(
                test_size=test_size,
                is_timeseries_split=True,
            )[0]
        else:
            bandit_feedback_ts = dataset_ts.obtain_batch_bandit_feedback()
        bandit_feedbacks = [bandit_feedback_ts]
        # obtain the ground-truth policy value
        ground_truth_ur = OpenBanditDataset.calc_on_policy_policy_value_estimate(
            behavior_policy="random",
            campaign=campaign,
            data_path=obd_path,
            test_size=test_size,
            is_timeseries_split=is_timeseries_split,
        )
        # obtain action choice probabilities and define evaluation policies
        policy_ur = Random(
            n_actions=dataset_ur.n_actions,
            len_list=dataset_ur.len_list,
            random_state=random_state,
        )
        action_dist_ur = policy_ur.compute_batch_action_dist(n_rounds=1000000)
        evaluation_policies = [(ground_truth_ur, action_dist_ur)]

    # regression models used in ope estimators
    hyperparams = dict(cfg.reg_model_hyperparams)[reg_model]
    regression_models = [reg_model_dict[reg_model](**hyperparams)]

    # define an evaluator class
    evaluator = InterpretableOPEEvaluator(
        random_states=np.arange(n_seeds),
        bandit_feedbacks=bandit_feedbacks,
        evaluation_policies=evaluation_policies,
        ope_estimators=ope_estimators,
        regression_models=regression_models,
    )

    # conduct an evaluation of OPE experiment
    logger.info("experiment started")
    _ = evaluator.estimate_policy_value(sample_size=sample_size,
                                        n_folds_=n_folds)
    # calculate statistics
    mean = evaluator.calculate_mean(root=True)
    mean_scaled = evaluator.calculate_mean(scale=True, root=True)

    # save results of the evaluation of off-policy estimators
    log_path = Path("./outputs/hypara")
    log_path.mkdir(exist_ok=True, parents=True)
    # save root mse
    root_mse_df = DataFrame()
    root_mse_df["estimator"] = list(mean.keys())
    root_mse_df["mean"] = list(mean.values())
    root_mse_df["mean(scaled)"] = list(mean_scaled.values())
    root_mse_df.to_csv(log_path / "root_mse.csv")
    # conduct pairwise t-tests
    se_df = DataFrame(evaluator.calculate_squared_error())
    se_df = DataFrame(se_df.stack()).reset_index(1)
    se_df.rename(columns={"level_1": "estimators", 0: "se"}, inplace=True)
    nonparam_ttests = (pg.pairwise_ttests(
        data=se_df,
        dv="se",
        parametric=False,
        between="estimators",
    ).round(4).drop(["Contrast", "Parametric", "Paired"], axis=1))
    nonparam_ttests.to_csv(log_path / "nonparam_ttests.csv")
    # print result
    print(root_mse_df)
    experiment = f"{campaign}-{behavior_policy}-{sample_size}"
    elapsed_time = np.round((time.time() - start_time) / 60, 2)
    logger.info(f"finish experiment {experiment} in {elapsed_time}min")
コード例 #14
0
    base_model = args.base_model
    behavior_policy = args.behavior_policy
    campaign = args.campaign
    test_size = args.test_size
    is_timeseries_split = args.is_timeseries_split
    random_state = args.random_state
    data_path = Path("../open_bandit_dataset")
    # prepare path
    log_path = (Path("./logs") / behavior_policy / campaign / "out_sample" /
                base_model if is_timeseries_split else Path("./logs") /
                behavior_policy / campaign / "in_sample" / base_model)
    reg_model_path = log_path / "trained_reg_models"
    reg_model_path.mkdir(exist_ok=True, parents=True)

    obd = OpenBanditDataset(behavior_policy=behavior_policy,
                            campaign=campaign,
                            data_path=data_path)
    start_time = time.time()
    performance_of_reg_model = {
        metrics[i]: np.zeros(n_boot_samples)
        for i in np.arange(len(metrics))
    }
    for b in np.arange(n_boot_samples):
        # sample bootstrap samples from batch logged bandit feedback
        boot_bandit_feedback = obd.sample_bootstrap_bandit_feedback(
            test_size=test_size,
            is_timeseries_split=is_timeseries_split,
            random_state=b)
        # split data into two folds (data for training reg_model and for ope)
        is_for_reg_model = np.random.binomial(
            n=1, p=0.3, size=boot_bandit_feedback["n_rounds"]).astype(bool)