Python BernoulliTS.BernoulliTS Examples

Programming Language: Python

Namespace/Package Name: obp.policy

Class/Type: BernoulliTS

Method/Function: BernoulliTS

Examples at hotexamples.com: 3

Python BernoulliTS.BernoulliTS - 3 examples found. These are the top rated real world Python examples of obp.policy.BernoulliTS.BernoulliTS extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

BernoulliTS(3)

compute_batch_action_dist(3)

Example #1

Show file

    print(args)

    # configurations
    n_runs = args.n_runs
    n_rounds = args.n_rounds
    n_actions = args.n_actions
    dim_context = args.dim_context
    n_sim = args.n_sim
    evaluation_policy_name = args.evaluation_policy_name
    n_jobs = args.n_jobs
    random_state = args.random_state
    np.random.seed(random_state)

    # define evaluation policy
    evaluation_policy_dict = dict(
        bernoulli_ts=BernoulliTS(n_actions=n_actions,
                                 random_state=random_state),
        epsilon_greedy=EpsilonGreedy(n_actions=n_actions,
                                     epsilon=0.1,
                                     random_state=random_state),
        lin_epsilon_greedy=LinEpsilonGreedy(dim=dim_context,
                                            n_actions=n_actions,
                                            epsilon=0.1,
                                            random_state=random_state),
        lin_ts=LinTS(dim=dim_context,
                     n_actions=n_actions,
                     random_state=random_state),
        lin_ucb=LinUCB(dim=dim_context,
                       n_actions=n_actions,
                       random_state=random_state),
        logistic_epsilon_greedy=LogisticEpsilonGreedy(
            dim=dim_context,

Example #2

Show file

                            data_path=data_path)
    # ground-truth policy value of a evaluation policy
    # , which is estimated with factual (observed) rewards (on-policy estimation)
    ground_truth_policy_value = OpenBanditDataset.calc_on_policy_policy_value_estimate(
        behavior_policy=evaluation_policy,
        campaign=campaign,
        data_path=data_path,
        test_size=test_size,
        is_timeseries_split=is_timeseries_split,
    )
    # compute action distribution by evaluation policy
    if evaluation_policy == "bts":
        policy = BernoulliTS(
            n_actions=obd.n_actions,
            len_list=obd.len_list,
            is_zozotown_prior=
            True,  # replicate the policy in the ZOZOTOWN production
            campaign=campaign,
            random_state=random_state,
        )
    else:
        policy = Random(
            n_actions=obd.n_actions,
            len_list=obd.len_list,
            random_state=random_state,
        )
    action_dist_single_round = policy.compute_batch_action_dist(
        n_sim=n_sim_to_compute_action_dist)

    def process(b: int):
        # load the pre-trained regression model
        with open(reg_model_path / f"reg_model_{b}.pkl", "rb") as f:

Example #3

Show file

def main(cfg: DictConfig) -> None:
    print(cfg)
    logger.info(f"The current working directory is {Path().cwd()}")
    start_time = time.time()
    logger.info("initializing experimental condition..")

    # compared ope estimators
    lambdas = list(dict(cfg.estimator_hyperparams)["lambdas"])
    ope_estimators = [
        DoublyRobustWithShrinkage(lambda_=lam_,
                                  estimator_name=f"DRos ({lam_})")
        for lam_ in lambdas
    ] + [
        DoublyRobustWithShrinkageTuning(lambdas=lambdas,
                                        estimator_name="DRos (tuning)"),
    ]

    # configurations
    n_seeds = cfg.setting.n_seeds
    sample_size = cfg.setting.sample_size
    reg_model = cfg.setting.reg_model
    campaign = cfg.setting.campaign
    behavior_policy = cfg.setting.behavior_policy
    test_size = cfg.setting.test_size
    is_timeseries_split = cfg.setting.is_timeseries_split
    n_folds = cfg.setting.n_folds
    obd_path = (Path().cwd().parents[5] /
                "open_bandit_dataset" if cfg.setting.is_full_obd else None)
    random_state = cfg.setting.random_state
    np.random.seed(random_state)

    # define dataset
    dataset_ts = OpenBanditDataset(behavior_policy="bts",
                                   campaign=campaign,
                                   data_path=obd_path)
    dataset_ur = OpenBanditDataset(behavior_policy="random",
                                   campaign=campaign,
                                   data_path=obd_path)

    # prepare logged bandit feedback and evaluation policies
    if behavior_policy == "random":
        if is_timeseries_split:
            bandit_feedback_ur = dataset_ur.obtain_batch_bandit_feedback(
                test_size=test_size,
                is_timeseries_split=True,
            )[0]
        else:
            bandit_feedback_ur = dataset_ur.obtain_batch_bandit_feedback()
        bandit_feedbacks = [bandit_feedback_ur]
        # obtain the ground-truth policy value
        ground_truth_ts = OpenBanditDataset.calc_on_policy_policy_value_estimate(
            behavior_policy="bts",
            campaign=campaign,
            data_path=obd_path,
            test_size=test_size,
            is_timeseries_split=is_timeseries_split,
        )
        # obtain action choice probabilities and define evaluation policies
        policy_ts = BernoulliTS(
            n_actions=dataset_ts.n_actions,
            len_list=dataset_ts.len_list,
            random_state=random_state,
            is_zozotown_prior=True,
            campaign=campaign,
        )
        action_dist_ts = policy_ts.compute_batch_action_dist(n_rounds=1000000)
        evaluation_policies = [(ground_truth_ts, action_dist_ts)]
    else:
        if is_timeseries_split:
            bandit_feedback_ts = dataset_ts.obtain_batch_bandit_feedback(
                test_size=test_size,
                is_timeseries_split=True,
            )[0]
        else:
            bandit_feedback_ts = dataset_ts.obtain_batch_bandit_feedback()
        bandit_feedbacks = [bandit_feedback_ts]
        # obtain the ground-truth policy value
        ground_truth_ur = OpenBanditDataset.calc_on_policy_policy_value_estimate(
            behavior_policy="random",
            campaign=campaign,
            data_path=obd_path,
            test_size=test_size,
            is_timeseries_split=is_timeseries_split,
        )
        # obtain action choice probabilities and define evaluation policies
        policy_ur = Random(
            n_actions=dataset_ur.n_actions,
            len_list=dataset_ur.len_list,
            random_state=random_state,
        )
        action_dist_ur = policy_ur.compute_batch_action_dist(n_rounds=1000000)
        evaluation_policies = [(ground_truth_ur, action_dist_ur)]

    # regression models used in ope estimators
    hyperparams = dict(cfg.reg_model_hyperparams)[reg_model]
    regression_models = [reg_model_dict[reg_model](**hyperparams)]

    # define an evaluator class
    evaluator = InterpretableOPEEvaluator(
        random_states=np.arange(n_seeds),
        bandit_feedbacks=bandit_feedbacks,
        evaluation_policies=evaluation_policies,
        ope_estimators=ope_estimators,
        regression_models=regression_models,
    )

    # conduct an evaluation of OPE experiment
    logger.info("experiment started")
    _ = evaluator.estimate_policy_value(sample_size=sample_size,
                                        n_folds_=n_folds)
    # calculate statistics
    mean = evaluator.calculate_mean(root=True)
    mean_scaled = evaluator.calculate_mean(scale=True, root=True)

    # save results of the evaluation of off-policy estimators
    log_path = Path("./outputs/hypara")
    log_path.mkdir(exist_ok=True, parents=True)
    # save root mse
    root_mse_df = DataFrame()
    root_mse_df["estimator"] = list(mean.keys())
    root_mse_df["mean"] = list(mean.values())
    root_mse_df["mean(scaled)"] = list(mean_scaled.values())
    root_mse_df.to_csv(log_path / "root_mse.csv")
    # conduct pairwise t-tests
    se_df = DataFrame(evaluator.calculate_squared_error())
    se_df = DataFrame(se_df.stack()).reset_index(1)
    se_df.rename(columns={"level_1": "estimators", 0: "se"}, inplace=True)
    nonparam_ttests = (pg.pairwise_ttests(
        data=se_df,
        dv="se",
        parametric=False,
        between="estimators",
    ).round(4).drop(["Contrast", "Parametric", "Paired"], axis=1))
    nonparam_ttests.to_csv(log_path / "nonparam_ttests.csv")
    # print result
    print(root_mse_df)
    experiment = f"{campaign}-{behavior_policy}-{sample_size}"
    elapsed_time = np.round((time.time() - start_time) / 60, 2)
    logger.info(f"finish experiment {experiment} in {elapsed_time}min")