Example #1
0
                             beta_prior=None,
                             explore_rounds=1500,
                             batch_train=True)
active_explorer = ActiveExplorer(deepcopy(base_algorithm),
                                 nchoices=nchoices,
                                 beta_prior=beta_prior,
                                 batch_train=True)
adaptive_active_greedy = AdaptiveGreedy(deepcopy(base_algorithm),
                                        nchoices=nchoices,
                                        beta_prior=beta_prior,
                                        active_choice='weighted',
                                        decay_type='percentile',
                                        decay=0.9997,
                                        batch_train=True)
softmax_explorer = SoftmaxExplorer(deepcopy(base_algorithm),
                                   nchoices=nchoices,
                                   beta_prior=beta_prior,
                                   batch_train=True)

models = [
    bootstrapped_ucb, bootstrapped_ts, one_vs_rest, epsilon_greedy,
    epsilon_greedy_nodecay, adaptive_greedy_thr, adaptive_greedy_perc,
    explore_first, active_explorer, adaptive_active_greedy, softmax_explorer
]

# These lists will keep track of the rewards obtained by each policy
rewards_ucb, rewards_ts, rewards_ovr, rewards_egr, rewards_egr2, \
rewards_agr, rewards_agr2, rewards_efr, rewards_ac, \
rewards_aac, rewards_sft = [list() for i in range(len(models))]

lst_rewards = [
    rewards_ucb, rewards_ts, rewards_ovr, rewards_egr, rewards_egr2,
Example #2
0
                       decay_type='percentile',
                       decay=0.9997),
        AdaptiveGreedy(deepcopy(base_algorithm),
                       nchoices=nchoices,
                       beta_prior=beta_prior,
                       active_choice='weighted',
                       decay_type='percentile',
                       decay=0.9997),
        BootstrappedTS(deepcopy(base_algorithm),
                       nchoices=nchoices,
                       beta_prior=beta_prior),
        BootstrappedUCB(deepcopy(base_algorithm),
                        nchoices=nchoices,
                        beta_prior=beta_prior),
        SoftmaxExplorer(deepcopy(base_algorithm),
                        nchoices=nchoices,
                        beta_prior=beta_prior)
    ]

    model_names_opts = [
        "adaptive_greedy_thres", "adaptive_greedy_perc",
        "adaptive_active_greedy", "bootstrapped_ts", "bootstrapped_ucb",
        "softmax"
    ]
    '''
    models = [AdaptiveGreedy(deepcopy(base_algorithm), nchoices=nchoices, decay_type='threshold')]
    models = [AdaptiveGreedy(deepcopy(base_algorithm), nchoices = nchoices, beta_prior=beta_prior, decay_type='percentile', decay=0.9997)]
    models = [AdaptiveGreedy(deepcopy(base_algorithm), nchoices = nchoices, beta_prior=beta_prior, active_choice='weighted', decay_type='percentile', decay=0.9997)]
    #models = [BootstrappedTS(deepcopy(base_algorithm), nchoices = nchoices, beta_prior=beta_prior)]

    model_names = ["adaptive_greedy_thres"]
Example #3
0
### Important!!! the default hyperparameters for LinUCB in the reference paper
### are very different from what's used in this example
adaptive_active_greedy = AdaptiveGreedy(
    deepcopy(base_ols),
    nchoices=nchoices,
    smoothing=None,
    beta_prior=((3. / nchoices, 4.), 2),
    active_choice='weighted',
    decay_type='percentile',
    decay=0.9997,
    batch_train=True,
)
softmax_explorer = SoftmaxExplorer(deepcopy(base_sgd),
                                   nchoices=nchoices,
                                   smoothing=(1, 2),
                                   beta_prior=None,
                                   batch_train=True,
                                   refit_buffer=50,
                                   deep_copy_buffer=False,
                                   random_state=3333)
adaptive_greedy_perc = AdaptiveGreedy(deepcopy(base_ols),
                                      nchoices=nchoices,
                                      smoothing=(1, 2),
                                      beta_prior=None,
                                      decay_type='percentile',
                                      decay=0.9997,
                                      batch_train=True,
                                      random_state=4444)
active_explorer = ActiveExplorer(deepcopy(base_sgd),
                                 smoothing=None,
                                 nchoices=nchoices,
                                 beta_prior=((3. / nchoices, 4.), 2),