beta_prior=None, explore_rounds=1500, batch_train=True) active_explorer = ActiveExplorer(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, batch_train=True) adaptive_active_greedy = AdaptiveGreedy(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, active_choice='weighted', decay_type='percentile', decay=0.9997, batch_train=True) softmax_explorer = SoftmaxExplorer(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, batch_train=True) models = [ bootstrapped_ucb, bootstrapped_ts, one_vs_rest, epsilon_greedy, epsilon_greedy_nodecay, adaptive_greedy_thr, adaptive_greedy_perc, explore_first, active_explorer, adaptive_active_greedy, softmax_explorer ] # These lists will keep track of the rewards obtained by each policy rewards_ucb, rewards_ts, rewards_ovr, rewards_egr, rewards_egr2, \ rewards_agr, rewards_agr2, rewards_efr, rewards_ac, \ rewards_aac, rewards_sft = [list() for i in range(len(models))] lst_rewards = [ rewards_ucb, rewards_ts, rewards_ovr, rewards_egr, rewards_egr2,
decay_type='percentile', decay=0.9997), AdaptiveGreedy(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior, active_choice='weighted', decay_type='percentile', decay=0.9997), BootstrappedTS(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior), BootstrappedUCB(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior), SoftmaxExplorer(deepcopy(base_algorithm), nchoices=nchoices, beta_prior=beta_prior) ] model_names_opts = [ "adaptive_greedy_thres", "adaptive_greedy_perc", "adaptive_active_greedy", "bootstrapped_ts", "bootstrapped_ucb", "softmax" ] ''' models = [AdaptiveGreedy(deepcopy(base_algorithm), nchoices=nchoices, decay_type='threshold')] models = [AdaptiveGreedy(deepcopy(base_algorithm), nchoices = nchoices, beta_prior=beta_prior, decay_type='percentile', decay=0.9997)] models = [AdaptiveGreedy(deepcopy(base_algorithm), nchoices = nchoices, beta_prior=beta_prior, active_choice='weighted', decay_type='percentile', decay=0.9997)] #models = [BootstrappedTS(deepcopy(base_algorithm), nchoices = nchoices, beta_prior=beta_prior)] model_names = ["adaptive_greedy_thres"]
### Important!!! the default hyperparameters for LinUCB in the reference paper ### are very different from what's used in this example adaptive_active_greedy = AdaptiveGreedy( deepcopy(base_ols), nchoices=nchoices, smoothing=None, beta_prior=((3. / nchoices, 4.), 2), active_choice='weighted', decay_type='percentile', decay=0.9997, batch_train=True, ) softmax_explorer = SoftmaxExplorer(deepcopy(base_sgd), nchoices=nchoices, smoothing=(1, 2), beta_prior=None, batch_train=True, refit_buffer=50, deep_copy_buffer=False, random_state=3333) adaptive_greedy_perc = AdaptiveGreedy(deepcopy(base_ols), nchoices=nchoices, smoothing=(1, 2), beta_prior=None, decay_type='percentile', decay=0.9997, batch_train=True, random_state=4444) active_explorer = ActiveExplorer(deepcopy(base_sgd), smoothing=None, nchoices=nchoices, beta_prior=((3. / nchoices, 4.), 2),