def _create_algorithm(self, base_algorithm, batch_train: bool, **kwargs):
     if self._algorithm_name == 'egreedy':
         self._algorithm = EpsilonGreedy(
             base_algorithm,
             nchoices=self._K,
             random_state=self._rnd.randint(1000),
             explore_prob=kwargs.get('epsilon', 0.1),
             batch_train=batch_train
         )
     elif self._algorithm_name == 'bootstrapped_ucb':
         self._algorithm = BootstrappedUCB(
             base_algorithm,
             nchoices=self._K,
             random_state=self._rnd.randint(1000),
             batch_train=batch_train
         )
     else:
         sys.exit("no such algorithm: %s" % algorithm)
Esempio n. 2
0
## The base algorithm is embedded in different metaheuristics
bootstrapped_ucb = BootstrappedUCB(deepcopy(base_algorithm),
                                   nchoices=nchoices,
                                   beta_prior=beta_prior,
                                   batch_train=True)
bootstrapped_ts = BootstrappedTS(deepcopy(base_algorithm),
                                 nchoices=nchoices,
                                 beta_prior=beta_prior,
                                 batch_train=True)
one_vs_rest = SeparateClassifiers(deepcopy(base_algorithm),
                                  nchoices=nchoices,
                                  beta_prior=beta_prior,
                                  batch_train=True)
epsilon_greedy = EpsilonGreedy(deepcopy(base_algorithm),
                               nchoices=nchoices,
                               beta_prior=beta_prior,
                               batch_train=True)
epsilon_greedy_nodecay = EpsilonGreedy(deepcopy(base_algorithm),
                                       nchoices=nchoices,
                                       beta_prior=beta_prior,
                                       decay=None,
                                       batch_train=True)
adaptive_greedy_thr = AdaptiveGreedy(deepcopy(base_algorithm),
                                     nchoices=nchoices,
                                     decay_type='threshold',
                                     batch_train=True)
adaptive_greedy_perc = AdaptiveGreedy(deepcopy(base_algorithm),
                                      nchoices=nchoices,
                                      beta_prior=beta_prior,
                                      decay_type='percentile',
                                      decay=0.9997,
Esempio n. 3
0
                                      decay_type='percentile',
                                      decay=0.9997,
                                      batch_train=True,
                                      random_state=4444)
active_explorer = ActiveExplorer(deepcopy(base_sgd),
                                 smoothing=None,
                                 nchoices=nchoices,
                                 beta_prior=((3. / nchoices, 4.), 2),
                                 batch_train=True,
                                 refit_buffer=50,
                                 deep_copy_buffer=False,
                                 random_state=5555)
epsilon_greedy_nodecay = EpsilonGreedy(deepcopy(base_ols),
                                       nchoices=nchoices,
                                       smoothing=(1, 2),
                                       beta_prior=None,
                                       decay=None,
                                       batch_train=True,
                                       deep_copy_buffer=False,
                                       random_state=6666)

models = [
    linucb, adaptive_active_greedy, softmax_explorer, adaptive_greedy_perc,
    active_explorer, epsilon_greedy_nodecay
]

# These lists will keep track of the rewards obtained by each policy
rewards_lucb, rewards_aac, rewards_sft, rewards_agr, \
rewards_ac, rewards_egr = [list() for i in range(len(models))]

lst_rewards = [
    rewards_lucb, rewards_aac, rewards_sft, rewards_agr, rewards_ac,