def test_ucb_policy(n_samples, n_actions, context_dim, dataset): # define a solver ucbp = UCBPolicy(n_actions=n_actions, lr=0.01) policies = [ucbp] results = simulate_cb(dataset, n_samples, policies) # no operational error assert results[0]["simple_regret"] > -1.0
def test_epsilon_greedy_policy(n_samples, n_actions, context_dim, dataset): # define a solver egp = EpsilonGreedyPolicy(n_actions=n_actions, lr=0.1, epsilon=0.1) policies = [egp] results = simulate_cb(dataset, n_samples, policies) # no operational error assert results[0]["simple_regret"] > -1.0
def test_linucb_policy(n_samples, n_actions, context_dim, dataset): # define a solver linucbp = LinUCBPolicy(n_actions=n_actions, context_dim=context_dim, delta=0.25, train_starts_at=500, train_freq=50) policies = [linucbp] results = simulate_cb(dataset, n_samples, policies) # must avoid getting stuck at no eating # not sure about synthetic assert results[0]["simple_regret"] > -1.0
def test_linear_gaussian_thompson_sampling_policy(n_samples, n_actions, context_dim, dataset): lgtsp = LinearGaussianThompsonSamplingPolicy(n_actions=n_actions, context_dim=context_dim, eta_prior=6.0, lambda_prior=0.25, train_starts_at=500, posterior_update_freq=50) policies = [lgtsp] results = simulate_cb(dataset, n_samples, policies) # must avoid getting stuck at no eating # not sure about synthetic assert results[0]["simple_regret"] > -1.0