class UCBExample: label = '2.6 - Upper-Confidence-Bound Action Selection' bandit = GaussianBandit(10) agents = [ Agent(bandit, GreedyPolicy(1)), Agent(bandit, GreedyPolicy(1), prior=10), Agent(bandit, EpsilonGreedyPolicy(0.1, 1)), Agent(bandit, UCBPolicy(2)), ]
class OptimisticInitialValueExample: label = 'Optimistic Initial Values' bandit = GaussianBandit(10) agents = [ Agent(bandit, EpsilonGreedyPolicy(0.1, 1)), Agent(bandit, GreedyPolicy(1)), Agent(bandit, GreedyPolicy(1), prior=1), Agent(bandit, GreedyPolicy(1), prior=2), Agent(bandit, GreedyPolicy(1), prior=5), Agent(bandit, GreedyPolicy(1), prior=10), ]
class EpsilonGreedyExample: label = 'Action-Value Methods' bandit = GaussianBandit(10) GreedyPolicy(1) agents = [ Agent(bandit, GreedyPolicy(1)), Agent(bandit, EpsilonGreedyPolicy(0.01, 1)), Agent(bandit, EpsilonGreedyPolicy(0.1, 1)), # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5), # Agent(bandit, GreedyPolicy(1), prior=5) # Agent(bandit, GreedyPolicy(10)), # Agent(bandit, EpsilonGreedyPolicy(0.1, 10)), ]
class BinomialExample: label = 'Bayesian Bandits - Binomial (n=5)' bandit = BinomialBandit(10, n=5, t=3 * 1000) agents = [ Agent(bandit, EpsilonGreedyPolicy(0.1)), Agent(bandit, UCBPolicy(1)), BetaAgent(bandit, GreedyPolicy()) ]
class BernoulliExample: label = 'Bayesian Bandits - Bernoulli' bandit = BernoulliBandit(10, t=3 * 1000) agents = [ Agent(bandit, EpsilonGreedyPolicy(0.1)), Agent(bandit, UCBPolicy(1)), BetaAgent(bandit, GreedyPolicy()) ]
class EpsilonGreedyExample: label = '2.2 - Action-Value Methods' bandit = GaussianBandit(10) agents = [ Agent(bandit, GreedyPolicy()), Agent(bandit, EpsilonGreedyPolicy(0.01)), Agent(bandit, EpsilonGreedyPolicy(0.1)), ]
class CompareExample: label = 'Action' n_arms = 10 n_trials = 1000 bandits = [GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms, mu=4), BernoulliBandit(n_arms, t=3 * n_trials)] agents = [ Agent(bandits[0], GreedyPolicy(1)), Agent(bandits[1], EpsilonGreedyPolicy(0.1, 1)), Agent(bandits[2], GreedyPolicy(1), prior=5), Agent(bandits[3], UCBPolicy(2)), GradientAgent(bandits[4], SoftmaxPolicy(), alpha=0.1, baseline=False), BetaAgent(bandits[5], GreedyPolicy()) # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5), # Agent(bandit, GreedyPolicy(1), prior=5) ]
""" Takes advantage of multicore systems to speed up the simulation runs. """ from bandits.agent import Agent, BetaAgent from bandits.bandit import BernoulliBandit from bandits.policy import GreedyPolicy, EpsilonGreedyPolicy, UCBPolicy from bandits.environment import Environment if __name__ == '__main__': experiments = 500 trials = 1000 bandit = BernoulliBandit(10, t=3 * 1000) agents = [ Agent(bandit, EpsilonGreedyPolicy(0.1)), Agent(bandit, UCBPolicy(1)), BetaAgent(bandit, GreedyPolicy()) ] env = Environment(bandit, agents, label='Bayesian Bandits') scores, optimal = env.run(trials, experiments) env.plot_results(scores, optimal) env.plot_beliefs()