class UCBExample: label = '2.6 - Upper-Confidence-Bound Action Selection' bandit = GaussianBandit(10) agents = [ Agent(bandit, EpsilonGreedyPolicy(0.1)), Agent(bandit, UCBPolicy(2)) ]
class BinomialExample: label = 'Bayesian Bandits - Binomial (n=5)' bandit = BinomialBandit(10, n=5, t=3 * 1000) agents = [ Agent(bandit, EpsilonGreedyPolicy(0.1)), Agent(bandit, UCBPolicy(1)), BetaAgent(bandit, GreedyPolicy()) ]
class BernoulliExample: label = 'Bayesian Bandits - Bernoulli' bandit = BernoulliBandit(10, t=3 * 1000) agents = [ Agent(bandit, EpsilonGreedyPolicy(0.1)), Agent(bandit, UCBPolicy(1)), BetaAgent(bandit, GreedyPolicy()) ]
class CompareExample: label = 'Action' n_arms = 10 n_trials = 1000 bandits = [GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms, mu=4), BernoulliBandit(n_arms, t=3 * n_trials)] agents = [ Agent(bandits[0], GreedyPolicy(1)), Agent(bandits[1], EpsilonGreedyPolicy(0.1, 1)), Agent(bandits[2], GreedyPolicy(1), prior=5), Agent(bandits[3], UCBPolicy(2)), GradientAgent(bandits[4], SoftmaxPolicy(), alpha=0.1, baseline=False), BetaAgent(bandits[5], GreedyPolicy()) # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5), # Agent(bandit, GreedyPolicy(1), prior=5) ]
""" Takes advantage of multicore systems to speed up the simulation runs. """ from bandits.agent import Agent, BetaAgent from bandits.bandit import BernoulliBandit from bandits.policy import GreedyPolicy, EpsilonGreedyPolicy, UCBPolicy from bandits.environment import Environment if __name__ == '__main__': experiments = 500 trials = 1000 bandit = BernoulliBandit(10, t=3 * 1000) agents = [ Agent(bandit, EpsilonGreedyPolicy(0.1)), Agent(bandit, UCBPolicy(1)), BetaAgent(bandit, GreedyPolicy()) ] env = Environment(bandit, agents, label='Bayesian Bandits') scores, optimal = env.run(trials, experiments) env.plot_results(scores, optimal) env.plot_beliefs()