def run_bandit_experiment(bandit, num_pulls, num_trials): # specify bandit algorithms below algorithm1 = IncrementalUniformAlgorithm(bandit) algorithm2 = UCBAlgorithm(bandit) algorithm3 = EpsilonGreedyAlgorithm(bandit) algorithms = [algorithm1, algorithm2, algorithm3] # keep track of data for plotting plot_sample_rate = 1 plot = Plot(num_pulls, num_trials, [a.get_name() for a in algorithms], plot_sample_rate) # experiment loop for a in algorithms: print '\nRunning algorithm {0}...'.format(a.get_name()) plot.reset_trial() best_arms = np.zeros(num_trials) for t in range(num_trials): print 'Running trial {0}...'.format(t) start = time.time() plot.begin_trial() optimal_expected_reward = bandit.get_expected_reward_optimal_arm() regret = Regret(optimal_expected_reward) a.reset(bandit) for i in range(num_pulls): # pull arm according to algorithm pulled_arm, _ = a.pull() # update regrets best_arm = a.get_best_arm() expected_reward_pulled_arm = bandit.get_expected_reward_arm(pulled_arm) expected_reward_best_arm = bandit.get_expected_reward_arm(best_arm) regret.add(expected_reward_pulled_arm, expected_reward_best_arm) # update plot if i % plot_sample_rate == 0: plot.add_point(i, regret.get_simple_regret(), regret.get_cumulative_regret(), a.get_name()) end = time.time() print '\telapsed: {0}'.format(end-start) print '\tbest arm: {0}'.format(a.get_best_arm()) best_arms[t] = a.get_best_arm() print "Best arm distribution: " print np.histogram(best_arms, bins=range(21)) # create plot plot.plot_simple_regret(bandit.get_name()) plot.plot_cumulative_regret(bandit.get_name()) # save plot.save('{0}_data'.format(bandit.get_name()))
# This is a hacky way of re-plotting graphs... from plot import Plot from bandit_algorithms import IncrementalUniformAlgorithm from bandit_algorithms import UCBAlgorithm from bandit_algorithms import EpsilonGreedyAlgorithm from bandit import SBRDBandit # load old plot arm_params = [(1,1)] # dummy params b = SBRDBandit(arm_params, 'custom_bandit') num_pulls = 10001 num_trials = 1000 plot_sample_rate = 1 algorithms = [IncrementalUniformAlgorithm(b), UCBAlgorithm(b), EpsilonGreedyAlgorithm(b)] plot = Plot(num_pulls, num_trials, [a.get_name() for a in algorithms], plot_sample_rate) print "loading data..." plot.load('custom_bandit_data.npz') # new plot print "creating plots..." sample_rate = 1 end_index = 501 plot.plot_cumulative_regret('new_'+b.get_name(), sample_rate, end_index) plot.plot_simple_regret('new_'+b.get_name(), sample_rate, end_index)