def run_experiment_decaying_epsilon(m1, m2, m3, N): bandits = [Bandit(m1), Bandit(m2), Bandit(m3)] data = np.empty(N) for i in range(N): p = np.random.random() if p < 1.0 / (i + 1): j = np.random.choice(3) else: j = np.argmax([b.mean for b in bandits]) x = bandits[j].pull() bandits[j].update(x) data[i] = x cumulative_average = np.cumsum(data) / (np.arange(N) + 1) #plot moving average plt.plot(cumulative_average) plt.plot(np.ones(N) * m1) plt.plot(np.ones(N) * m2) plt.plot(np.ones(N) * m3) plt.xscale('log') plt.show() return cumulative_average
def run_experiment_decaying_eps(m1, m2, m3, N): bandits = [Bandit(m1), Bandit(m2), Bandit(m3)] data = np.empty(N) for i in range(N): # Epsilon greedy p = np.random.random() if p < 1. / (i + 1): selection = np.random.choice(3) else: selection = np.argmax([bandit.mean for bandit in bandits]) # Pull from bandit and update sample mean x = bandits[selection].pull() bandits[selection].update(x) data[i] = x cumulative_avg = np.cumsum(data) / (np.arange(N) + 1) plt.plot(cumulative_avg) plt.plot(np.ones(N) * m1) plt.plot(np.ones(N) * m2) plt.plot(np.ones(N) * m3) plt.xscale('log') plt.show() # Print our esimate of each bandits mean and their actual mean print('Estimate of mean Actual mean') for bandit in bandits: print('{:<20}{}'.format(bandit.mean, bandit.m)) return cumulative_avg
def run_experiment_decaying_epsilon(m1, m2, m3, N): bandits = [Bandit(m1), Bandit(m2), Bandit(m3)] data = np.empty(N)