def run_experiment_decaying_epsilon(m1, m2, m3, N):
    bandits = [Bandit(m1), Bandit(m2), Bandit(m3)]
    data = np.empty(N)

    for i in range(N):
        p = np.random.random()
        if p < 1.0 / (i + 1):
            j = np.random.choice(3)
        else:
            j = np.argmax([b.mean for b in bandits])
        x = bandits[j].pull()
        bandits[j].update(x)

        data[i] = x
    cumulative_average = np.cumsum(data) / (np.arange(N) + 1)

    #plot moving average
    plt.plot(cumulative_average)
    plt.plot(np.ones(N) * m1)
    plt.plot(np.ones(N) * m2)
    plt.plot(np.ones(N) * m3)
    plt.xscale('log')
    plt.show()

    return cumulative_average
Beispiel #2
0
def run_experiment_decaying_eps(m1, m2, m3, N):
    bandits = [Bandit(m1), Bandit(m2), Bandit(m3)]

    data = np.empty(N)

    for i in range(N):
        # Epsilon greedy
        p = np.random.random()
        if p < 1. / (i + 1):
            selection = np.random.choice(3)
        else:
            selection = np.argmax([bandit.mean for bandit in bandits])
        # Pull from bandit and update sample mean
        x = bandits[selection].pull()
        bandits[selection].update(x)

        data[i] = x

    cumulative_avg = np.cumsum(data) / (np.arange(N) + 1)

    plt.plot(cumulative_avg)
    plt.plot(np.ones(N) * m1)
    plt.plot(np.ones(N) * m2)
    plt.plot(np.ones(N) * m3)
    plt.xscale('log')
    plt.show()

    # Print our esimate of each bandits mean and their actual mean
    print('Estimate of mean    Actual mean')
    for bandit in bandits:
        print('{:<20}{}'.format(bandit.mean, bandit.m))

    return cumulative_avg
Beispiel #3
0
def run_experiment_decaying_epsilon(m1, m2, m3, N):
  bandits = [Bandit(m1), Bandit(m2), Bandit(m3)]

  data = np.empty(N)