Python Bandit Beispiele

Programmiersprache: Python

Namespace / Paketname: comparing_epsilons

Klasse / Typ: Bandit

Beispiele auf hotexamples.com: 3

Python Bandit - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die comparing_epsilons.Bandit, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Bandit(3)

Häufig verwendete Methoden

Bandit (3)

Beispiel #1

Datei anzeigen

Datei: bayesian_compared.py Projekt: trandinhhieu1989/reinforcement_learning

def run_experiment_decaying_epsilon(m1, m2, m3, N):
    bandits = [Bandit(m1), Bandit(m2), Bandit(m3)]
    data = np.empty(N)

    for i in range(N):
        p = np.random.random()
        if p < 1.0 / (i + 1):
            j = np.random.choice(3)
        else:
            j = np.argmax([b.mean for b in bandits])
        x = bandits[j].pull()
        bandits[j].update(x)

        data[i] = x
    cumulative_average = np.cumsum(data) / (np.arange(N) + 1)

    #plot moving average
    plt.plot(cumulative_average)
    plt.plot(np.ones(N) * m1)
    plt.plot(np.ones(N) * m2)
    plt.plot(np.ones(N) * m3)
    plt.xscale('log')
    plt.show()

    return cumulative_average

Beispiel #2

Datei anzeigen

def run_experiment_decaying_eps(m1, m2, m3, N):
    bandits = [Bandit(m1), Bandit(m2), Bandit(m3)]

    data = np.empty(N)

    for i in range(N):
        # Epsilon greedy
        p = np.random.random()
        if p < 1. / (i + 1):
            selection = np.random.choice(3)
        else:
            selection = np.argmax([bandit.mean for bandit in bandits])
        # Pull from bandit and update sample mean
        x = bandits[selection].pull()
        bandits[selection].update(x)

        data[i] = x

    cumulative_avg = np.cumsum(data) / (np.arange(N) + 1)

    plt.plot(cumulative_avg)
    plt.plot(np.ones(N) * m1)
    plt.plot(np.ones(N) * m2)
    plt.plot(np.ones(N) * m3)
    plt.xscale('log')
    plt.show()

    # Print our esimate of each bandits mean and their actual mean
    print('Estimate of mean    Actual mean')
    for bandit in bandits:
        print('{:<20}{}'.format(bandit.mean, bandit.m))

    return cumulative_avg

Beispiel #3

Datei anzeigen

def run_experiment_decaying_epsilon(m1, m2, m3, N):
  bandits = [Bandit(m1), Bandit(m2), Bandit(m3)]

  data = np.empty(N)