Exemple #1
0
class UCBExample:
    label = '2.6 - Upper-Confidence-Bound Action Selection'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, GreedyPolicy(1), prior=10),
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandit, UCBPolicy(2)),
    ]
Exemple #2
0
class OptimisticInitialValueExample:
    label = 'Optimistic Initial Values'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, GreedyPolicy(1), prior=1),
        Agent(bandit, GreedyPolicy(1), prior=2),
        Agent(bandit, GreedyPolicy(1), prior=5),
        Agent(bandit, GreedyPolicy(1), prior=10),
    ]
Exemple #3
0
class EpsilonGreedyExample:
    label = 'Action-Value Methods'
    bandit = GaussianBandit(10)
    GreedyPolicy(1)
    agents = [
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, EpsilonGreedyPolicy(0.01, 1)),
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5),
        # Agent(bandit, GreedyPolicy(1), prior=5)
        # Agent(bandit, GreedyPolicy(10)),
        # Agent(bandit, EpsilonGreedyPolicy(0.1, 10)),
    ]
Exemple #4
0
class BinomialExample:
    label = 'Bayesian Bandits - Binomial (n=5)'
    bandit = BinomialBandit(10, n=5, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]
Exemple #5
0
class BernoulliExample:
    label = 'Bayesian Bandits - Bernoulli'
    bandit = BernoulliBandit(10, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]
class EpsilonGreedyExample:
    label = '2.2 - Action-Value Methods'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, GreedyPolicy()),
        Agent(bandit, EpsilonGreedyPolicy(0.01)),
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
    ]
Exemple #7
0
class CompareExample:
    label = 'Action'
    n_arms = 10
    n_trials = 1000
    bandits = [GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms),
               GaussianBandit(n_arms, mu=4), BernoulliBandit(n_arms, t=3 * n_trials)]
    agents = [
        Agent(bandits[0], GreedyPolicy(1)),
        Agent(bandits[1], EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandits[2], GreedyPolicy(1), prior=5),
        Agent(bandits[3], UCBPolicy(2)),
        GradientAgent(bandits[4], SoftmaxPolicy(), alpha=0.1, baseline=False),
        BetaAgent(bandits[5], GreedyPolicy())

        # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5),
        # Agent(bandit, GreedyPolicy(1), prior=5)
    ]
Exemple #8
0
"""
Takes advantage of multicore systems to speed up the simulation runs.
"""
from bandits.agent import Agent, BetaAgent
from bandits.bandit import BernoulliBandit
from bandits.policy import GreedyPolicy, EpsilonGreedyPolicy, UCBPolicy
from bandits.environment import Environment

if __name__ == '__main__':
    experiments = 500
    trials = 1000

    bandit = BernoulliBandit(10, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]
    env = Environment(bandit, agents, label='Bayesian Bandits')
    scores, optimal = env.run(trials, experiments)
    env.plot_results(scores, optimal)
    env.plot_beliefs()