コード例 #1
0
class UCBExample:
    label = '2.6 - Upper-Confidence-Bound Action Selection'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(2))
    ]
コード例 #2
0
class BinomialExample:
    label = 'Bayesian Bandits - Binomial (n=5)'
    bandit = BinomialBandit(10, n=5, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]
コード例 #3
0
class BernoulliExample:
    label = 'Bayesian Bandits - Bernoulli'
    bandit = BernoulliBandit(10, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]
コード例 #4
0
class CompareExample:
    label = 'Action'
    n_arms = 10
    n_trials = 1000
    bandits = [GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms),
               GaussianBandit(n_arms, mu=4), BernoulliBandit(n_arms, t=3 * n_trials)]
    agents = [
        Agent(bandits[0], GreedyPolicy(1)),
        Agent(bandits[1], EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandits[2], GreedyPolicy(1), prior=5),
        Agent(bandits[3], UCBPolicy(2)),
        GradientAgent(bandits[4], SoftmaxPolicy(), alpha=0.1, baseline=False),
        BetaAgent(bandits[5], GreedyPolicy())

        # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5),
        # Agent(bandit, GreedyPolicy(1), prior=5)
    ]
コード例 #5
0
ファイル: bayesian.py プロジェクト: xiutingmi/bandits
"""
Takes advantage of multicore systems to speed up the simulation runs.
"""
from bandits.agent import Agent, BetaAgent
from bandits.bandit import BernoulliBandit
from bandits.policy import GreedyPolicy, EpsilonGreedyPolicy, UCBPolicy
from bandits.environment import Environment

if __name__ == '__main__':
    experiments = 500
    trials = 1000

    bandit = BernoulliBandit(10, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]
    env = Environment(bandit, agents, label='Bayesian Bandits')
    scores, optimal = env.run(trials, experiments)
    env.plot_results(scores, optimal)
    env.plot_beliefs()