Python GreedyPolicy Exemples, bandits.policy.GreedyPolicy Python Exemples

Exemple #1

0

Afficher le fichier

class UCBExample:
    label = '2.6 - Upper-Confidence-Bound Action Selection'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, GreedyPolicy(1), prior=10),
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandit, UCBPolicy(2)),
    ]

Exemple #2

0

Afficher le fichier

class OptimisticInitialValueExample:
    label = 'Optimistic Initial Values'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, GreedyPolicy(1), prior=1),
        Agent(bandit, GreedyPolicy(1), prior=2),
        Agent(bandit, GreedyPolicy(1), prior=5),
        Agent(bandit, GreedyPolicy(1), prior=10),
    ]

Exemple #3

0

Afficher le fichier

class EpsilonGreedyExample:
    label = 'Action-Value Methods'
    bandit = GaussianBandit(10)
    GreedyPolicy(1)
    agents = [
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, EpsilonGreedyPolicy(0.01, 1)),
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5),
        # Agent(bandit, GreedyPolicy(1), prior=5)
        # Agent(bandit, GreedyPolicy(10)),
        # Agent(bandit, EpsilonGreedyPolicy(0.1, 10)),
    ]

Exemple #4

0

Afficher le fichier

class BinomialExample:
    label = 'Bayesian Bandits - Binomial (n=5)'
    bandit = BinomialBandit(10, n=5, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]

Exemple #5

0

Afficher le fichier

class BernoulliExample:
    label = 'Bayesian Bandits - Bernoulli'
    bandit = BernoulliBandit(10, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]

Exemple #6

0

Afficher le fichier

Fichier : sutton_barto.py Projet : snazari/multi-arm-bandit

class EpsilonGreedyExample:
    label = '2.2 - Action-Value Methods'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, GreedyPolicy()),
        Agent(bandit, EpsilonGreedyPolicy(0.01)),
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
    ]

Exemple #7

0

Afficher le fichier

class CompareExample:
    label = 'Action'
    n_arms = 10
    n_trials = 1000
    bandits = [GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms),
               GaussianBandit(n_arms, mu=4), BernoulliBandit(n_arms, t=3 * n_trials)]
    agents = [
        Agent(bandits[0], GreedyPolicy(1)),
        Agent(bandits[1], EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandits[2], GreedyPolicy(1), prior=5),
        Agent(bandits[3], UCBPolicy(2)),
        GradientAgent(bandits[4], SoftmaxPolicy(), alpha=0.1, baseline=False),
        BetaAgent(bandits[5], GreedyPolicy())

        # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5),
        # Agent(bandit, GreedyPolicy(1), prior=5)
    ]

Exemple #8

0

Afficher le fichier

Fichier : bayesian.py Projet : xiutingmi/bandits

"""
Takes advantage of multicore systems to speed up the simulation runs.
"""
from bandits.agent import Agent, BetaAgent
from bandits.bandit import BernoulliBandit
from bandits.policy import GreedyPolicy, EpsilonGreedyPolicy, UCBPolicy
from bandits.environment import Environment

if __name__ == '__main__':
    experiments = 500
    trials = 1000

    bandit = BernoulliBandit(10, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]
    env = Environment(bandit, agents, label='Bayesian Bandits')
    scores, optimal = env.run(trials, experiments)
    env.plot_results(scores, optimal)
    env.plot_beliefs()