Python Bandit.plot Exemples

Langage de programmation: Python

Espace de nommage/Pack: bandit

Class/Type: Bandit

Méthode/Fonction: plot

Exemples au hotexamples.com: 4

Python Bandit.plot - 4 exemples trouvés. Ce sont les exemples réels les mieux notés de bandit.Bandit.plot extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

Bandit(30)

calculate_regret(4)

report(4)

plot(4)

pull_arm(4)

percent(3)

pull(3)

get_connection(2)

reset(2)

play(1)

probexplore(1)

pull_lever(1)

make_move(1)

q_star(1)

random_action(1)

score(1)

setupBandit(1)

opinion(1)

knowledge(1)

make_dashboard(1)

allscores(1)

get_steps(1)

get_reward(1)

get_q_values(1)

get_mean_reward(1)

get_credits_demanded(1)

get_action(1)

gen_task(1)

find_opt(1)

experiment(1)

e_greedy(1)

draw(1)

done(1)

calculate_demand(1)

simulate(1)

Méthodes fréquemment utilisées

Bandit (30)

calculate_regret (4)

report (4)

plot (4)

pull_arm (4)

percent (3)

pull (3)

get_connection (2)

reset (2)

play (1)

Méthodes fréquemment utilisées

probexplore (1)

pull_lever (1)

make_move (1)

q_star (1)

random_action (1)

score (1)

setupBandit (1)

opinion (1)

knowledge (1)

make_dashboard (1)

allscores (1)

get_steps (1)

get_reward (1)

get_q_values (1)

get_mean_reward (1)

get_credits_demanded (1)

get_action (1)

gen_task (1)

find_opt (1)

experiment (1)

Méthodes fréquemment utilisées

allscores (1)

get_steps (1)

get_reward (1)

get_q_values (1)

get_mean_reward (1)

get_credits_demanded (1)

get_action (1)

gen_task (1)

find_opt (1)

experiment (1)

e_greedy (1)

draw (1)

done (1)

calculate_demand (1)

simulate (1)

Méthodes fréquemment utilisées

e_greedy (1)

draw (1)

done (1)

calculate_demand (1)

simulate (1)

Exemple #1

0

Afficher le fichier

Fichier : fig2_2.py Projet : boldyshev/sutton

mp.set_start_method('spawn') print('Stationary greedy started...') t1 = time.perf_counter() with mp.Pool(mp.cpu_count()) as pool: def func(x): return np.array(pool.map(EpsGreedy(eps=x).rews_opts_stat, args)) result = [func(eps) for eps in epsilons] # get 3 (2000, 2, 1000)-shaped arrays, axis=1 stands for rewards and optimals t2 = time.perf_counter() print(f'Done in {round(t2 - t1, 3)} sec') # get the average rewards rewards = [pair[:, 0, :].mean(axis=0) for pair in result] # get the percentage of the optimal actions optimals = [Bandit.percent(pair[:, 1, :]) for pair in result] # plotting colors = ('green', 'blue', 'red') labels = (r'$\varepsilon=0$ (greedy)', r'$\varepsilon=0.1$', r'$\varepsilon=0.01$') Bandit.plot(rewards, labels, 'Average reward') Bandit.plot(optimals, labels, '% Optimal action') plt.show()

Exemple #2

0

Afficher le fichier

Fichier : ex2_11.py Projet : boldyshev/sutton

print(f'done in {round(t2 - t1, 3)} sec') t3 = time.perf_counter() print(f'Overall execution time {round(t3 - t0, 3)} sec') # plotting # labels and colors labels = (r'$\varepsilon$-greedy, $\varepsilon$', 'constant step\n' r'$\varepsilon$-greedy $\alpha=0.1$, $\varepsilon$', r'gradient bandit, $\alpha$', r'UCB, $c$', 'optimistic greedy\n' r'$\alpha=0.1, Q_0$') ylabel = 'Average reward over\n last 100 000 steps' xlabel = r'$\varepsilon, \alpha, c, Q_0$' colors = ('red', 'purple', 'green', 'blue', 'black') # x axis values to correspond with parameter slices x = [ list(range(10)[start:stop]) for (start, stop) in param_slices.values() ] # plots ax = Bandit.plot(rewards.values(), labels, ylabel, datax=x, xlabel=xlabel, colors=colors, fig_size=(15, 8)) plt.xticks(range(10), x_ticks) plt.show()

Exemple #3

0

Afficher le fichier

Fichier : fig2_4.py Projet : boldyshev/sutton

runs = int(2e3) # the number of different bandit experiments steps = int(1e3) # number of learning iterations in a single experiment args = [steps] * runs # comment this line if run on windows or OS X (default method) mp.set_start_method('spawn') print('Start upper confidence bound...') t1 = time.perf_counter() with mp.Pool(mp.cpu_count()) as pool: ucb = np.array(pool.map(UCB(c=2).rewards_stat, args)) greedy = np.array(pool.map(EpsGreedy(eps=0.1).rewards_stat, args)) t2 = time.perf_counter() print(f'Done in {round(t2 - t1, 3)} sec') # get the averages ucb = ucb.mean(axis=0) greedy = greedy.mean(axis=0) # plot labels = (r'UCB, $c=2$', r'$\varepsilon$-greedy, $\varepsilon=0.1$') Bandit.plot((ucb, greedy), labels, 'Average reward', colors=('blue', 'grey')) plt.show()

Exemple #4

0

Afficher le fichier

bl01 = np.array( pool.map( GradientBaseline(true_value=4, alpha=0.1).optimals_stat, args)) bl04 = np.array( pool.map( GradientBaseline(true_value=4, alpha=0.4).optimals_stat, args)) no_bl01 = np.array( pool.map( GradientNoBaseline(true_value=4, alpha=0.1).optimals_stat, args)) no_bl04 = np.array( pool.map( GradientNoBaseline(true_value=4, alpha=0.4).optimals_stat, args)) t2 = time.perf_counter() print(f'Done in {round(t2 - t1, 3)} sec') result = [bl01, bl04, no_bl01, no_bl04] # get percentages result = [Bandit.percent(i) for i in result] # plotting labels = (r'with baseline, $\alpha=0.1$', r'with baseline, $\alpha=0.4$', r'without baseline, $\alpha=0.1$', r'without baseline, $\alpha=0.4$') colors = ('blue', 'cornflowerblue', 'sienna', 'tan') Bandit.plot(result, labels, '% Optimal action', colors=colors) plt.show()