Example #1
0
def averaged_runs(µ, σ, ε, selector, provider_factory, initializer,
                  dont_average_reward):
    exs = [
        Experiment(µ, σ, ε,
                   ctx().num_arms, selector, provider_factory, initializer,
                   dont_average_reward) for _ in range(ctx().num_runs)
    ]
    _ = [ex.run(ctx().num_pulls_per_run) for ex in tqdm(exs)]
    rewards = np.average(np.stack([ex.rewards for ex in exs], axis=1), axis=1)
    percent_optimal_actions = 100 * np.average(
        np.stack([ex.optimality for ex in exs], axis=1), axis=1)
    return rewards, percent_optimal_actions
Example #2
0
def run_experiment():
    results = [
        averaged_runs(ctx().ms[i],
                      ctx().σs[i],
                      ctx().εs[i],
                      ctx().selectors[i](ctx().εs[i], ctx().num_arms),
                      ctx().providers[i],
                      ctx().initializers[i](),
                      ctx().dont_average_reward) for i in range(len(ctx().εs))
    ]
    rewards = [r[0] for r in results]
    percent_optimals = [r[1] for r in results]
    return rewards, percent_optimals
Example #3
0
from munch import munchify
%matplotlib inline

#───────────────────────────────────────────────────────────────────────
#                           Experiment Start
#───────────────────────────────────────────────────────────────────────
use_context(1)

before = t.perf_counter()
rewards, percent_optimals = run_experiment()
print("time (seconds): {0}".format(t.perf_counter() - before))

#───────────────────────────────────────────────────────────────────────
#                              Plotting
#───────────────────────────────────────────────────────────────────────
labels = ["ε={0}{1}".format(ε, (" (greedy)" if ε == 0 else "")) for ε in ctx().εs]
pp.figure(figsize=(10,4))
[pp.plot(rewards[i], label=labels[i]) for i in range(len(ctx().εs))]
pp.legend(bbox_to_anchor=(1.2, 0.5)) 
pp.xlabel("Steps") 
pp.ylabel("Average Reward") 
pp.title("Average ε-greedy Rewards over " + str(ctx().num_runs) + " Runs") 
pp.show()

pp.figure(figsize=(10,4))
[pp.plot(percent_optimals[i], label=labels[i]) for i in range(len(ctx().εs))]
pp.legend(bbox_to_anchor=(1.2, 0.5)) 
pp.xlabel("Steps") 
pp.ylabel("% Optimal Action") 
pp.title("% times optical action selected averaged over " + str(ctx().num_runs) + " Runs") 
pp.show()
Example #4
0
from BanditMachine import BanditMachine
from Experiment import run_experiment
from Insanity.Initializers import OptimisticInitializer, ZeroInitializer
from ActionValuesProvider import ActionValuesProvider
from Context import ctx, use_context
from munch import munchify
%matplotlib inline

#───────────────────────────────────────────────────────────────────────
#                           Experiment Start
#───────────────────────────────────────────────────────────────────────
use_context(2)

before = t.perf_counter()
rewards, percent_optimals = run_experiment()
print("time (seconds): {0}".format(t.perf_counter() - before))

#───────────────────────────────────────────────────────────────────────
#                              Plotting
#───────────────────────────────────────────────────────────────────────
labels = ["ε={0}{1}".format(ε, (" (greedy, optimistic, Q₁=5), " if ε == 0 else " realistic, Q₁=0")) for ε in ctx().εs]

pp.figure(figsize=(10,4))
[pp.plot(percent_optimals[i], label=labels[i]) for i in range(len(ctx().εs))]
pp.legend(bbox_to_anchor=(1.2, 0.5)) 
pp.xlabel("Steps") 
pp.ylabel("% Optimal Action") 
pp.title("% times optical action selected averaged over " + str(ctx().num_runs) + " Runs") 
pp.show()

Example #5
0
import numpy as np
from tqdm import tqdm
from BanditMachine import BanditMachine
from Experiment import run_experiment
from Insanity.Initializers import OptimisticInitializer, ZeroInitializer
from ActionValuesProvider import ActionValuesProvider
from Context import ctx, use_context
from munch import munchify
%matplotlib inline

#───────────────────────────────────────────────────────────────────────
#                           Experiment Start
#───────────────────────────────────────────────────────────────────────
use_context(4)

before = t.perf_counter()
rewards, percent_optimals = run_experiment()
print("time (seconds): {0}".format(t.perf_counter() - before))

#───────────────────────────────────────────────────────────────────────
#                              Plotting
#───────────────────────────────────────────────────────────────────────
labels = ["α={0}{1}".format(α, " (no baseline)" if i > 1 else "") for i,α in enumerate([0.1, 0.4, 0.1, 0.4])]

pp.figure(figsize=(10,4))
[pp.plot(percent_optimals[i], label=labels[i]) for i in range(len(ctx().εs))]
pp.legend(bbox_to_anchor=(1.2, 0.5)) 
pp.xlabel("Steps") 
pp.ylabel("% Optimal Action") 
pp.title("Gradient Bandit (averaged over " + str(ctx().num_runs) + " Runs)") 
pp.show()