Beispiel #1
0
 def test_invalid_not_run_plot(self):
     rng = np.random.RandomState(seed=7)
     sim = Simulator(bandits=[("example",
                               MAB([0, 1],
                                   LearningPolicy.EpsilonGreedy()))],
                     decisions=[rng.randint(0, 2) for _ in range(10)],
                     rewards=[rng.randint(0, 100) for _ in range(10)],
                     contexts=[[rng.rand() for _ in range(5)]
                               for _ in range(10)],
                     scaler=StandardScaler(),
                     test_size=0.4,
                     batch_size=0,
                     is_ordered=True,
                     seed=7)
     with self.assertRaises(AssertionError):
         sim.plot()
Beispiel #2
0
runtime = (end - start) / 60
print('Complete', str(runtime) + ' minutes')
print('\n')

for mab_name, mab in sim.bandits:
    print(mab_name)

    # Since simulation is offline, print the bandit stats directly
    print('Worst Case Scenario', sim.bandit_to_arm_to_stats_min[mab_name])
    print('Average Case Scenario', sim.bandit_to_arm_to_stats_avg[mab_name])
    print('Best Case Scenario:', sim.bandit_to_arm_to_stats_max[mab_name])

    print('\n\n')

sim.plot('max', True)

####################################
# Context-Free Simulation
####################################
start = time()
sim = Simulator(context_free_mabs, decisions, rewards, contexts=None,
                scaler=None, test_size=0.5, is_ordered=False, batch_size=100, seed=123456)
sim.run()
end = time()

runtime = (end - start) / 60
print('Complete', str(runtime) + ' minutes')
print('\n')

for mab_name, mab in sim.bandits: