def test_invalid_log_format(self): rng = np.random.RandomState(seed=7) with self.assertRaises(TypeError): Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=[rng.randint(0, 2) for _ in range(10)], rewards=[rng.randint(0, 100) for _ in range(10)], contexts=[[rng.rand() for _ in range(5)] for _ in range(10)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7, log_format=7) with self.assertRaises(TypeError): Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=[rng.randint(0, 2) for _ in range(10)], rewards=[rng.randint(0, 100) for _ in range(10)], contexts=[[rng.rand() for _ in range(5)] for _ in range(10)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7, log_format=None)
def test_simulator_hyper_parameter(self): size = 100 decisions = [random.randint(0, 2) for _ in range(size)] rewards = [random.randint(0, 1000) for _ in range(size)] contexts = [[random.random() for _ in range(50)] for _ in range(size)] n_jobs = 1 hyper_parameter_tuning = [] for radius in range(6, 10): hyper_parameter_tuning.append( ('Radius' + str(radius), MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius), n_jobs=n_jobs))) sim = Simulator(hyper_parameter_tuning, decisions, rewards, contexts, scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456, is_quick=True) sim.run() self.assertTrue(sim.bandit_to_confusion_matrices) self.assertTrue(sim.bandit_to_predictions)
def test_simulator_mixed(self): size = 100 decisions = [random.randint(0, 2) for _ in range(size)] rewards = [random.randint(0, 1000) for _ in range(size)] contexts = [[random.random() for _ in range(50)] for _ in range(size)] n_jobs = 1 mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))] sim = Simulator(mixed, decisions, rewards, contexts, scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456) sim.run() self.assertTrue(sim.bandit_to_confusion_matrices) self.assertTrue(sim.bandit_to_predictions)
def test_simulator_contextual(self): size = 100 decisions = [random.randint(0, 2) for _ in range(size)] rewards = [random.randint(0, 1000) for _ in range(size)] contexts = [[random.random() for _ in range(50)] for _ in range(size)] def binarize(decision, reward): if decision == 0: return reward <= 50 else: return reward >= 220 n_jobs = 1 contextual_mabs = [('Random', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('UCB1', MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('ThompsonSampling', MAB([0, 1], LearningPolicy.ThompsonSampling(binarize), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('EpsilonGreedy', MAB([0, 1], LearningPolicy.EpsilonGreedy(epsilon=.15), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('Softmax', MAB([0, 1], LearningPolicy.Softmax(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs))] sim = Simulator(contextual_mabs, decisions, rewards, contexts, scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456) sim.run() self.assertTrue(sim.bandit_to_confusion_matrices) self.assertTrue(sim.bandit_to_predictions)
def test_invalid_simulator_stats_scope(self): rng = np.random.RandomState(seed=7) decisions = np.array([rng.randint(0, 2) for _ in range(10)]) rewards = np.array([rng.randint(0, 100) for _ in range(10)]) sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=decisions, rewards=rewards, contexts=[[rng.rand() for _ in range(5)] for _ in range(10)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7) with self.assertRaises(ValueError): sim._set_stats('validation', decisions, rewards)
def test_invalid_get_arm_stats(self): rng = np.random.RandomState(seed=9) decisions = np.array([rng.randint(0, 2) for _ in range(5)]) rewards = np.array([rng.randint(0, 100) for _ in range(5)]) new_rewards = np.array(['h', 'e', 'l', 'l', 'o']) sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=decisions, rewards=rewards, contexts=[[rng.rand() for _ in range(5)] for _ in range(5)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7) with self.assertRaises(TypeError): stats = sim.get_arm_stats(decisions, new_rewards)
def test_invalid_plot_args_metric_value(self): rng = np.random.RandomState(seed=7) sim = Simulator(bandits=[("example", MAB([0, 1], LearningPolicy.EpsilonGreedy()))], decisions=[rng.randint(0, 2) for _ in range(10)], rewards=[rng.randint(0, 100) for _ in range(10)], contexts=[[rng.rand() for _ in range(5)] for _ in range(10)], scaler=StandardScaler(), test_size=0.4, batch_size=0, is_ordered=True, seed=7) sim.run() with self.assertRaises(ValueError): sim.plot('mean')
def test_invalid_get_stats(self): data = np.array(['h', 'e', 'l', 'l', 'o']) with self.assertRaises(TypeError): Simulator.get_stats(data)
mixed = [('RandomRadius', MAB([0, 1], LearningPolicy.Random(), NeighborhoodPolicy.Radius(10), n_jobs=n_jobs)), ('Random', MAB([0, 1], LearningPolicy.Random(), n_jobs=n_jobs))] hyper_parameter_tuning = [] for radius in range(6, 10): hyper_parameter_tuning.append(('Radius'+str(radius), MAB([0, 1], LearningPolicy.UCB1(1), NeighborhoodPolicy.Radius(radius), n_jobs=n_jobs))) #################################### # Contextual Simulation #################################### start = time() sim = Simulator(contextual_mabs, decisions, rewards, contexts, scaler=StandardScaler(), test_size=0.5, is_ordered=False, batch_size=0, seed=123456) sim.run() end = time() runtime = (end - start) / 60 print('Complete', str(runtime) + ' minutes') print('\n') for mab_name, mab in sim.bandits: print(mab_name) # Since simulation is offline, print the bandit stats directly print('Worst Case Scenario', sim.bandit_to_arm_to_stats_min[mab_name]) print('Average Case Scenario', sim.bandit_to_arm_to_stats_avg[mab_name]) print('Best Case Scenario:', sim.bandit_to_arm_to_stats_max[mab_name])