def test_pull_does_not_change_state(self, k, e, rounds): egreedy = EpsilonGreedy(num_arms=k, epsilon=e) for _ in range(rounds): egreedy.pull() self.assertEqual(0, sum(egreedy.pull_counts)) self.assertEqual(0, sum(egreedy.rewards))
def test_pull_does_not_change_state_after_any_updates( self, k, e, rounds, updates): egreedy = EpsilonGreedy(num_arms=k, epsilon=e) for _ in range(updates): egreedy.update(chosen_arm=np.random.randint(0, k), reward=np.random.random()) pulls = sum(egreedy.pull_counts) rewards = sum(egreedy.rewards) for _ in range(rounds): egreedy.pull() self.assertEqual(pulls, sum(egreedy.pull_counts)) self.assertEqual(rewards, sum(egreedy.rewards))
def test_pull_returns_a_valid_arm(self, k, e, rounds): egreedy = EpsilonGreedy(num_arms=k, epsilon=e) for _ in range(rounds): arm = egreedy.pull() self.assertGreaterEqual(arm, 0) self.assertLess(arm, k)