Exemple #1
0
    def test_pull_does_not_change_state(self, k, e, rounds):
        egreedy = EpsilonGreedy(num_arms=k, epsilon=e)

        for _ in range(rounds):
            egreedy.pull()

        self.assertEqual(0, sum(egreedy.pull_counts))
        self.assertEqual(0, sum(egreedy.rewards))
Exemple #2
0
    def test_pull_does_not_change_state_after_any_updates(
            self, k, e, rounds, updates):
        egreedy = EpsilonGreedy(num_arms=k, epsilon=e)

        for _ in range(updates):
            egreedy.update(chosen_arm=np.random.randint(0, k),
                           reward=np.random.random())

        pulls = sum(egreedy.pull_counts)
        rewards = sum(egreedy.rewards)

        for _ in range(rounds):
            egreedy.pull()

        self.assertEqual(pulls, sum(egreedy.pull_counts))
        self.assertEqual(rewards, sum(egreedy.rewards))
Exemple #3
0
    def test_pull_returns_a_valid_arm(self, k, e, rounds):
        egreedy = EpsilonGreedy(num_arms=k, epsilon=e)
        for _ in range(rounds):
            arm = egreedy.pull()

            self.assertGreaterEqual(arm, 0)
            self.assertLess(arm, k)