def test_pull_returns_a_valid_arm(self, k, tau, rounds): softmax = Softmax(num_arms=k, temperature=tau) for _ in range(rounds): arm = softmax.pull() self.assertGreaterEqual(arm, 0) self.assertLess(arm, k)
def test_pull_does_not_change_state(self, k, tau, rounds): softmax = Softmax(num_arms=k, temperature=tau) for _ in range(rounds): softmax.pull() self.assertEqual(0, sum(softmax.pull_counts)) self.assertEqual(0, sum(softmax.rewards))
def test_the_best_arm_based_on_feedback(self, k, rounds): p = np.random.random(k) p = p / np.sum(p) softmax = Softmax.create(temperature=1.0, rewards=p) pulls = np.zeros(k, dtype=np.uint64) for _ in range(rounds): arm = softmax.pull() pulls[arm] += 1 self.assertEqual(np.argmax(p), np.argmax(pulls))
def test_pull_does_not_change_state_after_any_updates( self, k, tau, rounds, updates): softmax = Softmax(num_arms=k, temperature=tau) for _ in range(updates): softmax.update(chosen_arm=np.random.randint(0, k), reward=np.random.random()) pulls = sum(softmax.pull_counts) rewards = sum(softmax.rewards) for _ in range(rounds): softmax.pull() self.assertEqual(pulls, sum(softmax.pull_counts)) self.assertEqual(rewards, sum(softmax.rewards))
def test_init_fails_with_negative_temperature(self, k, tau): assume(tau < 0.0) with self.assertRaises(AssertionError, msg="temperature should positive"): _ = Softmax(num_arms=k, temperature=tau)
def test_init_fails_with_invalid_nums_arms(self, k, tau): with self.assertRaises(AssertionError, msg="there should be more than one arm"): _ = Softmax(num_arms=k, temperature=tau)
def test_init_set_config_correctly(self, k, tau): softmax = Softmax(num_arms=k, temperature=tau) self.assertEqual(k, softmax.num_arms) self.assertEqual(tau, softmax.temperature)