def at_least_n_times_exploration(actions, utilities, temperature, action_counter): utilities = [utilities[x] for x in actions] for i, utility in enumerate(utilities): if action_counter[actions[i]] < min_n: utilities[i] = optimistic_reward d = dict(zip(actions, utilities)) uf = lambda action: d[action] return argmax(actions, uf)
def test_random_tie(self): a = 0 for x in range(100): if argmax(['a', 'b', 'c'], lambda x: self.d[x]) == 'a': a += 1 self.assertTrue(25 < a < 75)
def test_return_max(self): self.assertEqual('a', argmax(['a', 'b'], lambda x: self.d[x]))