def at_least_n_times_exploration(actions, utilities, temperature, action_counter):
     utilities = [utilities[x] for x in actions]
     for i, utility in enumerate(utilities):
         if action_counter[actions[i]] < min_n:
             utilities[i] = optimistic_reward
     d = dict(zip(actions, utilities))
     uf = lambda action: d[action]
     return argmax(actions, uf)
 def at_least_n_times_exploration(actions, utilities, temperature,
                                  action_counter):
     utilities = [utilities[x] for x in actions]
     for i, utility in enumerate(utilities):
         if action_counter[actions[i]] < min_n:
             utilities[i] = optimistic_reward
     d = dict(zip(actions, utilities))
     uf = lambda action: d[action]
     return argmax(actions, uf)
Example #3
0
 def test_random_tie(self):
     a = 0
     for x in range(100):
         if argmax(['a', 'b', 'c'], lambda x: self.d[x]) == 'a':
             a += 1
     self.assertTrue(25 < a < 75)
Example #4
0
 def test_return_max(self):
     self.assertEqual('a', argmax(['a', 'b'], lambda x: self.d[x]))