def test_get_default_q_value(self): state = 0 action = 0 expected_q_value = 0 qv = learn.QValues(num_states=1, num_actions=1) q_value = qv.get(state, action) self.assertEqual(q_value, expected_q_value)
def test_set_q_value(self): state = 0 action = 0 q_value = 10 qv = learn.QValues(num_states=1, num_actions=1) qv.set(state, action, q_value) set_q_value = qv.get(state, action) self.assertEqual(set_q_value, q_value)
def test_get_max_action_index(self): q_values = [[1, 2], [4, 3]] qv = learn.QValues(num_states=2, num_actions=2) for state, actions in enumerate(q_values): for action, value in enumerate(actions): qv.set(state, action, value) actions = [qv.get_max_action(state) for state in [0, 1]] self.assertEqual(actions, [1, 0])
def test_initial_num_actions(self): qv = learn.QValues(num_actions=1) self.assertEqual(qv.num_actions, 1)
def test_initial_num_states(self): qv = learn.QValues(num_states=1) self.assertEqual(qv.num_states, 1)
def test_default_num_actions(self): qv = learn.QValues() self.assertEqual(qv.num_actions, 0)