예제 #1
0
    def test_get_default_q_value(self):
        state = 0
        action = 0
        expected_q_value = 0
        qv = learn.QValues(num_states=1, num_actions=1)

        q_value = qv.get(state, action)

        self.assertEqual(q_value, expected_q_value)
예제 #2
0
    def test_set_q_value(self):
        state = 0
        action = 0
        q_value = 10
        qv = learn.QValues(num_states=1, num_actions=1)

        qv.set(state, action, q_value)
        set_q_value = qv.get(state, action)

        self.assertEqual(set_q_value, q_value)
예제 #3
0
    def test_get_max_action_index(self):
        q_values = [[1, 2], [4, 3]]
        qv = learn.QValues(num_states=2, num_actions=2)

        for state, actions in enumerate(q_values):
            for action, value in enumerate(actions):
                qv.set(state, action, value)

        actions = [qv.get_max_action(state) for state in [0, 1]]

        self.assertEqual(actions, [1, 0])
예제 #4
0
    def test_initial_num_actions(self):
        qv = learn.QValues(num_actions=1)

        self.assertEqual(qv.num_actions, 1)
예제 #5
0
    def test_initial_num_states(self):
        qv = learn.QValues(num_states=1)

        self.assertEqual(qv.num_states, 1)
예제 #6
0
    def test_default_num_actions(self):
        qv = learn.QValues()

        self.assertEqual(qv.num_actions, 0)