Exemple #1
0
    def test_select_action_after_one_learning_step(self):
        state, action, reward = 1, 1, 10
        ql = learn.QLearner(num_states=2, num_actions=2, learning_rate=0.5)

        ql.learn(state, action, reward)
        selected_action = ql.act(0)

        self.assertEqual(action, selected_action)
Exemple #2
0
    def test_change_state_after_learning(self):
        state = 1
        action = 0
        reward = 0
        ql = learn.QLearner(num_states=3, num_actions=2)

        ql.learn(state, action, reward)

        self.assertEqual(ql.current_state, state)
Exemple #3
0
    def test_learn_with_1_state_action_q_value(self):
        state = 0
        action = 0
        reward = 10
        ql = learn.QLearner(num_states=1, num_actions=1)

        ql.learn(state, action, reward)
        q_value = ql.q_values.get(state, action)

        self.assertEqual(q_value, 10)
Exemple #4
0
    def test_learn_with_learning_rate(self):
        state = 0
        action = 0
        expected_q_value = 10
        rewards = [10, 10]
        ql = learn.QLearner(num_states=1, num_actions=1, learning_rate=0.5)

        for reward in rewards:
            ql.learn(state, action, reward)
        q_value = ql.q_values.get(state, action)

        self.assertEqual(q_value, expected_q_value)
Exemple #5
0
    def test_learn_two_rewards(self):
        state = 0
        action = 0
        rewards = [5, 10]
        expected_q_value = 15
        ql = learn.QLearner(num_states=1, num_actions=1)

        for reward in rewards:
            ql.learn(state, action, reward)
        q_value = ql.q_values.get(state, action)

        self.assertEqual(q_value, expected_q_value)
Exemple #6
0
    def test_learn_with_several_state_action_q_values(self):
        current_state = 2
        next_state = 4
        action = 3
        reward = 10
        ql = learn.QLearner(initial_state=current_state,
                            num_states=5,
                            num_actions=4)

        ql.learn(next_state, action, reward)
        q_value = ql.q_values.get(current_state, action)

        self.assertEqual(q_value, 10)
Exemple #7
0
    def test_select_action_after_several_learning_steps(self):
        ql = learn.QLearner(num_states=2,
                            num_actions=2,
                            learning_rate=0.5,
                            discount_factor=0.5)
        steps = [(1, 1, 10), (0, 0, 100), (1, 1, 5)]
        excepted_actions = [(0, 1), (1, 0)]

        for state, action, reward in steps:
            ql.learn(state, action, reward)

        for state, excepted_action in excepted_actions:
            action = ql.act(state)
            self.assertEqual(action, excepted_action)
Exemple #8
0
    def test_default_discount_factor(self):
        expected_discount_factor = 1

        ql = learn.QLearner()

        self.assertEqual(ql.discount_factor, expected_discount_factor)
Exemple #9
0
    def test_default_current_state(self):
        expected_current_state = 0

        ql = learn.QLearner()

        self.assertEqual(ql.current_state, expected_current_state)
Exemple #10
0
    def test_initial_learning_rate(self):
        expected_learning_rate = 0.8

        ql = learn.QLearner(learning_rate=0.8)

        self.assertEqual(ql.learning_rate, expected_learning_rate)
Exemple #11
0
    def test_initial_discount_factor(self):
        expected_discount_factor = 0.8

        ql = learn.QLearner(discount_factor=0.8)

        self.assertEqual(ql.discount_factor, expected_discount_factor)
Exemple #12
0
    def test_initial_state(self):
        expected_current_state = 2

        ql = learn.QLearner(initial_state=2)

        self.assertEqual(ql.current_state, expected_current_state)
Exemple #13
0
    def test_default_learning_rate(self):
        expected_learning_rate = 1

        ql = learn.QLearner()

        self.assertEqual(ql.learning_rate, expected_learning_rate)