def test_select_action_after_one_learning_step(self): state, action, reward = 1, 1, 10 ql = learn.QLearner(num_states=2, num_actions=2, learning_rate=0.5) ql.learn(state, action, reward) selected_action = ql.act(0) self.assertEqual(action, selected_action)
def test_change_state_after_learning(self): state = 1 action = 0 reward = 0 ql = learn.QLearner(num_states=3, num_actions=2) ql.learn(state, action, reward) self.assertEqual(ql.current_state, state)
def test_learn_with_1_state_action_q_value(self): state = 0 action = 0 reward = 10 ql = learn.QLearner(num_states=1, num_actions=1) ql.learn(state, action, reward) q_value = ql.q_values.get(state, action) self.assertEqual(q_value, 10)
def test_learn_with_learning_rate(self): state = 0 action = 0 expected_q_value = 10 rewards = [10, 10] ql = learn.QLearner(num_states=1, num_actions=1, learning_rate=0.5) for reward in rewards: ql.learn(state, action, reward) q_value = ql.q_values.get(state, action) self.assertEqual(q_value, expected_q_value)
def test_learn_two_rewards(self): state = 0 action = 0 rewards = [5, 10] expected_q_value = 15 ql = learn.QLearner(num_states=1, num_actions=1) for reward in rewards: ql.learn(state, action, reward) q_value = ql.q_values.get(state, action) self.assertEqual(q_value, expected_q_value)
def test_learn_with_several_state_action_q_values(self): current_state = 2 next_state = 4 action = 3 reward = 10 ql = learn.QLearner(initial_state=current_state, num_states=5, num_actions=4) ql.learn(next_state, action, reward) q_value = ql.q_values.get(current_state, action) self.assertEqual(q_value, 10)
def test_select_action_after_several_learning_steps(self): ql = learn.QLearner(num_states=2, num_actions=2, learning_rate=0.5, discount_factor=0.5) steps = [(1, 1, 10), (0, 0, 100), (1, 1, 5)] excepted_actions = [(0, 1), (1, 0)] for state, action, reward in steps: ql.learn(state, action, reward) for state, excepted_action in excepted_actions: action = ql.act(state) self.assertEqual(action, excepted_action)
def test_default_discount_factor(self): expected_discount_factor = 1 ql = learn.QLearner() self.assertEqual(ql.discount_factor, expected_discount_factor)
def test_default_current_state(self): expected_current_state = 0 ql = learn.QLearner() self.assertEqual(ql.current_state, expected_current_state)
def test_initial_learning_rate(self): expected_learning_rate = 0.8 ql = learn.QLearner(learning_rate=0.8) self.assertEqual(ql.learning_rate, expected_learning_rate)
def test_initial_discount_factor(self): expected_discount_factor = 0.8 ql = learn.QLearner(discount_factor=0.8) self.assertEqual(ql.discount_factor, expected_discount_factor)
def test_initial_state(self): expected_current_state = 2 ql = learn.QLearner(initial_state=2) self.assertEqual(ql.current_state, expected_current_state)
def test_default_learning_rate(self): expected_learning_rate = 1 ql = learn.QLearner() self.assertEqual(ql.learning_rate, expected_learning_rate)