def test_run_episode(self): lal = linear_approx_learning.LinearApproxLearning() lal._env.generate_starting_state = MagicMock( return_value=State(Card(COLOR_BLACK, 1), 1)) lal.generate_action = MagicMock(return_value=ACTION_HIT) lal._env.step = MagicMock(side_effect=[ (State(Card(COLOR_BLACK, 1), 1), 0), (State(Card(COLOR_BLACK, 1), 1, is_terminal=True), 1), ]) lal.run_episode() self.assertLess( 0.0, lal.evaluate_model(State(Card(COLOR_BLACK, 1), 1), ACTION_HIT)) lal = linear_approx_learning.LinearApproxLearning() lal._env.generate_starting_state = MagicMock( return_value=State(Card(COLOR_BLACK, 1), 1)) lal.generate_action = MagicMock(return_value=ACTION_HIT) lal._env.step = MagicMock(side_effect=[ (State(Card(COLOR_BLACK, 1), 1), 0), (State(Card(COLOR_BLACK, 1), 1, is_terminal=True), -1), ]) lal.run_episode() self.assertGreater( 0.0, lal.evaluate_model(State(Card(COLOR_BLACK, 1), 1), ACTION_HIT))
def test_evaluate_model(self): lal = linear_approx_learning.LinearApproxLearning() lal._weights = np.array([.5] + [0] * 35) self.assertEqual( .5, lal.evaluate_model(State(Card(COLOR_BLACK, 1), 1), ACTION_HIT)) self.assertEqual( 0.0, lal.evaluate_model(State(Card(COLOR_BLACK, 11), 1), ACTION_HIT))
def test_generate_action(self): lal = linear_approx_learning.LinearApproxLearning() lal.get_explore_threshold = MagicMock(return_value=0.0) with patch.object(lal, "evaluate_model", new=lambda state, action: 1.0 if action == ACTION_HIT else 0.5): self.assertEqual(ACTION_HIT, lal.generate_action(SAMPLE_STATE)) with patch.object(lal, "evaluate_model", new=lambda state, action: .5 if action == ACTION_HIT else 1.0): self.assertEqual(ACTION_STICK, lal.generate_action(SAMPLE_STATE))
def test_get_explore_threshold(self): lal = linear_approx_learning.LinearApproxLearning() self.assertLessEqual(0.05, lal.get_explore_threshold(SAMPLE_STATE))
def test_init(self): self.assertTrue(linear_approx_learning.LinearApproxLearning())