Beispiel #1
0
    def test_run_episode(self):
        lal = linear_approx_learning.LinearApproxLearning()
        lal._env.generate_starting_state = MagicMock(
            return_value=State(Card(COLOR_BLACK, 1), 1))
        lal.generate_action = MagicMock(return_value=ACTION_HIT)
        lal._env.step = MagicMock(side_effect=[
            (State(Card(COLOR_BLACK, 1), 1), 0),
            (State(Card(COLOR_BLACK, 1), 1, is_terminal=True), 1),
        ])
        lal.run_episode()
        self.assertLess(
            0.0, lal.evaluate_model(State(Card(COLOR_BLACK, 1), 1),
                                    ACTION_HIT))

        lal = linear_approx_learning.LinearApproxLearning()
        lal._env.generate_starting_state = MagicMock(
            return_value=State(Card(COLOR_BLACK, 1), 1))
        lal.generate_action = MagicMock(return_value=ACTION_HIT)
        lal._env.step = MagicMock(side_effect=[
            (State(Card(COLOR_BLACK, 1), 1), 0),
            (State(Card(COLOR_BLACK, 1), 1, is_terminal=True), -1),
        ])
        lal.run_episode()
        self.assertGreater(
            0.0, lal.evaluate_model(State(Card(COLOR_BLACK, 1), 1),
                                    ACTION_HIT))
Beispiel #2
0
 def test_evaluate_model(self):
     lal = linear_approx_learning.LinearApproxLearning()
     lal._weights = np.array([.5] + [0] * 35)
     self.assertEqual(
         .5, lal.evaluate_model(State(Card(COLOR_BLACK, 1), 1), ACTION_HIT))
     self.assertEqual(
         0.0, lal.evaluate_model(State(Card(COLOR_BLACK, 11), 1),
                                 ACTION_HIT))
Beispiel #3
0
    def test_generate_action(self):
        lal = linear_approx_learning.LinearApproxLearning()
        lal.get_explore_threshold = MagicMock(return_value=0.0)
        with patch.object(lal,
                          "evaluate_model",
                          new=lambda state, action: 1.0
                          if action == ACTION_HIT else 0.5):
            self.assertEqual(ACTION_HIT, lal.generate_action(SAMPLE_STATE))

        with patch.object(lal,
                          "evaluate_model",
                          new=lambda state, action: .5
                          if action == ACTION_HIT else 1.0):
            self.assertEqual(ACTION_STICK, lal.generate_action(SAMPLE_STATE))
Beispiel #4
0
 def test_get_explore_threshold(self):
     lal = linear_approx_learning.LinearApproxLearning()
     self.assertLessEqual(0.05, lal.get_explore_threshold(SAMPLE_STATE))
Beispiel #5
0
 def test_init(self):
     self.assertTrue(linear_approx_learning.LinearApproxLearning())