Exemple #1
0
    def test_learn_empty_history_does_nothing(self):
        mdp = model.MonteCarlo(1, 2, 0.4, 3, 0.6)

        mdp.learn([])

        self.assertEqual(mdp.action_value_fn[0][0], 0.0)
        self.assertEqual(mdp.action_value_fn[0][1], 0.0)
Exemple #2
0
    def test_is_mature_returns_true_if_trained(self):
        mdp = model.MonteCarlo(5, 2, 0.1, 1, 0.5)
        history = ((1, 0, 1.0), (4, 0, 1.5), (0, 1, 0.9))

        mdp.learn(history)

        self.assertTrue(mdp.is_mature())
Exemple #3
0
    def test_learn_none_history(self):
        mdp = model.MonteCarlo(1, 2, 0.4, 3, 0.6)

        try:
            mdp.learn(None)
            self.fail("The input is not check for none.")
        except AssertionError:
            pass
Exemple #4
0
 def __init__(self, environment):
     assert isinstance(environment, pypownet.environment.RunEnv)
     super().__init__(environment)
     """For this test use MonteCarlo to learn the action-value function."""
     self.mdp = model.MonteCarlo(self.state_space_size,
                                 self.action_space_size, self.alpha,
                                 self.mdp_iteration, self.gamma)
     """For this test use EpsilonGreedy for policy improvement."""
     self.policy = policy.EpsilonGreedy(self.state_space_size,
                                        self.action_space_size,
                                        self.epsilon)
Exemple #5
0
    def test_learn(self):
        mdp = model.MonteCarlo(5, 2, 1.0, 5, 0.5)
        history = ((1, 0, 1.0), (4, 1, 1.5), (0, 0, 0.9))

        mdp.learn(history)

        self.assertEqual(mdp.action_value_fn[0][0], 5.4)
        self.assertEqual(mdp.action_value_fn[1][0], 1.0)
        self.assertEqual(mdp.action_value_fn[2][0], 0.0)
        self.assertEqual(mdp.action_value_fn[3][0], 0.0)
        self.assertEqual(mdp.action_value_fn[4][0], 0.0)
        self.assertEqual(mdp.action_value_fn[0][1], 0.0)
        self.assertEqual(mdp.action_value_fn[1][1], 0.0)
        self.assertEqual(mdp.action_value_fn[2][1], 0.0)
        self.assertEqual(mdp.action_value_fn[3][1], 0.0)
        self.assertEqual(mdp.action_value_fn[4][1], 3.0)
Exemple #6
0
 def test_init_state_space_size_initialization(self):
     mdp = model.MonteCarlo(10, 1, 0.0, 1, 0.5)
     self.assertEqual(mdp.action_value_fn.size, 10)
Exemple #7
0
 def test_is_mature_returns_false_if_untrained(self):
     mdp = model.MonteCarlo(2, 2, 0.1, 1, 0.5)
     self.assertFalse(mdp.is_mature())
Exemple #8
0
 def test_init_value_fn_initialization(self):
     mdp = model.MonteCarlo(10, 2, 0.0, 0, 0.5)
     comparing_array = np.zeros((10, 2), float)
     self.assertTrue(np.array_equal(comparing_array, mdp.action_value_fn))