def test_learn_empty_history_does_nothing(self): mdp = model.MonteCarlo(1, 2, 0.4, 3, 0.6) mdp.learn([]) self.assertEqual(mdp.action_value_fn[0][0], 0.0) self.assertEqual(mdp.action_value_fn[0][1], 0.0)
def test_is_mature_returns_true_if_trained(self): mdp = model.MonteCarlo(5, 2, 0.1, 1, 0.5) history = ((1, 0, 1.0), (4, 0, 1.5), (0, 1, 0.9)) mdp.learn(history) self.assertTrue(mdp.is_mature())
def test_learn_none_history(self): mdp = model.MonteCarlo(1, 2, 0.4, 3, 0.6) try: mdp.learn(None) self.fail("The input is not check for none.") except AssertionError: pass
def __init__(self, environment): assert isinstance(environment, pypownet.environment.RunEnv) super().__init__(environment) """For this test use MonteCarlo to learn the action-value function.""" self.mdp = model.MonteCarlo(self.state_space_size, self.action_space_size, self.alpha, self.mdp_iteration, self.gamma) """For this test use EpsilonGreedy for policy improvement.""" self.policy = policy.EpsilonGreedy(self.state_space_size, self.action_space_size, self.epsilon)
def test_learn(self): mdp = model.MonteCarlo(5, 2, 1.0, 5, 0.5) history = ((1, 0, 1.0), (4, 1, 1.5), (0, 0, 0.9)) mdp.learn(history) self.assertEqual(mdp.action_value_fn[0][0], 5.4) self.assertEqual(mdp.action_value_fn[1][0], 1.0) self.assertEqual(mdp.action_value_fn[2][0], 0.0) self.assertEqual(mdp.action_value_fn[3][0], 0.0) self.assertEqual(mdp.action_value_fn[4][0], 0.0) self.assertEqual(mdp.action_value_fn[0][1], 0.0) self.assertEqual(mdp.action_value_fn[1][1], 0.0) self.assertEqual(mdp.action_value_fn[2][1], 0.0) self.assertEqual(mdp.action_value_fn[3][1], 0.0) self.assertEqual(mdp.action_value_fn[4][1], 3.0)
def test_init_state_space_size_initialization(self): mdp = model.MonteCarlo(10, 1, 0.0, 1, 0.5) self.assertEqual(mdp.action_value_fn.size, 10)
def test_is_mature_returns_false_if_untrained(self): mdp = model.MonteCarlo(2, 2, 0.1, 1, 0.5) self.assertFalse(mdp.is_mature())
def test_init_value_fn_initialization(self): mdp = model.MonteCarlo(10, 2, 0.0, 0, 0.5) comparing_array = np.zeros((10, 2), float) self.assertTrue(np.array_equal(comparing_array, mdp.action_value_fn))