def test_epsilon(self): actions = [10, 100, 1000] pi = EpsilonGreedy(actions, QFunction(), 0.5) self.assertEqual(0.5, pi.epsilon) pi.epsilon = 0.6 self.assertEqual(0.6, pi.epsilon) with self.assertRaises(AttributeError): pi.epsilon = -1 with self.assertRaises(AttributeError): pi.epsilon = 2 with self.assertRaises(AttributeError): del pi.epsilon
def test_sample(self): actions = [10, 100, 1000] pi = EpsilonGreedy(actions, TestQFunction([10, 100, 1000]), 0) self.assertEqual(1000, pi.sample_action(0)) self.assertTrue( np.linalg.norm(np.array([0., 0., 1.]) - pi(0)) < 0.0000001) pi.epsilon = 0.3 self.assertTrue( np.linalg.norm(np.array([0.1, 0.1, 0.8]) - pi(0)) < 0.0000001) pi.epsilon = 0.6 np.random.seed(0) self.assertEqual(100, pi.sample_action(0)) pi = EpsilonGreedy(actions, TestQFunction([10, 1000, 100]), 0) self.assertEqual(100, pi.sample_action(0)) self.assertTrue( np.linalg.norm(np.array([0., 1., 0.]) - pi(0)) < 0.0000001) pi.epsilon = 0.3 self.assertTrue( np.linalg.norm(np.array([0.1, 0.8, 0.1]) - pi(0)) < 0.0000001) pi.epsilon = 0.6 np.random.seed(0) self.assertEqual(100, pi.sample_action(0)) pi = EpsilonGreedy(actions, TestQFunction([1000, 100, 10]), 0) self.assertEqual(10, pi.sample_action(0)) self.assertTrue( np.linalg.norm(np.array([1., 0., 0.]) - pi(0)) < 0.0000001) pi.epsilon = 0.3 self.assertTrue( np.linalg.norm(np.array([0.8, 0.1, 0.1]) - pi(0)) < 0.0000001) pi.epsilon = 0.6 np.random.seed(0) self.assertEqual(100, pi.sample_action(0))