コード例 #1
0
    def test_epsilon(self):

        actions = [10, 100, 1000]
        pi = EpsilonGreedy(actions, QFunction(), 0.5)

        self.assertEqual(0.5, pi.epsilon)
        pi.epsilon = 0.6
        self.assertEqual(0.6, pi.epsilon)

        with self.assertRaises(AttributeError):
            pi.epsilon = -1

        with self.assertRaises(AttributeError):
            pi.epsilon = 2

        with self.assertRaises(AttributeError):
            del pi.epsilon
コード例 #2
0
    def test_sample(self):

        actions = [10, 100, 1000]

        pi = EpsilonGreedy(actions, TestQFunction([10, 100, 1000]), 0)
        self.assertEqual(1000, pi.sample_action(0))
        self.assertTrue(
            np.linalg.norm(np.array([0., 0., 1.]) - pi(0)) < 0.0000001)
        pi.epsilon = 0.3
        self.assertTrue(
            np.linalg.norm(np.array([0.1, 0.1, 0.8]) - pi(0)) < 0.0000001)
        pi.epsilon = 0.6
        np.random.seed(0)
        self.assertEqual(100, pi.sample_action(0))

        pi = EpsilonGreedy(actions, TestQFunction([10, 1000, 100]), 0)
        self.assertEqual(100, pi.sample_action(0))
        self.assertTrue(
            np.linalg.norm(np.array([0., 1., 0.]) - pi(0)) < 0.0000001)
        pi.epsilon = 0.3
        self.assertTrue(
            np.linalg.norm(np.array([0.1, 0.8, 0.1]) - pi(0)) < 0.0000001)
        pi.epsilon = 0.6
        np.random.seed(0)
        self.assertEqual(100, pi.sample_action(0))

        pi = EpsilonGreedy(actions, TestQFunction([1000, 100, 10]), 0)
        self.assertEqual(10, pi.sample_action(0))
        self.assertTrue(
            np.linalg.norm(np.array([1., 0., 0.]) - pi(0)) < 0.0000001)
        pi.epsilon = 0.3
        self.assertTrue(
            np.linalg.norm(np.array([0.8, 0.1, 0.1]) - pi(0)) < 0.0000001)
        pi.epsilon = 0.6
        np.random.seed(0)
        self.assertEqual(100, pi.sample_action(0))