コード例 #1
0
class TestAction(TestCase):
    """Tests related to the action"""

    def setUp(self):
        self.bandit = Bandit()
        self.action = Action(self.bandit)

    def test_action_value_is_gaussian(self):
        """
        Verify that our action will get a correct value from N(mu, sigma)
        - mu: the mean, here 0
        - sigma: the variance, here 1
        """
        results = [Action(self.bandit).value for x in range(5000)]
        law_1 = get_percent([x for x in results if x > 0], results)
        self.assertAlmostEqual(law_1, 50, delta=2, msg="%s%% of results are "
            "above mu instead of around 51%%" % law_1)
        law_2 = get_percent([x for x in results if x > -1 and x < 1], results)
        self.assertAlmostEqual(law_2, 68, delta=2, msg="%s%% of results are "
            "above mu instead of around 68%%" % law_2)

    def test_gauss_with_numpy(self):
        """We use numpy, let's add some verifications"""
        mu, sigma = 0, 1
        value = np.random.normal(mu, sigma, 1000)
        diff_mean = abs(mu - np.mean(value))
        self.assertLess(diff_mean, 0.12, "diff mean = %s > 0.1" % diff_mean)
        diff_vari = abs(sigma - np.std(value, ddof=1))
        self.assertLess(diff_vari, 0.12, "diff vari = %s > 0.1" % diff_vari)

    def test_action_can_play(self):
        """Let's play and check that action got a reward"""
        action = self.action  # shortcut
        [action.play() for x in range(1000)]
        lim = [action.mu - 3 * action.sigma, action.mu + 3 * action.sigma]
        rewards_in_range = [r for r in action.rewards if lim[0] <= r <= lim[1]]
        percent = get_percent(rewards_in_range, action.rewards)
        self.assertAlmostEqual(percent, 85.0, delta=15, msg="%s%% instead of "
            "99,7%% for a Normal Distribution" % percent)
    # TODO verify why it comes to 86% sometimes !

    def test_action_mean_reward(self):
        """Test that an action keep an history of rewards and that mean_reward
        become closer to value after multiples plays"""
        [self.action.play() for i in range(1, 1000)]
        vari = abs(self.action.sigma - np.std(self.action.rewards, ddof=1))
        self.assertLess(vari, 0.12, "diff variance = %s > 0.1" % vari)