def test_fresh_reservoir(self):
        res = ensemble.FreshReservoir(10000, 80.0)
        for i in range(1, 1000000 + 1):
            res.append_maybe(lambda: i)  # pylint: disable=cell-var-from-loop

        items = np.array([item for item in res]).astype(np.float64)
        desired_mean = 0.5 * (200000.0 + 1000000.0)
        computed_mean = np.mean(items)
        logging.info("Fresh reservoir, true mean %.2f, computed mean %.2f",
                     desired_mean, computed_mean)
        self.assertAlmostEqual(desired_mean,
                               computed_mean,
                               delta=1000.0,
                               msg="Fresh reservoir item freshness is off.")
    def test_fresh_reservoir_distribution(self, freshness, n, capacity):
        res = ensemble.FreshReservoir(capacity, freshness=freshness)
        for item in range(1, n + 1):
            res.append_maybe(lambda: item)  # pylint: disable=cell-var-from-loop

        items = np.asarray(list(res))

        # Variance of the uniform discrete distr. on {(freshness/100)*n,...,n},
        # from https://en.wikipedia.org/wiki/Discrete_uniform_distribution
        var_true = ((n -
                     ((100.0 - freshness) / 100.0) * n + 1)**2.0 - 1.0) / 12.0

        # The number of fresh items, len(items), varies stochastically in fresh
        # reservoir sampling.
        sample_mean_stddev = math.sqrt(var_true) / math.sqrt(len(items))

        # Check mean is in +/- 4 sampling stddev
        mean_true = 0.5 * (((100.0 - freshness) / 100.0) * n + n)
        mean_estimate = np.mean(items)
        logging.info(
            "Reservoir(n=%d, cap=%d, freshness=%.1f) has "
            "sample mean %.1f, true mean %.1f, sample stddev %.2f", n,
            capacity, freshness, mean_estimate, mean_true, sample_mean_stddev)
        self.assertAlmostEqual(
            mean_true,
            mean_estimate,
            delta=4.0 * sample_mean_stddev,
            msg="Mean %.1f deviates from true mean %.1f by "
            "more than allowed 4 sigma tolerance (%.2f)" %
            (mean_estimate, mean_true, 4.0 * sample_mean_stddev))

        # Check sample variance agrees with true variance
        var_var_est = 2.0 * (var_true**2.0) / (len(items) - 1
                                               )  # var of the sample var
        sample_var_stddev = math.sqrt(var_var_est)

        var_estimate = np.var(items)
        logging.info(
            "Reservoir(n=%d, cap=%d, freshness=%.1f) has "
            "sample var %.1f, true var %.1f, sample var stddev %.2f", n,
            capacity, freshness, var_estimate, var_true, sample_var_stddev)
        self.assertAlmostEqual(
            var_true,
            var_estimate,
            delta=4.0 * sample_var_stddev,
            msg="Sample variance %.1f deviates from true "
            "variance %.1f by more than allowed 4 sigma "
            "tolerance (%.2f)" %
            (var_estimate, var_true, 4.0 * sample_var_stddev))