def test_fresh_reservoir(self): res = ensemble.FreshReservoir(10000, 80.0) for i in range(1, 1000000 + 1): res.append_maybe(lambda: i) # pylint: disable=cell-var-from-loop items = np.array([item for item in res]).astype(np.float64) desired_mean = 0.5 * (200000.0 + 1000000.0) computed_mean = np.mean(items) logging.info("Fresh reservoir, true mean %.2f, computed mean %.2f", desired_mean, computed_mean) self.assertAlmostEqual(desired_mean, computed_mean, delta=1000.0, msg="Fresh reservoir item freshness is off.")
def test_fresh_reservoir_distribution(self, freshness, n, capacity): res = ensemble.FreshReservoir(capacity, freshness=freshness) for item in range(1, n + 1): res.append_maybe(lambda: item) # pylint: disable=cell-var-from-loop items = np.asarray(list(res)) # Variance of the uniform discrete distr. on {(freshness/100)*n,...,n}, # from https://en.wikipedia.org/wiki/Discrete_uniform_distribution var_true = ((n - ((100.0 - freshness) / 100.0) * n + 1)**2.0 - 1.0) / 12.0 # The number of fresh items, len(items), varies stochastically in fresh # reservoir sampling. sample_mean_stddev = math.sqrt(var_true) / math.sqrt(len(items)) # Check mean is in +/- 4 sampling stddev mean_true = 0.5 * (((100.0 - freshness) / 100.0) * n + n) mean_estimate = np.mean(items) logging.info( "Reservoir(n=%d, cap=%d, freshness=%.1f) has " "sample mean %.1f, true mean %.1f, sample stddev %.2f", n, capacity, freshness, mean_estimate, mean_true, sample_mean_stddev) self.assertAlmostEqual( mean_true, mean_estimate, delta=4.0 * sample_mean_stddev, msg="Mean %.1f deviates from true mean %.1f by " "more than allowed 4 sigma tolerance (%.2f)" % (mean_estimate, mean_true, 4.0 * sample_mean_stddev)) # Check sample variance agrees with true variance var_var_est = 2.0 * (var_true**2.0) / (len(items) - 1 ) # var of the sample var sample_var_stddev = math.sqrt(var_var_est) var_estimate = np.var(items) logging.info( "Reservoir(n=%d, cap=%d, freshness=%.1f) has " "sample var %.1f, true var %.1f, sample var stddev %.2f", n, capacity, freshness, var_estimate, var_true, sample_var_stddev) self.assertAlmostEqual( var_true, var_estimate, delta=4.0 * sample_var_stddev, msg="Sample variance %.1f deviates from true " "variance %.1f by more than allowed 4 sigma " "tolerance (%.2f)" % (var_estimate, var_true, 4.0 * sample_var_stddev))