Exemple #1
0
 def test_reservoir_sample_frequency(self, iterable_size, k):
   """Tests observed frequency is close to expected frequency."""
   # Use a fixed random number so our test is deterministic.
   random = np.random.RandomState(123456789)
   n_replicates = 100000
   counts = collections.Counter(
       item
       for _ in range(n_replicates)
       for item in utils.reservoir_sample(range(iterable_size), k, random))
   expected_frequency = min(k / float(iterable_size), 1.0)
   for c in counts.itervalues():
     observed_frequency = c / float(n_replicates)
     npt.assert_allclose(observed_frequency, expected_frequency, atol=0.01)
Exemple #2
0
 def test_reservoir_sample_length(self):
     """Tests samples have expected length."""
     first_ten_ints = range(10)
     # Test sampling with k > len(iterable).
     self.assertEquals(len(utils.reservoir_sample(first_ten_ints, 11)), 10)
     # Test sampling with k == len(iterable).
     self.assertEquals(len(utils.reservoir_sample(first_ten_ints, 10)), 10)
     # Test sampling with k < len(iterable).
     self.assertEquals(len(utils.reservoir_sample(first_ten_ints, 9)), 9)
     # Test sampling with k == 0.
     self.assertEquals(len(utils.reservoir_sample(first_ten_ints, 0)), 0)
     # Test sampling with k < 0 (bad args).
     with self.assertRaises(ValueError):
         utils.reservoir_sample(first_ten_ints, -1)