def test_reservoir_sample_frequency(self, iterable_size, k): """Tests observed frequency is close to expected frequency.""" # Use a fixed random number so our test is deterministic. random = np.random.RandomState(123456789) n_replicates = 100000 counts = collections.Counter( item for _ in range(n_replicates) for item in utils.reservoir_sample(range(iterable_size), k, random)) expected_frequency = min(k / float(iterable_size), 1.0) for c in counts.itervalues(): observed_frequency = c / float(n_replicates) npt.assert_allclose(observed_frequency, expected_frequency, atol=0.01)
def test_reservoir_sample_length(self): """Tests samples have expected length.""" first_ten_ints = range(10) # Test sampling with k > len(iterable). self.assertEquals(len(utils.reservoir_sample(first_ten_ints, 11)), 10) # Test sampling with k == len(iterable). self.assertEquals(len(utils.reservoir_sample(first_ten_ints, 10)), 10) # Test sampling with k < len(iterable). self.assertEquals(len(utils.reservoir_sample(first_ten_ints, 9)), 9) # Test sampling with k == 0. self.assertEquals(len(utils.reservoir_sample(first_ten_ints, 0)), 0) # Test sampling with k < 0 (bad args). with self.assertRaises(ValueError): utils.reservoir_sample(first_ten_ints, -1)