def test_target_duration(self): clock = FakeClock() batch_estimator = util._BatchSizeEstimator( target_batch_overhead=None, target_batch_duration_secs=10, clock=clock) batch_duration = lambda batch_size: 1 + .7 * batch_size # 1 + 12 * .7 is as close as we can get to 10 as possible. expected_sizes = [1, 2, 4, 8, 12, 12, 12] actual_sizes = [] for _ in range(len(expected_sizes)): actual_sizes.append(batch_estimator.next_batch_size()) with batch_estimator.record_time(actual_sizes[-1]): clock.sleep(batch_duration(actual_sizes[-1])) self.assertEqual(expected_sizes, actual_sizes)
def test_target_overhead(self): clock = FakeClock() batch_estimator = util._BatchSizeEstimator( target_batch_overhead=.05, target_batch_duration_secs=None, clock=clock) batch_duration = lambda batch_size: 1 + .7 * batch_size # At 27 items, a batch takes ~20 seconds with 5% (~1 second) overhead. expected_sizes = [1, 2, 4, 8, 16, 27, 27, 27] actual_sizes = [] for _ in range(len(expected_sizes)): actual_sizes.append(batch_estimator.next_batch_size()) with batch_estimator.record_time(actual_sizes[-1]): clock.sleep(batch_duration(actual_sizes[-1])) self.assertEqual(expected_sizes, actual_sizes)
def test_variance(self): clock = FakeClock() variance = 0.25 batch_estimator = util._BatchSizeEstimator( target_batch_overhead=.05, target_batch_duration_secs=None, variance=variance, clock=clock) batch_duration = lambda batch_size: 1 + .7 * batch_size expected_target = 27 actual_sizes = [] for _ in range(util._BatchSizeEstimator._MAX_DATA_POINTS - 1): actual_sizes.append(batch_estimator.next_batch_size()) with batch_estimator.record_time(actual_sizes[-1]): clock.sleep(batch_duration(actual_sizes[-1])) # Check that we're testing a good range of values. stable_set = set(actual_sizes[-20:]) self.assertGreater(len(stable_set), 3) self.assertGreater( min(stable_set), expected_target - expected_target * variance) self.assertLess( max(stable_set), expected_target + expected_target * variance)