def test_scalar(self): example_type = TensorType(tf.int32) merge_computation = sampling._build_merge_samples_computation( example_type, sample_size=5) reservoir_type = sampling._build_reservoir_type(example_type) expected_type = FunctionType(parameter=collections.OrderedDict( a=reservoir_type, b=reservoir_type), result=reservoir_type) self.assert_types_identical(merge_computation.type_signature, expected_type) reservoir_a = sampling._build_initial_sample_reservoir(example_type, seed=TEST_SEED) reservoir_a['random_values'] = [1, 3, 5] reservoir_a['samples'] = [3, 9, 15] with self.subTest('downsample'): reservoir_b = sampling._build_initial_sample_reservoir( example_type, seed=TEST_SEED + 1) reservoir_b['random_values'] = [2, 4, 6, 8] reservoir_b['samples'] = [6, 12, 18, 24] merged_reservoir = merge_computation(reservoir_a, reservoir_b) self.assertAllEqual( merged_reservoir, collections.OrderedDict( # Arbitrarily take seeds from `a`, discarded later. random_seed=tf.convert_to_tensor((TEST_SEED, TEST_SEED)), random_values=[3, 5, 4, 6, 8], samples=[9, 15, 12, 18, 24])) with self.subTest('keep_all'): reservoir_b = sampling._build_initial_sample_reservoir( example_type, seed=TEST_SEED + 1) reservoir_b['random_values'] = [2] reservoir_b['samples'] = [6] # We select the value from reservoir_b because its random_value was # higher. merged_reservoir = merge_computation(reservoir_a, reservoir_b) self.assertAllEqual( merged_reservoir, collections.OrderedDict( # Arbitrarily take seeds from `a`, discarded later. random_seed=tf.convert_to_tensor((TEST_SEED, TEST_SEED)), random_values=[1, 3, 5, 2], samples=[3, 9, 15, 6])) with self.subTest('tie_breakers'): # In case of tie, we take the as many values from `a` first. reservoir_b = sampling._build_initial_sample_reservoir( example_type, seed=TEST_SEED) reservoir_b['random_values'] = [5, 5, 5, 5, 5] # all tied with `a` reservoir_b['samples'] = [-1, -1, -1, -1, -1] merged_reservoir = merge_computation(reservoir_a, reservoir_b) self.assertAllEqual( merged_reservoir, collections.OrderedDict(random_seed=tf.convert_to_tensor( (TEST_SEED, TEST_SEED)), random_values=[5, 5, 5, 5, 5], samples=[15, -1, -1, -1, -1]))
def test_fails_with_non_tensor_type(self): with self.assertRaises(TypeError): sampling._build_initial_sample_reservoir( sample_value_type=SequenceType(TensorType(tf.int32)), seed=TEST_SEED) with self.assertRaises(TypeError): sampling._build_initial_sample_reservoir( sample_value_type=computation_types.to_type( collections.OrderedDict( a=SequenceType(TensorType(tf.int32)))), seed=TEST_SEED)
def test_scalar_fixed_seed(self): example_type = TensorType(tf.int32) sample_computation = sampling._build_sample_value_computation( example_type, sample_size=1) reservoir_type = sampling._build_reservoir_type(example_type) expected_type = FunctionType(parameter=collections.OrderedDict( reservoir=reservoir_type, sample=example_type), result=reservoir_type) self.assert_types_identical(sample_computation.type_signature, expected_type) reservoir = sampling._build_initial_sample_reservoir(example_type, seed=TEST_SEED) reservoir = sample_computation(reservoir, 1) self.assertAllEqual( reservoir, collections.OrderedDict(random_seed=(TEST_SEED, 100565241), random_values=[100565241], samples=[1])) # New value was not sampled, its random value was too low, but it # changes the seed for the next iteration. reservoir = sample_computation(reservoir, 2) self.assertAllEqual( reservoir, collections.OrderedDict(random_seed=(TEST_SEED, -1479562987), random_values=[100565241], samples=[1])) # The PRNG doesn't generate a number for sampling until 5th example. for i in range(3, 6): reservoir = sample_computation(reservoir, i) self.assertAllEqual( reservoir, collections.OrderedDict(random_seed=(TEST_SEED, 756274747), random_values=[756274747], samples=[5]))
def test_scalar_random_seed(self): example_type = TensorType(tf.int32) sample_computation = sampling._build_sample_value_computation( example_type, sample_size=1) reservoir_type = sampling._build_reservoir_type(example_type) expected_type = FunctionType(parameter=collections.OrderedDict( reservoir=reservoir_type, sample=example_type), result=reservoir_type) self.assert_types_identical(sample_computation.type_signature, expected_type) # Get the sentinel seed so that the first call initializes based on # timestamp. reservoir = sampling._build_initial_sample_reservoir(example_type) self.assertAllEqual(reservoir['random_seed'], [sampling.SEED_SENTINEL, sampling.SEED_SENTINEL]) reservoir = sample_computation(reservoir, 1) # The first value of the seed was the timestamp, it should be greater than # 1_600_000_000_000 (September 2020) and within 60 seconds of now. self.assertGreater(reservoir['random_seed'][0], 1_600_000_000_000) self.assertLess( tf.cast(tf.timestamp() * 1000.0, tf.int64) - reservoir['random_seed'][0], 60) # The second value should we a random number. We assert its not the # sentinel, though it ccould be with probability 1 / 2**32. self.assertNotEqual(reservoir['random_seed'][1], sampling.SEED_SENTINEL)
def test_scalar(self): with self.subTest('fixed_seed'): initial_reservoir = sampling._build_initial_sample_reservoir( TensorType(tf.int32), seed=TEST_SEED) self.assertAllEqual( initial_reservoir, collections.OrderedDict(random_seed=tf.convert_to_tensor( (TEST_SEED, TEST_SEED)), random_values=tf.zeros([0], tf.int32), samples=tf.zeros([0], dtype=tf.int32))) with self.subTest('no_seed'): initial_reservoir = sampling._build_initial_sample_reservoir( TensorType(tf.int32)) self.assertLen(initial_reservoir['random_seed'], 2) self.assertEqual(initial_reservoir['random_seed'][0], initial_reservoir['random_seed'][1]) self.assertEqual(initial_reservoir['random_seed'][0], sampling.SEED_SENTINEL)
def test_scalar(self): example_type = computation_types.to_type(TensorType(tf.int32)) finalize_computation = sampling._build_finalize_sample_computation( example_type) reservoir_type = sampling._build_reservoir_type(example_type) expected_type = FunctionType(parameter=reservoir_type, result=reservoir_type.samples) self.assert_types_identical(finalize_computation.type_signature, expected_type) reservoir = sampling._build_initial_sample_reservoir(example_type, seed=TEST_SEED) reservoir['random_values'] = [3, 5, 7] test_samples = [3, 9, 27] reservoir['samples'] = test_samples self.assertAllEqual(finalize_computation(reservoir), test_samples)
def test_structure_of_tensors(self): example_type = computation_types.to_type( collections.OrderedDict( a=TensorType(tf.int32, [3]), b=[TensorType(tf.float32), TensorType(tf.bool)])) sample_computation = sampling._build_sample_value_computation( example_type, sample_size=1) reservoir_type = sampling._build_reservoir_type(example_type) expected_type = FunctionType(parameter=collections.OrderedDict( reservoir=reservoir_type, sample=example_type), result=reservoir_type) self.assert_types_identical(sample_computation.type_signature, expected_type) reservoir = sampling._build_initial_sample_reservoir(example_type, seed=TEST_SEED) reservoir = sample_computation( reservoir, collections.OrderedDict(a=[0, 1, 2], b=[1.0, True])) expected_sample = collections.OrderedDict(a=[[0, 1, 2]], b=[[1.0], [True]]) self.assertAllEqual( reservoir, collections.OrderedDict(random_seed=(TEST_SEED, 100565241), random_values=[100565241], samples=expected_sample)) # New value was not sampled, its random value was too low, but it # changes the seed for the next iteration. reservoir = sample_computation( reservoir, collections.OrderedDict(a=[3, 4, 5], b=[2.0, False])) self.assertAllEqual( reservoir, collections.OrderedDict(random_seed=(TEST_SEED, -1479562987), random_values=[100565241], samples=expected_sample)) # The PRNG doesn't generate a number for sampling until 5th example. for i in range(3, 6): reservoir = sample_computation( reservoir, collections.OrderedDict(a=list(range(i, i + 3)), b=[float(i), False])) self.assertAllEqual( reservoir, collections.OrderedDict(random_seed=(TEST_SEED, 756274747), random_values=[756274747], samples=collections.OrderedDict( a=[[5, 6, 7]], b=[[5.0], [False]])))
def test_structure_of_tensors(self): value_type = computation_types.to_type( collections.OrderedDict( a=TensorType(tf.float32), b=[TensorType(tf.int64, [2]), TensorType(tf.bool)])) initial_reservoir = sampling._build_initial_sample_reservoir( sample_value_type=value_type, seed=TEST_SEED) self.assertAllEqual( initial_reservoir, collections.OrderedDict( random_seed=tf.convert_to_tensor((TEST_SEED, TEST_SEED)), random_values=tf.zeros([0], tf.int32), samples=collections.OrderedDict(a=tf.zeros([0], dtype=tf.float32), b=[ tf.zeros([0, 2], dtype=tf.int64), tf.zeros([0], dtype=tf.bool) ])))
def test_structure(self): example_type = computation_types.to_type( collections.OrderedDict( a=TensorType(tf.int32), b=[TensorType(tf.float32, [3]), TensorType(tf.bool)])) finalize_computation = sampling._build_finalize_sample_computation( example_type) reservoir_type = sampling._build_reservoir_type(example_type) expected_type = FunctionType(parameter=reservoir_type, result=reservoir_type.samples) self.assert_types_identical(finalize_computation.type_signature, expected_type) reservoir = sampling._build_initial_sample_reservoir(example_type, seed=TEST_SEED) reservoir['random_values'] = [3, 5, 7] test_samples = collections.OrderedDict(a=[3, 9, 27], b=[[[0, 1, 2], [1, 2, 3], [2, 3, 4]], [True, False, True]]) reservoir['samples'] = test_samples self.assertAllEqual(finalize_computation(reservoir), test_samples)
def test_structure_of_tensors(self): example_type = computation_types.to_type( collections.OrderedDict( a=TensorType(tf.int32, [3]), b=[TensorType(tf.float32), TensorType(tf.bool)])) merge_computation = sampling._build_merge_samples_computation( example_type, sample_size=5) reservoir_type = sampling._build_reservoir_type(example_type) expected_type = FunctionType(parameter=collections.OrderedDict( a=reservoir_type, b=reservoir_type), result=reservoir_type) self.assert_types_identical(merge_computation.type_signature, expected_type) reservoir_a = sampling._build_initial_sample_reservoir(example_type, seed=TEST_SEED) reservoir_a['random_values'] = [1, 3, 5] reservoir_a['samples'] = collections.OrderedDict( a=[[0, 1, 2], [1, 2, 3], [2, 3, 4]], b=[[0.0, 1.0, 2.0], [True, False, True]]) with self.subTest('downsample'): reservoir_b = sampling._build_initial_sample_reservoir( example_type, seed=TEST_SEED + 1) reservoir_b['random_values'] = [2, 4, 6, 8] reservoir_b['samples'] = collections.OrderedDict( a=[[0, -1, -2], [-1, -2, -3], [-2, -3, -4], [-3, -4, -5]], b=[[-1., -2., -3., -4.], [True, False, False, True]]) merged_reservoir = merge_computation(reservoir_a, reservoir_b) self.assertAllEqual( merged_reservoir, collections.OrderedDict( # Arbitrarily take seeds from `a`, discarded later. random_seed=tf.convert_to_tensor((TEST_SEED, TEST_SEED)), random_values=[3, 5, 4, 6, 8], samples=collections.OrderedDict( a=[[1, 2, 3], [2, 3, 4], [-1, -2, -3], [-2, -3, -4], [-3, -4, -5]], b=[[1., 2., -2., -3., -4.], [False, True, False, False, True]]))) with self.subTest('keep_all'): reservoir_b = sampling._build_initial_sample_reservoir( example_type, seed=TEST_SEED + 1) reservoir_b['random_values'] = [2] reservoir_b['samples'] = collections.OrderedDict(a=[[0, -1, -2]], b=[[-1.0], [True]]) # We select the value from reservoir_b because its random_value was # higher. merged_reservoir = merge_computation(reservoir_a, reservoir_b) self.assertAllEqual( merged_reservoir, collections.OrderedDict( # Arbitrarily take seeds from `a`, discarded later. random_seed=tf.convert_to_tensor((TEST_SEED, TEST_SEED)), random_values=[1, 3, 5, 2], samples=collections.OrderedDict( a=[[0, 1, 2], [1, 2, 3], [2, 3, 4], [-0, -1, -2]], b=[[0., 1., 2., -1.], [True, False, True, True]]))) with self.subTest('tie_breakers'): # In case of tie, we take the as many values from `a` first. reservoir_b = sampling._build_initial_sample_reservoir( example_type, seed=TEST_SEED) reservoir_b['random_values'] = [5, 5, 5, 5, 5] # all tied with `a` reservoir_b['samples'] = collections.OrderedDict(a=[[-1, -1, -1]] * 5, b=[[-1] * 5, [False] * 5]) merged_reservoir = merge_computation(reservoir_a, reservoir_b) self.assertAllEqual( merged_reservoir, collections.OrderedDict(random_seed=tf.convert_to_tensor( (TEST_SEED, TEST_SEED)), random_values=[5, 5, 5, 5, 5], samples=collections.OrderedDict( a=[[2, 3, 4]] + [[-1, -1, -1]] * 4, b=[[2] + [-1] * 4, [True] + [False] * 4])))