def testSingleBucket(self): bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c], which_bucket=constant_op.constant(0), num_buckets=2, batch_size=32, num_threads=10, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[32], [32, None], [32, 3], [None, None]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.cached_session() as sess: for v in range(32): self.enqueue_inputs(sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get a single minibatch bucketed_values = sess.run(bucketed_dynamic) # (which_bucket, bucket_tensors). self.assertEqual(2, len(bucketed_values)) # Count number of bucket_tensors. self.assertEqual(4, len(bucketed_values[1])) # Ensure bucket 0 was used for all minibatch entries. self.assertAllEqual(0, bucketed_values[0]) expected_scalar_int = np.arange(32) expected_unk_int64 = np.zeros((32, 31)).astype(np.int64) for i in range(32): expected_unk_int64[i, :i] = i expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T # Must resort the output because num_threads > 1 leads to # sometimes-inconsistent insertion order. resort = np.argsort(bucketed_values[1][0]) self.assertAllEqual(expected_scalar_int, bucketed_values[1][0][resort]) self.assertAllEqual(expected_unk_int64, bucketed_values[1][1][resort]) self.assertAllEqual(expected_vec3_str, bucketed_values[1][2][resort])
def testBatchSizePerBucket(self): which_bucket = control_flow_ops.cond(self.scalar_int < 5, lambda: constant_op.constant(0), lambda: constant_op.constant(1)) batch_sizes = [5, 10] bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c], which_bucket=which_bucket, num_buckets=2, batch_size=batch_sizes, num_threads=1, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[None], [None, None], [None, 3], [None, None]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.cached_session() as sess: for v in range(15): self.enqueue_inputs(sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get two minibatches (one with small values, one with large). bucketed_values_0 = sess.run(bucketed_dynamic) bucketed_values_1 = sess.run(bucketed_dynamic) # Figure out which output has the small values if bucketed_values_0[0] < 5: bucketed_values_large, bucketed_values_small = (bucketed_values_1, bucketed_values_0) else: bucketed_values_small, bucketed_values_large = (bucketed_values_0, bucketed_values_1) # Ensure bucket 0 was used for all minibatch entries. self.assertAllEqual(0, bucketed_values_small[0]) self.assertAllEqual(1, bucketed_values_large[0]) # Check that the batch sizes differ per bucket self.assertEqual(5, len(bucketed_values_small[1][0])) self.assertEqual(10, len(bucketed_values_large[1][0]))
def testEvenOddBucketsFilterOutAllOdd(self): which_bucket = (self.scalar_int % 2) keep_input = math_ops.equal(which_bucket, 0) bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str], which_bucket=which_bucket, num_buckets=2, batch_size=32, num_threads=10, keep_input=keep_input, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[32], [32, None], [32, 3]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.cached_session() as sess: for v in range(128): self.enqueue_inputs(sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get two minibatches ([0, 2, ...] and [64, 66, ...]) bucketed_values_even0 = sess.run(bucketed_dynamic) bucketed_values_even1 = sess.run(bucketed_dynamic) # Ensure that bucket 1 was completely filtered out self.assertAllEqual(0, bucketed_values_even0[0]) self.assertAllEqual(0, bucketed_values_even1[0]) # Merge their output for sorting and comparison bucketed_values_all_elem0 = np.concatenate((bucketed_values_even0[1][0], bucketed_values_even1[1][0])) self.assertAllEqual( np.arange(0, 128, 2), sorted(bucketed_values_all_elem0))
def testFailOnWrongBucketCapacities(self): with self.assertRaisesRegexp(ValueError, r"must have exactly num_buckets"): bucket_ops.bucket( # 2 buckets and 3 capacities raises ValueError. tensors=[self.scalar_int, self.unk_int64, self.vec3_str], which_bucket=constant_op.constant(0), num_buckets=2, batch_size=32, bucket_capacities=[3, 4, 5])
def testEvenOddBuckets(self): which_bucket = (self.scalar_int % 2) bucketed_dynamic = bucket_ops.bucket( tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c], which_bucket=which_bucket, num_buckets=2, batch_size=32, num_threads=10, dynamic_pad=True) # Check shape inference on bucketing outputs self.assertAllEqual( [[32], [32, None], [32, 3], [None, None]], [out.get_shape().as_list() for out in bucketed_dynamic[1]]) with self.cached_session() as sess: for v in range(64): self.enqueue_inputs(sess, { self.scalar_int_feed: v, self.unk_int64_feed: v * [v], self.vec3_str_feed: 3 * [str(v)] }) self.start_queue_runners(sess) # Get two minibatches (one containing even values, one containing odds) bucketed_values_0 = sess.run(bucketed_dynamic) bucketed_values_1 = sess.run(bucketed_dynamic) # (which_bucket, bucket_tensors). self.assertEqual(2, len(bucketed_values_0)) self.assertEqual(2, len(bucketed_values_1)) # Count number of bucket_tensors. self.assertEqual(4, len(bucketed_values_0[1])) self.assertEqual(4, len(bucketed_values_1[1])) # Figure out which output has the even values (there's # randomness due to the multithreaded nature of bucketing) if bucketed_values_0[0] % 2 == 1: bucketed_values_even, bucketed_values_odd = (bucketed_values_1, bucketed_values_0) else: bucketed_values_even, bucketed_values_odd = (bucketed_values_0, bucketed_values_1) # Ensure bucket 0 was used for all minibatch entries. self.assertAllEqual(0, bucketed_values_even[0]) self.assertAllEqual(1, bucketed_values_odd[0]) # Test the first bucket outputted, the events starting at 0 expected_scalar_int = np.arange(0, 32 * 2, 2) expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64) for i in range(0, 32): expected_unk_int64[i, :2 * i] = 2 * i expected_vec3_str = np.vstack(3 * [np.arange(0, 32 * 2, 2).astype(bytes)]).T # Must resort the output because num_threads > 1 leads to # sometimes-inconsistent insertion order. resort = np.argsort(bucketed_values_even[1][0]) self.assertAllEqual(expected_scalar_int, bucketed_values_even[1][0][resort]) self.assertAllEqual(expected_unk_int64, bucketed_values_even[1][1][resort]) self.assertAllEqual(expected_vec3_str, bucketed_values_even[1][2][resort]) # Test the second bucket outputted, the odds starting at 1 expected_scalar_int = np.arange(1, 32 * 2 + 1, 2) expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64) for i in range(0, 32): expected_unk_int64[i, :2 * i + 1] = 2 * i + 1 expected_vec3_str = np.vstack( 3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T # Must resort the output because num_threads > 1 leads to # sometimes-inconsistent insertion order. resort = np.argsort(bucketed_values_odd[1][0]) self.assertAllEqual(expected_scalar_int, bucketed_values_odd[1][0][resort]) self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1][1][resort]) self.assertAllEqual(expected_vec3_str, bucketed_values_odd[1][2][resort])