Ejemplo n.º 1
0
  def testSingleBucket(self):
    bucketed_dynamic = bucket_ops.bucket(
        tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c],
        which_bucket=constant_op.constant(0),
        num_buckets=2,
        batch_size=32,
        num_threads=10,
        dynamic_pad=True)
    # Check shape inference on bucketing outputs
    self.assertAllEqual(
        [[32], [32, None], [32, 3], [None, None]],
        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
    with self.cached_session() as sess:
      for v in range(32):
        self.enqueue_inputs(sess, {
            self.scalar_int_feed: v,
            self.unk_int64_feed: v * [v],
            self.vec3_str_feed: 3 * [str(v)]
        })
      self.start_queue_runners(sess)

      # Get a single minibatch
      bucketed_values = sess.run(bucketed_dynamic)

      # (which_bucket, bucket_tensors).
      self.assertEqual(2, len(bucketed_values))

      # Count number of bucket_tensors.
      self.assertEqual(4, len(bucketed_values[1]))

      # Ensure bucket 0 was used for all minibatch entries.
      self.assertAllEqual(0, bucketed_values[0])

      expected_scalar_int = np.arange(32)
      expected_unk_int64 = np.zeros((32, 31)).astype(np.int64)
      for i in range(32):
        expected_unk_int64[i, :i] = i
      expected_vec3_str = np.vstack(3 * [np.arange(32).astype(bytes)]).T

      # Must resort the output because num_threads > 1 leads to
      # sometimes-inconsistent insertion order.
      resort = np.argsort(bucketed_values[1][0])
      self.assertAllEqual(expected_scalar_int, bucketed_values[1][0][resort])
      self.assertAllEqual(expected_unk_int64, bucketed_values[1][1][resort])
      self.assertAllEqual(expected_vec3_str, bucketed_values[1][2][resort])
Ejemplo n.º 2
0
  def testBatchSizePerBucket(self):
    which_bucket = control_flow_ops.cond(self.scalar_int < 5,
                                         lambda: constant_op.constant(0),
                                         lambda: constant_op.constant(1))
    batch_sizes = [5, 10]
    bucketed_dynamic = bucket_ops.bucket(
        tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c],
        which_bucket=which_bucket,
        num_buckets=2,
        batch_size=batch_sizes,
        num_threads=1,
        dynamic_pad=True)
    # Check shape inference on bucketing outputs
    self.assertAllEqual(
        [[None], [None, None], [None, 3], [None, None]],
        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
    with self.cached_session() as sess:
      for v in range(15):
        self.enqueue_inputs(sess, {
            self.scalar_int_feed: v,
            self.unk_int64_feed: v * [v],
            self.vec3_str_feed: 3 * [str(v)]
        })
      self.start_queue_runners(sess)

      # Get two minibatches (one with small values, one with large).
      bucketed_values_0 = sess.run(bucketed_dynamic)
      bucketed_values_1 = sess.run(bucketed_dynamic)

      # Figure out which output has the small values
      if bucketed_values_0[0] < 5:
        bucketed_values_large, bucketed_values_small = (bucketed_values_1,
                                                        bucketed_values_0)
      else:
        bucketed_values_small, bucketed_values_large = (bucketed_values_0,
                                                        bucketed_values_1)

      # Ensure bucket 0 was used for all minibatch entries.
      self.assertAllEqual(0, bucketed_values_small[0])
      self.assertAllEqual(1, bucketed_values_large[0])

      # Check that the batch sizes differ per bucket
      self.assertEqual(5, len(bucketed_values_small[1][0]))
      self.assertEqual(10, len(bucketed_values_large[1][0]))
Ejemplo n.º 3
0
  def testEvenOddBucketsFilterOutAllOdd(self):
    which_bucket = (self.scalar_int % 2)
    keep_input = math_ops.equal(which_bucket, 0)
    bucketed_dynamic = bucket_ops.bucket(
        tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
        which_bucket=which_bucket,
        num_buckets=2,
        batch_size=32,
        num_threads=10,
        keep_input=keep_input,
        dynamic_pad=True)
    # Check shape inference on bucketing outputs
    self.assertAllEqual(
        [[32], [32, None], [32, 3]],
        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
    with self.cached_session() as sess:
      for v in range(128):
        self.enqueue_inputs(sess, {
            self.scalar_int_feed: v,
            self.unk_int64_feed: v * [v],
            self.vec3_str_feed: 3 * [str(v)]
        })
      self.start_queue_runners(sess)

      # Get two minibatches ([0, 2, ...] and [64, 66, ...])
      bucketed_values_even0 = sess.run(bucketed_dynamic)
      bucketed_values_even1 = sess.run(bucketed_dynamic)

      # Ensure that bucket 1 was completely filtered out
      self.assertAllEqual(0, bucketed_values_even0[0])
      self.assertAllEqual(0, bucketed_values_even1[0])

      # Merge their output for sorting and comparison
      bucketed_values_all_elem0 = np.concatenate((bucketed_values_even0[1][0],
                                                  bucketed_values_even1[1][0]))

      self.assertAllEqual(
          np.arange(0, 128, 2), sorted(bucketed_values_all_elem0))
Ejemplo n.º 4
0
 def testFailOnWrongBucketCapacities(self):
   with self.assertRaisesRegexp(ValueError, r"must have exactly num_buckets"):
     bucket_ops.bucket(  # 2 buckets and 3 capacities raises ValueError.
         tensors=[self.scalar_int, self.unk_int64, self.vec3_str],
         which_bucket=constant_op.constant(0), num_buckets=2,
         batch_size=32, bucket_capacities=[3, 4, 5])
Ejemplo n.º 5
0
  def testEvenOddBuckets(self):
    which_bucket = (self.scalar_int % 2)
    bucketed_dynamic = bucket_ops.bucket(
        tensors=[self.scalar_int, self.unk_int64, self.vec3_str, self.sparse_c],
        which_bucket=which_bucket,
        num_buckets=2,
        batch_size=32,
        num_threads=10,
        dynamic_pad=True)
    # Check shape inference on bucketing outputs
    self.assertAllEqual(
        [[32], [32, None], [32, 3], [None, None]],
        [out.get_shape().as_list() for out in bucketed_dynamic[1]])
    with self.cached_session() as sess:
      for v in range(64):
        self.enqueue_inputs(sess, {
            self.scalar_int_feed: v,
            self.unk_int64_feed: v * [v],
            self.vec3_str_feed: 3 * [str(v)]
        })
      self.start_queue_runners(sess)

      # Get two minibatches (one containing even values, one containing odds)
      bucketed_values_0 = sess.run(bucketed_dynamic)
      bucketed_values_1 = sess.run(bucketed_dynamic)

      # (which_bucket, bucket_tensors).
      self.assertEqual(2, len(bucketed_values_0))
      self.assertEqual(2, len(bucketed_values_1))

      # Count number of bucket_tensors.
      self.assertEqual(4, len(bucketed_values_0[1]))
      self.assertEqual(4, len(bucketed_values_1[1]))

      # Figure out which output has the even values (there's
      # randomness due to the multithreaded nature of bucketing)
      if bucketed_values_0[0] % 2 == 1:
        bucketed_values_even, bucketed_values_odd = (bucketed_values_1,
                                                     bucketed_values_0)
      else:
        bucketed_values_even, bucketed_values_odd = (bucketed_values_0,
                                                     bucketed_values_1)

      # Ensure bucket 0 was used for all minibatch entries.
      self.assertAllEqual(0, bucketed_values_even[0])
      self.assertAllEqual(1, bucketed_values_odd[0])

      # Test the first bucket outputted, the events starting at 0
      expected_scalar_int = np.arange(0, 32 * 2, 2)
      expected_unk_int64 = np.zeros((32, 31 * 2)).astype(np.int64)
      for i in range(0, 32):
        expected_unk_int64[i, :2 * i] = 2 * i
      expected_vec3_str = np.vstack(3 *
                                    [np.arange(0, 32 * 2, 2).astype(bytes)]).T

      # Must resort the output because num_threads > 1 leads to
      # sometimes-inconsistent insertion order.
      resort = np.argsort(bucketed_values_even[1][0])
      self.assertAllEqual(expected_scalar_int,
                          bucketed_values_even[1][0][resort])
      self.assertAllEqual(expected_unk_int64,
                          bucketed_values_even[1][1][resort])
      self.assertAllEqual(expected_vec3_str, bucketed_values_even[1][2][resort])

      # Test the second bucket outputted, the odds starting at 1
      expected_scalar_int = np.arange(1, 32 * 2 + 1, 2)
      expected_unk_int64 = np.zeros((32, 31 * 2 + 1)).astype(np.int64)
      for i in range(0, 32):
        expected_unk_int64[i, :2 * i + 1] = 2 * i + 1
      expected_vec3_str = np.vstack(
          3 * [np.arange(1, 32 * 2 + 1, 2).astype(bytes)]).T

      # Must resort the output because num_threads > 1 leads to
      # sometimes-inconsistent insertion order.
      resort = np.argsort(bucketed_values_odd[1][0])
      self.assertAllEqual(expected_scalar_int,
                          bucketed_values_odd[1][0][resort])
      self.assertAllEqual(expected_unk_int64, bucketed_values_odd[1][1][resort])
      self.assertAllEqual(expected_vec3_str, bucketed_values_odd[1][2][resort])