Example #1
0
 def testPandasFeeding(self):
     if not HAS_PANDAS:
         return
     with tf.Graph().as_default():
         array1 = np.arange(32)
         array2 = np.arange(32, 64)
         df = pd.DataFrame({
             "a": array1,
             "b": array2
         },
                           index=np.arange(64, 96))
         q = ff.enqueue_data(df, capacity=100)
         batch_size = 5
         dq_op = q.dequeue_many(5)
         with tf.Session() as sess:
             coord = tf.train.Coordinator()
             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
             for i in range(100):
                 indices = [
                     j % array1.shape[0]
                     for j in range(batch_size * i, batch_size * (i + 1))
                 ]
                 expected_df_indices = df.index[indices]
                 expected_rows = df.iloc[indices]
                 dq = sess.run(dq_op)
                 np.testing.assert_array_equal(expected_df_indices, dq[0])
                 for col_num, col in enumerate(df.columns):
                     np.testing.assert_array_equal(
                         expected_rows[col].values, dq[col_num + 1])
             coord.request_stop()
             coord.join(threads)
Example #2
0
 def input_fn():
     """Pandas input function."""
     if y is not None:
         if target_column in x:
             raise ValueError(
                 'Found already column \'%s\' in x, please change '
                 'target_column to something else. Current columns '
                 'in x: %s', target_column, x.columns)
         if not np.array_equal(x.index, y.index):
             raise ValueError(
                 'Index for x and y are mismatch, this will lead '
                 'to missing values. Please make sure they match or '
                 'use .reset_index() method.\n'
                 'Index for x: %s\n'
                 'Index for y: %s\n', x.index, y.index)
         x[target_column] = y
     queue = feeding_functions.enqueue_data(x,
                                            queue_capacity,
                                            shuffle=shuffle,
                                            num_threads=num_threads,
                                            enqueue_size=batch_size,
                                            num_epochs=num_epochs)
     if num_epochs is None:
         features = queue.dequeue_many(batch_size)
     else:
         features = queue.dequeue_up_to(batch_size)
     features = dict(zip([index_column] + list(x.columns), features))
     if y is not None:
         target = features.pop(target_column)
         return features, target
     return features
Example #3
0
 def testShuffle(self):
     array_size = 7
     batch_size = 3
     iterations = 1000
     mean = batch_size * iterations * 1.0 / array_size
     tolerance = 3
     with tf.Graph().as_default():
         array = np.arange(array_size)
         q = ff.enqueue_data(array, capacity=100, shuffle=True, seed=1234)
         dq_op = q.dequeue_many(batch_size)
         with tf.Session() as sess:
             coord = tf.train.Coordinator()
             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
             counts = {x: 0 for x in array}
             for _ in range(iterations):
                 dq = sess.run(dq_op)
                 for dqed in dq[1]:
                     self.assertIn(dqed, array)
                     counts[dqed] += 1
             for k, v in counts.items():
                 self.assertGreater(
                     mean + tolerance, v,
                     "Value {} occurred {} times, expected {:.2f} +/- {}".
                     format(k, v, mean, tolerance))
                 self.assertLess(
                     mean - tolerance, v,
                     "Value {} occurred {} times, expected {:.2f} +/- {}".
                     format(k, v, mean, tolerance))
             coord.request_stop()
             coord.join(threads)
 def testShuffle(self):
   array_size = 7
   batch_size = 3
   iterations = 1000
   mean = batch_size * iterations * 1.0 / array_size
   tolerance = 3
   with tf.Graph().as_default():
     array = np.arange(array_size)
     q = ff.enqueue_data(array, capacity=100, shuffle=True, seed=1234)
     dq_op = q.dequeue_many(batch_size)
     with tf.Session() as sess:
       coord = tf.train.Coordinator()
       threads = tf.train.start_queue_runners(sess=sess, coord=coord)
       counts = {x: 0 for x in array}
       for _ in range(iterations):
         dq = sess.run(dq_op)
         for dqed in dq[1]:
           self.assertIn(dqed, array)
           counts[dqed] += 1
       for k, v in counts.items():
         self.assertGreater(
             mean + tolerance, v,
             "Value {} occurred {} times, expected {:.2f} +/- {}".format(
                 k, v, mean, tolerance))
         self.assertLess(
             mean - tolerance, v,
             "Value {} occurred {} times, expected {:.2f} +/- {}".format(
                 k, v, mean, tolerance))
       coord.request_stop()
       coord.join(threads)
Example #5
0
 def input_fn():
   """Pandas input function."""
   if y is not None:
     if target_column in x:
       raise ValueError('Found already column \'%s\' in x, please change '
                        'target_column to something else. Current columns '
                        'in x: %s', target_column, x.columns)
     if not np.array_equal(x.index, y.index):
       raise ValueError('Index for x and y are mismatch, this will lead '
                        'to missing values. Please make sure they match or '
                        'use .reset_index() method.\n'
                        'Index for x: %s\n'
                        'Index for y: %s\n', x.index, y.index)
     x[target_column] = y
   queue = feeding_functions.enqueue_data(
       x, queue_capacity, shuffle=shuffle, num_threads=num_threads,
       enqueue_size=batch_size, num_epochs=num_epochs)
   if num_epochs is None:
     features = queue.dequeue_many(batch_size)
   else:
     features = queue.dequeue_up_to(batch_size)
   features = dict(zip([index_column] + list(x.columns), features))
   if y is not None:
     target = features.pop(target_column)
     return features, target
   return features
 def testPandasFeeding(self):
   if not HAS_PANDAS:
     return
   with tf.Graph().as_default():
     array1 = np.arange(32)
     array2 = np.arange(32, 64)
     df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(64, 96))
     q = ff.enqueue_data(df, capacity=100)
     batch_size = 5
     dq_op = q.dequeue_many(5)
     with tf.Session() as sess:
       coord = tf.train.Coordinator()
       threads = tf.train.start_queue_runners(sess=sess, coord=coord)
       for i in range(100):
         indices = [j % array1.shape[0]
                    for j in range(batch_size * i, batch_size * (i + 1))]
         expected_df_indices = df.index[indices]
         expected_rows = df.iloc[indices]
         dq = sess.run(dq_op)
         np.testing.assert_array_equal(expected_df_indices, dq[0])
         for col_num, col in enumerate(df.columns):
           np.testing.assert_array_equal(expected_rows[col].values,
                                         dq[col_num + 1])
       coord.request_stop()
       coord.join(threads)
Example #7
0
  def _apply_transform(self, transform_input):
    queue = feeding_functions.enqueue_data(
        self.data, self.queue_capacity, self.shuffle, self.min_after_dequeue)

    dequeued = queue.dequeue_many(self.batch_size)

    # TODO(jamieas): dequeue and dequeue_many will soon return a list regardless
    # of the number of enqueued tensors. Remove the following once that change
    # is in place.
    if not isinstance(dequeued, (tuple, list)):
      dequeued = (dequeued,)
    # pylint: disable=not-callable
    return self.return_type(*dequeued)
    def _apply_transform(self, transform_input):
        queue = feeding_functions.enqueue_data(self.data, self.queue_capacity,
                                               self.shuffle,
                                               self.min_after_dequeue)

        dequeued = queue.dequeue_many(self.batch_size)

        # TODO(jamieas): dequeue and dequeue_many will soon return a list regardless
        # of the number of enqueued tensors. Remove the following once that change
        # is in place.
        if not isinstance(dequeued, (tuple, list)):
            dequeued = (dequeued, )
        # pylint: disable=not-callable
        return self.return_type(*dequeued)
 def testArrayFeedingMultiThread(self):
   with tf.Graph().as_default():
     array = np.arange(256).reshape([128, 2])
     q = ff.enqueue_data(array, capacity=128, num_threads=8, shuffle=True)
     batch_size = 3
     dq_op = q.dequeue_many(batch_size)
     with tf.Session() as sess:
       coord = tf.train.Coordinator()
       threads = tf.train.start_queue_runners(sess=sess, coord=coord)
       for _ in range(100):
         dq = sess.run(dq_op)
         indices = dq[0]
         expected_dq = get_rows(array, indices)
         np.testing.assert_array_equal(expected_dq, dq[1])
       coord.request_stop()
       coord.join(threads)
 def testArrayFeeding(self):
     with tf.Graph().as_default():
         array = np.arange(32).reshape([16, 2])
         q = ff.enqueue_data(array, capacity=100)
         batch_size = 3
         dq_op = q.dequeue_many(batch_size)
         with tf.Session() as sess:
             coord = tf.train.Coordinator()
             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
             for i in range(100):
                 indices = [j % array.shape[0] for j in range(batch_size * i, batch_size * (i + 1))]
                 expected_dq = get_rows(array, indices)
                 dq = sess.run(dq_op)
                 np.testing.assert_array_equal(indices, dq[0])
                 np.testing.assert_array_equal(expected_dq, dq[1])
             coord.request_stop()
             coord.join(threads)
Example #11
0
 def testArrayFeeding(self):
   with tf.Graph().as_default():
     array = np.arange(32).reshape([16, 2])
     q = ff.enqueue_data(array, capacity=100)
     batch_size = 3
     dq_op = q.dequeue_many(batch_size)
     with tf.Session() as sess:
       coord = tf.train.Coordinator()
       threads = tf.train.start_queue_runners(sess=sess, coord=coord)
       for i in range(100):
         indices = [j % array.shape[0]
                    for j in range(batch_size * i, batch_size * (i + 1))]
         expected_dq = get_rows(array, indices)
         dq = sess.run(dq_op)
         np.testing.assert_array_equal(indices, dq[0])
         np.testing.assert_array_equal(expected_dq, dq[1])
       coord.request_stop()
       coord.join(threads)
Example #12
0
 def input_fn():
   """Pandas input function."""
   queue = feeding_functions.enqueue_data(
       x,
       queue_capacity,
       shuffle=shuffle,
       min_after_dequeue=min_after_dequeue,
       num_threads=num_threads,
       enqueue_size=batch_size,
       num_epochs=num_epochs)
   if num_epochs is None:
     features = queue.dequeue_many(batch_size)
   else:
     features = queue.dequeue_up_to(batch_size)
   features = dict(zip([index_column] + list(x.columns), features))
   if y is not None:
     target = features.pop(target_column)
     return features, target
   return features
Example #13
0
 def input_fn():
     """Pandas input function."""
     queue = feeding_functions.enqueue_data(
         x,
         queue_capacity,
         shuffle=shuffle,
         min_after_dequeue=min_after_dequeue,
         num_threads=num_threads,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
     if num_epochs is None:
         features = queue.dequeue_many(batch_size)
     else:
         features = queue.dequeue_up_to(batch_size)
     features = dict(zip([index_column] + list(x.columns), features))
     if y is not None:
         target = features.pop(target_column)
         return features, target
     return features
 def testArrayFeedingMultiThread(self):
     with tf.Graph().as_default():
         array = np.arange(256).reshape([128, 2])
         q = ff.enqueue_data(array,
                             capacity=128,
                             num_threads=8,
                             shuffle=True)
         batch_size = 3
         dq_op = q.dequeue_many(batch_size)
         with tf.Session() as sess:
             coord = tf.train.Coordinator()
             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
             for _ in range(100):
                 dq = sess.run(dq_op)
                 indices = dq[0]
                 expected_dq = get_rows(array, indices)
                 np.testing.assert_array_equal(expected_dq, dq[1])
             coord.request_stop()
             coord.join(threads)
Example #15
0
    def input_fn():
        """Numpy input function."""
        if not isinstance(x, dict):
            raise TypeError('x must be dict; got {}'.format(type(x).__name__))

        unique_target_key = _get_unique_target_key(x)
        if y is not None:
            x[unique_target_key] = y

        if len(set(v.shape for v in x.values())) != 1:
            shape_dict_of_x = {k: x[k].shape for k in x.keys()}
            shape_of_y = None if y is None else y.shape
            raise ValueError(
                'Shape of x and y are mismatch, this will lead to '
                'missing values. Please make sure each value in x have '
                'the same shape as y.\n'
                'Shape for x: {}\n'
                'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y))

        # Ensure the order of iteration is consistent.
        ordered_dict_x = collections.OrderedDict(
            sorted(x.items(), key=lambda t: t[0]))

        queue = feeding_functions.enqueue_data(ordered_dict_x,
                                               queue_capacity,
                                               shuffle=shuffle,
                                               num_threads=num_threads,
                                               enqueue_size=batch_size,
                                               num_epochs=num_epochs)

        features = (queue.dequeue_many(batch_size)
                    if num_epochs is None else queue.dequeue_up_to(batch_size))

        # Remove the first `Tensor` in `features`, which is the row number.
        if len(features) > 0:
            features.pop(0)

        features = dict(zip(ordered_dict_x.keys(), features))
        if y is not None:
            target = features.pop(unique_target_key)
            return features, target
        return features
Example #16
0
  def input_fn():
    """Numpy input function."""
    if not isinstance(x, dict):
      raise TypeError('x must be dict; got {}'.format(type(x).__name__))

    unique_target_key = _get_unique_target_key(x)
    if y is not None:
      x[unique_target_key] = y

    if len(set(v.shape for v in x.values())) != 1:
      shape_dict_of_x = {k: x[k].shape for k in x.keys()}
      shape_of_y = None if y is None else y.shape
      raise ValueError('Shape of x and y are mismatch, this will lead to '
                       'missing values. Please make sure each value in x have '
                       'the same shape as y.\n'
                       'Shape for x: {}\n'
                       'Shape for y: {}\n'.format(shape_dict_of_x, shape_of_y))

    # Ensure the order of iteration is consistent.
    ordered_dict_x = collections.OrderedDict(
        sorted(x.items(), key=lambda t: t[0]))

    queue = feeding_functions.enqueue_data(
        ordered_dict_x,
        queue_capacity,
        shuffle=shuffle,
        num_threads=num_threads,
        enqueue_size=batch_size,
        num_epochs=num_epochs)

    features = (queue.dequeue_many(batch_size) if num_epochs is None
                else queue.dequeue_up_to(batch_size))

    # Remove the first `Tensor` in `features`, which is the row number.
    if len(features) > 0:
      features.pop(0)

    features = dict(zip(ordered_dict_x.keys(), features))
    if y is not None:
      target = features.pop(unique_target_key)
      return features, target
    return features
Example #17
0
    def _generator_input_fn():
        """generator input function."""
        queue = feeding_functions.enqueue_data(x,
                                               queue_capacity,
                                               shuffle=shuffle,
                                               num_threads=num_threads,
                                               enqueue_size=batch_size,
                                               num_epochs=num_epochs)

        features = (queue.dequeue_many(batch_size)
                    if num_epochs is None else queue.dequeue_up_to(batch_size))
        if not isinstance(features, list):
            features = [features]
        features = dict(zip(input_keys, features))
        if target_key is not None:
            if len(target_key) > 1:
                target = {key: features.pop(key) for key in target_key}
            else:
                target = features.pop(target_key[0])
            return features, target
        return features
Example #18
0
  def _generator_input_fn():
    """generator input function."""
    queue = feeding_functions.enqueue_data(
      x,
      queue_capacity,
      shuffle=shuffle,
      num_threads=num_threads,
      enqueue_size=batch_size,
      num_epochs=num_epochs)

    features = (queue.dequeue_many(batch_size) if num_epochs is None
                else queue.dequeue_up_to(batch_size))
    if not isinstance(features, list):
      features = [features]
    features = dict(zip(input_keys, features))
    if target_key is not None:
      if len(target_key) > 1:
        target = {key: features.pop(key) for key in target_key}
      else:
        target = features.pop(target_key[0])
      return features, target
    return features
 def testPandasFeedingMultiThread(self):
     if not HAS_PANDAS:
         return
     with tf.Graph().as_default():
         array1 = np.arange(128, 256)
         array2 = 2 * array1
         df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128))
         q = ff.enqueue_data(df, capacity=128, num_threads=8, shuffle=True)
         batch_size = 5
         dq_op = q.dequeue_many(batch_size)
         with tf.Session() as sess:
             coord = tf.train.Coordinator()
             threads = tf.train.start_queue_runners(sess=sess, coord=coord)
             for _ in range(100):
                 dq = sess.run(dq_op)
                 indices = dq[0]
                 expected_rows = df.iloc[indices]
                 for col_num, col in enumerate(df.columns):
                     np.testing.assert_array_equal(
                         expected_rows[col].values, dq[col_num + 1])
             coord.request_stop()
             coord.join(threads)
Example #20
0
 def input_fn():
   """Pandas input function."""
   queue = feeding_functions.enqueue_data(
       x,
       queue_capacity,
       shuffle=shuffle,
       min_after_dequeue=min_after_dequeue,
       num_threads=num_threads,
       enqueue_size=batch_size,
       num_epochs=num_epochs)
   if num_epochs is None:
     features = queue.dequeue_many(batch_size)
   else:
     features = queue.dequeue_up_to(batch_size)
   assert len(features) == len(x.columns) + 1, ('Features should have one '
                                                'extra element for the index.')
   features = features[1:]
   features = dict(zip(list(x.columns), features))
   if y is not None:
     target = features.pop(target_column)
     return features, target
   return features
 def testPandasFeedingMultiThread(self):
   if not HAS_PANDAS:
     return
   with tf.Graph().as_default():
     array1 = np.arange(128, 256)
     array2 = 2 * array1
     df = pd.DataFrame({"a": array1, "b": array2}, index=np.arange(128))
     q = ff.enqueue_data(df, capacity=128, num_threads=8, shuffle=True)
     batch_size = 5
     dq_op = q.dequeue_many(batch_size)
     with tf.Session() as sess:
       coord = tf.train.Coordinator()
       threads = tf.train.start_queue_runners(sess=sess, coord=coord)
       for _ in range(100):
         dq = sess.run(dq_op)
         indices = dq[0]
         expected_rows = df.iloc[indices]
         for col_num, col in enumerate(df.columns):
           np.testing.assert_array_equal(expected_rows[col].values,
                                         dq[col_num + 1])
       coord.request_stop()
       coord.join(threads)
Example #22
0
 def input_fn():
     """Pandas input function."""
     queue = feeding_functions.enqueue_data(
         x,
         queue_capacity,
         shuffle=shuffle,
         min_after_dequeue=min_after_dequeue,
         num_threads=num_threads,
         enqueue_size=batch_size,
         num_epochs=num_epochs)
     if num_epochs is None:
         features = queue.dequeue_many(batch_size)
     else:
         features = queue.dequeue_up_to(batch_size)
     assert len(features) == len(x.columns) + 1, (
         'Features should have one '
         'extra element for the index.')
     features = features[1:]
     features = dict(zip(list(x.columns), features))
     if y is not None:
         target = features.pop(target_column)
         return features, target
     return features