def testYesShuffle(self):
   id_source = rs.ReaderSource(reader_cls=tf.IdentityReader,
                               work_units=self.work_units,
                               batch_size=1,
                               shuffle=True,
                               num_threads=10,
                               seed=1234)
   index_column, value_column = id_source()
   cache = {}
   index_tensor = index_column.build(cache)
   value_tensor = value_column.build(cache)
   self.assertEqual([1], index_tensor.get_shape().as_list())
   self.assertEqual([1], value_tensor.get_shape().as_list())
   seen = set([])
   with self.test_session() as sess:
     tf.global_variables_initializer().run()
     coord = tf.train.Coordinator()
     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
     for _ in range(500):
       index, value = sess.run([index_tensor, value_tensor])
       self.assertEqual(index, value)
       self.assertNotIn(int(value[0]), seen)
       seen.add(int(value[0]))
     coord.request_stop()
     coord.join(threads)
  def from_examples(cls,
                    filepatterns,
                    batch_size,
                    features,
                    file_format=FileFormat.TFRECORD,
                    shuffle=True,
                    num_threads=1,
                    queue_capacity=None,
                    min_after_dequeue=None,
                    seed=None):
    """Create a `DataFrame` from `tensorflow.Example`s.

    Args:
      filepatterns: a list of file patterns containing `tensorflow.Example`s.
      batch_size: desired batch size.
      features: a dict mapping feature names to `VarLenFeature` or
        `FixedLenFeature`.
      file_format: a `FileFormat` indicating the format of the files in
        `filepatterns`.
      shuffle: whether records should be shuffled. Defaults to true.
      num_threads: the number of readers that will work in parallel.
      queue_capacity: capacity of the queue that will store parsed `Example`s
      min_after_dequeue: minimum number of elements that can be left by a
        dequeue operation. Only used if `shuffle` is true.
      seed: passed to random shuffle operations. Only used if `shuffle` is true.

    Returns:
      A `DataFrame` that has columns corresponding to `features` and is filled
      with `Example`s from `filepatterns`.

    Raises:
      ValueError: no files match `filepatterns`.
      ValueError: `features` contains the reserved name 'index'.
    """
    filenames = _expand_file_names(filepatterns)
    if not filenames:
      raise ValueError("No matching file names.")

    if "index" in features:
      raise ValueError(
          "'index' is reserved and can not be used for a feature name.")

    index, record = reader_source.ReaderSource(
        FILE_FORMAT_TO_READER_CLS[file_format],
        filenames,
        batch_size=batch_size,
        queue_capacity=queue_capacity,
        shuffle=shuffle,
        min_after_dequeue=min_after_dequeue,
        num_threads=num_threads,
        seed=seed)()
    parser = example_parser.ExampleParser(features)
    parsed = parser(record)

    column_dict = parsed._asdict()
    column_dict["index"] = index

    dataframe = cls()
    dataframe.assign(**column_dict)
    return dataframe
 def testNoShuffle(self):
   id_source = rs.ReaderSource(reader_cls=tf.IdentityReader,
                               work_units=self.work_units,
                               batch_size=1,
                               shuffle=False,
                               num_threads=1)
   index_column, value_column = id_source()
   index_tensor = index_column.build()
   value_tensor = value_column.build()
   self.assertEqual([1], index_tensor.get_shape().as_list())
   self.assertEqual([1], value_tensor.get_shape().as_list())
   with self.test_session() as sess:
     tf.global_variables_initializer().run()
     coord = tf.train.Coordinator()
     threads = tf.train.start_queue_runners(sess=sess, coord=coord)
     for i in range(50):
       index, value = sess.run([index_tensor, value_tensor])
       self.assertEqual(i, int(index[0]))
       self.assertEqual(i, int(value[0]))
     coord.request_stop()
     coord.join(threads)
  def from_examples(cls,
                    filepatterns,
                    features,
                    reader_cls=io_ops.TFRecordReader,
                    num_epochs=None,
                    num_threads=1,
                    enqueue_size=None,
                    batch_size=32,
                    queue_capacity=None,
                    min_after_dequeue=None,
                    shuffle=True,
                    seed=None):
    """Create a `DataFrame` from `tensorflow.Example`s.

    Args:
      filepatterns: a list of file patterns containing `tensorflow.Example`s.
      features: a dict mapping feature names to `VarLenFeature` or
        `FixedLenFeature`.
      reader_cls: a subclass of `tensorflow.ReaderBase` that will be used to
        read the `Example`s.
      num_epochs: the number of times that the reader should loop through all
        the file names. If set to `None`, then the reader will continue
        indefinitely.
      num_threads: the number of readers that will work in parallel.
      enqueue_size: block size for each read operation.
      batch_size: desired batch size.
      queue_capacity: capacity of the queue that will store parsed `Example`s
      min_after_dequeue: minimum number of elements that can be left by a
        dequeue operation. Only used if `shuffle` is true.
      shuffle: whether records should be shuffled. Defaults to true.
      seed: passed to random shuffle operations. Only used if `shuffle` is true.

    Returns:
      A `DataFrame` that has columns corresponding to `features` and is filled
      with `Example`s from `filepatterns`.

    Raises:
      ValueError: no files match `filepatterns`.
      ValueError: `features` contains the reserved name 'index'.
    """
    filenames = _expand_file_names(filepatterns)
    if not filenames:
      raise ValueError("No matching file names.")

    if "index" in features:
      raise ValueError(
          "'index' is reserved and can not be used for a feature name.")

    index, record = reader_source.ReaderSource(
        reader_cls,
        filenames,
        enqueue_size=enqueue_size,
        batch_size=batch_size,
        num_epochs=num_epochs,
        queue_capacity=queue_capacity,
        shuffle=shuffle,
        min_after_dequeue=min_after_dequeue,
        num_threads=num_threads,
        seed=seed)()
    parser = example_parser.ExampleParser(features)
    parsed = parser(record)

    column_dict = parsed._asdict()
    column_dict["index"] = index

    dataframe = cls()
    dataframe.assign(**column_dict)
    return dataframe