Esempio n. 1
0
    def __init__(self, filenames):
        """Create a `SequenceFileDataset`.

    `SequenceFileDataset` allows a user to read data from a hadoop sequence
    file. A sequence file consists of (key value) pairs sequentially. At
    the moment, `org.apache.hadoop.io.Text` is the only serialization type
    being supported, and there is no compression support.

    For example:

    ```python
    tf.compat.v1.enable_eager_execution()

    dataset = tf.contrib.hadoop.SequenceFileDataset("/foo/bar.seq")
    # Prints the (key, value) pairs inside a hadoop sequence file.
    for key, value in dataset:
      print(key, value)
    ```

    Args:
      filenames: A `tf.string` tensor containing one or more filenames.
    """
        self._filenames = ops.convert_to_tensor(filenames,
                                                dtype=dtypes.string,
                                                name="filenames")
        variant_tensor = gen_dataset_ops.sequence_file_dataset(
            self._filenames,
            structure.get_flat_tensor_types(self._element_structure))
        super(SequenceFileDataset, self).__init__(variant_tensor)
  def __init__(self, filenames):
    """Create a `SequenceFileDataset`.

    `SequenceFileDataset` allows a user to read data from a hadoop sequence
    file. A sequence file consists of (key value) pairs sequentially. At
    the moment, `org.apache.hadoop.io.Text` is the only serialization type
    being supported, and there is no compression support.

    For example:

    ```python
    tf.compat.v1.enable_eager_execution()

    dataset = tf.contrib.hadoop.SequenceFileDataset("/foo/bar.seq")
    # Prints the (key, value) pairs inside a hadoop sequence file.
    for key, value in dataset:
      print(key, value)
    ```

    Args:
      filenames: A `tf.string` tensor containing one or more filenames.
    """
    self._filenames = ops.convert_to_tensor(
        filenames, dtype=dtypes.string, name="filenames")
    variant_tensor = gen_dataset_ops.sequence_file_dataset(
        self._filenames, self._element_structure._flat_types)  # pylint: disable=protected-access
    super(SequenceFileDataset, self).__init__(variant_tensor)
Esempio n. 3
0
    def __init__(self, filenames):
        """Create a `SequenceFileDataset`.

    `SequenceFileDataset` allows a user to read data from a hadoop sequence
    file. A sequence file consists of (key value) pairs sequentially. At
    the moment, `org.apache.hadoop.io.Text` is the only serialization type
    being supported, and there is no compression support.

    For example:

    ```python
    dataset = tf.contrib.hadoop.SequenceFileDataset("/foo/bar.seq")
    iterator = dataset.make_one_shot_iterator()
    next_element = iterator.get_next()
    # Prints the (key, value) pairs inside a hadoop sequence file.
    while True:
      try:
        print(sess.run(next_element))
      except tf.errors.OutOfRangeError:
        break
    ```

    Args:
      filenames: A `tf.string` tensor containing one or more filenames.
    """
        self._filenames = ops.convert_to_tensor(filenames,
                                                dtype=dtypes.string,
                                                name="filenames")
        variant_tensor = gen_dataset_ops.sequence_file_dataset(
            self._filenames, self._element_structure._flat_types)  # pylint: disable=protected-access
        super(SequenceFileDataset, self).__init__(variant_tensor)
 def _as_variant_tensor(self):
     return gen_dataset_ops.sequence_file_dataset(
         self._filenames, self._element_structure._flat_types)  # pylint: disable=protected-access
Esempio n. 5
0
 def _as_variant_tensor(self):
     return gen_dataset_ops.sequence_file_dataset(
         self._filenames, nest.flatten(self.output_types))
Esempio n. 6
0
 def _as_variant_tensor(self):
   return gen_dataset_ops.sequence_file_dataset(
       self._filenames, nest.flatten(self.output_types))