Beispiel #1
0
    def _apply_transform(self, transform_input, **kwargs):
        filename_queue = input_ops.string_input_producer(
            self.work_units,
            num_epochs=kwargs.get("num_epochs"),
            shuffle=self.shuffle,
            seed=self.seed)
        reader_ops = []
        for _ in range(self.num_threads):
            reader = self._reader_cls(**self._reader_kwargs)
            reader_ops.append(
                reader.read_up_to(filename_queue, self.enqueue_size))

        if self.shuffle:
            dequeued = input_ops.shuffle_batch_join(
                reader_ops,
                self.batch_size,
                capacity=self.queue_capacity,
                min_after_dequeue=self.min_after_dequeue,
                seed=self.seed,
                enqueue_many=True,
                shared_name=None,
                name=None)
        else:
            dequeued = input_ops.batch_join(reader_ops,
                                            self.batch_size,
                                            capacity=self.queue_capacity,
                                            enqueue_many=True,
                                            dynamic_pad=False,
                                            shared_name=None,
                                            name=None)

        # pylint: disable=not-callable
        return self.return_type(*dequeued)
Beispiel #2
0
  def _apply_transform(self, transform_input):
    filename_queue = input_ops.string_input_producer(self.work_units,
                                                     num_epochs=self.num_epochs,
                                                     shuffle=self.shuffle,
                                                     seed=self.seed)
    reader_ops = []
    for _ in range(self.num_threads):
      reader = self._reader_cls(**self._reader_kwargs)
      reader_ops.append(reader.read_up_to(filename_queue, self.enqueue_size))

    if self.shuffle:
      dequeued = input_ops.shuffle_batch_join(
          reader_ops,
          self.batch_size,
          capacity=self.queue_capacity,
          min_after_dequeue=self.min_after_dequeue,
          seed=self.seed,
          enqueue_many=True,
          shared_name=None,
          name=None)
    else:
      dequeued = input_ops.batch_join(reader_ops,
                                      self.batch_size,
                                      capacity=self.queue_capacity,
                                      enqueue_many=True,
                                      dynamic_pad=False,
                                      shared_name=None,
                                      name=None)

    # pylint: disable=not-callable
    return self.return_type(*dequeued)
Beispiel #3
0
def read_keyed_batch_examples(
    file_pattern, batch_size, reader,
    randomize_input=True, num_epochs=None,
    queue_capacity=10000, num_threads=1,
    read_batch_size=1, parse_fn=None,
    name=None):
  """Adds operations to read, queue, batch `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size`.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Use `parse_fn` if you need to do parsing / processing on single examples.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.initialize_all_variables()` as shown in the tests.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    read_batch_size: An int or scalar `Tensor` specifying the number of
      records to read at once
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    name: Name of resulting op.

  Returns:
    String `Tensor` of batched `Example` proto. If `keep_keys` is True, then
    returns tuple of string `Tensor`s, where first value is the key.

  Raises:
    ValueError: for invalid inputs.
  """
  # Retrive files to read.
  if isinstance(file_pattern, list):
    file_names = file_pattern
    if not file_names:
      raise ValueError('No files given to dequeue_examples.')
  else:
    file_names = list(gfile.Glob(file_pattern))
    if not file_names:
      raise ValueError('No files match %s.' % file_pattern)

  # Sort files so it will be deterministic for unit tests. They'll be shuffled
  # in `string_input_producer` if `randomize_input` is enabled.
  if not randomize_input:
    file_names = sorted(file_names)

  # Check input parameters are given and reasonable.
  if (not queue_capacity) or (queue_capacity <= 0):
    raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
  if (batch_size is None) or (
      (not isinstance(batch_size, ops.Tensor)) and
      (batch_size <= 0 or batch_size > queue_capacity)):
    raise ValueError(
        'Invalid batch_size %s, with queue_capacity %s.' %
        (batch_size, queue_capacity))
  if (read_batch_size is None) or (
      (not isinstance(read_batch_size, ops.Tensor)) and
      (read_batch_size <= 0)):
    raise ValueError('Invalid read_batch_size %s.' % read_batch_size)
  if (not num_threads) or (num_threads <= 0):
    raise ValueError('Invalid num_threads %s.' % num_threads)
  if (num_epochs is not None) and (num_epochs <= 0):
    raise ValueError('Invalid num_epochs %s.' % num_epochs)

  with ops.op_scope([file_pattern], name, 'read_batch_examples') as scope:
    # Setup filename queue with shuffling.
    with ops.name_scope('file_name_queue') as file_name_queue_scope:
      file_name_queue = input_ops.string_input_producer(
          constant_op.constant(file_names, name='input'),
          shuffle=randomize_input, num_epochs=num_epochs,
          name=file_name_queue_scope)

    # Create readers, one per thread and set them to read from filename queue.
    with ops.name_scope('read'):
      example_list = []
      for _ in range(num_threads):
        if read_batch_size > 1:
          keys, examples_proto = reader().read_up_to(file_name_queue,
                                                     read_batch_size)
        else:
          keys, examples_proto = reader().read(file_name_queue)
        if parse_fn:
          parsed_examples = parse_fn(examples_proto)
          # Map keys into example map because batch_join doesn't support
          # tuple of Tensor + dict.
          if isinstance(parsed_examples, dict):
            parsed_examples[KEY_FEATURE_NAME] = keys
            example_list.append(parsed_examples)
          else:
            example_list.append((keys, parsed_examples))
        else:
          example_list.append((keys, examples_proto))

    enqueue_many = read_batch_size > 1

    # Setup batching queue given list of read example tensors.
    if randomize_input:
      if isinstance(batch_size, ops.Tensor):
        min_after_dequeue = int(queue_capacity * 0.4)
      else:
        min_after_dequeue = max(queue_capacity - (3 * batch_size), batch_size)
      queued_examples_with_keys = input_ops.shuffle_batch_join(
          example_list, batch_size, capacity=queue_capacity,
          min_after_dequeue=min_after_dequeue,
          enqueue_many=enqueue_many, name=scope)
    else:
      queued_examples_with_keys = input_ops.batch_join(
          example_list, batch_size, capacity=queue_capacity,
          enqueue_many=enqueue_many, name=scope)
    if parse_fn and isinstance(queued_examples_with_keys, dict):
      queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME)
      return queued_keys, queued_examples_with_keys
    return queued_examples_with_keys
Beispiel #4
0
def _read_keyed_batch_examples_helper(file_pattern,
                                      batch_size,
                                      reader,
                                      randomize_input=True,
                                      num_epochs=None,
                                      queue_capacity=10000,
                                      num_threads=1,
                                      read_batch_size=1,
                                      parse_fn=None,
                                      setup_shared_queue=False,
                                      name=None):
  """Adds operations to read, queue, batch `Example` protos.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.initialize_all_variables()` as shown in the tests.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    read_batch_size: An int or scalar `Tensor` specifying the number of
      records to read at once
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    setup_shared_queue: Whether to set up a shared queue for file names.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` of string keys.
    - String `Tensor` of batched `Example` proto.

  Raises:
    ValueError: for invalid inputs.
  """
  # Retrieve files to read.
  file_names = _get_file_names(file_pattern, randomize_input)

  # Check input parameters are given and reasonable.
  if (not queue_capacity) or (queue_capacity <= 0):
    raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
  if (batch_size is None) or (
      (not isinstance(batch_size, ops.Tensor)) and
      (batch_size <= 0 or batch_size > queue_capacity)):
    raise ValueError(
        'Invalid batch_size %s, with queue_capacity %s.' %
        (batch_size, queue_capacity))
  if (read_batch_size is None) or (
      (not isinstance(read_batch_size, ops.Tensor)) and
      (read_batch_size <= 0)):
    raise ValueError('Invalid read_batch_size %s.' % read_batch_size)
  if (not num_threads) or (num_threads <= 0):
    raise ValueError('Invalid num_threads %s.' % num_threads)
  if (num_epochs is not None) and (num_epochs <= 0):
    raise ValueError('Invalid num_epochs %s.' % num_epochs)

  with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
    with ops.name_scope('file_name_queue') as file_name_queue_scope:
      if setup_shared_queue:
        shared_file_name_queue = _get_shared_file_name_queue(
            file_names, randomize_input, num_epochs, file_name_queue_scope)
        file_name_queue = data_flow_ops.FIFOQueue(
            capacity=1, dtypes=[dtypes.string], shapes=[[]])
        enqueue_op = file_name_queue.enqueue(shared_file_name_queue.dequeue())
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
      else:
        file_name_queue = input_ops.string_input_producer(
            constant_op.constant(
                file_names, name='input'),
            shuffle=randomize_input,
            num_epochs=num_epochs,
            name=file_name_queue_scope)

    example_list = _get_examples(file_name_queue, reader, num_threads,
                                 read_batch_size, parse_fn)

    enqueue_many = read_batch_size > 1

    if num_epochs is None:
      allow_smaller_final_batch = False
    else:
      allow_smaller_final_batch = True

    # Setup batching queue given list of read example tensors.
    if randomize_input:
      if isinstance(batch_size, ops.Tensor):
        min_after_dequeue = int(queue_capacity * 0.4)
      else:
        min_after_dequeue = max(queue_capacity - (3 * batch_size), batch_size)
      queued_examples_with_keys = input_ops.shuffle_batch_join(
          example_list, batch_size, capacity=queue_capacity,
          min_after_dequeue=min_after_dequeue,
          enqueue_many=enqueue_many, name=scope,
          allow_smaller_final_batch=allow_smaller_final_batch)
    else:
      queued_examples_with_keys = input_ops.batch_join(
          example_list, batch_size, capacity=queue_capacity,
          enqueue_many=enqueue_many, name=scope,
          allow_smaller_final_batch=allow_smaller_final_batch)
    if parse_fn and isinstance(queued_examples_with_keys, dict):
      queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME)
      return queued_keys, queued_examples_with_keys
    return queued_examples_with_keys
Beispiel #5
0
def read_keyed_batch_examples(file_pattern,
                              batch_size,
                              reader,
                              randomize_input=True,
                              num_epochs=None,
                              queue_capacity=10000,
                              num_threads=1,
                              read_batch_size=1,
                              parse_fn=None,
                              name=None):
    """Adds operations to read, queue, batch `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size`.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Use `parse_fn` if you need to do parsing / processing on single examples.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.initialize_all_variables()` as shown in the tests.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    read_batch_size: An int or scalar `Tensor` specifying the number of
      records to read at once
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` of string keys.
    - String `Tensor` of batched `Example` proto.

  Raises:
    ValueError: for invalid inputs.
  """
    # Retrieve files to read.
    if isinstance(file_pattern, list):
        file_names = file_pattern
        if not file_names:
            raise ValueError('No files given to dequeue_examples.')
    else:
        file_names = list(gfile.Glob(file_pattern))
        if not file_names:
            raise ValueError('No files match %s.' % file_pattern)

    # Sort files so it will be deterministic for unit tests. They'll be shuffled
    # in `string_input_producer` if `randomize_input` is enabled.
    if not randomize_input:
        file_names = sorted(file_names)

    # Check input parameters are given and reasonable.
    if (not queue_capacity) or (queue_capacity <= 0):
        raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
    if (batch_size is None) or (
        (not isinstance(batch_size, ops.Tensor)) and
        (batch_size <= 0 or batch_size > queue_capacity)):
        raise ValueError('Invalid batch_size %s, with queue_capacity %s.' %
                         (batch_size, queue_capacity))
    if (read_batch_size is None) or (
        (not isinstance(read_batch_size, ops.Tensor)) and
        (read_batch_size <= 0)):
        raise ValueError('Invalid read_batch_size %s.' % read_batch_size)
    if (not num_threads) or (num_threads <= 0):
        raise ValueError('Invalid num_threads %s.' % num_threads)
    if (num_epochs is not None) and (num_epochs <= 0):
        raise ValueError('Invalid num_epochs %s.' % num_epochs)

    with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
        # Setup filename queue with shuffling.
        with ops.name_scope('file_name_queue') as file_name_queue_scope:
            file_name_queue = input_ops.string_input_producer(
                constant_op.constant(file_names, name='input'),
                shuffle=randomize_input,
                num_epochs=num_epochs,
                name=file_name_queue_scope)

        # Create readers, one per thread and set them to read from filename queue.
        with ops.name_scope('read'):
            example_list = []
            for _ in range(num_threads):
                if read_batch_size > 1:
                    keys, examples_proto = reader().read_up_to(
                        file_name_queue, read_batch_size)
                else:
                    keys, examples_proto = reader().read(file_name_queue)
                if parse_fn:
                    parsed_examples = parse_fn(examples_proto)
                    # Map keys into example map because batch_join doesn't support
                    # tuple of Tensor + dict.
                    if isinstance(parsed_examples, dict):
                        parsed_examples[KEY_FEATURE_NAME] = keys
                        example_list.append(parsed_examples)
                    else:
                        example_list.append((keys, parsed_examples))
                else:
                    example_list.append((keys, examples_proto))

        enqueue_many = read_batch_size > 1

        if num_epochs is not None:
            allow_smaller_final_batch = True
        else:
            allow_smaller_final_batch = False

        # Setup batching queue given list of read example tensors.
        if randomize_input:
            if isinstance(batch_size, ops.Tensor):
                min_after_dequeue = int(queue_capacity * 0.4)
            else:
                min_after_dequeue = max(queue_capacity - (3 * batch_size),
                                        batch_size)
            queued_examples_with_keys = input_ops.shuffle_batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                min_after_dequeue=min_after_dequeue,
                enqueue_many=enqueue_many,
                name=scope,
                allow_smaller_final_batch=allow_smaller_final_batch)
        else:
            queued_examples_with_keys = input_ops.batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                enqueue_many=enqueue_many,
                name=scope,
                allow_smaller_final_batch=allow_smaller_final_batch)
        if parse_fn and isinstance(queued_examples_with_keys, dict):
            queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME)
            return queued_keys, queued_examples_with_keys
        return queued_examples_with_keys
Beispiel #6
0
def _read_keyed_batch_examples_helper(file_pattern,
                                      batch_size,
                                      reader,
                                      randomize_input=True,
                                      num_epochs=None,
                                      queue_capacity=10000,
                                      num_threads=1,
                                      read_batch_size=1,
                                      parse_fn=None,
                                      setup_shared_queue=False,
                                      name=None):
    """Adds operations to read, queue, batch `Example` protos.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.initialize_all_variables()` as shown in the tests.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    read_batch_size: An int or scalar `Tensor` specifying the number of
      records to read at once
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    setup_shared_queue: Whether to set up a shared queue for file names.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` of string keys.
    - String `Tensor` of batched `Example` proto.

  Raises:
    ValueError: for invalid inputs.
  """
    # Retrieve files to read.
    file_names = _get_file_names(file_pattern, randomize_input)

    # Check input parameters are given and reasonable.
    if (not queue_capacity) or (queue_capacity <= 0):
        raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
    if (batch_size is None) or (
        (not isinstance(batch_size, ops.Tensor)) and
        (batch_size <= 0 or batch_size > queue_capacity)):
        raise ValueError('Invalid batch_size %s, with queue_capacity %s.' %
                         (batch_size, queue_capacity))
    if (read_batch_size is None) or (
        (not isinstance(read_batch_size, ops.Tensor)) and
        (read_batch_size <= 0)):
        raise ValueError('Invalid read_batch_size %s.' % read_batch_size)
    if (not num_threads) or (num_threads <= 0):
        raise ValueError('Invalid num_threads %s.' % num_threads)
    if (num_epochs is not None) and (num_epochs <= 0):
        raise ValueError('Invalid num_epochs %s.' % num_epochs)

    with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
        with ops.name_scope('file_name_queue') as file_name_queue_scope:
            if setup_shared_queue:
                shared_file_name_queue = _get_shared_file_name_queue(
                    file_names, randomize_input, num_epochs,
                    file_name_queue_scope)
                file_name_queue = data_flow_ops.FIFOQueue(
                    capacity=1, dtypes=[dtypes.string], shapes=[[]])
                enqueue_op = file_name_queue.enqueue(
                    shared_file_name_queue.dequeue())
                queue_runner.add_queue_runner(
                    queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
            else:
                file_name_queue = input_ops.string_input_producer(
                    constant_op.constant(file_names, name='input'),
                    shuffle=randomize_input,
                    num_epochs=num_epochs,
                    name=file_name_queue_scope)

        example_list = _get_examples(file_name_queue, reader, num_threads,
                                     read_batch_size, parse_fn)

        enqueue_many = read_batch_size > 1

        if num_epochs is None:
            allow_smaller_final_batch = False
        else:
            allow_smaller_final_batch = True

        # Setup batching queue given list of read example tensors.
        if randomize_input:
            if isinstance(batch_size, ops.Tensor):
                min_after_dequeue = int(queue_capacity * 0.4)
            else:
                min_after_dequeue = max(queue_capacity - (3 * batch_size),
                                        batch_size)
            queued_examples_with_keys = input_ops.shuffle_batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                min_after_dequeue=min_after_dequeue,
                enqueue_many=enqueue_many,
                name=scope,
                allow_smaller_final_batch=allow_smaller_final_batch)
        else:
            queued_examples_with_keys = input_ops.batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                enqueue_many=enqueue_many,
                name=scope,
                allow_smaller_final_batch=allow_smaller_final_batch)
        if parse_fn and isinstance(queued_examples_with_keys, dict):
            queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME)
            return queued_keys, queued_examples_with_keys
        return queued_examples_with_keys
Beispiel #7
0
def _read_keyed_batch_examples_helper(file_pattern,
                                      batch_size,
                                      reader,
                                      randomize_input=True,
                                      num_epochs=None,
                                      queue_capacity=10000,
                                      num_threads=1,
                                      read_batch_size=1,
                                      parse_fn=None,
                                      setup_shared_queue=False,
                                      name=None):
  # Retrieve files to read.
  file_names = _get_file_names(file_pattern, randomize_input)

  # Check input parameters are given and reasonable.
  if (not queue_capacity) or (queue_capacity <= 0):
    raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
  if (batch_size is None) or (
      (not isinstance(batch_size, ops.Tensor)) and
      (batch_size <= 0 or batch_size > queue_capacity)):
    raise ValueError(
        'Invalid batch_size %s, with queue_capacity %s.' %
        (batch_size, queue_capacity))
  if (read_batch_size is None) or (
      (not isinstance(read_batch_size, ops.Tensor)) and
      (read_batch_size <= 0)):
    raise ValueError('Invalid read_batch_size %s.' % read_batch_size)
  if (not num_threads) or (num_threads <= 0):
    raise ValueError('Invalid num_threads %s.' % num_threads)
  if (num_epochs is not None) and (num_epochs <= 0):
    raise ValueError('Invalid num_epochs %s.' % num_epochs)

  with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
    with ops.name_scope('file_name_queue') as file_name_queue_scope:
      if setup_shared_queue:
        shared_file_name_queue = _get_shared_file_name_queue(
            file_names, randomize_input, num_epochs, file_name_queue_scope)
        file_name_queue = data_flow_ops.FIFOQueue(
            capacity=1, dtypes=[dtypes.string], shapes=[[]])
        enqueue_op = file_name_queue.enqueue(shared_file_name_queue.dequeue())
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
      else:
        file_name_queue = input_ops.string_input_producer(
            constant_op.constant(
                file_names, name='input'),
            shuffle=randomize_input,
            num_epochs=num_epochs,
            name=file_name_queue_scope)

    example_list = _get_examples(file_name_queue, reader, num_threads,
                                 read_batch_size, parse_fn)

    enqueue_many = read_batch_size > 1

    if num_epochs is not None:
      allow_smaller_final_batch = True
    else:
      allow_smaller_final_batch = False

    # Setup batching queue given list of read example tensors.
    if randomize_input:
      if isinstance(batch_size, ops.Tensor):
        min_after_dequeue = int(queue_capacity * 0.4)
      else:
        min_after_dequeue = max(queue_capacity - (3 * batch_size), batch_size)
      queued_examples_with_keys = input_ops.shuffle_batch_join(
          example_list, batch_size, capacity=queue_capacity,
          min_after_dequeue=min_after_dequeue,
          enqueue_many=enqueue_many, name=scope,
          allow_smaller_final_batch=allow_smaller_final_batch)
    else:
      queued_examples_with_keys = input_ops.batch_join(
          example_list, batch_size, capacity=queue_capacity,
          enqueue_many=enqueue_many, name=scope,
          allow_smaller_final_batch=allow_smaller_final_batch)
    if parse_fn and isinstance(queued_examples_with_keys, dict):
      queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME)
      return queued_keys, queued_examples_with_keys
    return queued_examples_with_keys
Beispiel #8
0
def read_batch_examples(file_pattern,
                        batch_size,
                        reader,
                        randomize_input=True,
                        num_epochs=None,
                        queue_capacity=10000,
                        num_threads=1,
                        name=None):
    """Adds operations to read, queue, batch `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size`.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.initialize_all_variables()` as shown in the tests.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    name: Name of resulting op.

  Returns:
    String `Tensor` of batched `Example` proto.

  Raises:
    ValueError: for invalid inputs.
  """
    # Retrive files to read.
    if isinstance(file_pattern, list):
        file_names = file_pattern
        if not file_names:
            raise ValueError('No files given to dequeue_examples.')
    else:
        file_names = list(gfile.Glob(file_pattern))
        if not file_names:
            raise ValueError('No files match %s.' % file_pattern)

    # Sort files so it will be deterministic for unit tests. They'll be shuffled
    # in `string_input_producer` if `randomize_input` is enabled.
    if not randomize_input:
        file_names = sorted(file_names)

    # Check input parameters are given and reasonable.
    if (not queue_capacity) or (queue_capacity <= 0):
        raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
    if (batch_size is None) or (
        (not isinstance(batch_size, ops.Tensor)) and
        (batch_size <= 0 or batch_size > queue_capacity)):
        raise ValueError('Invalid batch_size %s, with queue_capacity %s.' %
                         (batch_size, queue_capacity))
    if (not num_threads) or (num_threads <= 0):
        raise ValueError('Invalid num_threads %s.' % num_threads)
    if (num_epochs is not None) and (num_epochs <= 0):
        raise ValueError('Invalid num_epochs %s.' % num_epochs)

    with ops.op_scope([file_pattern], name, 'read_batch_examples') as scope:
        # Setup filename queue with shuffling.
        with ops.name_scope('file_name_queue') as file_name_queue_scope:
            file_name_queue = input_ops.string_input_producer(
                constant_op.constant(file_names, name='input'),
                shuffle=randomize_input,
                num_epochs=num_epochs,
                name=file_name_queue_scope)

        # Create readers, one per thread and set them to read from filename queue.
        with ops.name_scope('read'):
            example_list = []
            for _ in range(num_threads):
                _, example_proto = reader().read(file_name_queue)
                example_list.append([example_proto])

        # Setup batching queue given list of read example tensors.
        if randomize_input:
            if isinstance(batch_size, ops.Tensor):
                min_after_dequeue = int(queue_capacity * 0.4)
            else:
                min_after_dequeue = max(queue_capacity - (3 * batch_size),
                                        batch_size)
            examples = input_ops.shuffle_batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                min_after_dequeue=min_after_dequeue,
                name=scope)
        else:
            examples = input_ops.batch_join(example_list,
                                            batch_size,
                                            capacity=queue_capacity,
                                            name=scope)

        return examples
Beispiel #9
0
def read_batch_examples(file_pattern, batch_size, reader,
                        randomize_input=True, num_epochs=None,
                        queue_capacity=10000, num_threads=1,
                        name=None):
  """Adds operations to read, queue, batch `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size`.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.initialize_all_variables()` as shown in the tests.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    name: Name of resulting op.

  Returns:
    String `Tensor` of batched `Example` proto.

  Raises:
    ValueError: for invalid inputs.
  """
  # Retrive files to read.
  if isinstance(file_pattern, list):
    file_names = file_pattern
    if not file_names:
      raise ValueError('No files given to dequeue_examples.')
  else:
    file_names = list(gfile.Glob(file_pattern))
    if not file_names:
      raise ValueError('No files match %s.' % file_pattern)

  # Sort files so it will be deterministic for unit tests. They'll be shuffled
  # in `string_input_producer` if `randomize_input` is enabled.
  if not randomize_input:
    file_names = sorted(file_names)

  # Check input parameters are given and reasonable.
  if (not queue_capacity) or (queue_capacity <= 0):
    raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
  if (batch_size is None) or (
      (not isinstance(batch_size, ops.Tensor)) and
      (batch_size <= 0 or batch_size > queue_capacity)):
    raise ValueError(
        'Invalid batch_size %s, with queue_capacity %s.' %
        (batch_size, queue_capacity))
  if (not num_threads) or (num_threads <= 0):
    raise ValueError('Invalid num_threads %s.' % num_threads)
  if (num_epochs is not None) and (num_epochs <= 0):
    raise ValueError('Invalid num_epochs %s.' % num_epochs)

  with ops.op_scope([file_pattern], name, 'read_batch_examples') as scope:
    # Setup filename queue with shuffling.
    with ops.name_scope('file_name_queue') as file_name_queue_scope:
      file_name_queue = input_ops.string_input_producer(
          constant_op.constant(file_names, name='input'),
          shuffle=randomize_input, num_epochs=num_epochs,
          name=file_name_queue_scope)

    # Create readers, one per thread and set them to read from filename queue.
    with ops.name_scope('read'):
      example_list = []
      for _ in range(num_threads):
        _, example_proto = reader().read(file_name_queue)
        example_list.append([example_proto])

    # Setup batching queue given list of read example tensors.
    if randomize_input:
      if isinstance(batch_size, ops.Tensor):
        min_after_dequeue = int(queue_capacity * 0.4)
      else:
        min_after_dequeue = max(queue_capacity - (3 * batch_size), batch_size)
      examples = input_ops.shuffle_batch_join(
          example_list, batch_size, capacity=queue_capacity,
          min_after_dequeue=min_after_dequeue,
          name=scope)
    else:
      examples = input_ops.batch_join(
          example_list, batch_size, capacity=queue_capacity,
          name=scope)

    return examples