コード例 #1
0
def read_keyed_batch_features(file_pattern,
                              batch_size,
                              features,
                              reader,
                              randomize_input=True,
                              num_epochs=None,
                              queue_capacity=10000,
                              reader_num_threads=1,
                              feature_queue_capacity=100,
                              num_queue_runners=2,
                              parser_num_threads=None,
                              name=None):
    """Adds operations to read, queue, batch and parse `Example` protos.

  Given file pattern (or list of files), will setup a queue for file names,
  read `Example` proto using provided `reader`, use batch queue to create
  batches of examples of size `batch_size` and parse example given `features`
  specification.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    file_pattern: List of files or pattern of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    features: A `dict` mapping feature keys to `FixedLenFeature` or
      `VarLenFeature` values.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If None, cycles through the dataset forever. NOTE - If specified,
      creates a variable that must be initialized, so call
      tf.initialize_local_variables() as shown in the tests.
    queue_capacity: Capacity for input queue.
    reader_num_threads: The number of threads to read examples.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_queue_runners: Number of queue runners to start for the feature queue,
      Adding multiple queue runners for the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing.
    parser_num_threads: (Deprecated) The number of threads to parse examples.
    name: Name of resulting op.

  Returns:
    A dict of `Tensor` or `SparseTensor` objects for each in `features`.
    If `keep_keys` is `True`, returns tuple of string `Tensor` and above dict.

  Raises:
    ValueError: for invalid inputs.
  """

    if parser_num_threads:
        # TODO(sibyl-Aix6ihai): Remove on Sept 3 2016.
        logging.warning(
            'parser_num_threads is deprecated, it will be removed on'
            'Sept 3 2016')
    with ops.name_scope(name, 'read_batch_features', [file_pattern]) as scope:
        keys, examples = read_keyed_batch_examples(
            file_pattern,
            batch_size,
            reader,
            randomize_input=randomize_input,
            num_epochs=num_epochs,
            queue_capacity=queue_capacity,
            num_threads=reader_num_threads,
            read_batch_size=batch_size,
            name=scope)

        # Parse the example.
        feature_map = parsing_ops.parse_example(examples, features)

        # Lets also add preprocessed tensors into the queue types for each item of
        # the queue.
        tensors_to_enqueue = []
        # Each entry contains the key, and a boolean which indicates whether the
        # tensor was a sparse tensor.
        tensors_mapping = []
        # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse
        # tensors into a queue. This could be taken care in somewhere else so others
        # can reuse it. Also, QueueBase maybe extended to handle sparse tensors
        # directly.
        for key in sorted(feature_map.keys()):
            tensor = feature_map[key]
            if isinstance(tensor, ops.SparseTensor):
                tensors_mapping.append((key, True))
                tensors_to_enqueue.extend(
                    [tensor.indices, tensor.values, tensor.shape])
            else:
                tensors_mapping.append((key, False))
                tensors_to_enqueue.append(tensor)
        tensors_to_enqueue.append(keys)

        queue_dtypes = [x.dtype for x in tensors_to_enqueue]
        input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity,
                                              queue_dtypes)

        # Add a summary op to debug if our feature queue is full or not.
        logging_ops.scalar_summary(
            'queue/parsed_features/%s/fraction_of_%d_full' %
            (input_queue.name, feature_queue_capacity),
            math_ops.cast(input_queue.size(), dtypes.float32) *
            (1. / feature_queue_capacity))

        # Add multiple queue runners so that the queue is always full. Adding more
        # than two queue-runners may hog the cpu on the worker to fill up the queue.
        for _ in range(num_queue_runners):
            queue_runner.add_queue_runner(
                queue_runner.QueueRunner(
                    input_queue, [input_queue.enqueue(tensors_to_enqueue)]))

        dequeued_tensors = input_queue.dequeue()

        # Reset shapes on dequeued tensors.
        for i in range(len(tensors_to_enqueue)):
            dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape())

        # Recreate feature mapping according to the original dictionary.
        dequeued_feature_map = {}
        index = 0
        for key, is_sparse_tensor in tensors_mapping:
            if is_sparse_tensor:
                # Three tensors are (indices, values, shape).
                dequeued_feature_map[key] = ops.SparseTensor(
                    dequeued_tensors[index], dequeued_tensors[index + 1],
                    dequeued_tensors[index + 2])
                index += 3
            else:
                dequeued_feature_map[key] = dequeued_tensors[index]
                index += 1
        dequeued_keys = dequeued_tensors[-1]

        return dequeued_keys, dequeued_feature_map
コード例 #2
0
ファイル: input.py プロジェクト: zqkou/tensorflow
def input_producer(input_tensor,
                   element_shape=None,
                   num_epochs=None,
                   shuffle=True,
                   seed=None,
                   capacity=32,
                   shared_name=None,
                   summary_name=None,
                   name=None):
    """Output the rows of `input_tensor` to a queue for an input pipeline.

  Args:
    input_tensor: A tensor with the rows to produce. Must be at least
      one-dimensional. Must either have a fully-defined shape, or
      `element_shape` must be defined.
    element_shape: (Optional.) A `TensorShape` representing the shape of a
      row of `input_tensor`, if it cannot be inferred.
    num_epochs: (Optional.) An integer. If specified `input_producer` produces
      each row of `input_tensor` `num_epochs` times before generating an
      `OutOfRange` error. If not specified, `input_producer` can cycle through
      the rows of `input_tensor` an unlimited number of times.
    shuffle: (Optional.) A boolean. If true, the rows are randomly shuffled
      within each epoch.
    seed: (Optional.) An integer. The seed to use if `shuffle` is true.
    capacity: (Optional.) The capacity of the queue to be used for buffering
      the input.
    shared_name: (Optional.) If set, this queue will be shared under the given
      name across multiple sessions.
    summary_name: (Optional.) If set, a scalar summary for the current queue
      size will be generated, using this name as part of the tag.
    name: (Optional.) A name for queue.

  Returns:
    A queue with the output rows.  A `QueueRunner` for the queue is
    added to the current `QUEUE_RUNNER` collection of the current
    graph.

  Raises:
    ValueError: If the shape of the input cannot be inferred from the arguments.
  """
    with ops.name_scope(name, "input_producer", [input_tensor]):
        input_tensor = ops.convert_to_tensor(input_tensor, name="input_tensor")
        element_shape = input_tensor.get_shape()[1:].merge_with(element_shape)
        if not element_shape.is_fully_defined():
            raise ValueError(
                "Either `input_tensor` must have a fully defined shape "
                "or `element_shape` must be specified")

        if shuffle:
            input_tensor = random_ops.random_shuffle(input_tensor, seed=seed)

        input_tensor = limit_epochs(input_tensor, num_epochs)

        q = data_flow_ops.FIFOQueue(capacity=capacity,
                                    dtypes=[input_tensor.dtype.base_dtype],
                                    shapes=[element_shape],
                                    shared_name=shared_name,
                                    name=name)
        enq = q.enqueue_many([input_tensor])
        queue_runner.add_queue_runner(queue_runner.QueueRunner(q, [enq]))
        if summary_name is not None:
            logging_ops.scalar_summary(
                "queue/%s/%s" % (q.name, summary_name),
                math_ops.cast(q.size(), dtypes.float32) * (1. / capacity))
        return q
コード例 #3
0
def _enqueue_join(queue, tensor_list_list):
    enqueue_ops = [queue.enqueue(tl) for tl in tensor_list_list]
    queue_runner.add_queue_runner(queue_runner.QueueRunner(queue, enqueue_ops))
コード例 #4
0
ファイル: input.py プロジェクト: zqkou/tensorflow
def _enqueue_join(queue, tensor_list_list, enqueue_many):
    if enqueue_many:
        enqueue_ops = [queue.enqueue_many(tl) for tl in tensor_list_list]
    else:
        enqueue_ops = [queue.enqueue(tl) for tl in tensor_list_list]
    queue_runner.add_queue_runner(queue_runner.QueueRunner(queue, enqueue_ops))
コード例 #5
0
ファイル: input.py プロジェクト: zqkou/tensorflow
def _enqueue(queue, tensor_list, threads, enqueue_many):
    if enqueue_many:
        enqueue_ops = [queue.enqueue_many(tensor_list)] * threads
    else:
        enqueue_ops = [queue.enqueue(tensor_list)] * threads
    queue_runner.add_queue_runner(queue_runner.QueueRunner(queue, enqueue_ops))
コード例 #6
0
  def apply_gradients(self, grads_and_vars, worker_id, global_step=None, name=None, collect_cdfs=False):
    """Apply gradients to variables.
    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.
    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.
    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.
    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
    if not grads_and_vars:
      raise ValueError("Must supply at least one variable")

    if global_step is None:
      raise ValueError("Global step is required to check staleness")

    self._global_step = global_step
    train_ops = []
    aggregated_grad = []
    var_list = []

#      worker_id_list_printer = logging_ops.Print(global_step,
#                  [a for a in self._worker_idx_list] + [worker_id] + [global_step],
#                  message="Worker ID list status")
#      train_ops.append(worker_id_list_printer)

    self._local_step = variables.Variable(
        initial_value=0,
        trainable=False,
        collections=[ops.GraphKeys.LOCAL_VARIABLES],
        dtype=global_step.dtype.base_dtype,
        name="sync_rep_local_step")
    self.local_step_init_op = state_ops.assign(self._local_step, global_step._ref())
    chief_init_ops = [self.local_step_init_op]
    self.ready_for_local_init_op = variables.report_uninitialized_variables(
      variables.all_variables())

    # The wait op waits for the current worker to dequeue a token from its respective token queue
    self._wait_op = self._sync_token_queues[worker_id].dequeue()

    # Replicas have to wait until they can get a token from the token queue
    # BEFORE begining to compute gradients.
    with ops.device(global_step.device):
      queue_size = self._sync_token_queues[worker_id].size()
      update_local_step_op = state_ops.assign(self._local_step, global_step._ref())

    # Gradient accum creation
    with ops.name_scope(None, self._name):
      worker_idx_list = []
      worker_counter = 0
      for grad, var in grads_and_vars:
        var_list.append(var)
        tf.logging.info("Grad " + str(grad) + " assigned to " + str(var.device))
        with ops.device(var.device):
          if grad is None:
            continue
          elif isinstance(grad, ops.Tensor):
            grad_accum = data_flow_ops.ConditionalAccumulator(
              grad.dtype,
              shape=var.get_shape(),
              shared_name=var.name + "/grad_accum")
          else:
            if not isinstance(grad, ops.IndexedSlices):
              raise ValueError("Unknown grad type!")
            grad_accum = data_flow_ops.SparseConditionalAccumulator(
              grad.dtype, shape=(), shared_name=var.name + "/grad_accum")
          self._accumulator_list.append((grad_accum, var))

      with ops.device(global_step.device):
        worker_idx_list.append(worker_id)
        worker_counter += 1
        worker_id_list_printer = logging_ops.Print(global_step,
                  [a for a in worker_idx_list] + [worker_id] + [global_step],
                  message="Worker ID list status")
        train_ops.append(worker_id_list_printer)
        counter_printer = logging_ops.Print(global_step, [worker_counter], message="Test for the counter")
        train_ops.append(counter_printer)

      """# Phase 1 gradient computation
      with ops.control_dependencies([update_local_step_op]):
        for index, (grad, var) in enumerate(grads_and_vars):
          with ops.device(var.device):
            if grad is None:
              continue

            elif isinstance(grad, ops.Tensor):
              grad_accum = self._accumulator_list[index][0]

              train_ops.append(grad_accum.apply_grad(grad,
                                                     local_step=self._local_step._ref()))

            else:
              if not isinstance(grad, ops.IndexedSlices):
                raise ValueError("Unknown grad type!")
              grad_accum = self._accumulator_list[index][0]

              train_ops.append(grad_accum.apply_indexed_slices_grad(
                grad, local_step=self._local_step._ref()))"""

      # Phase 1 gradient computation
      with ops.control_dependencies([update_local_step_op]):
        for index, (grad, var) in enumerate(grads_and_vars):
          print_start_op = logging_ops.Print(global_step, [global_step], message="Starting to apply grads for variable %d" % index)
          train_ops.append(print_start_op)
          with ops.device(var.device):
            if grad is None:
              continue

            elif isinstance(grad, ops.Tensor):
              grad_accum = self._accumulator_list[index][0]

              with ops.control_dependencies([print_start_op]):               
                with tf.device("job:worker/task:%d" % worker_id):
                  apply_grad_op = grad_accum.apply_grad(grad,
                                                        local_step=self._local_step._ref())
                  with ops.control_dependencies([apply_grad_op]):
                    accum_sizes_printer = logging_ops.Print(global_step,
                          [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step],
                          message="Accum aggregated status")
                    ret = tf.cond(tf.greater(self._accumulator_list[0][0].num_accumulated(), self._constant_for_comparison),
                           lambda: tf.constant(1), lambda: tf.constant(0))
                    notification_printer = logging_ops.Print(global_step, [ret], message="should stop notification")
                    train_ops.append(notification_printer)
                    '''else:
                      notification_printer = logging_ops.Print(global_step, ["shouldn't stop"], message="shouldn't stop notification")
                      train_ops.append(notification_printer)'''
                    train_ops.append(accum_sizes_printer)
#                    worker_id_list_printer = logging_ops.Print(global_step,
#                          [len(self._worker_list)] + [worker_id] + [global_step],
#                          message="Worker ID list status")
#                    train_ops.append(worker_id_list_printer)
                    finished_print_op = logging_ops.Print(global_step, [global_step], message="Done applying grads for variable %d" % index)
                    train_ops.append(finished_print_op)

            else:
              if not isinstance(grad, ops.IndexedSlices):
                raise ValueError("Unknown grad type!")
              grad_accum = self._accumulator_list[index][0]

              with ops.control_dependencies([print_start_op]):
                with tf.device("job:worker/task:%d" % worker_id):
                  apply_grad_op = grad_accum.apply_indexed_slices_grad(
                    grad, local_step=self._local_step._ref())
                  with ops.control_dependencies([apply_grad_op]):
                    accum_sizes_printer_parse = logging_ops.Print(global_step,
                          [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step],
                          message="Accum aggregated status")
                    ret_sparse = tf.cond(tf.greater(self._accumulator_list[0][0].num_accumulated(), self._constant_for_comparison),
                                  lambda: tf.constant(1), lambda: tf.constant(0))
                    notification_printer_sparse = logging_ops.Print(global_step, [ret_sparse], message="should stop notification")
                    train_ops.append(notification_printer_sparse)
                    #else:
                    #  notification_printer_sparse = logging_ops.Print(global_step, ["shouldn't stop"], message="shouldn't stop notification")
                    #  train_ops.append(notification_printer_sparse)                      
                    train_ops.append(accum_sizes_printer_parse)
#                    worker_id_list_printer_sparse = logging_ops.Print(global_step,
#                          [len(self._worker_list)] + [worker_id] + [global_step],
#                          message="Worker ID list status")
#                    train_ops.append(worker_id_list_printer_sparse)                    
                    finished_print_op = logging_ops.Print(global_step, [global_step], message="Done applying grads for variable %d" % index)
                    train_ops.append(finished_print_op)         
            with ops.control_dependencies([apply_grad_op]):          
              accum_sizes_printer = logging_ops.Print(global_step,
                                                   [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step],
                                                   message="Accum aggregated status on ps")
              train_ops.append(accum_sizes_printer)

      # Phase 2 gradient applying
      for index, (grad, var) in enumerate(grads_and_vars):
        with ops.device(var.device):
          grad_accum = self._accumulator_list[index][0]
          if grad is None:
            aggregated_grad.append(None)
          elif isinstance(grad, ops.Tensor):
            if collect_cdfs:
              aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas))
            else:
              aggregated_grad.append(grad_accum.take_grad(1))
          else:
            if collect_cdfs:
              aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas))
            else:
              aggregated_grad.append(grad_accum.take_indexed_slices_grad(1))

      aggregated_grads_and_vars = zip(aggregated_grad, var_list)

      # Some debug operations
      self.print_sizes = logging_ops.Print(global_step, [self._sync_token_queues[i].size() for i in range(self._total_num_replicas)], message="queue sizes")
      self.print_accum_sizes = logging_ops.Print(self._local_step,
                                                 [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id],
                                                 message="Accum sizes")
      self.print_local_step = logging_ops.Print(self._local_step, [self._local_step._ref(), global_step._ref()], message="local vs global step")

      # sync_op will be assigned to the same device as the global step.
      with ops.device(global_step.device), ops.name_scope(""):
        with ops.control_dependencies([self.print_accum_sizes]):
          update_op = self._opt.apply_gradients(aggregated_grads_and_vars, global_step)
          self._update_op = update_op
          with ops.control_dependencies([update_op]):
            sync_op = []
            for cur_worker_id in range(self._total_num_replicas):
              sync_op.append(self._sync_token_queues[cur_worker_id].enqueue(global_step))
            sync_op = control_flow_ops.group(*(sync_op))

        # dummy_queue is passed to the queue runner. Don't use the real queues
        # because the queue runner doesn't automatically reopen it once it
        # closed queues in PS devices.
        dummy_queue = (
            data_flow_ops.FIFOQueue(1,
                                    types_pb2.DT_INT32,
                                    shapes=(),
                                    shared_name="dummy_queue"))

        self._chief_queue_runner = queue_runner.QueueRunner(dummy_queue,
                                                            [sync_op])

      with ops.device(global_step.device), ops.name_scope(""):
        with ops.control_dependencies(train_ops):
          # Worker finished applying gradients. Add token to phase1_finished_queue
          train_op = logging_ops.Print(self._local_step._ref(),
                                       [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id] + [global_step],
                                       message="Finished worker updates",
                                       name="FinishedWorkerUpdatesPrint")

      for accum, var in self._accumulator_list:
        with ops.device(var.device):
          chief_init_ops.append(
              accum.set_global_step(
                  global_step, name="SetGlobalStep"))
      self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
      self._gradients_applied = True

      return train_op
コード例 #7
0
def _read_keyed_batch_examples_helper(file_pattern,
                                      batch_size,
                                      reader,
                                      randomize_input=True,
                                      num_epochs=None,
                                      queue_capacity=10000,
                                      num_threads=1,
                                      read_batch_size=1,
                                      filter_fn=None,
                                      parse_fn=None,
                                      setup_shared_queue=False,
                                      name=None,
                                      seed=None):
    """Adds operations to read, queue, batch `Example` protos.

  Args:
    file_pattern: List of files or patterns of file paths containing
        `Example` records. See `tf.gfile.Glob` for pattern rules.
    batch_size: An int or scalar `Tensor` specifying the batch size to use.
    reader: A function or class that returns an object with
      `read` method, (filename tensor) -> (example tensor).
    randomize_input: Whether the input should be randomized.
    num_epochs: Integer specifying the number of times to read through the
      dataset. If `None`, cycles through the dataset forever.
      NOTE - If specified, creates a variable that must be initialized, so call
      `tf.global_variables_initializer()` and run the op in a session.
    queue_capacity: Capacity for input queue.
    num_threads: The number of threads enqueuing examples.
    read_batch_size: An int or scalar `Tensor` specifying the number of
      records to read at once
    filter_fn: Filtering function, takes both keys as well `Example` Tensors
      and returns a boolean mask of the same shape as the input Tensors to
      be applied for filtering. If `None`, no filtering is done.
    parse_fn: Parsing function, takes `Example` Tensor returns parsed
      representation. If `None`, no parsing is done.
    setup_shared_queue: Whether to set up a shared queue for file names.
    name: Name of resulting op.
    seed: An integer (optional). Seed used if randomize_input == True.

  Returns:
    Returns tuple of:
    - `Tensor` of string keys.
    - String `Tensor` of batched `Example` proto.

  Raises:
    ValueError: for invalid inputs.
  """
    # Retrieve files to read.
    file_names = _get_file_names(file_pattern, randomize_input)

    # Check input parameters are given and reasonable.
    if (not queue_capacity) or (queue_capacity <= 0):
        raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
    if (batch_size is None) or (
        (not isinstance(batch_size, ops.Tensor)) and
        (batch_size <= 0 or batch_size >= queue_capacity)):
        raise ValueError('Invalid batch_size %s, with queue_capacity %s.' %
                         (batch_size, queue_capacity))
    if (read_batch_size is None) or (
        (not isinstance(read_batch_size, ops.Tensor)) and
        (read_batch_size <= 0)):
        raise ValueError('Invalid read_batch_size %s.' % read_batch_size)
    if (not num_threads) or (num_threads <= 0):
        raise ValueError('Invalid num_threads %s.' % num_threads)
    if (num_epochs is not None) and (num_epochs <= 0):
        raise ValueError('Invalid num_epochs %s.' % num_epochs)

    with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
        with ops.name_scope('file_name_queue') as file_name_queue_scope:
            if setup_shared_queue:
                file_name_queue = data_flow_ops.FIFOQueue(
                    capacity=1, dtypes=[dtypes.string], shapes=[[]])
                enqueue_op = file_name_queue.enqueue(
                    input_pipeline_ops.seek_next(file_names,
                                                 shuffle=randomize_input,
                                                 num_epochs=num_epochs,
                                                 seed=seed))
                queue_runner.add_queue_runner(
                    queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
            else:
                file_name_queue = input_ops.string_input_producer(
                    constant_op.constant(file_names, name='input'),
                    shuffle=randomize_input,
                    num_epochs=num_epochs,
                    name=file_name_queue_scope,
                    seed=seed)

        example_list = _get_examples(file_name_queue, reader, num_threads,
                                     read_batch_size, filter_fn, parse_fn)

        enqueue_many = read_batch_size > 1

        if num_epochs is None:
            allow_smaller_final_batch = False
        else:
            allow_smaller_final_batch = True

        # Setup batching queue given list of read example tensors.
        if randomize_input:
            if isinstance(batch_size, ops.Tensor):
                min_after_dequeue = int(queue_capacity * 0.4)
            else:
                min_after_dequeue = max(queue_capacity - (3 * batch_size),
                                        batch_size)
            queued_examples_with_keys = input_ops.shuffle_batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                min_after_dequeue=min_after_dequeue,
                enqueue_many=enqueue_many,
                name=scope,
                allow_smaller_final_batch=allow_smaller_final_batch,
                seed=seed)
        else:
            queued_examples_with_keys = input_ops.batch_join(
                example_list,
                batch_size,
                capacity=queue_capacity,
                enqueue_many=enqueue_many,
                name=scope,
                allow_smaller_final_batch=allow_smaller_final_batch)
        if parse_fn and isinstance(queued_examples_with_keys, dict):
            queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME)
            return queued_keys, queued_examples_with_keys
        return queued_examples_with_keys
コード例 #8
0
def prefetch_queue(tensors,
                   capacity=8,
                   num_threads=1,
                   dynamic_pad=False,
                   shared_name=None,
                   name=None):
  """Creates a queue to prefetch tensors from `tensors`.

  A queue runner for enqueuing tensors into the prefetch_queue is automatically
  added to the TF QueueRunners collection.

  Example:
  This is for example useful to pre-assemble input batches read with
  `tf.compat.v1.train.batch()` and enqueue the pre-assembled batches.  Ops that
  dequeue
  from the pre-assembled queue will not pay the cost of assembling the batch.

  images, labels = tf.compat.v1.train.batch([image, label], batch_size=32,
  num_threads=4)
  batch_queue = prefetch_queue([images, labels])
  images, labels = batch_queue.dequeue()
  logits = Net(images)
  loss = Loss(logits, labels)

  Args:
    tensors: A list or dictionary of `Tensors` to enqueue in the buffer.
    capacity: An integer. The maximum number of elements in the queue.
    num_threads: An integer.  Number of threads running the enqueue op.
    dynamic_pad: Boolean.  Whether to allow variable dimensions in input shapes.
    shared_name: (optional). If set, this queue will be shared under the given
      name across multiple sessions.
    name: (Optional) A name for the operations.

  Returns:
    A queue from which you can dequeue tensors with the same type and shape
    as `tensors`.
  """
  if isinstance(tensors, dict):
    # Need to wrap the keys and values in list() since Python3 returns views.
    # We sort the keys so the order is consistent across runs.
    names = list(sorted(tensors.keys()))
    tensor_list = list([tensors[n] for n in names])
  else:
    names = None
    tensor_list = tensors

  with ops.name_scope(name, "prefetch_queue", tensor_list) as name:
    dtypes = [t.dtype for t in tensor_list]
    shapes = [t.get_shape() for t in tensor_list]
    queue = _which_queue(dynamic_pad)(
        capacity=capacity,
        dtypes=dtypes,
        shapes=shapes,
        names=names,
        shared_name=shared_name)
    enqueue_op = queue.enqueue(tensors)
    queue_runner.add_queue_runner(
        queue_runner.QueueRunner(queue, [enqueue_op] * num_threads))
    summary.scalar(
        "fraction_of_%d_full" % capacity,
        math_ops.cast(queue.size(), _dtypes.float32) * (1. / capacity))
    return queue
コード例 #9
0
  def apply_gradients(self, grads_and_vars, global_step=None, name=None):
    """Apply gradients to variables.

    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.

    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.

    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
    if not grads_and_vars:
      raise ValueError("Must supply at least one variable")

    if global_step is None:
      raise ValueError("Global step is required to check staleness")

    self._global_step = global_step
    train_ops = []
    aggregated_grad = []
    var_list = []

    self._local_step = variables.Variable(
        initial_value=0,
        trainable=False,
        collections=[ops.GraphKeys.LOCAL_VARIABLES],
        dtype=global_step.dtype.base_dtype,
        name="sync_rep_local_step")
    self.local_step_init_op = state_ops.assign(self._local_step, global_step)
    chief_init_ops = [self.local_step_init_op]
    self.ready_for_local_init_op = variables.report_uninitialized_variables(
        variables.global_variables())

    with ops.name_scope(None, self._name):
      for grad, var in grads_and_vars:
        var_list.append(var)
        with ops.device(var.device):
          # Dense gradients.
          if grad is None:
            aggregated_grad.append(None)  # pass-through.
            continue
          elif isinstance(grad, ops.Tensor):
            grad_accum = data_flow_ops.ConditionalAccumulator(
                grad.dtype,
                shape=var.get_shape(),
                shared_name=var.name + "/grad_accum")
            train_ops.append(grad_accum.apply_grad(
                grad, local_step=self._local_step))
            aggregated_grad.append(grad_accum.take_grad(
                self._replicas_to_aggregate))
          else:
            if not isinstance(grad, ops.IndexedSlices):
              raise ValueError("Unknown grad type!")
            grad_accum = data_flow_ops.SparseConditionalAccumulator(
                grad.dtype, shape=(), shared_name=var.name + "/grad_accum")
            train_ops.append(grad_accum.apply_indexed_slices_grad(
                grad, local_step=self._local_step))
            aggregated_grad.append(grad_accum.take_indexed_slices_grad(
                self._replicas_to_aggregate))

          self._accumulator_list.append((grad_accum, var.device))

      aggregated_grads_and_vars = zip(aggregated_grad, var_list)

      # sync_op will be assigned to the same device as the global step.
      with ops.device(global_step.device), ops.name_scope(""):
        update_op = self._opt.apply_gradients(aggregated_grads_and_vars,
                                              global_step)

      # Create token queue.
      with ops.device(global_step.device), ops.name_scope(""):
        sync_token_queue = (
            data_flow_ops.FIFOQueue(-1,
                                    global_step.dtype.base_dtype,
                                    shapes=(),
                                    name="sync_token_q",
                                    shared_name="sync_token_q"))
        self._sync_token_queue = sync_token_queue

        # dummy_queue is passed to the queue runner. Don't use the real queues
        # because the queue runner doesn't automatically reopen it once it
        # closed queues in PS devices.
        dummy_queue = (
            data_flow_ops.FIFOQueue(1,
                                    types_pb2.DT_INT32,
                                    shapes=(),
                                    name="dummy_queue",
                                    shared_name="dummy_queue"))

      with ops.device(global_step.device), ops.name_scope(""):
        # Replicas have to wait until they can get a token from the token queue.
        with ops.control_dependencies(train_ops):
          token = sync_token_queue.dequeue()
        train_op = state_ops.assign(self._local_step, token)

        with ops.control_dependencies([update_op]):
          # Sync_op needs to insert tokens to the token queue at the end of the
          # step so the replicas can fetch them to start the next step.
          tokens = array_ops.fill([self._tokens_per_step], global_step)
          sync_op = sync_token_queue.enqueue_many((tokens,))

        if self._variable_averages is not None:
          with ops.control_dependencies([sync_op]), ops.name_scope(""):
            sync_op = self._variable_averages.apply(
                self._variables_to_average)

        self._chief_queue_runner = queue_runner.QueueRunner(dummy_queue,
                                                            [sync_op])
      for accum, dev in self._accumulator_list:
        with ops.device(dev):
          chief_init_ops.append(
              accum.set_global_step(
                  global_step, name="SetGlobalStep"))
      self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
      self._gradients_applied = True
      return train_op
コード例 #10
0
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        """Apply gradients to variables.

    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.

    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.

    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.

    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")

        if global_step is None:
            raise ValueError("Global step is required to check staleness")

        self._global_step = global_step
        train_ops = []
        aggregated_grad = []
        inputs = []
        var_list = []
        for x in grads_and_vars:
            inputs.extend(list(x))

        with ops.device(global_step.device):
            self._local_steps = variables.Variable(array_ops.zeros(
                [self._total_num_replicas], dtype=global_step.dtype),
                                                   trainable=False,
                                                   name="local_steps")

        # Check staleness. Note that this has to be ref(), otherwise identity will
        # be accessed and it will be old values.
        local_step = array_ops.slice(
            self._local_steps._ref(),  # pylint: disable=protected-access
            array_ops.reshape(self._replica_id, (1, )),
            [1],
            name="get_local_step")
        local_step = array_ops.reshape(local_step, ())
        is_stale = math_ops.less(local_step, global_step)

        with ops.name_scope(name, self._name, inputs) as name:
            for grad, var in grads_and_vars:
                var_list.append(var)
                with ops.device(var.device):
                    if isinstance(grad, ops.Tensor):
                        gradient_queue = (data_flow_ops.FIFOQueue(
                            self._tokens_per_step * 2,
                            grad.dtype,
                            shapes=var.get_shape(),
                            shared_name=var.name))
                        self._one_element_queue_list.append(
                            (gradient_queue, var.device))
                        train_ops.append(gradient_queue.enqueue([grad]))

                        # Aggregate all gradients
                        gradients = gradient_queue.dequeue_many(
                            self._replicas_to_aggregate)
                        aggregated_grad.append(
                            math_ops.reduce_sum(gradients, [0]))
                    elif grad is None:
                        aggregated_grad.append(None)  # pass-through.
                    else:
                        if not isinstance(grad, ops.IndexedSlices):
                            raise ValueError("Unknown grad type!")
                        aggregated_grad.append(
                            self._aggregate_sparse_grad(grad, var, train_ops))

            aggregated_grads_and_vars = zip(aggregated_grad, var_list)

            # sync_op will be assigned to the same device as the global step.
            with ops.device(global_step.device), ops.name_scope(""):
                update_op = self._opt.apply_gradients(
                    aggregated_grads_and_vars, global_step)

            # Create token queue.
            with ops.device(global_step.device), ops.name_scope(""):
                sync_token_queue = (data_flow_ops.FIFOQueue(
                    -1,
                    global_step.dtype.base_dtype,
                    shapes=(),
                    shared_name="sync_token_q"))
                self._sync_token_queue = sync_token_queue

                # dummy_queue is passed to the queue runner. Don't use the real queues
                # because the queue runner doesn't automatically reopen it once it
                # closed queues in PS devices.
                dummy_queue = (data_flow_ops.FIFOQueue(
                    1,
                    types_pb2.DT_INT32,
                    shapes=(),
                    shared_name="dummy_queue"))
            # Clear all the gradients queues in case there are stale gradients.
            clear_queue_ops = []
            with ops.control_dependencies([update_op]):
                for queue, dev in self._one_element_queue_list:
                    with ops.device(dev):
                        stale_grads = queue.dequeue_many(queue.size())
                        clear_queue_ops.append(stale_grads)

                for queue, dev in self._sparse_grad_queues_and_devs:
                    with ops.device(dev):
                        _, stale_indices = queue.dequeue_many(queue.size())
                        clear_queue_ops.append(stale_indices)

            with ops.device(global_step.device):
                self._clean_up_op = control_flow_ops.abort(
                    error_msg="From sync_replicas")

            # According to the staleness, select between the enqueue op (real_grad)
            # or no-op (no_op_grad). Effectively dropping all the stale gradients.
            no_op_grad = lambda: [
                control_flow_ops.no_op(name="no_grad_enqueue")
            ]
            real_grad = lambda: [control_flow_ops.group(*train_ops)]
            final_train_ops = control_flow_ops.cond(is_stale, no_op_grad,
                                                    real_grad)

            with ops.device(global_step.device), ops.name_scope(""):
                # Replicas have to wait until they can get a token from the token queue.
                with ops.control_dependencies([final_train_ops]):
                    token = sync_token_queue.dequeue()
                    train_op = state_ops.scatter_update(self._local_steps,
                                                        self._replica_id,
                                                        token,
                                                        name=name)

                with ops.control_dependencies(clear_queue_ops):
                    # Sync_op needs to insert tokens to the token queue at the end of the
                    # step so the replicas can fetch them to start the next step.
                    # Note that ref() is used to avoid reading from the identity with old
                    # the step.
                    tokens = array_ops.fill([self._tokens_per_step],
                                            global_step._ref())  # pylint: disable=protected-access
                    sync_op = sync_token_queue.enqueue_many((tokens, ))

                if self._variable_averages is not None:
                    with ops.control_dependencies([sync_op
                                                   ]), ops.name_scope(""):
                        sync_op = self._variable_averages.apply(
                            self._variables_to_average)

                self._chief_queue_runner = queue_runner.QueueRunner(
                    dummy_queue, [sync_op])
                self._gradients_applied = True
                return train_op
    def apply_gradients(self, grads_and_vars, global_step=None, name=None):
        if not grads_and_vars:
            raise ValueError("Must supply at least one variable")

        if global_step is None:
            raise ValueError("Global step is required to check staleness")

        self._global_step = global_step
        train_ops = []
        aggregated_grad = []

        # local_anchor op will be placed on this worker task by default.
        local_anchor = control_flow_ops.no_op()
        # Colocating local_step variable prevents it being placed on the PS.
        with ops.colocate_with(local_anchor):
            self._local_step = variable_scope.variable(
                initial_value=0,
                trainable=False,
                collections=[ops.GraphKeys.LOCAL_VARIABLES],
                dtype=global_step.dtype.base_dtype,
                name="local_step")

        self.local_step_init_op = state_ops.assign(self._local_step,
                                                   global_step)
        chief_init_ops = [self.local_step_init_op]
        self.ready_for_local_init_op = variables.report_uninitialized_variables(
            variables.global_variables())

        var_list = [v for g, v in grads_and_vars]
        velocity_list = [self._var_2_velocity[v] for v in var_list]
        residual_list = [self._var_2_residual[v] for v in var_list]

        density = 0.01

        with ops.name_scope(None, self._name):
            for velocity, residual, grad, var in zip(velocity_list,
                                                     residual_list,
                                                     grads_and_vars):
                if grad is not None:
                    if self._use_nesterov:
                        update_velocity = self._momentum * (velocity + grad)
                        update_residual = residual + update_velocity + grad
                    else:
                        update_velocity = self._momentum * velocity + grad
                        update_residual = residual + update_velocity
                else:
                    update_velocity = velocity
                    update_residual = residual

                # select threshold according to abs(update_residual)
                top_k_values, top_k_indices = nn_ops.top_k(
                    math_ops.abs(update_residual),
                    math_ops.to_int32(
                        array_ops.shape(update_residual)[-1] * density))
                threshold = top_k_values[-1]
                mask = math_ops.abs(update_residual) > threshold
                mask = math_ops.cast(mask, dtype=dtypes.int32)
                mask_h = math_ops.abs(mask - 1)

                with ops.device(grad.device):
                    dense_grad = mask * update_residual
                    indices = array_ops.where(math_ops.not_equal(
                        dense_grad, 0))
                    values = array_ops.gather_nd(dense_grad, indices)
                    sparse_grad = ops.IndexedSlices(values, indices,
                                                    dense_grad.get_shape())
                    #grad_update = state_ops.assign(grad, mask * update_residual)

                #with ops.control_dependencies([grad_update]), ops.device(var.device):
                #grad_accum = data_flow_ops.ConditionalAccumulator(
                #grad.dtype, shape=var.get_shape(),
                #shared_name=var.name + "/grad_accum")
                #train_ops.append(grad_accum.apply_grad(grad, local_step=self._local_step))
                #aggregated_grad.append(grad_accum.take_grad(self._replicas_to_aggregate))

                with ops.device(var.device):
                    grad_accum = data_flow_ops.SparseConditionalAccumulator(
                        sparse_grad.dtype,
                        shape=(),
                        shared_name=var.name + "/grad_accum")
                    train_ops.append(
                        grad_accum.apply_indexed_slices_grad(
                            sparse_grad, local_step=self._local_step))
                    aggregated_grad.append(
                        grad_accum.take_indexed_slices_grad(
                            self._replicas_to_aggregate))

                    self._accumulator_list.append((grad_accum, var.device))

                with ops.device(residual.device):
                    train_ops.append(
                        state_ops.assign(residual, mask_h * update_residual))
                with ops.device(velocity.device):
                    train_ops.append(
                        state_ops.assign(velocity, mask_h * update_velocity))

            aggregated_grads_and_vars = zip(aggregated_grad, var_list)

            with ops.device(global_step.device), ops.name_scope(""):
                update_op = self._opt.apply_gradient(aggregated_grads_and_vars,
                                                     global_step)

            with ops.device(global_step.device), ops.name_scope(""):
                sync_token_queue = (data_flow_ops.FIFOQueue(
                    -1,
                    global_step.dtype.base_dtype,
                    shapes=(),
                    name="sync_token_q",
                    shared_name="sync_token_q"))
                self._sync_token_queue = sync_token_queue

                dummy_queue = (data_flow_ops.FIFOQueue(
                    1,
                    types_pb2.DT_INT32,
                    shapes=(),
                    name="dummy_queue",
                    shared_name="dummy_queue"))

                with ops.control_dependencies(train_ops):
                    token = sync_token_queue.dequeue()
                train_op = state_ops.assign(self._local_step, token)

                with ops.control_dependencies([update_op]):
                    tokens = array_ops.fill([self._tokens_per_step],
                                            global_step)
                    sync_op = sync_token_queue.enqueue_many((tokens, ))

                if self._variable_averages is not None:
                    with ops.control_dependencies([sync_op
                                                   ]), ops.name_scope(""):
                        sync_op = self._variable_averages.apply(
                            self._variables_to_average)

                self._chief_queue_runner = queue_runner.QueueRunner(
                    dummy_queue, [sync_op])

            for accum, dev in self._accumulator_list:
                with ops.device(dev):
                    chief_init_ops.append(
                        accum.set_global_step(global_step,
                                              name="SetGlobalStep"))
            self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
            self._gradients_applied = True

            return train_op
コード例 #12
0
ファイル: pipeline.py プロジェクト: rbpittman/ensemble
    def get_image_labels(self):
        if self.is_all_shared:
            ### ALL SHARED ###
            img_pre_fn = preprocessing_factory.get_preprocessing(self.FLAGS.preprocessing_name, 
                                                                 is_training=True)
            with tf.device("/cpu:0"):
                with tf.name_scope("reading"):
                    data_provider = slim.dataset_data_provider.DatasetDataProvider(
                        self.dataset, num_readers=self.FLAGS.num_data_readers,
                        common_queue_capacity=20*self.FLAGS.batch_size,
                        common_queue_min=10*self.FLAGS.batch_size,
                        seed=self.rank)
                    [image, label] = data_provider.get(['image', 'label'])
                with tf.name_scope("to-preprocessing"):
                    capacity = 20 * self.FLAGS.batch_size
                    to_pre_queue = data_flow_ops.FIFOQueue(capacity=capacity,
                                                           dtypes=[image.dtype, label.dtype],
                                                           shapes=None,
                                                           name="to_pre_queue")
                    to_pre_op = to_pre_queue.enqueue([image, label])
                    queue_runner.add_queue_runner(queue_runner.QueueRunner(to_pre_queue, [to_pre_op] * Pipeline.QR_THREADS))
                    tf.summary.scalar("to_pre_fraction_of_%d_full" % capacity,
                                    math_ops.to_float(to_pre_queue.size()) * (1. / capacity))
                    image, label = to_pre_queue.dequeue()
                with tf.name_scope("preprocessing"):#TODO
                    image = img_pre_fn(image, self.train_image_size, self.train_image_size, fast_mode=self.FLAGS.fast_mode)
                with tf.name_scope("to-allgather"):
                    capacity = 20 * self.FLAGS.batch_size
                    to_allg_queue = data_flow_ops.FIFOQueue(capacity=capacity,
                                                            dtypes=[image.dtype, label.dtype],
                                                            shapes=[[self.train_image_size, self.train_image_size, 3], []],
                                                            name="to_allgather_queue")#[image.get_shape(), label.get_shape()])
                    queue_runner.add_queue_runner(queue_runner.QueueRunner(to_allg_queue, [to_allg_queue.enqueue([image, label])] * Pipeline.QR_THREADS))
                    tf.summary.scalar("to_allgather_fraction_of_%d_full" % capacity,
                                   math_ops.to_float(to_allg_queue.size()) * (1. / capacity))

                # num_preprocessors = tf.placeholder(tf.int32, shape=[], name="num_preprocessors)
                # self.num_hvd_send_tensor = 
                send_images, send_labels = to_allg_queue.dequeue_many(self.num_hvd_send)
                # if rank == #TODO
                all_images = hvd.allgather(send_images, name="hvd_allgather")
                all_labels = hvd.allgather(send_labels, name="hvd_allgather")
                #TODO: Remove extra queues
                with tf.name_scope("to-compute"):
                    capacity = 30 * self.FLAGS.batch_size
                    to_compute_queue = data_flow_ops.FIFOQueue(capacity=capacity,
                                                               dtypes=[image.dtype, label.dtype],
                                                               shapes=[[self.train_image_size, self.train_image_size, 3], []],#TODO
                                                               name="to_compute_queue")#[image.get_shape(), label.get_shape()])
                    queue_runner.add_queue_runner(queue_runner.QueueRunner(to_compute_queue, [to_compute_queue.enqueue_many([all_images, all_labels])]))#1 thread!
                    tf.summary.scalar("to_compute_fraction_of_%d_full" % capacity,
                                   math_ops.to_float(to_compute_queue.size()) * (1. / capacity))
                image, label = to_compute_queue.dequeue()
        elif self.is_single_bcast:
            ### SINGLE BROADCAST ###
            img_pre_fn = preprocessing_factory.get_preprocessing(self.FLAGS.preprocessing_name, 
                                                                 is_training=True)
            allg_images_name = "allgather-images-op"
            allg_labels_name = "allgather-labels-op"
            bcast_images_name = "bcast-images-op"
            bcast_labels_name = "bcast-labels-op"
            if 0 in self.member_of_group: #If we belong to group 0, initialize the reading and preprocessing pipeline
                with tf.device("/cpu:0"):
                    with tf.name_scope("reading"):
                        data_provider = slim.dataset_data_provider.DatasetDataProvider(
                            self.dataset, num_readers=self.FLAGS.num_data_readers,
                            common_queue_capacity=20*self.FLAGS.batch_size,
                            common_queue_min=10*self.FLAGS.batch_size,
                            seed=self.rank)
                        [image, label] = data_provider.get(['image', 'label'])
                    image, label = create_qr("to-pre", 10 * self.FLAGS.batch_size, [image, label], None, [image.dtype, label.dtype], Pipeline.QR_THREADS, False, False)

                    with tf.name_scope("preprocessing"):
                        image = img_pre_fn(image, self.train_image_size, self.train_image_size, fast_mode=self.FLAGS.fast_mode)

                    send_images, send_labels = create_qr("to-allg", 10 * self.FLAGS.batch_size, [image, label], [[self.train_image_size, self.train_image_size, 3], []], [image.dtype, label.dtype], Pipeline.QR_THREADS, False, True, self.num_hvd_send)
                all_images = hvd.allgather(send_images, group=0, name=allg_images_name)
                all_labels = hvd.allgather(send_labels, group=0, name=allg_labels_name)
                all_images, all_labels = create_qr("to-bcast", 20 * self.FLAGS.batch_size, [all_images, all_labels], [[self.train_image_size, self.train_image_size, 3], []], [post_pre_image_dtype, post_pre_label_dtype], 1, True, True, self.images_per_bcast)
            if 1 in self.member_of_group:
                # For the middle man rank, reset all_images and all_labels
                # names to their broadcasted tensors so that the bcast is
                # performed. Note that the bcast root is rank 0 since the
                # group1 sent to init had this rank listed first, meaning that
                # the resulting mpi group comm has this rank has rank 0
                if len(self.member_of_group) == 1:
                    # Then not middle man, so construct holder variable WITH CORRECT NAME!
                    # tf.Variable(self.num_hvd_send?
                    all_images = tf.zeros([self.images_per_bcast, self.train_image_size, self.train_image_size, 3], dtype=post_pre_image_dtype)
                    all_labels = tf.zeros([self.images_per_bcast]                                       , dtype=post_pre_label_dtype) #shape of [] turns into 1D instead of 0D
                all_images = hvd.broadcast(all_images, 0, group=1, name=bcast_images_name)
                all_labels = hvd.broadcast(all_labels, 0, group=1, name=bcast_labels_name)
            image, label = create_qr("to-compute", 20 * self.FLAGS.batch_size, [all_images, all_labels], [[self.train_image_size, self.train_image_size, 3], []], [post_pre_image_dtype, post_pre_label_dtype], 1, True, False)
        elif self.is_multi_bcast:
            ### MULTIPLE BROADCAST
            # print("Rank:", rank, member_of_group, group_rank_list)
            img_pre_fn = preprocessing_factory.get_preprocessing(self.FLAGS.preprocessing_name, 
                                                                 is_training=True)
            # allg_image_name = "allgathered-image" # need some naming commonalities
            # allg_label_name = "allgathered-label"
            allg_images_name = "allgather-images-op"
            allg_labels_name = "allgather-labels-op"
            bcast_images_name = "bcast-images-op"
            bcast_labels_name = "bcast-labels-op"
            # if 0 in member_of_group: #If we belong to group 0, initialize the reading and preprocessing pipeline
            if self.rank < self.FLAGS.num_pre:
                with tf.device("/cpu:0"):
                    with tf.name_scope("reading"):
                        data_provider = slim.dataset_data_provider.DatasetDataProvider(
                            self.dataset, num_readers=self.FLAGS.num_data_readers,
                            common_queue_capacity=20*self.FLAGS.batch_size,
                            common_queue_min=10*self.FLAGS.batch_size,
                            seed=self.rank)
                        [image, label] = data_provider.get(['image', 'label'])

                    image, label = create_qr("to-pre", 10 * self.FLAGS.batch_size, [image, label], None, [image.dtype, label.dtype], Pipeline.QR_THREADS, False, False)

                    with tf.name_scope("preprocessing"):
                        image = img_pre_fn(image, self.train_image_size, self.train_image_size, fast_mode=self.FLAGS.fast_mode)
                        # image = tf.Print(image, ["using preprocessed image"])
                    send_images, send_labels = create_qr("to-bcast", 20 * self.FLAGS.batch_size, [image, label], [[self.train_image_size, self.train_image_size, 3], []], [image.dtype, label.dtype], 2 * Pipeline.QR_THREADS, False, True, self.images_per_bcast)
            else:
                send_images = tf.zeros([self.images_per_bcast, self.train_image_size, self.train_image_size, 3], dtype=post_pre_image_dtype)
                send_labels = tf.zeros([self.images_per_bcast]                                                 , dtype=post_pre_label_dtype)
            with tf.device("/cpu:0"):
                bcast_images_root = "broadcast-images-"
                bcast_labels_root = "broadcast-labels-"
                bcast_images_per_group = [hvd.broadcast(send_images, i, group=i, name=bcast_images_root + str(i)) for i in range(self.FLAGS.num_pre)]
                bcast_labels_per_group = [hvd.broadcast(send_labels, i, group=i, name=bcast_labels_root + str(i)) for i in range(self.FLAGS.num_pre)]
                
                with tf.name_scope("to-compute"):
                    capacity = 30 * self.FLAGS.batch_size
                    to_compute_q = data_flow_ops.FIFOQueue(capacity=capacity,
                                                    dtypes=[post_pre_image_dtype, post_pre_label_dtype],
                                                    shapes=[[self.train_image_size, self.train_image_size, 3], []], 
                                                    name="to-compute-queue")
                    to_comp_ops = [to_compute_q.enqueue_many([bcast_images_per_group[i], bcast_labels_per_group[i]]) for i in range(self.FLAGS.num_pre)]
                    queue_runner.add_queue_runner(queue_runner.QueueRunner(to_compute_q, to_comp_ops))
                    tf.summary.scalar("to_compute_fraction_of_%d_full" % capacity,
                                      math_ops.to_float(to_compute_q.size()) * (1. / capacity))
                    image, label = to_compute_q.dequeue()
        return image, label
コード例 #13
0
  def apply_gradients(self, grads_and_vars, worker_id, global_step=None, name=None, collect_cdfs=False,
    #  batch_idx_list=None, worker_kill_list=None, num_workers=None, num_batches_per_epoch=None):
    matrix_to_solve=None, num_batches_per_epoch=None):
    """Apply gradients to variables.
    This contains most of the synchronization implementation and also wraps the
    apply_gradients() from the real optimizer.
    Args:
      grads_and_vars: List of (gradient, variable) pairs as returned by
        compute_gradients().
      global_step: Optional Variable to increment by one after the
        variables have been updated.
      name: Optional name for the returned operation.  Default to the
        name passed to the Optimizer constructor.
    Returns:
      train_op: The op to dequeue a token so the replicas can exit this batch
      and start the next one. This is executed by each replica.
    Raises:
      ValueError: If the grads_and_vars is empty.
      ValueError: If global step is not provided, the staleness cannot be
        checked.
    """
    if not grads_and_vars:
      raise ValueError("Must supply at least one variable")

    if global_step is None:
      raise ValueError("Global step is required to check staleness")

    self._global_step = global_step
    train_ops = []
    aggregated_grad = []
    var_list = []

    self._local_step = variables.Variable(
        initial_value=0,
        trainable=False,
        collections=[ops.GraphKeys.LOCAL_VARIABLES],
        dtype=global_step.dtype.base_dtype,
        name="sync_rep_local_step")
    self.local_step_init_op = state_ops.assign(self._local_step, global_step._ref())
    chief_init_ops = [self.local_step_init_op]
    self.ready_for_local_init_op = variables.report_uninitialized_variables(
      variables.all_variables())

    # The wait op waits for the current worker to dequeue a token from its respective token queue
    self._wait_op = self._sync_token_queues[worker_id].dequeue()

    # Replicas have to wait until they can get a token from the token queue
    # BEFORE begining to compute gradients.
    with ops.device(global_step.device):
      queue_size = self._sync_token_queues[worker_id].size()
      update_local_step_op = state_ops.assign(self._local_step, global_step._ref())

    # Gradient accum creation
    with ops.name_scope(None, self._name):
      for grad, var in grads_and_vars:
        var_list.append(var)
        tf.logging.info("Grad " + str(grad) + " assigned to " + str(var.device))
        with ops.device(var.device):
          if grad is None:
            continue
          elif isinstance(grad, ops.Tensor):
            grad_accum = data_flow_ops.ConditionalAccumulator(
              grad.dtype,
              shape=var.get_shape(),
              shared_name=var.name + "/grad_accum")
          else:
            if not isinstance(grad, ops.IndexedSlices):
              raise ValueError("Unknown grad type!")
            grad_accum = data_flow_ops.SparseConditionalAccumulator(
              grad.dtype, shape=(), shared_name=var.name + "/grad_accum")

          self._accumulator_list.append((grad_accum, var))

      """# Phase 1 gradient computation
      with ops.control_dependencies([update_local_step_op]):
        for index, (grad, var) in enumerate(grads_and_vars):
          with ops.device(var.device):
            if grad is None:
              continue
            elif isinstance(grad, ops.Tensor):
              grad_accum = self._accumulator_list[index][0]
              train_ops.append(grad_accum.apply_grad(grad,
                                                     local_step=self._local_step._ref()))
            else:
              if not isinstance(grad, ops.IndexedSlices):
                raise ValueError("Unknown grad type!")
              grad_accum = self._accumulator_list[index][0]
              train_ops.append(grad_accum.apply_indexed_slices_grad(
                grad, local_step=self._local_step._ref()))"""

      # Phase 1 gradient computation
      with ops.control_dependencies([update_local_step_op]):
        for index, (grad, var) in enumerate(grads_and_vars):
          print_start_op = logging_ops.Print(global_step, [global_step], message="Starting to apply grads for variable %d" % index)
          train_ops.append(print_start_op)
          with ops.device(var.device):
            ps_step_printer0 = logging_ops.Print(global_step, [global_step], message="global step printer0 on ps")
            train_ops.append(ps_step_printer0)
            '''Implement LS computation and solution here'''            
            #b = np.ones(int(num_batches_per_epoch))
            b = tf.ones([int(num_batches_per_epoch),1], tf.float32)         
            A = matrix_to_solve
#            A_for_calc = np.transpose(A)
            LS_solution = linalg_ops.matrix_solve_ls(A, b, fast=False)
            LS_calc = tf.reshape(LS_solution, [-1])
            weight = tf.slice(LS_calc, [worker_id], [1])
#            print_ls_op = logging_ops.Print(LS_calc, [LS_calc], message="Solution for LS!")
#            train_ops.append(print_ls_op)
            weighted_grad = tf.scalar_mul(weight[0], grad)
            '''Kill some workers'''
            if grad is None:
              continue

            elif isinstance(grad, ops.Tensor):
              grad_accum = self._accumulator_list[index][0]

              num_accum = grad_accum.num_accumulated()
              tf.logging.info("Grad Accumed %s, Worker ID: %s" % (str(num_accum), str(worker_id)))

              with ops.control_dependencies([print_start_op]):
                with tf.device("job:worker/task:%d" % worker_id):
                  apply_grad_op = grad_accum.apply_grad(grad,
#                  apply_grad_op = grad_accum.apply_grad(weighted_grad,
                                                        local_step=self._local_step._ref())
                  with ops.control_dependencies([apply_grad_op]):
                    finished_print_op = logging_ops.Print(global_step, [global_step], message="Done applying grads for variable %d" % index)
                    train_ops.append(finished_print_op)

            else:
              if not isinstance(grad, ops.IndexedSlices):
                raise ValueError("Unknown grad type!")
              grad_accum = self._accumulator_list[index][0]

              with ops.control_dependencies([print_start_op]):
                with tf.device("job:worker/task:%d" % worker_id):
                  apply_grad_op = grad_accum.apply_indexed_slices_grad(
                    grad, local_step=self._local_step._ref())
#                    weighted_grad, local_step=self._local_step._ref())
                  with ops.control_dependencies([apply_grad_op]):
                    finished_print_op = logging_ops.Print(global_step, [global_step], message="Done applying grads for variable %d" % index)
                    train_ops.append(finished_print_op)

      # Phase 2 gradient applying
      for index, (grad, var) in enumerate(grads_and_vars):
        with ops.device(var.device):
          work_idx_print1 = logging_ops.Print(worker_id, [worker_id], message="worker id for aggregate grad")
          ps_step_printer1 = logging_ops.Print(global_step, [global_step], message="global step printer1 on ps")
          num_replica_aggragate = logging_ops.Print(self._replicas_to_aggregate, [self._replicas_to_aggregate], message="num replica aggregate")
          train_ops.append(work_idx_print1)
          train_ops.append(ps_step_printer1)
          train_ops.append(num_replica_aggragate)
          grad_accum = self._accumulator_list[index][0]
       
          if grad is None:
            aggregated_grad.append(None)
          elif isinstance(grad, ops.Tensor):
            if collect_cdfs:
#              aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas))
              aggregated_grad.append(grad_accum.take_grad(self._replicas_to_aggregate))
            else:
              aggregated_grad.append(grad_accum.take_grad(1))
          else:
            if collect_cdfs:
#              aggregated_grad.append(grad_accum.take_grad(self._total_num_replicas))
              aggregated_grad.append(grad_accum.take_grad(self._replicas_to_aggregate))
            else:
              aggregated_grad.append(grad_accum.take_indexed_slices_grad(1))

      aggregated_grads_and_vars = zip(aggregated_grad, var_list)

      # Some debug operations
      self.print_sizes = logging_ops.Print(global_step, [self._sync_token_queues[i].size() for i in range(self._total_num_replicas)], message="queue sizes")
      self.print_accum_sizes = logging_ops.Print(self._local_step,
                                                 [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id],
                                                 message="Accum sizes")
      self.print_local_step = logging_ops.Print(self._local_step, [self._local_step._ref(), global_step._ref()], message="local vs global step")

      # sync_op will be assigned to the same device as the global step.
      with ops.device(global_step.device), ops.name_scope(""):
        with ops.control_dependencies([self.print_accum_sizes]):
          update_op = self._opt.apply_gradients(aggregated_grads_and_vars, global_step)
          self._update_op = update_op
          with ops.control_dependencies([update_op]):
            sync_op = []
            for cur_worker_id in range(self._total_num_replicas):
              sync_op.append(self._sync_token_queues[cur_worker_id].enqueue(global_step))
            sync_op = control_flow_ops.group(*(sync_op))

        # dummy_queue is passed to the queue runner. Don't use the real queues
        # because the queue runner doesn't automatically reopen it once it
        # closed queues in PS devices.
        dummy_queue = (
            data_flow_ops.FIFOQueue(1,
                                    types_pb2.DT_INT32,
                                    shapes=(),
                                    shared_name="dummy_queue"))

        self._chief_queue_runner = queue_runner.QueueRunner(dummy_queue,
                                                            [sync_op])

      with ops.device(global_step.device), ops.name_scope(""):
        with ops.control_dependencies(train_ops):
          # Worker finished applying gradients. Add token to phase1_finished_queue
          train_op = logging_ops.Print(self._local_step._ref(),
                                       [x[0].num_accumulated() for x in self._accumulator_list] + [worker_id],
                                       message="Finished worker updates",
                                       name="FinishedWorkerUpdatesPrint")

      for accum, var in self._accumulator_list:
        with ops.device(var.device):
          chief_init_ops.append(
              accum.set_global_step(
                  global_step, name="SetGlobalStep"))
      self.chief_init_op = control_flow_ops.group(*(chief_init_ops))
      self._gradients_applied = True

      return train_op
コード例 #14
0
queue = tf.FIFOQueue(32, dtypes=[batch.dtype])
enqueue_ops = []
dependency = None

for step_index in range(step_count):
    step = tf.strided_slice(batch, [0, step_index * step_size],
                            [tf.shape(batch)[0], (step_index + 1) * step_size])

    if dependency is None:
        dependency = queue.enqueue(step)
    else:
        with tf.control_dependencies([dependency]):
            step = queue.enqueue(step)
            dependency = step

    enqueue_ops.append(step)

queue_runner.add_queue_runner(
    queue_runner.QueueRunner(queue=queue,
                             enqueue_ops=[tf.group(*enqueue_ops)]))
step = queue.dequeue()

supervisor = tf.train.Supervisor()

with supervisor.managed_session() as session:
    for batch_index in range(batch_count):
        for step_index in range(step_count):
            print("Batch %d, step %d" % (batch_index, step_index))
            print(session.run(step))
コード例 #15
0
ファイル: graph_io.py プロジェクト: PaullMP/TensorFlowT
def _read_keyed_batch_examples_helper(file_pattern,
                                      batch_size,
                                      reader,
                                      randomize_input=True,
                                      num_epochs=None,
                                      queue_capacity=10000,
                                      num_threads=1,
                                      read_batch_size=1,
                                      parse_fn=None,
                                      setup_shared_queue=False,
                                      name=None):
  # Retrieve files to read.
  file_names = _get_file_names(file_pattern, randomize_input)

  # Check input parameters are given and reasonable.
  if (not queue_capacity) or (queue_capacity <= 0):
    raise ValueError('Invalid queue_capacity %s.' % queue_capacity)
  if (batch_size is None) or (
      (not isinstance(batch_size, ops.Tensor)) and
      (batch_size <= 0 or batch_size > queue_capacity)):
    raise ValueError(
        'Invalid batch_size %s, with queue_capacity %s.' %
        (batch_size, queue_capacity))
  if (read_batch_size is None) or (
      (not isinstance(read_batch_size, ops.Tensor)) and
      (read_batch_size <= 0)):
    raise ValueError('Invalid read_batch_size %s.' % read_batch_size)
  if (not num_threads) or (num_threads <= 0):
    raise ValueError('Invalid num_threads %s.' % num_threads)
  if (num_epochs is not None) and (num_epochs <= 0):
    raise ValueError('Invalid num_epochs %s.' % num_epochs)

  with ops.name_scope(name, 'read_batch_examples', [file_pattern]) as scope:
    with ops.name_scope('file_name_queue') as file_name_queue_scope:
      if setup_shared_queue:
        shared_file_name_queue = _get_shared_file_name_queue(
            file_names, randomize_input, num_epochs, file_name_queue_scope)
        file_name_queue = data_flow_ops.FIFOQueue(
            capacity=1, dtypes=[dtypes.string], shapes=[[]])
        enqueue_op = file_name_queue.enqueue(shared_file_name_queue.dequeue())
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(file_name_queue, [enqueue_op]))
      else:
        file_name_queue = input_ops.string_input_producer(
            constant_op.constant(
                file_names, name='input'),
            shuffle=randomize_input,
            num_epochs=num_epochs,
            name=file_name_queue_scope)

    example_list = _get_examples(file_name_queue, reader, num_threads,
                                 read_batch_size, parse_fn)

    enqueue_many = read_batch_size > 1

    if num_epochs is not None:
      allow_smaller_final_batch = True
    else:
      allow_smaller_final_batch = False

    # Setup batching queue given list of read example tensors.
    if randomize_input:
      if isinstance(batch_size, ops.Tensor):
        min_after_dequeue = int(queue_capacity * 0.4)
      else:
        min_after_dequeue = max(queue_capacity - (3 * batch_size), batch_size)
      queued_examples_with_keys = input_ops.shuffle_batch_join(
          example_list, batch_size, capacity=queue_capacity,
          min_after_dequeue=min_after_dequeue,
          enqueue_many=enqueue_many, name=scope,
          allow_smaller_final_batch=allow_smaller_final_batch)
    else:
      queued_examples_with_keys = input_ops.batch_join(
          example_list, batch_size, capacity=queue_capacity,
          enqueue_many=enqueue_many, name=scope,
          allow_smaller_final_batch=allow_smaller_final_batch)
    if parse_fn and isinstance(queued_examples_with_keys, dict):
      queued_keys = queued_examples_with_keys.pop(KEY_FEATURE_NAME)
      return queued_keys, queued_examples_with_keys
    return queued_examples_with_keys
コード例 #16
0
def bucket(tensors,
           which_bucket,
           batch_size,
           num_buckets,
           num_threads=1,
           capacity=32,
           shapes=None,
           dynamic_pad=False,
           allow_smaller_final_batch=False,
           keep_input=None,
           shared_name=None,
           name=None):
    """Lazy bucketing of input tensors according to `which_bucket`.

  The argument `tensors` can be a list or a dictionary of tensors.
  The value returned by the function will be of the same type
  as `tensors`.

  The tensors entering this function are put into the bucket given by
  `which_bucket`.  Each bucket has its own queue.  When a bucket contains
  `batch_size` elements, this minibatch is pushed onto a top queue.  The
  tensors returned from this function are a the result of dequeueing the
  next minibatch from this top queue.

  This function is implemented using several queues. A `QueueRunner` for the
  queues is added to the current `Graph`'s `QUEUE_RUNNER` collection.

  As the returned tensors are the result of of a dequeue operation, evaluating
  them will throw a `tf.errors.OutOfRangeError` when the input queue is
  exhausted.  If these tensors are feeding another input queue, its queue runner
  will catch this exception, however, if they are used in your main thread
  you are responsible for catching this yourself.

  *N.B.:* If `dynamic_pad` is `False`, you must ensure that either
  (i) the `shapes` argument is passed, or (ii) all of the tensors in
  `tensors` must have fully-defined shapes. `ValueError` will be
  raised if neither of these conditions holds.

  If `dynamic_pad` is `True`, it is sufficient that the *rank* of the
  tensors is known, but individual dimensions may have shape `None`.
  In this case, for each enqueue the dimensions with value `None`
  may have a variable length; upon dequeue, the output tensors will be padded
  on the right to the maximum shape of the tensors in the current minibatch.
  For numbers, this padding takes value 0.  For strings, this padding is
  the empty string.  See `PaddingFIFOQueue` for more info.

  If `allow_smaller_final_batch` is `True`, a smaller batch value than
  `batch_size` is returned when the queues are closed and there are not enough
  elements to fill the batch, otherwise the pending elements are discarded.
  In addition, all output tensors' static shapes, as accessed via the
  `get_shape()` method will have a 0th `Dimension` value of `None`, and
  operations that depend on fixed batch_size would fail.

  Args:
    tensors: The list or dictionary of tensors, representing a single element,
      to bucket.  Nested lists are not supported.
    which_bucket: An `int32` scalar Tensor taking a value in `[0, num_buckets)`.
    batch_size: The new batch size pulled from the queue (all queues will have
      the same size).  If a list is passed in then each bucket will have a
      different batch_size.
      (python int, int32 scalar or iterable of integers of length num_buckets).
    num_buckets: A python integer, the number of buckets.
    num_threads: An integer.  The number of threads enqueuing `tensors`.
    capacity: An integer. The maximum number of minibatches in the top queue,
      and also the maximum number of elements within each bucket.
    shapes: (Optional) The shapes for each example.  Defaults to the
      inferred shapes for `tensors`.
    dynamic_pad: Boolean.  Allow variable dimensions in input shapes.
      The given dimensions are padded upon dequeue so that tensors within a
      batch have the same shapes.
    allow_smaller_final_batch: (Optional) Boolean. If `True`, allow the final
      batches to be smaller if there are insufficient items left in the queues.
    keep_input: (Optional).  A `bool` scalar Tensor.  If provided, this tensor
      controls whether the input is added to the queue or not.  If it evaluates
      `True`, then `tensors` are added to the bucket; otherwise they are
      dropped.  This tensor essentially acts as a filtering mechanism.
      The default behavior is to assume `keep_input=True`.
    shared_name: (Optional). If set, the queues will be shared under the given
      name across multiple sessions.
    name: (Optional) A name for the operations.

  Returns:
    A tuple `(bucket, outputs)` where `bucket` is
    a `int32` scalar tensor and `outputs` is a list or
    dictionary of batched outputs corresponding to elements of `tensors`.
    Every step will receive a new bucket of outputs.

  Raises:
    ValueError: If the `shapes` are not specified, and cannot be
      inferred from the elements of `tensors` or if batch_size is a sequence
      but it's length != num_buckets.
  """
    batch_size_per_bucket = False
    if isinstance(batch_size, (list, tuple)):
        batch_size_per_bucket = True
        if len(batch_size) != num_buckets:
            raise ValueError(
                "If batch_size is a list it must have num_buckets elements")
    else:
        batch_size = [batch_size] * num_buckets
    tensor_list = _as_tensor_list(tensors)
    with ops.name_scope(name, "bucket", tensor_list) as name:
        tensor_list = _validate_bucket(tensor_list)
        (tensor_list, sparse_info) = _store_sparse_tensors(
            tensor_list,
            enqueue_many=False,
            keep_input=constant_op.constant(True))

        # Round-trip batch_size to a tensor, and possibly back
        for i, bucket_batch_size in enumerate(batch_size):
            bucket_batch_size = ops.convert_to_tensor(bucket_batch_size,
                                                      dtype=dtypes.int32,
                                                      name="batch_size")
            static_batch_size = tensor_util.constant_value(bucket_batch_size)
            batch_size[i] = (static_batch_size if static_batch_size is not None
                             else bucket_batch_size)

        types = _dtypes([tensor_list])
        shapes = _shapes([tensor_list], shapes, enqueue_many=False)

        which_bucket = ops.convert_to_tensor(which_bucket,
                                             dtype=dtypes.int32,
                                             name="which_bucket")

        queue_creator = _which_queue(dynamic_pad)
        bucket_queues = []
        for i in range(num_buckets):
            shared_name_i = ("%s_%d" % (shared_name, i)
                             if shared_name is not None else None)
            bucket_queues.append(
                queue_creator(capacity=capacity,
                              dtypes=types,
                              shapes=shapes,
                              shared_name=shared_name_i,
                              name="bucket_queue_%d" % i))

        maybe_static_batch_size = (None if (allow_smaller_final_batch
                                            or batch_size_per_bucket) else
                                   static_batch_size)

        bucket_shapes = [
            tensor_shape.vector(maybe_static_batch_size).concatenate(s)
            for s in bucket_queues[0].shapes
        ]
        # top_queue is a PaddingFIFOQueue even if the bucket queues are regular FIFO
        # queues because if we use allow_smaller_final_batch, shapes will
        # contain Nones in their first entry; as a result, a regular
        # FIFOQueue would die when being passed shapes that are not fully defined.
        top_queue = data_flow_ops.PaddingFIFOQueue(
            capacity=capacity,
            dtypes=[dtypes.int32] + types,
            shapes=[tensor_shape.scalar()] + bucket_shapes,
            shared_name=shared_name,
            name="top_queue")

        def enqueue_which():
            def enqueue_single(i):
                return bucket_queues[i].enqueue(tensor_list)

            enqueues = [
                control_flow_ops.cond(math_ops.equal(which_bucket, i),
                                      functools.partial(enqueue_single, i),
                                      control_flow_ops.no_op)
                for i in range(num_buckets)
            ]
            return control_flow_ops.group(*enqueues, name="group_enqueues")

        if keep_input is not None:
            # TODO(ebrevdo): Expand keep_input param to core training
            # methods, and pipe through to _store_sparse_tensors; so
            # that expensive serialization is guarded by keep_input.
            maybe_enqueue = control_flow_ops.cond(keep_input, enqueue_which,
                                                  control_flow_ops.no_op)
        else:
            maybe_enqueue = enqueue_which()

        bucket_enqueue_ops = [maybe_enqueue] * num_threads

        if allow_smaller_final_batch:
            which_dequeue = lambda q: q.dequeue_up_to
        else:
            which_dequeue = lambda q: q.dequeue_many

        enqueues_to_top = [
            top_queue.enqueue([constant_op.constant(i)] +
                              which_dequeue(q)(bs, name="read_bucket_%d" % i),
                              name="enqueue_from_bucket_%d" % i)
            for i, (q, bs) in enumerate(zip(bucket_queues, batch_size))
        ]

        for i, q in enumerate(bucket_queues):
            queue_runner.add_queue_runner(
                queue_runner.QueueRunner(
                    q, [enqueues_to_top[i]],
                    queue_closed_exception_types=(errors.OutOfRangeError,
                                                  errors.CancelledError)))
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(
                top_queue,
                bucket_enqueue_ops,
                queue_closed_exception_types=(errors.OutOfRangeError,
                                              errors.CancelledError)))

        for q in bucket_queues:
            summary.scalar("bucket/%s/size" % q.name,
                           math_ops.cast(top_queue.size(), dtypes.float32))
        summary.scalar(
            "bucket/%s/fraction_of_%d_full" % (top_queue.name, capacity),
            math_ops.cast(top_queue.size(), dtypes.float32) * (1. / capacity))

        dequeued = top_queue.dequeue(name="dequeue_top")
        which_bucket_dequeued = dequeued[0]
        dequeued = dequeued[1:]
        dequeued = _restore_sparse_tensors(dequeued, sparse_info)
        return (which_bucket_dequeued, _as_original_type(tensors, dequeued))
コード例 #17
0
ファイル: graph_io.py プロジェクト: PaullMP/TensorFlowT
def queue_parsed_features(parsed_features,
                          keys=None,
                          feature_queue_capacity=100,
                          num_queue_runners=None,
                          num_enqueue_threads=None,
                          name=None):
  """Speeds up parsing by using queues to do it asynchronously.

  This function adds the tensors in `parsed_features` to a queue, which allows
  the parsing (or any other expensive op before this) to be asynchronous wrt the
  rest of the training graph. This greatly improves read latency and speeds up
  training since the data will already be parsed and ready when each step of
  training needs it.

  All queue runners are added to the queue runners collection, and may be
  started via `start_queue_runners`.

  All ops are added to the default graph.

  Args:
    parsed_features: A dict of string key to `Tensor` or `SparseTensor` objects.
    keys: `Tensor` of string keys.
    feature_queue_capacity: Capacity of the parsed features queue.
    num_queue_runners: Deprecated. Defaults to 2 if this and
      `num_enqueue_threads` are both `None`. This is the number of queue
      runners to start for the feature queue. Adding multiple queue runners for
      the parsed example queue helps maintain a full queue when the subsequent
      computations overall are cheaper than parsing. This argument will be
      deprecated and replaced with `num_enqueue_threads`.
    num_enqueue_threads: Number of threads to enqueue the parsed example queue.
      Using multiple threads to enqueue the parsed example queue helps maintain
      a full queue when the subsequent computations overall are cheaper than
      parsing. This argument will replace `num_queue_runners`. This and
      `num_queue_runners` can not both be set.
    name: Name of resulting op.

  Returns:
    Returns tuple of:
    - `Tensor` corresponding to `keys` if provided, otherwise `None`.
    -  A dict of string key to `Tensor` or `SparseTensor` objects corresponding
       to `parsed_features`.
  Raises:
    ValueError: for invalid inputs.
  """
  num_queue_runners, num_enqueue_threads = _check_enqueue_params(
      num_queue_runners, num_enqueue_threads)

  args = list(parsed_features.values())
  if keys is not None:
    args += [keys]

  with ops.name_scope(name, 'queue_parsed_features', args):
    # Lets also add preprocessed tensors into the queue types for each item of
    # the queue.
    tensors_to_enqueue = []
    # Each entry contains the key, and a boolean which indicates whether the
    # tensor was a sparse tensor.
    tensors_mapping = []
    # TODO(sibyl-Aix6ihai): Most of the functionality here is about pushing sparse
    # tensors into a queue. This could be taken care in somewhere else so others
    # can reuse it. Also, QueueBase maybe extended to handle sparse tensors
    # directly.
    for key in sorted(parsed_features.keys()):
      tensor = parsed_features[key]
      if isinstance(tensor, ops.SparseTensor):
        tensors_mapping.append((key, True))
        tensors_to_enqueue.extend([tensor.indices, tensor.values, tensor.shape])
      else:
        tensors_mapping.append((key, False))
        tensors_to_enqueue.append(tensor)

    if keys is not None:
      tensors_to_enqueue.append(keys)

    queue_dtypes = [x.dtype for x in tensors_to_enqueue]
    input_queue = data_flow_ops.FIFOQueue(feature_queue_capacity, queue_dtypes)

    # Add a summary op to debug if our feature queue is full or not.
    logging_ops.scalar_summary('queue/parsed_features/%s/fraction_of_%d_full' %
                               (input_queue.name, feature_queue_capacity),
                               math_ops.cast(input_queue.size(), dtypes.float32)
                               * (1. / feature_queue_capacity))

    # Add multiple queue runners so that the queue is always full. Adding more
    # than two queue-runners may hog the cpu on the worker to fill up the queue.
    #
    # Note: this can result in large last batch being lost as the multiple queue
    # runner threads do not coordinate with each other. Please use
    # `num_enqueue_threads` instead.
    if num_queue_runners is not None:
      for _ in range(num_queue_runners):
        queue_runner.add_queue_runner(
            queue_runner.QueueRunner(
                input_queue, [input_queue.enqueue(tensors_to_enqueue)],
                queue_closed_exception_types=(errors.OutOfRangeError,
                                              errors.CancelledError)))
    # Use a single QueueRunner with multiple threads to enqueue so the queue is
    # always full. The threads are coordinated so the last batch will not be
    # lost.
    elif num_enqueue_threads is not None:
      enqueue_ops = [input_queue.enqueue(tensors_to_enqueue)
                     for _ in range(num_enqueue_threads)]
      queue_runner.add_queue_runner(queue_runner.QueueRunner(
          input_queue, enqueue_ops,
          queue_closed_exception_types=(errors.OutOfRangeError,
                                        errors.CancelledError)))
    else:
      raise AssertionError(
          'Either `num_queue_runners` or `num_enqueue_threads` should have '
          'been set.')

    dequeued_tensors = input_queue.dequeue()

    # Reset shapes on dequeued tensors.
    for i in range(len(tensors_to_enqueue)):
      dequeued_tensors[i].set_shape(tensors_to_enqueue[i].get_shape())

    # Recreate feature mapping according to the original dictionary.
    dequeued_parsed_features = {}
    index = 0
    for key, is_sparse_tensor in tensors_mapping:
      if is_sparse_tensor:
        # Three tensors are (indices, values, shape).
        dequeued_parsed_features[key] = ops.SparseTensor(
            dequeued_tensors[index], dequeued_tensors[index + 1],
            dequeued_tensors[index + 2])
        index += 3
      else:
        dequeued_parsed_features[key] = dequeued_tensors[index]
        index += 1

    dequeued_keys = None
    if keys is not None:
      dequeued_keys = dequeued_tensors[-1]

    return dequeued_keys, dequeued_parsed_features
コード例 #18
0
 def set_many_fed_tensors(self, tensors):
   """Sets batches fed tensors."""
   enq_op = self._local_q.enqueue_many(tensors)
   queue_runner.add_queue_runner(queue_runner.QueueRunner(
       self._local_q, [enq_op]))