Exemple #1
0
  def _generate_enqueue_op(self,
                           inputs,
                           name_prefix,
                           index,
                           device=None,
                           tpu_ordinal=-1):
    """Generate a host-side Op to enqueue a tuple to the queue.

    If device is None the inputs are all required to have the same
    device specification, and the enqueue Op is colocated with
    inputs[0]. Otherwise the enqueue Op is placed on 'device'.

    Args:
      inputs: a list of Tensors with the types and shapes of the tuple elements.
      name_prefix: the base name for the Op.
      index: the shard index, used to uniquify the Op name.
      device: device to place the Op on, or None if it should be
        colocated with the inputs.
      tpu_ordinal: ordinal of the TPU device on the host to use for
      infeed if device is a CPU device. Should be set to -1 if device
      is a TPU device.

    Returns:
      An Op corresponding to a shard of infeed enqueued at the host,
      suitable for use within a replicated block.

    Raises:
      ValueError: if device is None and inputs do not all have the
        same device specification.
    """
    full_name = "%s/%d" % (name_prefix, index)
    shapes = [t.shape for t in inputs]
    if device is None:
      devices = [t.device for t in inputs]
      for i in xrange(1, self.number_of_tuple_elements):
        if devices[0] != devices[i]:
          raise ValueError(
              "input devices for shard %d are %s, but should all be the same" %
              (index, str(devices)))
      with ops.colocate_with(inputs[0]):
        return tpu_ops.infeed_enqueue_tuple(
            inputs=inputs,
            shapes=shapes,
            name=full_name,
            device_ordinal=tpu_ordinal)
    else:
      with ops.device(device):
        return tpu_ops.infeed_enqueue_tuple(
            inputs=inputs,
            shapes=shapes,
            name=full_name,
            device_ordinal=tpu_ordinal)
Exemple #2
0
  def _generate_enqueue_op(self,
                           inputs,
                           name_prefix,
                           index,
                           device=None,
                           tpu_ordinal=-1):
    """Generate a host-side Op to enqueue a tuple to the queue.

    If device is None the inputs are all required to have the same
    device specification, and the enqueue Op is colocated with
    inputs[0]. Otherwise the enqueue Op is placed on 'device'.

    Args:
      inputs: a list of Tensors with the types and shapes of the tuple elements.
      name_prefix: the base name for the Op.
      index: the shard index, used to uniquify the Op name.
      device: device to place the Op on, or None if it should be
        colocated with the inputs.
      tpu_ordinal: ordinal of the TPU device on the host to use for
      infeed if device is a CPU device. Should be set to -1 if device
      is a TPU device.

    Returns:
      An Op corresponding to a shard of infeed enqueued at the host,
      suitable for use within a replicated block.

    Raises:
      ValueError: if device is None and inputs do not all have the
        same device specification.
    """
    full_name = "%s/%d" % (name_prefix, index)
    shapes = [t.shape for t in inputs]
    if device is None:
      devices = [t.device for t in inputs]
      for i in xrange(1, self.number_of_tuple_elements):
        if devices[0] != devices[i]:
          raise ValueError(
              "input devices for shard %d are %s, but should all be the same" %
              (index, str(devices)))
      with ops.colocate_with(inputs[0]):
        return tpu_ops.infeed_enqueue_tuple(
            inputs=inputs,
            shapes=shapes,
            name=full_name,
            device_ordinal=tpu_ordinal)
    else:
      with ops.device(device):
        return tpu_ops.infeed_enqueue_tuple(
            inputs=inputs,
            shapes=shapes,
            name=full_name,
            device_ordinal=tpu_ordinal)
Exemple #3
0
  def generate_enqueue_ops(self, sharded_inputs):
    """Generates the host-side Ops to enqueue the partitioned inputs.

    sharded_inputs is a list, one for each replica, of lists of
    Tensors. sharded_inputs[i] is the tuple of Tensors to use to feed
    replica i.
    sharded_inputs[i][j] is partitioned by self._input_partition_dims[j].

    For example, if sharded_inputs[i][j] is a 2-D Tensor:
    [[A, B, C, D],
     [E ,F, G, H]]
    self._input_partition_dims[j] is [2, 4].

    sharded_inputs[i][j] will be partitioned and flattened into:
    [A, B, C, D, E, F, G, H] and fed into the logical core ids:
    [0, 1, 2, 3, 4, 5, 6, 7] respectively.

    Args:
      sharded_inputs: a list of lists of Tensors. The length of the
        outer list determines the number of shards. Each inner list indicates
        the types and shapes of the tuples in the corresponding shard.

    Returns:
      A list of host-side Ops, one for each shard, that when executed together
      will enqueue a full-size element of infeed.

    Raises:
      ValueError: if the queue configuration has previously been frozen and the
        shapes of the elements of sharded_inputs are not compatible with the
        frozen configuration; or if the shapes of the elements of sharded_inputs
        don't form a consistent unsharded tuple; or if the elements of a tuple
        have different device constraints; or if the partition dims are invalid.
      TypeError: if the queue configuration has previously been frozen and the
        types of the elements of sharded_inputs are not compatible with the
        frozen configuration; or if the types of the elements of sharded_inputs
        don't form a consistent unsharded tuple.
    """
    self.set_configuration_from_sharded_input_tensors(sharded_inputs)
    number_of_replicas = len(sharded_inputs)
    number_of_tuple_elements = len(sharded_inputs[0])

    assert len(self._input_partition_dims) == number_of_tuple_elements
    enqueue_ops = []

    for replica_index in range(number_of_replicas):
      flattened_inputs = sharded_inputs[replica_index]
      inputs_part_dims_flat = nest.flatten_up_to(flattened_inputs,
                                                 self._input_partition_dims)
      inputs_parted_iters = [
          iter(self._check_dims_and_partition_or_replicate_on_host(x, dims))
          for x, dims in zip(sharded_inputs[replica_index],
                             inputs_part_dims_flat)
      ]

      # Find the replica_id of the host's logical core 0.
      # The self._host_id is guaranteed to contain the logical core 0,
      # even when num_cores_per_replica > num_cores_per_host -- the function
      # caller makes sure that this host_id will must be receiving data (calls
      # input_fn).
      replica_id = self._device_assignment.lookup_replicas(
          task_id=self._host_id, logical_core=0)[replica_index]
      for logical_core in xrange(self._device_assignment.num_cores_per_replica):
        # Places different partitions to different logic cores.
        # Since there can be multiple hosts per replica, we need to find
        # the actual host (device) of this logical core.
        device = self._device_assignment.host_device(
            replica=replica_id, logical_core=logical_core)

        with ops.device(device):
          ordinal = self._device_assignment.tpu_ordinal(
              replica=replica_id, logical_core=logical_core)
          infeed_inputs = []
          for it in inputs_parted_iters:
            input_for_device = next(it, None)
            if input_for_device is not None:
              infeed_inputs.append(input_for_device)

          if infeed_inputs:
            enqueue_ops.append(
                tpu_ops.infeed_enqueue_tuple(
                    inputs=infeed_inputs,
                    shapes=[x.shape for x in infeed_inputs],
                    name="enqueue/replica_{0}/input_{1}".format(
                        replica_index, logical_core),
                    device_ordinal=ordinal))
    return enqueue_ops
Exemple #4
0
  def generate_enqueue_ops(self, per_host_sharded_inputs):
    """Generates the host-side Ops to enqueue the partitioned inputs.

    per_host_sharded_inputs is a list, one for each replica, of lists of
    Tensors. sharded_inputs[i] is the tuple of Tensors to use to feed
    replica i.
    sharded_inputs[i][j] is partitioned by self._input_partition_dims[j].

    For example, if sharded_inputs[i][j] is a 2-D Tensor:
    [[A, B, C, D],
     [E ,F, G, H]]
    self._input_partition_dims[j] is [2, 4].

    sharded_inputs[i][j] will be partitioned and flattened into:
    [A, B, C, D, E, F, G, H] and fed into the logical core ids:
    [0, 1, 2, 3, 4, 5, 6, 7] respectively.

    Args:
      per_host_sharded_inputs: a list of lists of Tensors. The length of the
        outer list determines the number of shards. Each inner list indicates
        the types and shapes of the tuples in the corresponding shard.

    Returns:
      A list of host-side Ops, one for each shard, that when executed together
      will enqueue a full-size element of infeed.

    Raises:
      ValueError: if the queue configuration has previously been frozen and the
        shapes of the elements of sharded_inputs are not compatible with the
        frozen configuration; or if the shapes of the elements of sharded_inputs
        don't form a consistent unsharded tuple; or if the elements of a tuple
        have different device constraints; or if the partition dims are invalid.
      TypeError: if the queue configuration has previously been frozen and the
        types of the elements of sharded_inputs are not compatible with the
        frozen configuration; or if the types of the elements of sharded_inputs
        don't form a consistent unsharded tuple.
    """
    self.set_configuration_from_sharded_input_tensors(per_host_sharded_inputs)
    number_of_replicas_per_host = len(per_host_sharded_inputs)
    number_of_tuple_elements = len(per_host_sharded_inputs[0])

    assert len(self._input_partition_dims) == number_of_tuple_elements
    per_host_enqueue_ops = []

    for replica_index in range(number_of_replicas_per_host):
      flattened_inputs = per_host_sharded_inputs[replica_index]
      inputs_part_dims_flat = nest.flatten_up_to(flattened_inputs,
                                                 self._input_partition_dims)
      inputs_parted_iters = [
          iter(self._check_dims_and_partition_or_replicate_on_host(x, dims))
          for x, dims in zip(per_host_sharded_inputs[replica_index],
                             inputs_part_dims_flat)
      ]

      for logical_core in xrange(self._device_assignment.num_cores_per_replica):
        # Places different partitions to different logic cores.
        replica_id = self._device_assignment.lookup_replicas(
            self._host_id, logical_core)[replica_index]
        ordinal = self._device_assignment.tpu_ordinal(
            replica=replica_id, logical_core=logical_core)
        infeed_inputs = []
        for it in inputs_parted_iters:
          input_for_device = next(it, None)
          if input_for_device is not None:
            infeed_inputs.append(input_for_device)

        if infeed_inputs:
          per_host_enqueue_ops.append(
              tpu_ops.infeed_enqueue_tuple(
                  inputs=infeed_inputs,
                  shapes=[x.shape for x in infeed_inputs],
                  name="enqueue/replica_{0}/input_{1}".format(
                      replica_index, logical_core),
                  device_ordinal=ordinal))
    return per_host_enqueue_ops