Ejemplo n.º 1
0
  def _generate_enqueue_op(self,
                           inputs,
                           name_prefix,
                           index,
                           device=None,
                           tpu_ordinal=-1):
    """Generate a host-side Op to enqueue a tuple to the queue.

    If device is None the inputs are all required to have the same
    device specification, and the enqueue Op is colocated with
    inputs[0]. Otherwise the enqueue Op is placed on 'device'.

    Args:
      inputs: a list of Tensors with the types and shapes of the tuple elements.
      name_prefix: the base name for the Op.
      index: the shard index, used to uniquify the Op name.
      device: device to place the Op on, or None if it should be
        colocated with the inputs.
      tpu_ordinal: ordinal of the TPU device on the host to use for
      infeed if device is a CPU device. Should be set to -1 if device
      is a TPU device.

    Returns:
      An Op corresponding to a shard of infeed enqueued at the host,
      suitable for use within a replicated block.

    Raises:
      ValueError: if device is None and inputs do not all have the
        same device specification.
    """
    full_name = "%s/%d" % (name_prefix, index)
    shapes = [t.shape for t in inputs]
    if device is None:
      devices = [t.device for t in inputs]
      for i in xrange(1, self.number_of_tuple_elements):
        if devices[0] != devices[i]:
          raise ValueError(
              "input devices for shard %d are %s, but should all be the same",
              index, str(devices))
      with ops.colocate_with(inputs[0]):
        return tpu_ops.infeed_enqueue_tuple(
            inputs=inputs,
            shapes=shapes,
            name=full_name,
            device_ordinal=tpu_ordinal)
    else:
      with ops.device(device):
        return tpu_ops.infeed_enqueue_tuple(
            inputs=inputs,
            shapes=shapes,
            name=full_name,
            device_ordinal=tpu_ordinal)
Ejemplo n.º 2
0
    def build_infeed_from_input_specs(self, input_specs, execution_mode):
        infeed_op = []
        shard_infeed_tensors = []

        for shard_id in range(self._strategy.num_towers):
            with ops.device('/device:CPU:0'):
                infeed_tensors = []
                with ops.device('/device:TPU:%d' % shard_id):
                    for spec in input_specs:
                        # Construct placeholders for each of the inputs.
                        infeed_tensors.append(
                            array_ops.placeholder(dtype=spec.dtype,
                                                  shape=spec.shape,
                                                  name='infeed-enqueue-%s-%d' %
                                                  (spec.name, shard_id)))
                shard_infeed_tensors.append(infeed_tensors)

                infeed_op.append(
                    tpu_ops.infeed_enqueue_tuple(
                        infeed_tensors, [spec.shape for spec in input_specs],
                        name='infeed-enqueue-%s-%d' %
                        (execution_mode, shard_id),
                        device_ordinal=shard_id))
        return SizedInfeed(infeed_ops=infeed_op,
                           sharded_infeed_tensors=shard_infeed_tensors)
Ejemplo n.º 3
0
    def infeed_input(i):
      """Get input, split it and then enqueue."""
      batches = iterator.get_next()
      batches = array_ops.split(batches, 2)

      infeeds = [
          tpu_ops.infeed_enqueue_tuple(
              inputs=[batches[j]], shapes=[[1, 1, 1]], device_ordinal=j)
          for j in range(2)
      ]

      with ops.control_dependencies(infeeds):
        return i + 1
Ejemplo n.º 4
0
 def build_infeed_from_input_specs(self, input_specs, execution_mode):
   shard_infeed_tensors = self._get_next_ops
   assert len(shard_infeed_tensors) == self._strategy.num_towers
   infeed_ops = []
   for shard_id in range(self._strategy.num_towers):
     with ops.device('/device:TPU:%d' % shard_id):
       infeed_ops.append(
           tpu_ops.infeed_enqueue_tuple(
               shard_infeed_tensors[shard_id],
               [spec.shape for spec in input_specs],
               name='infeed-enqueue-%s-%d' % (execution_mode, shard_id)))
   return SizedInfeed(infeed_ops=infeed_ops,
                      sharded_infeed_tensors=shard_infeed_tensors)
Ejemplo n.º 5
0
 def build_infeed_from_input_specs(self, input_specs, execution_mode):
     shard_infeed_tensors = self._get_next_ops
     assert len(shard_infeed_tensors) == self._strategy.num_towers
     infeed_ops = []
     for shard_id in range(self._strategy.num_towers):
         with ops.device('/device:TPU:%d' % shard_id):
             infeed_ops.append(
                 tpu_ops.infeed_enqueue_tuple(
                     shard_infeed_tensors[shard_id],
                     [spec.shape for spec in input_specs],
                     name='infeed-enqueue-%s-%d' %
                     (execution_mode, shard_id)))
     return SizedInfeed(infeed_ops=infeed_ops,
                        sharded_infeed_tensors=shard_infeed_tensors)
Ejemplo n.º 6
0
    def infeed_input(i):
      """Get input, split it and then enqueue."""
      iteration_inputs = [f.get(i) for f in feeds()]
      infeed_inputs = [[inputs_per_core[core_id]
                        for inputs_per_core in iteration_inputs]
                       for core_id in range(self._num_cores_per_host)]

      infeed_ops = []
      for core_id, infeed_input in enumerate(infeed_inputs):
        infeed_ops.append(
            tpu_ops.infeed_enqueue_tuple(
                inputs=infeed_input, shapes=shapes, device_ordinal=core_id))

      with ops.control_dependencies(infeed_ops):
        return i + 1
Ejemplo n.º 7
0
    def infeed_input(i):
      """Get input, split it and then enqueue."""
      iteration_inputs = [f.get(i) for f in feeds()]

      infeed_inputs = [[inputs_per_core[core_id]
                        for inputs_per_core in iteration_inputs]
                       for core_id in range(self._num_cores_per_host)]

      infeed_ops = []
      for core_id, infeed_input in enumerate(infeed_inputs):
        infeed_ops.append(
            tpu_ops.infeed_enqueue_tuple(
                inputs=infeed_input, shapes=shapes, device_ordinal=core_id))

      with ops.control_dependencies(infeed_ops):
        return i + 1
    def enqueue_ops_fn():
      """Enqueue ops for one iteration."""
      control_deps = []
      sharded_inputs = []
      with ops.device(self._host):
        for _ in range(self._num_cores_per_host):
          # Use control dependencies to ensure a deterministic ordering.
          with ops.control_dependencies(control_deps):
            inputs = nest.flatten(iterator.get_next())
            control_deps.extend(inputs)
            sharded_inputs.append(inputs)

      enqueue_ops = []
      for core_id, shard_input in enumerate(sharded_inputs):
        enqueue_ops.append(
            tpu_ops.infeed_enqueue_tuple(
                inputs=shard_input, shapes=shapes, device_ordinal=core_id))
      return enqueue_ops
Ejemplo n.º 9
0
  def build_infeed_from_input_specs(self, input_specs, execution_mode):
    infeed_op = []
    shard_infeed_tensors = []

    for shard_id in range(self._strategy.num_towers):
      with ops.device('/device:TPU:%d' % shard_id):
        infeed_tensors = []
        for spec in input_specs:
          # Construct placeholders for each of the inputs.
          infeed_tensors.append(
              array_ops.placeholder(
                  dtype=spec.dtype,
                  shape=spec.shape,
                  name='infeed-enqueue-%s-%d' % (spec.name, shard_id)))
        shard_infeed_tensors.append(infeed_tensors)

        infeed_op.append(
            tpu_ops.infeed_enqueue_tuple(
                infeed_tensors, [spec.shape for spec in input_specs],
                name='infeed-enqueue-%s-%d' % (execution_mode, shard_id)))
    return SizedInfeed(infeed_ops=infeed_op,
                       sharded_infeed_tensors=shard_infeed_tensors)
Ejemplo n.º 10
0
    def _specialize_model(self, input_specs):
        """Specialize `self.model` (a Keras model) for the given input shapes."""
        # Re-create our input and output layers inside our subgraph.  They will be
        # attached to the true computation when we clone our model in `tpu_fn`.
        K.set_learning_phase(
            self.execution_mode == model_fn_lib.ModeKeys.TRAIN)

        # functools.partial and callable objects are not supported by tpu.rewrite
        def _model_fn():
            """Compute fit/eval/predict for the TPU."""
            is_training = self.execution_mode == model_fn_lib.ModeKeys.TRAIN
            is_test = self.execution_mode == model_fn_lib.ModeKeys.EVAL
            is_predict = self.execution_mode == model_fn_lib.ModeKeys.PREDICT

            # During train/eval, we infeed our features as well as labels.
            if is_training or is_test:
                infeed_layers = self.model._input_layers + self.model._output_layers
            else:
                infeed_layers = self.model._input_layers

            # Generate our infeed operation to read features & labels.
            infeed_tensors = tpu_ops.infeed_dequeue_tuple(
                dtypes=[spec.dtype for spec in input_specs],
                shapes=[spec.shape for spec in input_specs],
                name='infeed-%s' % self.execution_mode)

            assert len(infeed_tensors) == len(infeed_layers), (
                'Infeed inputs did not match model: %s vs %s',
                (infeed_layers, infeed_tensors))

            tpu_targets = []
            tpu_inputs = []

            # Sort infeed outputs into inputs and labels for calling our Keras model.
            for tensor, layer in zip(infeed_tensors, infeed_layers):
                if layer in self.model._input_layers:
                    tpu_inputs.append(
                        layers.Input(name=layer.name, tensor=tensor))
                if layer in self.model._output_layers:
                    tpu_targets.append(tensor)

            # Call our model with our infeed inputs (re-using the weights).
            model_outputs = self.model(tpu_inputs)
            child_model = models.Model(inputs=tpu_inputs,
                                       outputs=model_outputs)

            if is_training or is_test:
                child_model.compile(
                    optimizer=_replicated_optimizer(self.model.optimizer,
                                                    self.num_replicas),
                    loss=self.model.loss,
                    loss_weights=self.model.loss_weights,
                    metrics=self.model.metrics,
                    weighted_metrics=self.model.weighted_metrics,
                    target_tensors=tpu_targets,
                )

            # Compute our outfeed depending on the execution mode
            if is_training:
                child_model._make_train_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in child_model.train_function.outputs
                ]
                return [
                    child_model.train_function.updates_op,
                    tpu_ops.outfeed_enqueue_tuple(
                        child_model.train_function.outputs,
                        name='outfeed-enqueue-train')
                ]
            elif is_test:
                child_model._make_test_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in child_model.test_function.outputs
                ]
                return [
                    tpu_ops.outfeed_enqueue_tuple(
                        child_model.test_function.outputs,
                        name='outfeed-enqueue-test')
                ]
            elif is_predict:
                child_model._make_predict_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in child_model.predict_function.outputs
                ]
                return [
                    tpu_ops.outfeed_enqueue_tuple(
                        child_model.predict_function.outputs,
                        name='outfeed-enqueue-predict',
                    )
                ]
            else:
                assert False, 'Unexpected execution mode: %s' % self.execution_mode

        # Capture outfeed metadata computed during the rewrite.
        self._outfeed_spec = None

        # Generate out TPU operations using `tpu.split_compile_and_replicate`.
        # `compile_op` can be used to test the TPU model compiles before execution.
        # `execute op` replicates `_model_fn` `num_replicas` times, with each shard
        # running on a different logical core.
        compile_op, execute_op = tpu.split_compile_and_replicate(
            _model_fn, inputs=[[]] * self.num_replicas)

        # Generate CPU side operations to enqueue features/labels and dequeue
        # outputs from the model call.
        infeed_op = []
        outfeed_op = []
        shard_infeed_tensors = []

        for shard_id in range(self.num_replicas):
            with ops.device('/device:TPU:%d' % shard_id):
                infeed_tensors = []
                for spec in input_specs:
                    infeed_tensors.append(
                        array_ops.placeholder(dtype=spec.dtype,
                                              shape=spec.shape,
                                              name='infeed-enqueue-%s-%d' %
                                              (spec.name, shard_id)))
                shard_infeed_tensors.append(infeed_tensors)

                infeed_op.append(
                    tpu_ops.infeed_enqueue_tuple(
                        infeed_tensors, [spec.shape for spec in input_specs],
                        name='infeed-enqueue-%s-%d' %
                        (self.execution_mode, shard_id)))

                outfeed_op.extend(
                    tpu_ops.outfeed_dequeue_tuple(
                        dtypes=[spec.dtype for spec in self._outfeed_spec],
                        shapes=[spec.shape for spec in self._outfeed_spec],
                        name='outfeed-dequeue-%s-%d' %
                        (self.execution_mode, shard_id)))

        return TPUModelOp(compile_op,
                          execute_op,
                          infeed_tensors=shard_infeed_tensors,
                          infeed_op=infeed_op,
                          outfeed_op=outfeed_op)
Ejemplo n.º 11
0
  def _specialize_model(self, input_specs):
    """Specialize `self.model` (a Keras model) for the given input shapes."""
    # Re-create our input and output layers inside our subgraph.  They will be
    # attached to the true computation when we clone our model in `tpu_fn`.
    K.set_learning_phase(self.execution_mode == model_fn_lib.ModeKeys.TRAIN)

    # functools.partial and callable objects are not supported by tpu.rewrite
    def _model_fn():
      """Compute fit/eval/predict for the TPU."""
      is_training = self.execution_mode == model_fn_lib.ModeKeys.TRAIN
      is_test = self.execution_mode == model_fn_lib.ModeKeys.EVAL
      is_predict = self.execution_mode == model_fn_lib.ModeKeys.PREDICT

      # During train/eval, we infeed our features as well as labels.
      if is_training or is_test:
        infeed_layers = self.model._input_layers + self.model._output_layers
      else:
        infeed_layers = self.model._input_layers

      # Generate our infeed operation to read features & labels.
      infeed_tensors = tpu_ops.infeed_dequeue_tuple(
          dtypes=[spec.dtype for spec in input_specs],
          shapes=[spec.shape for spec in input_specs],
          name='infeed-%s' % self.execution_mode)

      assert len(infeed_tensors) == len(infeed_layers), (
          'Infeed inputs did not match model: %s vs %s', (infeed_layers,
                                                          infeed_tensors))

      tpu_targets = []
      tpu_input_map = {}

      # Sort infeed outputs into inputs and labels for calling our Keras model.
      for tensor, layer in zip(infeed_tensors, infeed_layers):
        if layer in self.model._input_layers:
          tpu_input_map[layer.name] = tensor
        if layer in self.model._output_layers:
          tpu_targets.append(tensor)

      # Clone our CPU model, running within the TPU device context.
      with TPURewriteContext(tpu_input_map):
        self._cloned_model = models.clone_model(self.model)

      # Create a copy of the optimizer for this graph.
      if isinstance(self.model.optimizer, keras_optimizers.TFOptimizer):
        cloned_optimizer = keras_optimizers.TFOptimizer(
            self.model.optimizer.optimizer)
      else:
        logging.info('Cloning %s %s', self.model.optimizer.__class__.__name__,
                     self._optimizer_config)
        cloned_optimizer = self.model.optimizer.__class__.from_config(
            self._optimizer_config)

      if is_training or is_test:
        self._cloned_model.compile(
            optimizer=_replicated_optimizer(cloned_optimizer),
            loss=self.model.loss,
            loss_weights=self.model.loss_weights,
            metrics=self.model.metrics,
            weighted_metrics=self.model.weighted_metrics,
            target_tensors=tpu_targets,
        )

      # Compute our outfeed depending on the execution mode
      if is_training:
        self._cloned_model._make_train_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in self._cloned_model.train_function.outputs
        ]
        return [
            self._cloned_model.train_function.updates_op,
            tpu_ops.outfeed_enqueue_tuple(
                self._cloned_model.train_function.outputs,
                name='outfeed-enqueue-train')
        ]
      elif is_test:
        self._cloned_model._make_test_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in self._cloned_model.test_function.outputs
        ]
        return [
            tpu_ops.outfeed_enqueue_tuple(
                self._cloned_model.test_function.outputs,
                name='outfeed-enqueue-test')
        ]
      elif is_predict:
        self._cloned_model._make_predict_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in self._cloned_model.predict_function.outputs
        ]
        return [
            tpu_ops.outfeed_enqueue_tuple(
                self._cloned_model.predict_function.outputs,
                name='outfeed-enqueue-predict',
            )
        ]
      else:
        assert False, 'Unexpected execution mode: %s' % self.execution_mode

    # Capture outfeed metadata computed during the rewrite.
    self._outfeed_spec = None

    # Generate out TPU operations using `tpu.split_compile_and_replicate`.
    # `compile_op` can be used to test the TPU model compiles before execution.
    # `execute op` replicates `_model_fn` `num_replicas` times, with each shard
    # running on a different logical core.
    compile_op, execute_op = tpu.split_compile_and_replicate(
        _model_fn, inputs=[[]] * self._strategy.num_towers)

    # Generate CPU side operations to enqueue features/labels and dequeue
    # outputs from the model call.
    infeed_op = []
    outfeed_op = []
    shard_infeed_tensors = []

    for shard_id in range(self._strategy.num_towers):
      with ops.device('/device:TPU:%d' % shard_id):
        infeed_tensors = []
        for spec in input_specs:
          infeed_tensors.append(
              array_ops.placeholder(
                  dtype=spec.dtype,
                  shape=spec.shape,
                  name='infeed-enqueue-%s-%d' % (spec.name, shard_id)))
        shard_infeed_tensors.append(infeed_tensors)

        infeed_op.append(
            tpu_ops.infeed_enqueue_tuple(
                infeed_tensors, [spec.shape for spec in input_specs],
                name='infeed-enqueue-%s-%d' % (self.execution_mode, shard_id)))

        outfeed_op.extend(
            tpu_ops.outfeed_dequeue_tuple(
                dtypes=[spec.dtype for spec in self._outfeed_spec],
                shapes=[spec.shape for spec in self._outfeed_spec],
                name='outfeed-dequeue-%s-%d' % (self.execution_mode, shard_id)))

    return TPUModelOp(
        compile_op,
        execute_op,
        infeed_tensors=shard_infeed_tensors,
        infeed_op=infeed_op,
        outfeed_op=outfeed_op)
Ejemplo n.º 12
0
    def generate_enqueue_ops(self, per_host_sharded_inputs):
        """Generates the host-side Ops to enqueue the partitioned inputs.

    per_host_sharded_inputs is a list, one for each replica, of lists of
    Tensors. sharded_inputs[i] is the tuple of Tensors to use to feed
    replica i.
    sharded_inputs[i][j] is partitioned by self._input_partition_dims[j].

    For example, if sharded_inputs[i][j] is a 2-D Tensor:
    [[A, B, C, D],
     [E ,F, G, H]]
    self._input_partition_dims[j] is [2, 4].

    sharded_inputs[i][j] will be partitioned and flattened into:
    [A, B, C, D, E, F, G, H] and fed into the logical core ids:
    [0, 1, 2, 3, 4, 5, 6, 7] respectively.

    Args:
      per_host_sharded_inputs: a list of lists of Tensors. The length of the
        outer list determines the number of shards. Each inner list indicates
        the types and shapes of the tuples in the corresponding shard.

    Returns:
      A list of host-side Ops, one for each shard, that when executed together
      will enqueue a full-size element of infeed.

    Raises:
      ValueError: if the queue configuration has previously been frozen and the
        shapes of the elements of sharded_inputs are not compatible with the
        frozen configuration; or if the shapes of the elements of sharded_inputs
        don't form a consistent unsharded tuple; or if the elements of a tuple
        have different device constraints; or if the partition dims are invalid.
      TypeError: if the queue configuration has previously been frozen and the
        types of the elements of sharded_inputs are not compatible with the
        frozen configuration; or if the types of the elements of sharded_inputs
        don't form a consistent unsharded tuple.
    """
        self.set_configuration_from_sharded_input_tensors(
            per_host_sharded_inputs)
        number_of_replicas_per_host = len(per_host_sharded_inputs)
        number_of_tuple_elements = len(per_host_sharded_inputs[0])

        assert len(self._input_partition_dims) == number_of_tuple_elements
        per_host_enqueue_ops = []

        for replica_index in range(number_of_replicas_per_host):
            flattened_inputs = per_host_sharded_inputs[replica_index]
            inputs_part_dims_flat = nest.flatten_up_to(
                flattened_inputs, self._input_partition_dims)
            inputs_parted_iters = [
                iter(self._partition_or_replicate_on_host(x, dims))
                for x, dims in zip(per_host_sharded_inputs[replica_index],
                                   inputs_part_dims_flat)
            ]

            for logical_core in xrange(
                    self._device_assignment.num_cores_per_replica):
                # Places different partitions to different logic cores.
                replica_id = self._device_assignment.lookup_replicas(
                    self._host_id, logical_core)[replica_index]
                ordinal = self._device_assignment.tpu_ordinal(
                    replica=replica_id, logical_core=logical_core)
                infeed_inputs = []
                for it in inputs_parted_iters:
                    input_for_device = next(it, None)
                    if input_for_device is not None:
                        infeed_inputs.append(input_for_device)

                if infeed_inputs:
                    per_host_enqueue_ops.append(
                        tpu_ops.infeed_enqueue_tuple(
                            inputs=infeed_inputs,
                            shapes=[x.shape for x in infeed_inputs],
                            name="enqueue/replica_{0}/input_{1}".format(
                                replica_index, logical_core),
                            device_ordinal=ordinal))
        return per_host_enqueue_ops
Ejemplo n.º 13
0
    def _specialize_model(self, input_specs):
        """Specialize `self.model` (a Keras model) for the given input shapes."""
        # Re-create our input and output layers inside our subgraph.  They will be
        # attached to the true computation when we clone our model in `tpu_fn`.
        K.set_learning_phase(
            self.execution_mode == model_fn_lib.ModeKeys.TRAIN)

        # functools.partial and callable objects are not supported by tpu.rewrite
        def _model_fn():
            """Compute fit/eval/predict for the TPU."""
            is_training = self.execution_mode == model_fn_lib.ModeKeys.TRAIN
            is_test = self.execution_mode == model_fn_lib.ModeKeys.EVAL
            is_predict = self.execution_mode == model_fn_lib.ModeKeys.PREDICT

            # During train/eval, we infeed our features as well as labels.
            if is_training or is_test:
                infeed_layers = self.model._input_layers + self.model._output_layers
            else:
                infeed_layers = self.model._input_layers

            # Generate our infeed operation to read features & labels.
            infeed_tensors = tpu_ops.infeed_dequeue_tuple(
                dtypes=[spec.dtype for spec in input_specs],
                shapes=[spec.shape for spec in input_specs],
                name='infeed-%s' % self.execution_mode)

            assert len(infeed_tensors) == len(infeed_layers), (
                'Infeed inputs did not match model: %s vs %s',
                (infeed_layers, infeed_tensors))

            tpu_targets = []
            tpu_inputs = []

            # Sort infeed outputs into inputs and labels for calling our Keras model.
            for tensor, layer in zip(infeed_tensors, infeed_layers):
                if layer in self.model._input_layers:
                    tpu_inputs.append(
                        layers.Input(name=layer.name, tensor=tensor))
                if layer in self.model._output_layers:
                    tpu_targets.append(tensor)

            optimizer = self.model.optimizer
            optimizer.iterations = training_util.get_or_create_global_step()

            # Call our model with our infeed inputs (re-using the weights).
            model_outputs = self.model(tpu_inputs)
            child_model = models.Model(inputs=tpu_inputs,
                                       outputs=model_outputs)
            if is_training or is_test:
                child_model.compile(
                    optimizer=self.model.optimizer,
                    loss=self.model.loss,
                    loss_weights=self.model.loss_weights,
                    metrics=self.model.metrics,
                    weighted_metrics=self.model.weighted_metrics,
                    target_tensors=tpu_targets,
                )

            # Compute our outfeed depending on the execution mode
            if is_training:
                child_model._make_train_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in child_model.train_function.outputs
                ]
                return [
                    child_model.train_function.updates_op,
                    tpu_ops.outfeed_enqueue_tuple(
                        child_model.train_function.outputs,
                        name='oufeed-enqueue-train')
                ]
            elif is_test:
                child_model._make_test_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in child_model.test_function.outputs
                ]
                return [
                    tpu_ops.outfeed_enqueue_tuple(
                        child_model.test_function.outputs,
                        name='outfeed-enqueue-test')
                ]
            elif is_predict:
                child_model._make_predict_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in child_model.predict_function.outputs
                ]
                return [
                    tpu_ops.outfeed_enqueue_tuple(
                        child_model.predict_function.outputs,
                        name='outfeed-enqueue-predict',
                    )
                ]
            else:
                assert False, 'Unexpected execution mode: %s' % self.execution_mode

        # Capture outfeed metadata computed during the rewrite.
        self._outfeed_spec = None

        tpu_execute_op = tpu.rewrite(_model_fn)

        K._initialize_variables(
            K.get_session())  # pylint-disable: protected-access

        # Generate CPU side operations to enqueue features/labels and dequeue
        # outputs from the model call.
        with ops.device('/device:TPU:0'):
            infeed_tensors = []
            for spec in input_specs:
                infeed_tensors.append(
                    array_ops.placeholder(dtype=spec.dtype,
                                          shape=spec.shape,
                                          name='infeed-enqueue-%s' %
                                          spec.name))

            infeed_op = tpu_ops.infeed_enqueue_tuple(
                infeed_tensors, [spec.shape for spec in input_specs],
                name='infeed-enqueue-%s' % self.execution_mode)

            outfeed_op = tpu_ops.outfeed_dequeue_tuple(
                dtypes=[spec.dtype for spec in self._outfeed_spec],
                shapes=[spec.shape for spec in self._outfeed_spec],
                name='outfeed-dequeue-%s' % self.execution_mode)

        return CompiledTPUOp(tpu_execute_op, infeed_tensors, infeed_op,
                             outfeed_op)
Ejemplo n.º 14
0
  def _specialize_model(self, input_specs):
    """Specialize `self.model` (a Keras model) for the given input shapes."""
    # Re-create our input and output layers inside our subgraph.  They will be
    # attached to the true computation when we clone our model in `tpu_fn`.
    K.set_learning_phase(
        self.execution_mode == model_fn_lib.ModeKeys.TRAIN
    )

    # functools.partial and callable objects are not supported by tpu.rewrite
    def _model_fn():
      """Compute fit/eval/predict for the TPU."""
      is_training = self.execution_mode == model_fn_lib.ModeKeys.TRAIN
      is_test = self.execution_mode == model_fn_lib.ModeKeys.EVAL
      is_predict = self.execution_mode == model_fn_lib.ModeKeys.PREDICT

      # During train/eval, we infeed our features as well as labels.
      if is_training or is_test:
        infeed_layers = self.model._input_layers + self.model._output_layers
      else:
        infeed_layers = self.model._input_layers

      # Generate our infeed operation to read features & labels.
      infeed_tensors = tpu_ops.infeed_dequeue_tuple(
          dtypes=[spec.dtype for spec in input_specs],
          shapes=[spec.shape for spec in input_specs],
          name='infeed-%s' % self.execution_mode)

      assert len(infeed_tensors) == len(infeed_layers), (
          'Infeed inputs did not match model: %s vs %s', (infeed_layers,
                                                          infeed_tensors))

      tpu_targets = []
      tpu_inputs = []

      # Sort infeed outputs into inputs and labels for calling our Keras model.
      for tensor, layer in zip(infeed_tensors, infeed_layers):
        if layer in self.model._input_layers:
          tpu_inputs.append(layers.Input(name=layer.name, tensor=tensor))
        if layer in self.model._output_layers:
          tpu_targets.append(tensor)

      # Call our model with our infeed inputs (re-using the weights).
      model_outputs = self.model(tpu_inputs)
      child_model = models.Model(inputs=tpu_inputs, outputs=model_outputs)
      if is_training or is_test:
        child_model.compile(
            optimizer=self.model.optimizer,
            loss=self.model.loss,
            loss_weights=self.model.loss_weights,
            metrics=self.model.metrics,
            weighted_metrics=self.model.weighted_metrics,
            target_tensors=tpu_targets,
        )

      # Compute our outfeed depending on the execution mode
      if is_training:
        child_model._make_train_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in child_model.train_function.outputs
        ]
        return [
            child_model.train_function.updates_op,
            tpu_ops.outfeed_enqueue_tuple(
                child_model.train_function.outputs, name='oufeed-enqueue-train')
        ]
      elif is_test:
        child_model._make_test_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in child_model.test_function.outputs
        ]
        return [
            tpu_ops.outfeed_enqueue_tuple(
                child_model.test_function.outputs, name='outfeed-enqueue-test')
        ]
      elif is_predict:
        child_model._make_predict_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in child_model.predict_function.outputs
        ]
        return [
            tpu_ops.outfeed_enqueue_tuple(
                child_model.predict_function.outputs,
                name='outfeed-enqueue-predict',
            )
        ]
      else:
        assert False, 'Unexpected execution mode: %s' % self.execution_mode

    # Capture outfeed metadata computed during the rewrite.
    self._outfeed_spec = None

    tpu_execute_op = tpu.rewrite(_model_fn)

    # Generate CPU side operations to enqueue features/labels and dequeue
    # outputs from the model call.
    with ops.device('/device:TPU:0'):
      infeed_tensors = []
      for spec in input_specs:
        infeed_tensors.append(
            array_ops.placeholder(
                dtype=spec.dtype,
                shape=spec.shape,
                name='infeed-enqueue-%s' % spec.name))

      infeed_op = tpu_ops.infeed_enqueue_tuple(
          infeed_tensors, [spec.shape for spec in input_specs],
          name='infeed-enqueue-%s' % self.execution_mode)

      outfeed_op = tpu_ops.outfeed_dequeue_tuple(
          dtypes=[spec.dtype for spec in self._outfeed_spec],
          shapes=[spec.shape for spec in self._outfeed_spec],
          name='outfeed-dequeue-%s' % self.execution_mode)

    return CompiledTPUOp(tpu_execute_op, infeed_tensors, infeed_op, outfeed_op)
Ejemplo n.º 15
0
  def generate_enqueue_ops(self, per_host_sharded_inputs):
    """Generates the host-side Ops to enqueue the partitioned inputs.

    per_host_sharded_inputs is a list, one for each replica, of lists of
    Tensors. sharded_inputs[i] is the tuple of Tensors to use to feed
    replica i.
    sharded_inputs[i][j] is partitioned by self._input_partition_dims[j].

    For example, if sharded_inputs[i][j] is a 2-D Tensor:
    [[A, B, C, D],
     [E ,F, G, H]]
    self._input_partition_dims[j] is [2, 4].

    sharded_inputs[i][j] will be partitioned and flattened into:
    [A, B, C, D, E, F, G, H] and fed into the logical core ids:
    [0, 1, 2, 3, 4, 5, 6, 7] respectively.

    Args:
      per_host_sharded_inputs: a list of lists of Tensors. The length of the
        outer list determines the number of shards. Each inner list indicates
        the types and shapes of the tuples in the corresponding shard.

    Returns:
      A list of host-side Ops, one for each shard, that when executed together
      will enqueue a full-size element of infeed.

    Raises:
      ValueError: if the queue configuration has previously been frozen and the
        shapes of the elements of sharded_inputs are not compatible with the
        frozen configuration; or if the shapes of the elements of sharded_inputs
        don't form a consistent unsharded tuple; or if the elements of a tuple
        have different device constraints; or if the partition dims are invalid.
      TypeError: if the queue configuration has previously been frozen and the
        types of the elements of sharded_inputs are not compatible with the
        frozen configuration; or if the types of the elements of sharded_inputs
        don't form a consistent unsharded tuple.
    """
    self.set_configuration_from_sharded_input_tensors(per_host_sharded_inputs)
    number_of_replicas_per_host = len(per_host_sharded_inputs)
    number_of_tuple_elements = len(per_host_sharded_inputs[0])

    assert len(self._input_partition_dims) == number_of_tuple_elements
    per_host_enqueue_ops = []

    for replica_index in range(number_of_replicas_per_host):
      flattened_inputs = per_host_sharded_inputs[replica_index]
      inputs_part_dims_flat = nest.flatten_up_to(flattened_inputs,
                                                 self._input_partition_dims)
      inputs_parted_iters = [
          iter(self._partition_or_replicate_on_host(x, dims)) for x, dims in
          zip(per_host_sharded_inputs[replica_index], inputs_part_dims_flat)
      ]

      for core_index in xrange(self._device_assignment.num_cores_per_replica):
        # Places different partitions to different logic cores.
        logical_core = self._get_logical_core(core_index)
        replica_id = self._device_assignment.lookup_replicas(
            self._host_id, logical_core)[replica_index]
        ordinal = self._device_assignment.tpu_ordinal(
            replica=replica_id, logical_core=logical_core)
        infeed_inputs = []
        for it in inputs_parted_iters:
          input_for_device = next(it, None)
          if input_for_device is not None:
            infeed_inputs.append(input_for_device)

        if infeed_inputs:
          per_host_enqueue_ops.append(
              tpu_ops.infeed_enqueue_tuple(
                  inputs=infeed_inputs,
                  shapes=[x.shape for x in infeed_inputs],
                  name="enqueue/replica_{0}/input_{1}".format(
                      replica_index, core_index),
                  device_ordinal=ordinal))
    return per_host_enqueue_ops