コード例 #1
0
ファイル: keras_test.py プロジェクト: zhaoyongke/tensorflow
  def test_calculating_batch_size(self):
    with self.cached_session():
      # 64 is the number of input samples.
      inputs = np.zeros((64, 3), dtype=np.float32)
      targets = np.zeros((64, 4), dtype=np.float32)

      model = get_model()
      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      loss = 'mse'
      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
                                                     '/device:CPU:0'])
      strategy.extended._require_static_shapes = True

      model.compile(optimizer, loss, distribute=strategy)
      iterator = model._distribution_standardize_user_data(inputs,
                                                           targets,
                                                           batch_size=None,
                                                           check_steps=True,
                                                           steps_name='steps',
                                                           steps=3)

      # The global batch size(21) across all replicas is the ratio of the input
      # samples(64) to the steps(3).
      # The batch size(10) per device is the ratio of the global batch size(21)
      # to the number of replicas(2).
      # The global batch size and batch size are rounded integer values.
      self.assertEqual(10, distributed_training_utils.get_batch_dimension(
          iterator))
コード例 #2
0
  def test_calculating_batch_size(self):
    with self.cached_session():
      # 64 is the number of input samples.
      inputs = np.zeros((64, 3), dtype=np.float32)
      targets = np.zeros((64, 4), dtype=np.float32)

      model = get_model()
      optimizer = gradient_descent.GradientDescentOptimizer(0.001)
      loss = 'mse'
      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
                                                     '/device:CPU:0'])
      strategy._require_static_shapes = True

      model.compile(optimizer, loss, distribute=strategy)
      iterator = model._distribution_standardize_user_data(inputs,
                                                           targets,
                                                           batch_size=None,
                                                           check_steps=True,
                                                           steps_name='steps',
                                                           steps=3)

      # The global batch size(21) across all replicas is the ratio of the input
      # samples(64) to the steps(3).
      # The batch size(10) per device is the ratio of the global batch size(21)
      # to the number of replicas(2).
      # The global batch size and batch size are rounded integer values.
      self.assertEqual(10, distributed_training_utils.get_batch_dimension(
          iterator._iterator))
コード例 #3
0
def experimental_tpu_predict_loop(model,
                                  dataset,
                                  verbose=0,
                                  steps=None,
                                  callbacks=None):
  """Predict loop for predicting with TPU DistributionStrategy.

  Arguments:
      model: Keras Model instance.
      dataset: Dataset for input data.
      verbose: Integer, Verbosity mode 0 or 1.
      steps: Total number of steps (batches of samples)
          before declaring `_predict_loop` finished.
          Ignored with the default value of `None`.
      callbacks: List of callbacks to be called during training

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions
      (if the model has multiple outputs).
  """
  mode = ModeKeys.PREDICT
  dataset_fully_shaped = (distributed_training_utils.
                          is_dataset_shape_fully_defined(dataset))
  padding_handler = None
  if not dataset_fully_shaped:
    # TODO(hongjunchoi): Investigate whether operations from
    # PartialBatchPaddingHandler are unnecessarily pruned out
    # during graph optimization.
    padding_handler = padding_util.PartialBatchPaddingHandler(
        model._feed_output_shapes)
    batch_size, _, prefetch_buffer = input_lib._get_dataset_attributes(dataset)
    padding_handler.padded_batch_size = batch_size
    padding_handler.padding_mask = dataset.reduce(padding_handler.padding_mask,
                                                  padding_handler.update_mask)

    dataset = dataset.map(padding_handler.pad_batch)
    dataset = dataset.apply(batching.unbatch())
    # Upon this point, it is guaranteed that the dataset does not
    # have partial batches. Thus, we set `drop_remainder=True` to
    # get static shape information about the elements in the dataset.
    dataset = dataset.batch(batch_size, drop_remainder=True)

    if prefetch_buffer is not None:
      dataset = dataset.prefetch(prefetch_buffer)

  current_strategy = model._distribution_strategy
  iterator = distributed_training_utils.get_iterator(dataset, current_strategy)

  scope = distributed_training_utils.distributed_scope(
      strategy=current_strategy, learning_phase=0)
  scope.__enter__()

  def _per_device_predict_function(model):
    model._make_predict_function()
    return (model.predict_function.inputs,
            model.predict_function.outputs,
            model.predict_function.updates_op,
            model.predict_function.session_kwargs)

  def step_fn(ctx, inputs):
    """Clones the model and calls make_predict_function."""
    if model._compile_distribution:
      distributed_training_utils.clone_model_on_replicas(
          model, current_strategy, mode, inputs=inputs)
    else:
      distributed_training_utils._build_distributed_network(
          model, current_strategy, mode, inputs)

    (grouped_inputs, grouped_outputs, grouped_updates,
     grouped_session_args) = current_strategy.extended.call_for_each_replica(
         _per_device_predict_function,
         args=(distributed_training_utils.get_distributed_model(
             model, ModeKeys.PREDICT),))

    (all_inputs, all_outputs, all_updates,
     all_session_args) = distributed_training_utils.unwrap_values(
         current_strategy, grouped_inputs, grouped_outputs, grouped_updates,
         grouped_session_args)

    combined_fn = K.function(
        all_inputs, all_outputs,
        updates=all_updates,
        name='distributed_predict_function',
        **all_session_args)

    for label, output in zip(model.output_names, combined_fn.outputs):
      ctx.set_last_step_output(label, output)

    return combined_fn.updates_op

  # Add initial dummy values for outputs.
  initial_loop_values = {}
  batch_dimension = distributed_training_utils.get_batch_dimension(iterator)
  for name, tensor in zip(model.output_names, model.outputs):
    # TODO(priyag): This is a workaround as we do not know the batch dimension
    # of the model's output at this point.
    shape = tensor_shape.TensorShape(tensor.shape.dims)
    shape.dims = [batch_dimension] + shape.dims[1:]
    initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype)

  # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed.
  ctx = current_strategy.extended.experimental_run_steps_on_iterator(
      step_fn, iterator, iterations=1,
      initial_loop_values=initial_loop_values)

  predict_op = ctx.run_op
  output_tensors = ctx.last_step_outputs

  if verbose == 1:
    progbar = Progbar(target=steps)

  if model._compile_distribution:
    distributed_training_utils._copy_weights_to_distributed_model(model, mode)

  distributed_training_utils._reset_metrics(model)

  callbacks = cbks.configure_callbacks(
      callbacks,
      model,
      do_validation=False,
      epochs=1,
      steps_per_epoch=steps,
      verbose=verbose,
      count_mode='steps',
      mode=mode)
  callbacks._call_begin_hook(mode)

  assert steps is not None
  # Since we do not know how many samples we will see, we cannot pre-allocate
  # the returned Numpy arrays. Instead, we store one array per batch seen
  # and concatenate them upon returning.
  unconcatenated_outs = [[] for _ in model.outputs]
  for step in range(steps):
    batch_logs = {'batch': step, 'size': 1}
    callbacks._call_batch_hook(mode, 'begin', step, batch_logs)
    _, batch_outs = K.get_session().run([predict_op, output_tensors])
    # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy.
    for i, label in enumerate(model.output_names):
      unconcatenated_outs[i].extend(batch_outs[label])
    batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
    callbacks._call_batch_hook(mode, 'end', step, batch_logs)
    if verbose >= 1:
      progbar.update(step + 1)

  callbacks._call_end_hook(mode)

  scope.__exit__(None, None, None)

  if len(unconcatenated_outs) == 1:
    prediction_result = np.concatenate(unconcatenated_outs[0], axis=0)
  else:
    prediction_result = [
        np.concatenate(unconcatenated_outs[i], axis=0)
        for i in range(len(unconcatenated_outs))
    ]

  if padding_handler:
    prediction_result = padding_handler.apply_mask(prediction_result)

  return prediction_result
コード例 #4
0
def experimental_tpu_predict_loop(model, dataset, verbose=0, steps=None):
    """Predict loop for predicting with TPU DistributionStrategy.

  Arguments:
      model: Keras Model instance.
      dataset: Dataset for input data.
      verbose: Integer, Verbosity mode 0 or 1.
      steps: Total number of steps (batches of samples)
          before declaring `_predict_loop` finished.
          Ignored with the default value of `None`.

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions
      (if the model has multiple outputs).
  """
    dataset_fully_shaped = (
        distributed_training_utils.is_dataset_shape_fully_defined(dataset))
    padding_handler = None
    if not dataset_fully_shaped:
        # TODO(hongjunchoi): Investigate whether operations from
        # PartialBatchPaddingHandler are unnecessarily pruned out
        # during graph optimization.
        padding_handler = padding_util.PartialBatchPaddingHandler(
            model._feed_output_shapes)
        batched_dataset = input_lib._get_batched_dataset(dataset)
        batch_size, _, prefetch_buffer = input_lib._get_batched_dataset_attributes(
            batched_dataset)
        padding_handler.padded_batch_size = batch_size
        padding_handler.padding_mask = dataset.reduce(
            padding_handler.padding_mask, padding_handler.update_mask)

        dataset = dataset.map(padding_handler.pad_batch)
        dataset = dataset.apply(batching.unbatch())
        # Upon this point, it is guaranteed that the dataset does not
        # have partial batches. Thus, we set `drop_remainder=True` to
        # get static shape information about the elements in the dataset.
        dataset = dataset.batch(batch_size, drop_remainder=True)

        if prefetch_buffer is not None:
            dataset = dataset.prefetch(prefetch_buffer)

    current_strategy = model._distribution_strategy
    iterator = distributed_training_utils.get_iterator(dataset,
                                                       current_strategy)

    scope = distributed_training_utils.distributed_scope(
        strategy=current_strategy, learning_phase=0)
    scope.__enter__()

    def _per_device_predict_function(model):
        model._make_predict_function()
        return (model.predict_function.inputs, model.predict_function.outputs,
                model.predict_function.updates_op,
                model.predict_function.session_kwargs)

    def step_fn(ctx, inputs):
        """Clones the model and calls make_predict_function."""
        if model._compile_distribution:
            distributed_training_utils.clone_model_on_replicas(
                model, current_strategy, ModeKeys.PREDICT, inputs=inputs)
        else:
            distributed_training_utils._build_distributed_network(
                model, current_strategy, ModeKeys.PREDICT, inputs)

        (grouped_inputs, grouped_outputs, grouped_updates, grouped_session_args
         ) = current_strategy.extended.call_for_each_replica(
             _per_device_predict_function,
             args=(model._distributed_model_predict, ))

        (all_inputs, all_outputs, all_updates,
         all_session_args) = distributed_training_utils.unwrap_values(
             current_strategy, grouped_inputs, grouped_outputs,
             grouped_updates, grouped_session_args)

        combined_fn = K.function(all_inputs,
                                 all_outputs,
                                 updates=all_updates,
                                 name='distributed_predict_function',
                                 **all_session_args)

        for label, output in zip(model.output_names, combined_fn.outputs):
            ctx.set_last_step_output(label, output)

        return combined_fn.updates_op

    # Add initial dummy values for outputs.
    initial_loop_values = {}
    batch_dimension = distributed_training_utils.get_batch_dimension(iterator)
    for name, tensor in zip(model.output_names, model.outputs):
        # TODO(priyag): This is a workaround as we do not know the batch dimension
        # of the model's output at this point.
        shape = tensor_shape.TensorShape(tensor.shape.dims)
        shape.dims = [batch_dimension] + shape.dims[1:]
        initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype)

    # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed.
    ctx = current_strategy.extended.experimental_run_steps_on_iterator(
        step_fn,
        iterator,
        iterations=1,
        initial_loop_values=initial_loop_values)

    predict_op = ctx.run_op
    output_tensors = ctx.last_step_outputs

    if verbose == 1:
        progbar = Progbar(target=steps)

    if model._compile_distribution:
        distributed_training_utils._copy_weights_to_distributed_model(
            model, ModeKeys.PREDICT)

    distributed_training_utils._reset_metrics(model)

    assert steps is not None
    # Since we do not know how many samples we will see, we cannot pre-allocate
    # the returned Numpy arrays. Instead, we store one array per batch seen
    # and concatenate them upon returning.
    unconcatenated_outs = [[] for _ in model.outputs]
    for step in range(steps):
        _, batch_outs = K.get_session().run([predict_op, output_tensors])
        # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy.
        for i, label in enumerate(model.output_names):
            unconcatenated_outs[i].extend(batch_outs[label])
        if verbose >= 1:
            progbar.update(step + 1)

    scope.__exit__(None, None, None)

    if len(unconcatenated_outs) == 1:
        prediction_result = np.concatenate(unconcatenated_outs[0], axis=0)
    else:
        prediction_result = [
            np.concatenate(unconcatenated_outs[i], axis=0)
            for i in range(len(unconcatenated_outs))
        ]

    if padding_handler:
        prediction_result = padding_handler.apply_mask(prediction_result)

    return prediction_result
コード例 #5
0
def _experimental_predict_loop(model, iterator, verbose=0, steps=None):
    """Predict loop for predicting with TPU DistributionStrategy.

  Arguments:
      model: Keras Model instance.
      iterator: Iterator for input data.
      verbose: Integer, Verbosity mode 0 or 1.
      steps: Total number of steps (batches of samples)
          before declaring `_predict_loop` finished.
          Ignored with the default value of `None`.

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions
      (if the model has multiple outputs).
  """
    current_strategy = model._distribution_strategy
    K.get_session().run(current_strategy.initialize())

    # TODO(priyag, sourabhbajaj): This should likely not be hardcoded here.
    K.set_learning_phase(0)

    def _per_device_predict_function(model):
        model._make_predict_function()
        return (model.predict_function.inputs, model.predict_function.outputs,
                model.predict_function.updates_op,
                model.predict_function.session_kwargs)

    def step_fn(ctx, inputs):
        """Clones the model and calls make_predict_function."""

        # TODO(priyag, sourabhbajaj): The model gets cloned every time
        # fit/test/predict is called. We should look into caching this keyed on
        # input shapes.
        clone_model_on_replicas(model,
                                current_strategy,
                                make_callback_model=False,
                                inputs=inputs,
                                mode=_Mode.PREDICT)

        (grouped_inputs, grouped_outputs, grouped_updates,
         grouped_session_args) = current_strategy.call_for_each_replica(
             _per_device_predict_function,
             args=(model._grouped_model_predict, ))

        (all_inputs, all_outputs, all_updates,
         all_session_args) = distributed_training_utils.unwrap_values(
             current_strategy, grouped_inputs, grouped_outputs,
             grouped_updates, grouped_session_args)

        combined_fn = K.function(all_inputs,
                                 all_outputs,
                                 updates=all_updates,
                                 name='distributed_predict_function',
                                 **all_session_args)

        for label, output in zip(model.output_names, combined_fn.outputs):
            ctx.set_last_step_output(label, output)

        return combined_fn.updates_op

    # Add initial dummy values for outputs.
    initial_loop_values = {}
    batch_dimension = distributed_training_utils.get_batch_dimension(iterator)
    for name, tensor in zip(model.output_names, model.outputs):
        # TODO(priyag): This is a workaround as we do not know the batch dimension
        # of the model's output at this point.
        shape = tensor_shape.TensorShape(tensor.shape.dims)
        shape.dims = [batch_dimension] + shape.dims[1:]
        initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype)

    with current_strategy.scope():
        # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed.
        ctx = current_strategy.run_steps_on_dataset(
            step_fn,
            iterator,
            iterations=1,
            initial_loop_values=initial_loop_values)

    predict_op = ctx.run_op
    output_tensors = ctx.last_step_outputs

    if verbose == 1:
        progbar = Progbar(target=steps)

    # Copy the weights from the original model to each of the replicated models.
    orig_model_weights = model.get_weights()
    with current_strategy.scope():
        distributed_model = current_strategy.unwrap(
            model._grouped_model_predict)[0]
        distributed_training_utils.set_weights(current_strategy,
                                               distributed_model,
                                               orig_model_weights)

    assert steps is not None
    # Since we do not know how many samples we will see, we cannot pre-allocate
    # the returned Numpy arrays. Instead, we store one array per batch seen
    # and concatenate them upon returning.
    unconcatenated_outs = [[] for _ in model.outputs]
    for step in range(steps):
        _, batch_outs = K.get_session().run([predict_op, output_tensors])
        # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy.
        for i, label in enumerate(model.output_names):
            unconcatenated_outs[i].extend(batch_outs[label])
        if verbose >= 1:
            progbar.update(step + 1)

    K.get_session().run(current_strategy.finalize())

    if len(unconcatenated_outs) == 1:
        return np.concatenate(unconcatenated_outs[0], axis=0)
    return [
        np.concatenate(unconcatenated_outs[i], axis=0)
        for i in range(len(unconcatenated_outs))
    ]
コード例 #6
0
def _experimental_predict_loop(model, iterator, verbose=0, steps=None):
  """Predict loop for predicting with TPU DistributionStrategy.

  Arguments:
      model: Keras Model instance.
      iterator: Iterator for input data.
      verbose: Integer, Verbosity mode 0 or 1.
      steps: Total number of steps (batches of samples)
          before declaring `_predict_loop` finished.
          Ignored with the default value of `None`.

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions
      (if the model has multiple outputs).
  """
  current_strategy = model._distribution_strategy
  K.get_session().run(current_strategy.initialize())

  # TODO(priyag, sourabhbajaj): This should likely not be hardcoded here.
  K.set_learning_phase(0)

  def _per_device_predict_function(model):
    model._make_predict_function()
    return (model.predict_function.inputs,
            model.predict_function.outputs,
            model.predict_function.updates_op,
            model.predict_function.session_kwargs)

  def step_fn(ctx, *inputs):
    """Clones the model and calls make_predict_function."""

    # TODO(priyag, sourabhbajaj): The model gets cloned every time
    # fit/test/predict is called. We should look into caching this keyed on
    # input shapes.
    clone_model_on_replicas(
        model,
        current_strategy,
        make_callback_model=False,
        inputs=inputs,
        mode=_Mode.PREDICT)

    (grouped_inputs, grouped_outputs, grouped_updates,
     grouped_session_args) = current_strategy.call_for_each_replica(
         _per_device_predict_function, args=(model._grouped_model_predict,))

    (all_inputs, all_outputs, all_updates,
     all_session_args) = distributed_training_utils.unwrap_values(
         current_strategy, grouped_inputs, grouped_outputs, grouped_updates,
         grouped_session_args)

    combined_fn = K.function(
        all_inputs, all_outputs,
        updates=all_updates,
        name='distributed_predict_function',
        **all_session_args)

    for label, output in zip(model.output_names, combined_fn.outputs):
      ctx.set_last_step_output(label, output)

    return combined_fn.updates_op

  # Add initial dummy values for outputs.
  initial_loop_values = {}
  batch_dimension = distributed_training_utils.get_batch_dimension(iterator)
  for name, tensor in zip(model.output_names, model.outputs):
    # TODO(priyag): This is a workaround as we do not know the batch dimension
    # of the model's output at this point.
    shape = tensor_shape.TensorShape(tensor.shape.dims)
    shape.dims = [batch_dimension] + shape.dims[1:]
    initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype)

  with current_strategy.scope():
    # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed.
    ctx = current_strategy.run_steps_on_dataset(
        step_fn, iterator, iterations=1,
        initial_loop_values=initial_loop_values)

  predict_op = ctx.run_op
  output_tensors = ctx.last_step_outputs

  if verbose == 1:
    progbar = Progbar(target=steps)

  # Copy the weights from the original model to each of the replicated models.
  orig_model_weights = model.get_weights()
  with current_strategy.scope():
    distributed_model = current_strategy.unwrap(model._grouped_model_predict)[0]
    distributed_training_utils.set_weights(
        current_strategy, distributed_model, orig_model_weights)

  assert steps is not None
  # Since we do not know how many samples we will see, we cannot pre-allocate
  # the returned Numpy arrays. Instead, we store one array per batch seen
  # and concatenate them upon returning.
  unconcatenated_outs = [[] for _ in model.outputs]
  for step in range(steps):
    _, batch_outs = K.get_session().run([predict_op, output_tensors])
    # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy.
    for i, label in enumerate(model.output_names):
      unconcatenated_outs[i].extend(batch_outs[label])
    if verbose >= 1:
      progbar.update(step + 1)

  K.get_session().run(current_strategy.finalize())

  if len(unconcatenated_outs) == 1:
    return np.concatenate(unconcatenated_outs[0], axis=0)
  return [
      np.concatenate(unconcatenated_outs[i], axis=0)
      for i in range(len(unconcatenated_outs))
  ]
コード例 #7
0
def experimental_tpu_predict_loop(model,
                                  dataset,
                                  verbose=0,
                                  steps=None,
                                  callbacks=None):
    """Predict loop for predicting with TPU DistributionStrategy.

  Arguments:
      model: Keras Model instance.
      dataset: Dataset for input data.
      verbose: Integer, Verbosity mode 0 or 1.
      steps: Total number of steps (batches of samples)
          before declaring `_predict_loop` finished.
          Ignored with the default value of `None`.
      callbacks: List of callbacks to be called during training

  Returns:
      Array of predictions (if the model has a single output)
      or list of arrays of predictions
      (if the model has multiple outputs).
  """
    mode = ModeKeys.PREDICT
    steps = training_utils.infer_steps_for_dataset(dataset,
                                                   steps,
                                                   steps_name='steps')
    dataset_fully_shaped = (
        distributed_training_utils.is_dataset_shape_fully_defined(dataset))
    padding_handler = None
    if not dataset_fully_shaped:
        # TODO(hongjunchoi): Investigate whether operations from
        # PartialBatchPaddingHandler are unnecessarily pruned out
        # during graph optimization.
        padding_handler = padding_util.PartialBatchPaddingHandler(
            model._feed_output_shapes)
        batch_size, _, prefetch_buffer = input_lib._get_dataset_attributes(
            dataset)
        padding_handler.padded_batch_size = batch_size
        padding_handler.padding_mask = dataset.reduce(
            padding_handler.padding_mask, padding_handler.update_mask)

        dataset = dataset.map(padding_handler.pad_batch)
        dataset = dataset.apply(batching.unbatch())
        # Upon this point, it is guaranteed that the dataset does not
        # have partial batches. Thus, we set `drop_remainder=True` to
        # get static shape information about the elements in the dataset.
        dataset = dataset.batch(batch_size, drop_remainder=True)

        if prefetch_buffer is not None:
            dataset = dataset.prefetch(prefetch_buffer)

    current_strategy = model._distribution_strategy
    iterator = distributed_training_utils.get_iterator(dataset,
                                                       current_strategy)

    scope = distributed_training_utils.distributed_scope(
        strategy=current_strategy, learning_phase=0)
    scope.__enter__()

    out_labels = model.output_names
    step_fn = _make_step_fn(model, ModeKeys.PREDICT, current_strategy,
                            out_labels)

    # Add initial dummy values for outputs.
    initial_loop_values = {}
    batch_dimension = distributed_training_utils.get_batch_dimension(iterator)
    for name, tensor in zip(model.output_names, model.outputs):
        # TODO(priyag): This is a workaround as we do not know the batch dimension
        # of the model's output at this point.
        shape = tensor_shape.TensorShape(tensor.shape.dims)
        shape.dims = [batch_dimension] + shape.dims[1:]
        initial_loop_values[name] = array_ops.zeros(shape, tensor.dtype)

    # TODO(priyag, sourabhbajaj): Support steps_per_run if/when we add outfeed.
    ctx = current_strategy.extended.experimental_run_steps_on_iterator(
        step_fn,
        iterator,
        iterations=1,
        initial_loop_values=initial_loop_values)

    predict_op = ctx.run_op
    output_tensors = ctx.last_step_outputs

    if verbose == 1:
        progbar = Progbar(target=steps)

    if model._compile_distribution:
        distributed_training_utils._copy_weights_to_distributed_model(
            model, mode)

    distributed_training_utils._reset_metrics(model)

    callbacks = cbks.configure_callbacks(callbacks,
                                         model,
                                         do_validation=False,
                                         epochs=1,
                                         steps_per_epoch=steps,
                                         verbose=verbose,
                                         count_mode='steps',
                                         mode=mode)
    callbacks._call_begin_hook(mode)

    # Since we do not know how many samples we will see, we cannot pre-allocate
    # the returned Numpy arrays. Instead, we store one array per batch seen
    # and concatenate them upon returning.
    unconcatenated_outs = [[] for _ in model.outputs]
    if steps is not None:
        target_steps = steps
    else:
        target_steps = np.inf

    current_step = 0
    while current_step < target_steps:
        batch_logs = {'batch': current_step, 'size': 1}
        callbacks._call_batch_hook(mode, 'begin', current_step, batch_logs)
        try:
            _, batch_outs = K.batch_get_value([predict_op, output_tensors])
        except errors.OutOfRangeError:
            if steps is not None:
                warning_msg = 'Make sure that your dataset can generate at least '
                '`steps` batches (in this case, {} batches).'.format(steps)
            else:
                warning_msg = 'Number of steps ran: {} steps'.format(
                    current_step)

            logging.warning('Your dataset iterator ran out of data; '
                            'interrupting evaluation. ' + warning_msg)
            break

        # TODO(priyag): maybe need to unwrap the outputs first for MirroredStrategy.
        for i, label in enumerate(model.output_names):
            unconcatenated_outs[i].extend(batch_outs[label])
        batch_logs = cbks.make_logs(model, batch_logs, batch_outs, mode)
        callbacks._call_batch_hook(mode, 'end', current_step, batch_logs)
        if verbose >= 1:
            progbar.update(current_step + 1)
        current_step += 1

    callbacks._call_end_hook(mode)

    scope.__exit__(None, None, None)

    if len(unconcatenated_outs) == 1:
        prediction_result = np.concatenate(unconcatenated_outs[0], axis=0)
    else:
        prediction_result = [
            np.concatenate(unconcatenated_outs[i], axis=0)
            for i in range(len(unconcatenated_outs))
        ]

    if padding_handler:
        prediction_result = padding_handler.apply_mask(prediction_result)

    return prediction_result