Example #1
0
def learn(iterations=100):
    current_best_val = load_model("../models/alphazerolike/azval.tf")
    current_best_pol = load_model("../models/alphazerolike/azpol.tf")
    candidate_val = clone_model(current_best_val)
    candidate_pol = clone_model(current_best_pol)
    _provide_new_training_data((candidate_val, candidate_pol))
    for _ in tqdm(range(iterations)):
        candidate_val = clone_model(current_best_val)
        candidate_pol = clone_model(current_best_pol)

        _training(candidate_val, candidate_pol)

        if _compete((current_best_val, current_best_pol),
                    (candidate_val, candidate_pol)):
            save_model(candidate_val, "../models/alphazerolike/azval.tf")
            save_model(candidate_pol, "../models/alphazerolike/azpol.tf")
            _provide_new_training_data((candidate_val, candidate_pol))

            save_model(
                candidate_val, "../models/alphazerolike/history/" +
                datetime.datetime.now().strftime("%Y_%m_%d%H_%M"))
            save_model(
                candidate_pol, "../models/alphazerolike/history/" +
                datetime.datetime.now().strftime("%Y_%m_%d%H_%M"))

            current_best_pol = candidate_pol
            current_best_val = candidate_val
def _clone_and_build_model(model, strategy):
    # The new "original" model in worker 0.
    with strategy.scope():
        cloned_model = models.clone_model(model)

    # Compile and build model.
    if isinstance(model.optimizer, optimizers.TFOptimizer):
        optimizer = model.optimizer
        # TODO(yuefengz): figure out why the optimizer here is still a
        # TFOptimizer.
        while isinstance(optimizer, optimizers.TFOptimizer):
            optimizer = optimizer.optimizer
        optimizer = copy.deepcopy(optimizer)
    else:
        optimizer_config = model.optimizer.get_config()
        optimizer = type(model.optimizer).from_config(optimizer_config)

    cloned_model.compile(optimizer,
                         model.loss,
                         metrics=metrics_module.clone_metrics(
                             model._compile_metrics),
                         loss_weights=model.loss_weights,
                         sample_weight_mode=model.sample_weight_mode,
                         weighted_metrics=metrics_module.clone_metrics(
                             model._compile_weighted_metrics))
    return cloned_model
Example #3
0
def _clone_and_build_model(model, inputs=None, targets=None):
  """Clone and build the given keras_model."""
  # We need to set the import here since we run into a circular dependency
  # error.
  from tensorflow.python.keras import models  # pylint: disable=g-import-not-at-top
  cloned_model = models.clone_model(model, input_tensors=inputs)

  # Compile and build model.
  if isinstance(model.optimizer, optimizers.TFOptimizer):
    optimizer = model.optimizer
  else:
    optimizer_config = model.optimizer.get_config()
    optimizer = model.optimizer.__class__.from_config(optimizer_config)

  if isinstance(targets, tuple):
    targets = nest.flatten(targets)
  cloned_model.compile(
      optimizer,
      model.loss,
      metrics=metrics_module.clone_metrics(model.metrics),
      loss_weights=model.loss_weights,
      sample_weight_mode=model.sample_weight_mode,
      weighted_metrics=metrics_module.clone_metrics(model.weighted_metrics),
      target_tensors=targets)
  return cloned_model
Example #4
0
def _clone_and_build_model(model, inputs=None, targets=None):
  """Clone and build the given keras_model."""
  # We need to set the import here since we run into a circular dependency
  # error.
  from tensorflow.python.keras import models  # pylint: disable=g-import-not-at-top
  cloned_model = models.clone_model(model, input_tensors=inputs)

  # Compile and build model.
  if isinstance(model.optimizer, optimizers.TFOptimizer):
    optimizer = model.optimizer
  else:
    optimizer_config = model.optimizer.get_config()
    optimizer = model.optimizer.__class__.from_config(optimizer_config)

  # TODO(priyag): Is there a cleaner way to do this? The API doc suggests a
  # single tensor should be OK but it throws an error in that case.
  if (targets is not None and not isinstance(targets, list) and
      not isinstance(targets, dict)):
    targets = [targets]
  cloned_model.compile(
      optimizer,
      model.loss,
      metrics=model.metrics,
      loss_weights=model.loss_weights,
      sample_weight_mode=model.sample_weight_mode,
      weighted_metrics=model.weighted_metrics,
      target_tensors=targets)
  return cloned_model
    def test_uncompiled_prebuilt_model_raises_error(self):
        """Tests that an uncompiled model cannot be used as build_fn param."""

        for config in [
            "MLPRegressor",
            "MLPClassifier",
            "CNNClassifier",
            "CNNClassifierF",
        ]:
            loader, model, build_fn, _ = CONFIG[config]
            data = loader()
            x_train, y_train = data.data[:100], data.target[:100]

            n_classes_ = np.unique(y_train).size
            # make y the same shape as will be used by .fit
            if config != "MLPRegressor":
                y_train = to_categorical(y_train)
                keras_model = build_fn(
                    X=x_train, n_classes_=n_classes_, n_outputs_=1
                )
            else:
                keras_model = build_fn(X=x_train, n_outputs_=1)

            # clone to simulate uncompiled model
            keras_model = clone_model(keras_model)
            estimator = model(build_fn=keras_model)
            with pytest.raises(ValueError):
                check(estimator, loader)
Example #6
0
def _clone_and_build_model(model, inputs=None, targets=None):
    """Clone and build the given keras_model."""
    # We need to set the import here since we run into a circular dependency
    # error.
    from tensorflow.python.keras import models  # pylint: disable=g-import-not-at-top
    cloned_model = models.clone_model(model, input_tensors=inputs)

    # Compile and build model.
    if isinstance(model.optimizer, optimizers.TFOptimizer):
        optimizer = model.optimizer
    else:
        optimizer_config = model.optimizer.get_config()
        optimizer = model.optimizer.__class__.from_config(optimizer_config)

    # TODO(priyag): Is there a cleaner way to do this? The API doc suggests a
    # single tensor should be OK but it throws an error in that case.
    if (targets is not None and not isinstance(targets, list)
            and not isinstance(targets, dict)):
        targets = [targets]
    cloned_model.compile(optimizer,
                         model.loss,
                         metrics=model.metrics,
                         loss_weights=model.loss_weights,
                         sample_weight_mode=model.sample_weight_mode,
                         weighted_metrics=model.weighted_metrics,
                         target_tensors=targets)
    return cloned_model
Example #7
0
    def test_gradient_tape_doesnt_crash_when_model_has_non_trainable_variables(self):
        # Given
        initial_model = Sequential([
            tf.keras.layers.Input((1,)),
            Dense(3),
            BatchNormalization(),
            Dense(7)
        ])
        initial_weights = initial_model.get_weights()
        x = np.array([[1]])

        # When
        updated_model = clone_model(initial_model)
        take_n_gradient_step(
            initial_model,
            updated_model,
            n_step=1,
            alpha=1.0,
            loss=(lambda y, p: p),
            data_x=x,
            data_y=x
        )

        # Then
        np.testing.assert_equal(initial_weights[4], updated_model.get_weights()[4])  # Moving mean
        np.testing.assert_equal(initial_weights[5], updated_model.get_weights()[5])  # Moving Variance
Example #8
0
    def test_2nd_order_gradient_through_updated_model(self):
        # Given
        initial_model = Sequential([
            Dense(1, use_bias=False, kernel_initializer='ones', input_shape=(1,)),
            Lambda(lambda x: x ** 2)
        ])
        x = np.array([[3]])

        updated_model = clone_model(initial_model)

        # When
        with tf.GradientTape() as outer_tape:
            take_n_gradient_step(
                initial_model,
                updated_model,
                n_step=1,
                alpha=1.0,
                loss=(lambda y, p: p),
                data_x=x,
                data_y=x
            )
            yp = updated_model(x)
        grad_of_grads = outer_tape.gradient(yp, initial_model.trainable_variables)

        # Then
        self.assertEqual(5202, grad_of_grads[0])
def _clone_and_build_model(model, inputs=None, targets=None):
    """Clone and build the given keras_model."""
    # We need to set the import here since we run into a circular dependency
    # error.
    from tensorflow.python.keras import models  # pylint: disable=g-import-not-at-top
    cloned_model = models.clone_model(model, input_tensors=inputs)

    # Compile and build model.
    if isinstance(model.optimizer, optimizers.TFOptimizer):
        optimizer = model.optimizer
    else:
        optimizer_config = model.optimizer.get_config()
        optimizer = model.optimizer.__class__.from_config(optimizer_config)

    if isinstance(targets, tuple):
        targets = nest.flatten(targets)
    cloned_model.compile(optimizer,
                         model.loss,
                         metrics=metrics_module.clone_metrics(model.metrics),
                         loss_weights=model.loss_weights,
                         sample_weight_mode=model.sample_weight_mode,
                         weighted_metrics=metrics_module.clone_metrics(
                             model.weighted_metrics),
                         target_tensors=targets)
    return cloned_model
def _clone_and_build_model(model, strategy):
  # The new "original" model in worker 0.
  with strategy.scope():
    cloned_model = models.clone_model(model)

  # Compile and build model.
  if isinstance(model.optimizer, optimizers.TFOptimizer):
    optimizer = model.optimizer
    # TODO(yuefengz): figure out why the optimizer here is still a
    # TFOptimizer.
    while isinstance(optimizer, optimizers.TFOptimizer):
      optimizer = optimizer.optimizer
    optimizer = copy.deepcopy(optimizer)
  else:
    optimizer_config = model.optimizer.get_config()
    optimizer = type(model.optimizer).from_config(optimizer_config)

  cloned_model.compile(
      optimizer,
      model.loss,
      metrics=metrics_module.clone_metrics(model._compile_metrics),
      loss_weights=model.loss_weights,
      sample_weight_mode=model.sample_weight_mode,
      weighted_metrics=metrics_module.clone_metrics(
          model._compile_weighted_metrics))
  return cloned_model
Example #11
0
 def __init__(self,
              transitions_seen_between_updates=10000,
              *args,
              **kwargs):
     super().__init__(*args, **kwargs)
     self.target_model = clone_model(self.online_model)
     self.n_model_updates = 0
     self.transitions_seen_between_updates = transitions_seen_between_updates
Example #12
0
    def on_epoch_end(self, epoch, logs=None):
        acc = logs["val_accuracy"]
        if acc < min(self.top):
            return

        for i in range(self.m):
            if acc >= self.top[i] and epoch not in self.numbers:

                for j in range(self.m - 2, -1, -1):

                    self.top[j + 1] = self.top[j]
                    self.numbers[j + 1] = self.numbers[j]
                    if self.models[j]:
                        self.models[j + 1] = clone_model(self.models[j])
                self.top[i] = acc
                self.models[i] = clone_model(self.model)
                self.numbers[i] = epoch
                break
Example #13
0
 def __init__(self, config, network, logger, start_epoch=0):
     self.discount_factor = config['discount_factor']
     self.metrics = config['metrics']
     self.loss = config['loss']
     self.num_actions = config['num_actions']
     self.train_model = network
     self.logger = logger
     self.start_epoch = start_epoch
     self.weights = None
     self.optimizer = self.create_optimizer(config)
     self.target_model = clone_model(self.train_model)
     self.compile_train_model()
     self.logger.set_start_epoch(start_epoch)
     self.tensor_board = self.logger.get_tensor_board()
     self.tensor_board.set_model(self.train_model)
Example #14
0
    def test_take_5_gradient_steps(self):
        # Given
        model = Sequential([
            tf.keras.layers.Input((1,)),
            Dense(1, use_bias=False, kernel_initializer='ones'),
        ])
        updated_model = clone_model(model)
        x = np.array([[1]])
        y = np.array([[4]])

        # When
        n_step = 5
        alpha = 1.0
        take_n_gradient_step(model, updated_model, n_step, alpha, tf.keras.losses.mse, x, y)

        # Then
        self.assertIsNotNone(updated_model(x))
def _clone_and_build_model(model, inputs=None, targets=None, mode=None):
    """Clone and build the given keras_model."""
    # We need to set the import here since we run into a circular dependency
    # error.
    from tensorflow.python.keras import models  # pylint: disable=g-import-not-at-top
    cloned_model = models.clone_model(model, input_tensors=inputs)

    # Compile and build model.
    if isinstance(model.optimizer, optimizers.TFOptimizer):
        optimizer = model.optimizer
    else:
        optimizer_config = model.optimizer.get_config()
        optimizer = model.optimizer.__class__.from_config(optimizer_config)

    # Recast all low precision outputs back to float32 since we only casted
    # the inputs to bfloat16 and not targets. This is done so that we can preserve
    # precision when calculating the loss value.
    def _upcast_low_precision_outputs(output):
        if output.dtype == dtypes.bfloat16:
            return math_ops.cast(output, dtypes.float32)
        else:
            return output

    cloned_model.outputs = [
        _upcast_low_precision_outputs(o) for o in cloned_model.outputs
    ]

    if isinstance(targets, tuple):
        targets = nest.flatten(targets)
    if mode == ModeKeys.PREDICT:
        _custom_compile_for_predict(cloned_model)
    else:
        cloned_model.compile(optimizer,
                             model.loss,
                             metrics=metrics_module.clone_metrics(
                                 model._compile_metrics),
                             loss_weights=model.loss_weights,
                             sample_weight_mode=model.sample_weight_mode,
                             weighted_metrics=metrics_module.clone_metrics(
                                 model._compile_weighted_metrics),
                             target_tensors=targets)
    return cloned_model
Example #16
0
    def test_update_weights_creates_model_with_right_weights(self):
        # Given
        initial_model = create_2_layer_MLP()
        grads = initial_model.get_weights()

        # When
        to_be_updated_model = clone_model(initial_model)
        take_n_gradient_step(
            initial_model,
            to_be_updated_model,
            n_step=1,
            alpha=1.0,
            loss=(lambda y, p: p),
            data_x=np.array([[1]]),
            data_y=np.array([[1]])
        )
        to_be_updated_model_weights = [layer.kernel for layer in to_be_updated_model.layers if layer.trainable]

        # Then
        np.testing.assert_equal(to_be_updated_model_weights[0].numpy(), np.zeros((1, 2)))
        np.testing.assert_equal(to_be_updated_model_weights[1].numpy(), np.zeros((2, 1)))
def _clone_and_build_model(model, mode, inputs=None, targets=None):
  """Clone and build the given keras_model."""
  # We need to set the import here since we run into a circular dependency
  # error.
  from tensorflow.python.keras import models  # pylint: disable=g-import-not-at-top
  cloned_model = models.clone_model(model, input_tensors=inputs)

  # Compile and build model.
  if isinstance(model.optimizer, optimizers.TFOptimizer):
    optimizer = model.optimizer
  else:
    optimizer_config = model.optimizer.get_config()
    optimizer = model.optimizer.__class__.from_config(optimizer_config)

  # Recast all low precision outputs back to float32 since we only casted
  # the inputs to bfloat16 and not targets. This is done so that we can preserve
  # precision when calculating the loss value.
  def _upcast_low_precision_outputs(output):
    if output.dtype == dtypes.bfloat16:
      return math_ops.cast(output, dtypes.float32)
    else:
      return output
  cloned_model.outputs = [_upcast_low_precision_outputs(o)
                          for o in cloned_model.outputs]

  if isinstance(targets, tuple):
    targets = nest.flatten(targets)
  if mode == ModeKeys.PREDICT and inputs is not None:  # TPU predict case
    _custom_compile_for_predict(cloned_model)
  else:
    cloned_model.compile(
        optimizer,
        model.loss,
        metrics=metrics_module.clone_metrics(model._compile_metrics),
        loss_weights=model.loss_weights,
        sample_weight_mode=model.sample_weight_mode,
        weighted_metrics=metrics_module.clone_metrics(
            model._compile_weighted_metrics),
        target_tensors=targets)
  return cloned_model
Example #18
0
def _clone_prebuilt_model(build_fn):
    """Clones and compiles a pre-built model when build_fn is an existing
            Keras model instance.

    Arguments:
        build_fn : instance of Keras Model.

    Returns: copy of the input model with no training.
    """
    model = clone_model(build_fn)
    # clone_model does not compy over compilation parameters, do those manually
    model_metadata = saving_utils.model_metadata(build_fn)
    if "training_config" in model_metadata:
        training_config = model_metadata["training_config"]
    else:
        raise ValueError("To use %s as `build_fn`, you must compile"
                         "it first." % build_fn)

    model.compile(
        **saving_utils.compile_args_from_training_config(training_config))

    return model
def run_kt_methods() -> None:
    """ Runs all the available KT methods. """
    methods = generate_appropriate_methods(kt_methods, temperature, kd_lambda_supervised, pkt_lambda_supervised,
                                           n_submodels)
    results = []

    for method in methods:
        kt_logging.info('Performing {}...'.format(method['name']))
        trained_student, history = knowledge_transfer(clone_model(student), method['method'], method['loss'])
        # TODO model_path = os.path.join(tempfile.gettempdir(), next(tempfile._get_candidate_names()) + '.h5')
        #  and save student model there, when we stop needing it,
        #  because it is inefficient to have it in memory until - if ever - we need to save it.
        #  That way, when the time comes, we will just need to move it to the out folder.
        results.append({
            'method': method['name'],
            'network': trained_student,
            'history': history.history,
            'evaluation': None
        })

    # Add baseline to the results list.
    results.append({
        'method': 'Teacher',
        'network': teacher,
        'history': None,
        'evaluation': None
    })

    kt_logging.info('Evaluating results...')
    evaluate_results(results)

    kt_logging.info('Saving student network(s)...')
    save_students(save_students_mode, results[:-1], out_folder, results_name_prefix)

    if save_results:
        kt_logging.info('Saving results...')
        save_res(results, join(out_folder, results_name_prefix + 'results.pkl'))
Example #20
0
        def _model_fn():
            """Compute fit/eval/predict for the TPU."""
            is_training = self.execution_mode == model_fn_lib.ModeKeys.TRAIN
            is_test = self.execution_mode == model_fn_lib.ModeKeys.EVAL
            is_predict = self.execution_mode == model_fn_lib.ModeKeys.PREDICT

            # During train/eval, we infeed our features as well as labels.
            if is_training or is_test:
                infeed_layers = self.model._input_layers + self.model._output_layers
            else:
                infeed_layers = self.model._input_layers

            # Generate our infeed operation to read features & labels.
            infeed_tensors = tpu_ops.infeed_dequeue_tuple(
                dtypes=[spec.dtype for spec in input_specs],
                shapes=[spec.shape for spec in input_specs],
                name='infeed-%s' % self.execution_mode)

            assert len(infeed_tensors) == len(infeed_layers), (
                'Infeed inputs did not match model: %s vs %s' %
                (infeed_layers, infeed_tensors))

            tpu_targets = []
            tpu_input_map = {}

            # Sort infeed outputs into inputs and labels for calling our Keras model.
            for tensor, layer in zip(infeed_tensors, infeed_layers):
                if layer in self.model._input_layers:
                    tpu_input_map[layer.name] = tensor
                if layer in self.model._output_layers:
                    tpu_targets.append(tensor)

            # Clone our CPU model, running within the TPU device context.
            with TPURewriteContext(tpu_input_map):
                # TODO(power): Replicate variables.
                with ops.device('/device:TPU:0'):
                    self._cloned_model = models.clone_model(self.model)

            # Create a copy of the optimizer for this graph.
            if isinstance(self.model.optimizer, keras_optimizers.TFOptimizer):
                cloned_optimizer = keras_optimizers.TFOptimizer(
                    self.model.optimizer.optimizer)
            else:
                logging.info('Cloning %s %s',
                             self.model.optimizer.__class__.__name__,
                             self._optimizer_config)
                cloned_optimizer = self.model.optimizer.__class__.from_config(
                    self._optimizer_config)

            if is_training or is_test:
                self._cloned_model.compile(
                    optimizer=_replicated_optimizer(cloned_optimizer),
                    loss=self.model.loss,
                    loss_weights=self.model.loss_weights,
                    metrics=self.model.metrics,
                    weighted_metrics=self.model.weighted_metrics,
                    target_tensors=tpu_targets,
                )

            # Compute our outfeed depending on the execution mode
            if is_training:
                self._cloned_model._make_train_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in self._cloned_model.train_function.outputs
                ]
                return [
                    self._cloned_model.train_function.updates_op,
                    tpu_ops.outfeed_enqueue_tuple(
                        self._cloned_model.train_function.outputs,
                        name='outfeed-enqueue-train')
                ]
            elif is_test:
                self._cloned_model._make_test_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in self._cloned_model.test_function.outputs
                ]
                return [
                    tpu_ops.outfeed_enqueue_tuple(
                        self._cloned_model.test_function.outputs,
                        name='outfeed-enqueue-test')
                ]
            elif is_predict:
                self._cloned_model._make_predict_function()
                self._outfeed_spec = [
                    tensor_spec.TensorSpec(tensor.shape, tensor.dtype,
                                           tensor.name)
                    for tensor in self._cloned_model.predict_function.outputs
                ]
                return [
                    tpu_ops.outfeed_enqueue_tuple(
                        self._cloned_model.predict_function.outputs,
                        name='outfeed-enqueue-predict',
                    )
                ]
            else:
                assert False, 'Unexpected execution mode: %s' % self.execution_mode
Example #21
0
def _clone_and_build_model(mode,
                           keras_model,
                           custom_objects,
                           features=None,
                           labels=None):
    """Clone and build the given keras_model.

  Args:
    mode: training mode.
    keras_model: an instance of compiled keras model.
    custom_objects: Dictionary for custom objects.
    features: Dict of tensors.
    labels: Dict of tensors, or single tensor instance.

  Returns:
    The newly built model.
  """
    # Set to True during training, False for inference.
    K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN)

    # Get list of inputs.
    if features is None:
        input_tensors = None
    else:
        input_tensors = _create_ordered_io(keras_model,
                                           estimator_io=features,
                                           is_input=True)
    # Get list of outputs.
    if labels is None:
        target_tensors = None
    elif isinstance(labels, dict):
        target_tensors = _create_ordered_io(keras_model,
                                            estimator_io=labels,
                                            is_input=False)
    else:
        target_tensors = [_convert_tensor(labels)]

    if keras_model._is_graph_network:
        if custom_objects:
            with CustomObjectScope(custom_objects):
                model = models.clone_model(keras_model,
                                           input_tensors=input_tensors)
        else:
            model = models.clone_model(keras_model,
                                       input_tensors=input_tensors)
    else:
        model = keras_model
        _in_place_subclassed_model_reset(model)
        if input_tensors is not None:
            model._set_inputs(input_tensors)

    # Compile/Build model
    if mode is model_fn_lib.ModeKeys.PREDICT:
        if isinstance(model, models.Sequential):
            model.build()
    else:
        if isinstance(keras_model.optimizer, optimizers.TFOptimizer):
            optimizer = keras_model.optimizer
        else:
            optimizer_config = keras_model.optimizer.get_config()
            optimizer = keras_model.optimizer.__class__.from_config(
                optimizer_config)
        optimizer.iterations = training_util.get_or_create_global_step()

        model.compile(optimizer,
                      keras_model.loss,
                      metrics=keras_model.metrics,
                      loss_weights=keras_model.loss_weights,
                      sample_weight_mode=keras_model.sample_weight_mode,
                      weighted_metrics=keras_model.weighted_metrics,
                      target_tensors=target_tensors)
    return model
def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False):
  """Replicates a model on different GPUs.

  Specifically, this function implements single-machine
  multi-GPU data parallelism. It works in the following way:

  - Divide the model's input(s) into multiple sub-batches.
  - Apply a model copy on each sub-batch. Every model copy
      is executed on a dedicated GPU.
  - Concatenate the results (on CPU) into one big batch.

  E.g. if your `batch_size` is 64 and you use `gpus=2`,
  then we will divide the input into 2 sub-batches of 32 samples,
  process each sub-batch on one GPU, then return the full
  batch of 64 processed samples.

  This induces quasi-linear speedup on up to 8 GPUs.

  This function is only available with the TensorFlow backend
  for the time being.

  Arguments:
      model: A Keras model instance. To avoid OOM errors,
          this model could have been built on CPU, for instance
          (see usage example below).
      gpus: Integer >= 2, number of on GPUs on which to create
          model replicas.
      cpu_merge: A boolean value to identify whether to force
          merging model weights under the scope of the CPU or not.
      cpu_relocation: A boolean value to identify whether to
          create the model's weights under the scope of the CPU.
          If the model is not defined under any preceding device
          scope, you can still rescue it by activating this option.

  Returns:
      A Keras `Model` instance which can be used just like the initial
      `model` argument, but which distributes its workload on multiple GPUs.

  Example 1: Training models with weights merge on CPU

  ```python
      import tensorflow as tf
      from keras.applications import Xception
      from keras.utils import multi_gpu_model
      import numpy as np

      num_samples = 1000
      height = 224
      width = 224
      num_classes = 1000

      # Instantiate the base model (or "template" model).
      # We recommend doing this with under a CPU device scope,
      # so that the model's weights are hosted on CPU memory.
      # Otherwise they may end up hosted on a GPU, which would
      # complicate weight sharing.
      with tf.device('/cpu:0'):
          model = Xception(weights=None,
                           input_shape=(height, width, 3),
                           classes=num_classes)

      # Replicates the model on 8 GPUs.
      # This assumes that your machine has 8 available GPUs.
      parallel_model = multi_gpu_model(model, gpus=8)
      parallel_model.compile(loss='categorical_crossentropy',
                             optimizer='rmsprop')

      # Generate dummy data.
      x = np.random.random((num_samples, height, width, 3))
      y = np.random.random((num_samples, num_classes))

      # This `fit` call will be distributed on 8 GPUs.
      # Since the batch size is 256, each GPU will process 32 samples.
      parallel_model.fit(x, y, epochs=20, batch_size=256)

      # Save model via the template model (which shares the same weights):
      model.save('my_model.h5')
  ```

  Example 2: Training models with weights merge on CPU using cpu_relocation

  ```python
       ..
       # Not needed to change the device scope for model definition:
       model = Xception(weights=None, ..)

       try:
           model = multi_gpu_model(model, cpu_relocation=True)
           print("Training using multiple GPUs..")
       except:
           print("Training using single GPU or CPU..")

       model.compile(..)
       ..
  ```

  Example 3: Training models with weights merge on GPU (recommended for NV-link)

  ```python
       ..
       # Not needed to change the device scope for model definition:
       model = Xception(weights=None, ..)

       try:
           model = multi_gpu_model(model, cpu_merge=False)
           print("Training using multiple GPUs..")
       except:
           print("Training using single GPU or CPU..")
       model.compile(..)
       ..
  ```

  Raises:
    ValueError: if the `gpus` argument does not match available devices.
  """
  # pylint: disable=g-import-not-at-top
  from tensorflow.python.keras.layers.core import Lambda
  from tensorflow.python.keras.layers.merge import concatenate

  if isinstance(gpus, (list, tuple)):
    if len(gpus) <= 1:
      raise ValueError('For multi-gpu usage to be effective, '
                       'call `multi_gpu_model` with `len(gpus) >= 2`. '
                       'Received: `gpus=%s`' % gpus)
    num_gpus = len(gpus)
    target_gpu_ids = gpus
  else:
    if gpus <= 1:
      raise ValueError('For multi-gpu usage to be effective, '
                       'call `multi_gpu_model` with `gpus >= 2`. '
                       'Received: `gpus=%s`' % gpus)
    num_gpus = gpus
    target_gpu_ids = range(num_gpus)

  target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids]
  available_devices = _get_available_devices()
  available_devices = [
      _normalize_device_name(name) for name in available_devices
  ]
  for device in target_devices:
    if device not in available_devices:
      raise ValueError('To call `multi_gpu_model` with `gpus=%s`, '
                       'we expect the following devices to be available: %s. '
                       'However this machine only has: %s. '
                       'Try reducing `gpus`.' % (gpus, target_devices,
                                                 available_devices))

  def get_slice(data, i, parts):
    """Slice an array into `parts` slices and return slice `i`.

    Arguments:
      data: array to slice.
      i: index of slice to return.
      parts: number of slices to make.

    Returns:
      Slice `i` of `data`.
    """
    shape = array_ops.shape(data)
    batch_size = shape[:1]
    input_shape = shape[1:]
    step = batch_size // parts
    if i == parts - 1:
      size = batch_size - step * i
    else:
      size = step
    size = array_ops.concat([size, input_shape], axis=0)
    stride = array_ops.concat([step, input_shape * 0], axis=0)
    start = stride * i
    return array_ops.slice(data, start, size)

  # Relocate the model definition under CPU device scope if needed
  if cpu_relocation:
    from tensorflow.python.keras.models import clone_model  # pylint: disable=g-import-not-at-top
    with ops.device('/cpu:0'):
      model = clone_model(model)

  all_outputs = []
  for i in range(len(model.outputs)):
    all_outputs.append([])

  # Place a copy of the model on each GPU,
  # each getting a slice of the inputs.
  for i, gpu_id in enumerate(target_gpu_ids):
    with ops.device('/gpu:%d' % gpu_id):
      with ops.name_scope('replica_%d' % gpu_id):
        inputs = []
        # Retrieve a slice of the input.
        for x in model.inputs:
          input_shape = tuple(x.get_shape().as_list())[1:]
          slice_i = Lambda(
              get_slice,
              output_shape=input_shape,
              arguments={
                  'i': i,
                  'parts': num_gpus
              })(
                  x)
          inputs.append(slice_i)

        # Apply model on slice
        # (creating a model replica on the target device).
        outputs = model(inputs)
        if not isinstance(outputs, list):
          outputs = [outputs]

        # Save the outputs for merging back together later.
        for o in range(len(outputs)):
          all_outputs[o].append(outputs[o])

  # Deduplicate output names to handle Siamese networks.
  occurrences = {}
  for n in model.output_names:
    if n not in occurrences:
      occurrences[n] = 1
    else:
      occurrences[n] += 1
  conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1}
  output_names = []
  for n in model.output_names:
    if n in conflict_counter:
      conflict_counter[n] += 1
      n += '_%d' % conflict_counter[n]
    output_names.append(n)

  # Merge outputs under expected scope.
  with ops.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]):
    merged = []
    for name, outputs in zip(output_names, all_outputs):
      merged.append(concatenate(outputs, axis=0, name=name))
    return Model(model.inputs, merged)
Example #23
0
def _clone_and_build_model(mode,
                           keras_model,
                           custom_objects,
                           features=None,
                           labels=None):
  """Clone and build the given keras_model.

  Args:
    mode: training mode.
    keras_model: an instance of compiled keras model.
    custom_objects: Dictionary for custom objects.
    features: Dict of tensors.
    labels: Dict of tensors, or single tensor instance.

  Returns:
    The newly built model.
  """
  # Set to True during training, False for inference.
  K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN)

  # Get list of inputs.
  if features is None:
    input_tensors = None
  else:
    input_tensors = _create_ordered_io(keras_model,
                                       estimator_io=features,
                                       is_input=True)
  # Get list of outputs.
  if labels is None:
    target_tensors = None
  elif isinstance(labels, dict):
    target_tensors = _create_ordered_io(keras_model,
                                        estimator_io=labels,
                                        is_input=False)
  else:
    target_tensors = [
        _convert_tensor(labels)
    ]

  if keras_model._is_graph_network:
    if custom_objects:
      with CustomObjectScope(custom_objects):
        model = models.clone_model(keras_model, input_tensors=input_tensors)
    else:
      model = models.clone_model(keras_model, input_tensors=input_tensors)
  else:
    model = keras_model
    _in_place_subclassed_model_reset(model)
    if input_tensors is not None:
      model._set_inputs(input_tensors)

  # Compile/Build model
  if mode is model_fn_lib.ModeKeys.PREDICT:
    if isinstance(model, models.Sequential):
      model.build()
  else:
    if isinstance(keras_model.optimizer, optimizers.TFOptimizer):
      optimizer = keras_model.optimizer
    else:
      optimizer_config = keras_model.optimizer.get_config()
      optimizer = keras_model.optimizer.__class__.from_config(optimizer_config)
    optimizer.iterations = training_util.get_or_create_global_step()

    model.compile(
        optimizer,
        keras_model.loss,
        metrics=keras_model.metrics,
        loss_weights=keras_model.loss_weights,
        sample_weight_mode=keras_model.sample_weight_mode,
        weighted_metrics=keras_model.weighted_metrics,
        target_tensors=target_tensors)
  return model
Example #24
0
    def _model_fn():
      """Compute fit/eval/predict for the TPU."""
      is_training = self.execution_mode == model_fn_lib.ModeKeys.TRAIN
      is_test = self.execution_mode == model_fn_lib.ModeKeys.EVAL
      is_predict = self.execution_mode == model_fn_lib.ModeKeys.PREDICT

      # During train/eval, we infeed our features as well as labels.
      if is_training or is_test:
        infeed_layers = self.model._input_layers + self.model._output_layers
      else:
        infeed_layers = self.model._input_layers

      # Generate our infeed operation to read features & labels.
      infeed_tensors = tpu_ops.infeed_dequeue_tuple(
          dtypes=[spec.dtype for spec in input_specs],
          shapes=[spec.shape for spec in input_specs],
          name='infeed-%s' % self.execution_mode)

      assert len(infeed_tensors) == len(infeed_layers), (
          'Infeed inputs did not match model: %s vs %s', (infeed_layers,
                                                          infeed_tensors))

      tpu_targets = []
      tpu_input_map = {}

      # Sort infeed outputs into inputs and labels for calling our Keras model.
      for tensor, layer in zip(infeed_tensors, infeed_layers):
        if layer in self.model._input_layers:
          tpu_input_map[layer.name] = tensor
        if layer in self.model._output_layers:
          tpu_targets.append(tensor)

      # Clone our CPU model, running within the TPU device context.
      with TPURewriteContext(tpu_input_map):
        self._cloned_model = models.clone_model(self.model)

      # Create a copy of the optimizer for this graph.
      if isinstance(self.model.optimizer, keras_optimizers.TFOptimizer):
        cloned_optimizer = keras_optimizers.TFOptimizer(
            self.model.optimizer.optimizer)
      else:
        logging.info('Cloning %s %s', self.model.optimizer.__class__.__name__,
                     self._optimizer_config)
        cloned_optimizer = self.model.optimizer.__class__.from_config(
            self._optimizer_config)

      if is_training or is_test:
        self._cloned_model.compile(
            optimizer=_replicated_optimizer(cloned_optimizer),
            loss=self.model.loss,
            loss_weights=self.model.loss_weights,
            metrics=self.model.metrics,
            weighted_metrics=self.model.weighted_metrics,
            target_tensors=tpu_targets,
        )

      # Compute our outfeed depending on the execution mode
      if is_training:
        self._cloned_model._make_train_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in self._cloned_model.train_function.outputs
        ]
        return [
            self._cloned_model.train_function.updates_op,
            tpu_ops.outfeed_enqueue_tuple(
                self._cloned_model.train_function.outputs,
                name='outfeed-enqueue-train')
        ]
      elif is_test:
        self._cloned_model._make_test_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in self._cloned_model.test_function.outputs
        ]
        return [
            tpu_ops.outfeed_enqueue_tuple(
                self._cloned_model.test_function.outputs,
                name='outfeed-enqueue-test')
        ]
      elif is_predict:
        self._cloned_model._make_predict_function()
        self._outfeed_spec = [
            tensor_spec.TensorSpec(tensor.shape, tensor.dtype, tensor.name)
            for tensor in self._cloned_model.predict_function.outputs
        ]
        return [
            tpu_ops.outfeed_enqueue_tuple(
                self._cloned_model.predict_function.outputs,
                name='outfeed-enqueue-predict',
            )
        ]
      else:
        assert False, 'Unexpected execution mode: %s' % self.execution_mode
def clone_and_build_model(model,
                          input_tensors=None,
                          target_tensors=None,
                          custom_objects=None,
                          compile_clone=True,
                          in_place_reset=False,
                          optimizer_iterations=None,
                          optimizer_config=None):
    orig_optimizer = model.optimizer
    if compile_clone and not orig_optimizer:
        raise ValueError(
            'Error when cloning model: compile_clone was set to True, but the '
            'original model has not been compiled.')

    if model._is_graph_network or isinstance(model, Sequential):
        if custom_objects:
            with CustomObjectScope(custom_objects):
                clone = models.clone_model(model, input_tensors=input_tensors)
        else:
            clone = models.clone_model(model, input_tensors=input_tensors)

        if all([
                isinstance(clone, Sequential), not clone._is_graph_network,
                getattr(model, '_build_input_shape', None) is not None
        ]):
            clone._set_inputs(
                K.placeholder(model._build_input_shape,
                              dtype=model.inputs[0].dtype))
    else:
        if not in_place_reset:
            raise ValueError('.')
        clone = model
        _in_place_subclassed_model_reset(clone)
        if input_tensors is not None:
            if isinstance(input_tensors,
                          (list, tuple)) and len(input_tensors) == 1:
                input_tensors = input_tensors[0]
            clone._set_inputs(input_tensors)

    if compile_clone:
        if isinstance(orig_optimizer, optimizers.TFOptimizer):
            optimizer = optimizers.TFOptimizer(orig_optimizer.optimizer,
                                               optimizer_iterations)
            K.track_tf_optimizer(optimizer)
        else:
            optimizer_config = optimizer_config or orig_optimizer.get_config()
            #       print("orig_optimizer          :", orig_optimizer)
            #       print("orig_optimizer.c .      :",orig_optimizer.__class__)
            #       print("orig_optimizer.c.i .    :", orig_optimizer.__class__.__init__)
            #       print("optimizer_config        :", optimizer_config)
            #       print("orig_optimizer.c.i.args :", inspect.getargspec(orig_optimizer.__class__.__init__))
            #       print("orig_optimizer.c.i.dict :", orig_optimizer.__class__.__dict__)
            #       print("orig_optimizer.c._b_ .  :", orig_optimizer.__class__.__bases__)
            #orig_optimizer          : <horovod._keras.Adam object at 0x7f803812aac8>
            #orig_optimizer.c .      : <class 'horovod._keras.Adam'>
            #orig_optimizer.c.i .    : <function create_distributed_optimizer.<locals>._DistributedOptimizer.__init__ at 0x7f80480840d0>
            #optimizer_config        : {'name': 'Adam', 'learning_rate': 0.0014000001, 'decay': 0.0002, 'beta_1': 0.9, 'beta_2': 0.999, 'epsilon': 1e-07, 'amsgrad': False}
            #orig_optimizer.c.i.args : ArgSpec(args=['self', 'name', 'device_dense', 'device_sparse', 'compression', 'sparse_as_dense', 'config'], varargs=None, keywords=None, defaults=None)
            #orig_optimizer.c.i.dict : {'__module__': 'horovod._keras', '__init__': <function create_distributed_optimizer.<locals>._DistributedOptimizer.__init__ at 0x7f80480840d0>, 'get_gradients': <function create_distributed_optimizer.<locals>._DistributedOptimizer.get_gradients at 0x7f80480847b8>, '__doc__': None, '__abstractmethods__': frozenset(), '_abc_registry': <_weakrefset.WeakSet object at 0x7f8038112cf8>, '_abc_cache': <_weakrefset.WeakSet object at 0x7f803812ac50>, '_abc_negative_cache': <_weakrefset.WeakSet object at 0x7f803812a6d8>, '_abc_negative_cache_version': 51}
            if "horovod._keras" not in str(type(orig_optimizer)):
                optimizer = orig_optimizer.__class__.from_config(
                    optimizer_config)
            else:
                optimizer = orig_optimizer.__class__.__bases__[0].from_config(
                    optimizer_config)
            if optimizer_iterations is not None:
                optimizer.iterations = optimizer_iterations

        clone.compile(optimizer,
                      model.loss,
                      metrics=metrics_module.clone_metrics(
                          model._compile_metrics),
                      loss_weights=model.loss_weights,
                      sample_weight_mode=model.sample_weight_mode,
                      weighted_metrics=metrics_module.clone_metrics(
                          model._compile_weighted_metrics),
                      target_tensors=target_tensors)
    return clone
Example #26
0
def clone_and_build_model(model,
                          input_tensors=None,
                          target_tensors=None,
                          custom_objects=None,
                          compile_clone=True,
                          in_place_reset=False,
                          optimizer_iterations=None):
    """1.13"""
    if compile_clone and not model.optimizer:
        raise ValueError(
            'Error when cloning model: compile_clone was set to True, but the '
            'original model has not been compiled.')

    if model._is_graph_network or isinstance(model, Sequential):
        if custom_objects:
            with CustomObjectScope(custom_objects):
                clone = models.clone_model(model, input_tensors=input_tensors)
        else:
            clone = models.clone_model(model, input_tensors=input_tensors)

        if all([
                isinstance(clone, Sequential),
                not models.clone._is_graph_network,
                getattr(model, '_build_input_shape', None) is not None
        ]):
            clone._set_inputs(
                K.placeholder(model._build_input_shape,
                              dtype=model.inputs[0].dtype))
    else:
        if not in_place_reset:
            raise ValueError('.')
        clone = model
        _in_place_subclassed_model_reset(clone)
        if input_tensors is not None:
            if isinstance(input_tensors,
                          (list, tuple)) and len(input_tensors) == 1:
                input_tensors = input_tensors[0]
            models.clone._set_inputs(input_tensors)

    if compile_clone and model.optimizer:
        if isinstance(model.optimizer, optimizers.TFOptimizer):
            optimizer = optimizers.TFOptimizer(model.optimizer.optimizer,
                                               optimizer_iterations)
            K.track_tf_optimizer(optimizer)
        else:
            optimizer_config = model.optimizer.get_config()
            optimizer = model.optimizer.__class__.from_config(optimizer_config)
            if optimizer_iterations is not None:
                optimizer.iterations = optimizer_iterations

        models.clone.compile(optimizer,
                             model.loss,
                             metrics=metrics_module.clone_metrics(
                                 model._compile_metrics),
                             loss_weights=model.loss_weights,
                             sample_weight_mode=model.sample_weight_mode,
                             weighted_metrics=metrics_module.clone_metrics(
                                 model._compile_weighted_metrics),
                             target_tensors=target_tensors)

    return clone
Example #27
0
 def pre_build(self, *args):
     self.model = clone_model(self.model)
def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False):
  """Replicates a model on different GPUs.

  Specifically, this function implements single-machine
  multi-GPU data parallelism. It works in the following way:

  - Divide the model's input(s) into multiple sub-batches.
  - Apply a model copy on each sub-batch. Every model copy
      is executed on a dedicated GPU.
  - Concatenate the results (on CPU) into one big batch.

  E.g. if your `batch_size` is 64 and you use `gpus=2`,
  then we will divide the input into 2 sub-batches of 32 samples,
  process each sub-batch on one GPU, then return the full
  batch of 64 processed samples.

  This induces quasi-linear speedup on up to 8 GPUs.

  This function is only available with the TensorFlow backend
  for the time being.

  Args:
      model: A Keras model instance. To avoid OOM errors,
          this model could have been built on CPU, for instance
          (see usage example below).
      gpus: Integer >= 2, number of on GPUs on which to create
          model replicas.
      cpu_merge: A boolean value to identify whether to force
          merging model weights under the scope of the CPU or not.
      cpu_relocation: A boolean value to identify whether to
          create the model's weights under the scope of the CPU.
          If the model is not defined under any preceding device
          scope, you can still rescue it by activating this option.

  Returns:
      A Keras `Model` instance which can be used just like the initial
      `model` argument, but which distributes its workload on multiple GPUs.

  Example 1: Training models with weights merge on CPU

  ```python
      import tensorflow as tf
      from keras.applications import Xception
      from keras.utils import multi_gpu_model
      import numpy as np

      num_samples = 1000
      height = 224
      width = 224
      num_classes = 1000

      # Instantiate the base model (or "template" model).
      # We recommend doing this with under a CPU device scope,
      # so that the model's weights are hosted on CPU memory.
      # Otherwise they may end up hosted on a GPU, which would
      # complicate weight sharing.
      with tf.device('/cpu:0'):
          model = Xception(weights=None,
                           input_shape=(height, width, 3),
                           classes=num_classes)

      # Replicates the model on 8 GPUs.
      # This assumes that your machine has 8 available GPUs.
      parallel_model = multi_gpu_model(model, gpus=8)
      parallel_model.compile(loss='categorical_crossentropy',
                             optimizer='rmsprop')

      # Generate dummy data.
      x = np.random.random((num_samples, height, width, 3))
      y = np.random.random((num_samples, num_classes))

      # This `fit` call will be distributed on 8 GPUs.
      # Since the batch size is 256, each GPU will process 32 samples.
      parallel_model.fit(x, y, epochs=20, batch_size=256)

      # Save model via the template model (which shares the same weights):
      model.save('my_model.h5')
  ```

  Example 2: Training models with weights merge on CPU using cpu_relocation

  ```python
       ..
       # Not needed to change the device scope for model definition:
       model = Xception(weights=None, ..)

       try:
           model = multi_gpu_model(model, cpu_relocation=True)
           print("Training using multiple GPUs..")
       except:
           print("Training using single GPU or CPU..")

       model.compile(..)
       ..
  ```

  Example 3: Training models with weights merge on GPU (recommended for NV-link)

  ```python
       ..
       # Not needed to change the device scope for model definition:
       model = Xception(weights=None, ..)

       try:
           model = multi_gpu_model(model, cpu_merge=False)
           print("Training using multiple GPUs..")
       except:
           print("Training using single GPU or CPU..")
       model.compile(..)
       ..
  ```

  Raises:
    ValueError: if the `gpus` argument does not match available devices.
  """
  if isinstance(gpus, (list, tuple)):
    if len(gpus) <= 1:
      raise ValueError('For multi-gpu usage to be effective, '
                       'call `multi_gpu_model` with `len(gpus) >= 2`. '
                       'Received: `gpus=%s`' % gpus)
    num_gpus = len(gpus)
    target_gpu_ids = gpus
  else:
    if gpus <= 1:
      raise ValueError('For multi-gpu usage to be effective, '
                       'call `multi_gpu_model` with `gpus >= 2`. '
                       'Received: `gpus=%s`' % gpus)
    num_gpus = gpus
    target_gpu_ids = range(num_gpus)

  target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids]
  available_devices = _get_available_devices()
  available_devices = [
      _normalize_device_name(name) for name in available_devices
  ]
  for device in target_devices:
    if device not in available_devices:
      raise ValueError('To call `multi_gpu_model` with `gpus=%s`, '
                       'we expect the following devices to be available: %s. '
                       'However this machine only has: %s. '
                       'Try reducing `gpus`.' % (gpus, target_devices,
                                                 available_devices))

  def get_slice(data, i, parts):
    """Slice an array into `parts` slices and return slice `i`.

    Args:
      data: array to slice.
      i: index of slice to return.
      parts: number of slices to make.

    Returns:
      Slice `i` of `data`.
    """
    shape = array_ops.shape(data)
    batch_size = shape[:1]
    input_shape = shape[1:]
    step = batch_size // parts
    if i == parts - 1:
      size = batch_size - step * i
    else:
      size = step
    size = array_ops.concat([size, input_shape], axis=0)
    stride = array_ops.concat([step, input_shape * 0], axis=0)
    start = stride * i
    return array_ops.slice(data, start, size)

  # Relocate the model definition under CPU device scope if needed
  if cpu_relocation:
    from tensorflow.python.keras.models import clone_model  # pylint: disable=g-import-not-at-top
    with ops.device('/cpu:0'):
      model = clone_model(model)

  all_outputs = [[] for _ in range(len(model.outputs))]

  # Place a copy of the model on each GPU,
  # each getting a slice of the inputs.
  for i, gpu_id in enumerate(target_gpu_ids):
    with ops.device('/gpu:%d' % gpu_id):
      with backend.name_scope('replica_%d' % gpu_id):
        inputs = []
        # Retrieve a slice of the input.
        for x in model.inputs:
          input_shape = tuple(x.shape.as_list())[1:]
          slice_i = Lambda(
              get_slice,
              output_shape=input_shape,
              arguments={
                  'i': i,
                  'parts': num_gpus
              })(
                  x)
          inputs.append(slice_i)

        # Apply model on slice
        # (creating a model replica on the target device).
        outputs = model(inputs)
        if not isinstance(outputs, list):
          outputs = [outputs]

        # Save the outputs for merging back together later.
        for o, output in enumerate(outputs):
          all_outputs[o].append(output)

  # Deduplicate output names to handle Siamese networks.
  occurrences = {}
  for n in model.output_names:
    if n not in occurrences:
      occurrences[n] = 1
    else:
      occurrences[n] += 1
  conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1}
  output_names = []
  for n in model.output_names:
    if n in conflict_counter:
      conflict_counter[n] += 1
      n += '_%d' % conflict_counter[n]
    output_names.append(n)

  # Merge outputs under expected scope.
  with ops.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]):
    merged = []
    for name, outputs in zip(output_names, all_outputs):
      merged.append(concatenate(outputs, axis=0, name=name))
    return Model(model.inputs, merged)