Beispiel #1
0
  def __call__(self, y_true, y_pred, sample_weight=None):
    """Invokes the `Loss` instance.

    Args:
      y_true: Ground truth values.
      y_pred: The predicted values.
      sample_weight: Optional `Tensor` whose rank is either 0, or the same rank
        as `y_true`, or is broadcastable to `y_true`. `sample_weight` acts as a
        coefficient for the loss. If a scalar is provided, then the loss is
        simply scaled by the given value. If `sample_weight` is a tensor of size
        `[batch_size]`, then the total loss for each sample of the batch is
        rescaled by the corresponding element in the `sample_weight` vector. If
        the shape of `sample_weight` matches the shape of `y_pred`, then the
        loss of each measurable element of `y_pred` is scaled by the
        corresponding value of `sample_weight`.

    Returns:
      Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
        shape as `y_true`; otherwise, it is scalar.

    Raises:
      ValueError: If the shape of `sample_weight` is invalid.
    """
    # If we are wrapping a lambda function strip '<>' from the name as it is not
    # accepted in scope name.
    scope_name = 'lambda' if self.name == '<lambda>' else self.name
    graph_ctx = tf_utils.graph_context_for_symbolic_tensors(
        y_true, y_pred, sample_weight)
    with K.name_scope(scope_name or self.__class__.__name__), graph_ctx:
      losses = self.call(y_true, y_pred)
      return losses_utils.compute_weighted_loss(
          losses, sample_weight, reduction=self._get_reduction())
Beispiel #2
0
def compute_weighted_loss(losses,
                          sample_weight=None,
                          reduction=ReductionV2.SUM_OVER_BATCH_SIZE,
                          name=None):
  """Computes the weighted loss.

  Args:
    losses: `Tensor` of shape `[batch_size, d1, ... dN]`.
    sample_weight: Optional `Tensor` whose rank is either 0, or the same rank as
      `losses`, or be broadcastable to `losses`.
    reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to loss.
      Default value is `SUM_OVER_BATCH_SIZE`.
    name: Optional name for the op.

  Raises:
    ValueError: If the shape of `sample_weight` is not compatible with `losses`.

  Returns:
    Weighted loss `Tensor` of the same type as `losses`. If `reduction` is
    `NONE`, this has the same shape as `losses`; otherwise, it is scalar.
  """
  ReductionV2.validate(reduction)

  # If this function is called directly, then we just default 'AUTO' to
  # 'SUM_OVER_BATCH_SIZE'. Eg. Canned estimator use cases.
  if reduction == ReductionV2.AUTO:
    reduction = ReductionV2.SUM_OVER_BATCH_SIZE
  if sample_weight is None:
    sample_weight = 1.0
  with K.name_scope(name or 'weighted_loss'):
    # Save the `reduction` argument for loss normalization when distributing
    # to multiple replicas. Used only for estimator + v1 optimizer flow.
    ops.get_default_graph()._last_loss_reduction = reduction  # pylint: disable=protected-access

    # Update dimensions of `sample_weight` to match with `losses` if possible.
    losses, _, sample_weight = squeeze_or_expand_dimensions(
        losses, None, sample_weight)
    losses = ops.convert_to_tensor(losses)
    input_dtype = losses.dtype
    losses = math_ops.cast(losses, dtypes.float32)
    sample_weight = math_ops.cast(sample_weight, dtypes.float32)

    try:
      # Broadcast weights if possible.
      sample_weight = weights_broadcast_ops.broadcast_weights(
          sample_weight, losses)
    except ValueError:
      # Reduce values to same ndim as weight array.
      ndim = K.ndim(losses)
      weight_ndim = K.ndim(sample_weight)
      losses = K.mean(losses, axis=list(range(weight_ndim, ndim)))

    sample_weight.shape.assert_is_compatible_with(losses.shape)
    weighted_losses = math_ops.multiply(losses, sample_weight)
    # Apply reduction function to the individual weighted losses.
    loss = reduce_weighted_loss(weighted_losses, reduction)
    # Convert the result back to the input type.
    loss = math_ops.cast(loss, input_dtype)
    return loss
Beispiel #3
0
  def _distributed_apply(self, distribution, grads_and_vars, name):
    """`apply_gradients` using a `DistributionStrategy`."""
    reduced_grads = distribution.extended.batch_reduce_to(
        ds_reduce_util.ReduceOp.SUM, grads_and_vars)
    var_list = [v for _, v in grads_and_vars]
    grads_and_vars = zip(reduced_grads, var_list)

    def apply_grad_to_update_var(var, grad):
      """Apply gradient to variable."""
      if isinstance(var, ops.Tensor):
        raise NotImplementedError("Trying to update a Tensor ", var)
      if isinstance(grad, ops.IndexedSlices):
        if var.constraint is not None:
          raise RuntimeError(
              "Cannot use a constraint function on a sparse variable.")
        return self._resource_apply_sparse_duplicate_indices(
            grad.values, var, grad.indices)
      update_op = self._resource_apply_dense(grad, var)
      if var.constraint is not None:
        with ops.control_dependencies([update_op]):
          return var.assign(var.constraint(var))
      else:
        return update_op

    update_ops = []
    with backend.name_scope(name or self._name):
      for grad, var in grads_and_vars:
        scope_name = ("" if ops.executing_eagerly_outside_functions() else
                      "_" + var.op.name)
        with backend.name_scope("update" + scope_name):
          update_ops.extend(
              distribution.extended.update(
                  var, apply_grad_to_update_var, args=(grad,), group=False))

      any_symbolic = any(isinstance(i, ops.Operation) or
                         tf_utils.is_symbolic_tensor(i) for i in update_ops)
      if not context.executing_eagerly() or any_symbolic:
        # If the current context is graph mode or any of the update ops are
        # symbolic then the step update should be carried out under a graph
        # context. (eager updates execute immediately)
        with ops._get_graph_from_inputs(update_ops).as_default():  # pylint: disable=protected-access
          with ops.control_dependencies(update_ops):
            return self._iterations.assign_add(1).op

      return self._iterations.assign_add(1)
Beispiel #4
0
 def __init__(self, optimizer, iterations=None):  # pylint: disable=super-init-not-called
   self.optimizer = optimizer
   self._track_checkpointable(optimizer, name='optimizer')
   if iterations is None:
     with K.name_scope(self.__class__.__name__):
       self.iterations = K.variable(0, dtype='int64', name='iterations')
   else:
     self.iterations = iterations
   self._track_checkpointable(self.iterations, name='global_step')
Beispiel #5
0
 def __init__(self, lr=0.01, momentum=0., decay=0., nesterov=False, **kwargs):
   super(SGD, self).__init__(**kwargs)
   with K.name_scope(self.__class__.__name__):
     self.iterations = K.variable(0, dtype='int64', name='iterations')
     self.lr = K.variable(lr, name='lr')
     self.momentum = K.variable(momentum, name='momentum')
     self.decay = K.variable(decay, name='decay')
   self.initial_decay = decay
   self.nesterov = nesterov
Beispiel #6
0
 def __init__(self, lr=0.01, epsilon=None, decay=0., **kwargs):
   super(Adagrad, self).__init__(**kwargs)
   with K.name_scope(self.__class__.__name__):
     self.lr = K.variable(lr, name='lr')
     self.decay = K.variable(decay, name='decay')
     self.iterations = K.variable(0, dtype='int64', name='iterations')
   if epsilon is None:
     epsilon = K.epsilon()
   self.epsilon = epsilon
   self.initial_decay = decay
Beispiel #7
0
def _separable_conv_block(ip,
                          filters,
                          kernel_size=(3, 3),
                          strides=(1, 1),
                          block_id=None):
  """Adds 2 blocks of [relu-separable conv-batchnorm].

  Arguments:
      ip: Input tensor
      filters: Number of output filters per layer
      kernel_size: Kernel size of separable convolutions
      strides: Strided convolution for downsampling
      block_id: String block_id

  Returns:
      A Keras tensor
  """
  channel_dim = 1 if K.image_data_format() == 'channels_first' else -1

  with K.name_scope('separable_conv_block_%s' % block_id):
    x = Activation('relu')(ip)
    x = SeparableConv2D(
        filters,
        kernel_size,
        strides=strides,
        name='separable_conv_1_%s' % block_id,
        padding='same',
        use_bias=False,
        kernel_initializer='he_normal')(
            x)
    x = BatchNormalization(
        axis=channel_dim,
        momentum=0.9997,
        epsilon=1e-3,
        name='separable_conv_1_bn_%s' % (block_id))(
            x)
    x = Activation('relu')(x)
    x = SeparableConv2D(
        filters,
        kernel_size,
        name='separable_conv_2_%s' % block_id,
        padding='same',
        use_bias=False,
        kernel_initializer='he_normal')(
            x)
    x = BatchNormalization(
        axis=channel_dim,
        momentum=0.9997,
        epsilon=1e-3,
        name='separable_conv_2_bn_%s' % (block_id))(
            x)
  return x
Beispiel #8
0
 def __init__(self,
              lr=0.002,
              beta_1=0.9,
              beta_2=0.999,
              epsilon=None,
              decay=0.,
              **kwargs):
   super(Adamax, self).__init__(**kwargs)
   with K.name_scope(self.__class__.__name__):
     self.iterations = K.variable(0, dtype='int64', name='iterations')
     self.lr = K.variable(lr, name='lr')
     self.beta_1 = K.variable(beta_1, name='beta_1')
     self.beta_2 = K.variable(beta_2, name='beta_2')
     self.decay = K.variable(decay, name='decay')
   if epsilon is None:
     epsilon = K.epsilon()
   self.epsilon = epsilon
   self.initial_decay = decay
Beispiel #9
0
 def __init__(self,
              lr=0.002,
              beta_1=0.9,
              beta_2=0.999,
              epsilon=None,
              schedule_decay=0.004,
              **kwargs):
   super(Nadam, self).__init__(**kwargs)
   with K.name_scope(self.__class__.__name__):
     self.iterations = K.variable(0, dtype='int64', name='iterations')
     self.m_schedule = K.variable(1., name='m_schedule')
     self.lr = K.variable(lr, name='lr')
     self.beta_1 = K.variable(beta_1, name='beta_1')
     self.beta_2 = K.variable(beta_2, name='beta_2')
   if epsilon is None:
     epsilon = K.epsilon()
   self.epsilon = epsilon
   self.schedule_decay = schedule_decay
Beispiel #10
0
def _eager_metrics_fn(model, outputs, targets):
  """Calculates the metrics for each output of the given model.

  Arguments:
      model: The model on which metrics are being calculated.
      outputs: The outputs of the given model.
      targets: The predictions or targets of the given model.

  Returns:
      Returns the metric names and metric results for each output of the model.
  """
  metric_names = []
  metric_results = []
  if not isinstance(outputs, list):
    outputs = [outputs]

  if not isinstance(targets, list):
    targets = [targets]

  for i in range(len(model.outputs)):
    output_metrics = model.nested_metrics[i]
    for nested_output_metric in output_metrics:
      metric_name, metric_fn = _get_metrics_info(
          nested_output_metric, backend.int_shape(model.outputs[i]),
          model.loss_functions[i])

      if len(model.output_names) > 1:
        metric_name = model.output_names[i] + '_' + metric_name
        if metric_name not in model.metrics_names:
          model.metrics_names.append(metric_name)

      with backend.name_scope(metric_name):
        metric_result = metric_fn(targets[i], outputs[i])
        metric_names.append(metric_name)
        metric_results.append(backend.mean(metric_result))

  return metric_results
Beispiel #11
0
def _eager_metrics_fn(model, outputs, targets):
    """Calculates the metrics for each output of the given model.

  Arguments:
      model: The model on which metrics are being calculated.
      outputs: The outputs of the given model.
      targets: The predictions or targets of the given model.

  Returns:
      Returns the metric names and metric results for each output of the model.
  """
    metric_names = []
    metric_results = []
    if not isinstance(outputs, list):
        outputs = [outputs]

    if not isinstance(targets, list):
        targets = [targets]

    for i in range(len(model.outputs)):
        output_metrics = model.nested_metrics[i]
        for nested_output_metric in output_metrics:
            metric_name, metric_fn = _get_metrics_info(
                nested_output_metric, backend.int_shape(model.outputs[i]),
                model.loss_functions[i])

            if len(model.output_names) > 1:
                metric_name = model.output_names[i] + '_' + metric_name
                if metric_name not in model.metrics_names:
                    model.metrics_names.append(metric_name)

            with backend.name_scope(metric_name):
                metric_result = metric_fn(targets[i], outputs[i])
                metric_names.append(metric_name)
                metric_results.append(backend.mean(metric_result))

    return metric_results
 def __init__(self,
              lr=0.001,
              beta_1=0.9,
              beta_2=0.999,
              epsilon=None,
              decay=0.,
              amsgrad=False,
              multipliers=None,
              debug_verbose=False,
              **kwargs):
     super(AdvancedAdam, self).__init__(**kwargs)
     with K.name_scope(self.__class__.__name__):
         self.iterations = K.variable(0, dtype='int64', name='iterations')
         self.lr = K.variable(lr, name='lr')
         self.beta_1 = K.variable(beta_1, name='beta_1')
         self.beta_2 = K.variable(beta_2, name='beta_2')
         self.decay = K.variable(decay, name='decay')
     if epsilon is None:
         epsilon = K.epsilon()
     self.epsilon = epsilon
     self.initial_decay = decay
     self.amsgrad = amsgrad
     self.multipliers = multipliers
     self.debug_verbose = debug_verbose
Beispiel #13
0
    def get_gradients(self, loss, params):
        """Returns gradients of `loss` with respect to `params`.

    Arguments:
      loss: Loss tensor.
      params: List of variables.

    Returns:
      List of gradient tensors.

    Raises:
      ValueError: In case any gradient cannot be computed (e.g. if gradient
        function not implemented).
    """
        params = nest.flatten(params)
        with backend.get_graph().as_default(), backend.name_scope(
                self._name + "/gradients"):
            grads = gradients.gradients(loss, params)
            for grad, param in zip(grads, params):
                if grad is None:
                    raise ValueError(
                        "Variable {} has `None` for gradient. "
                        "Please make sure that all of your ops have a "
                        "gradient defined (i.e. are differentiable). "
                        "Common ops without gradient: "
                        "K.argmax, K.round, K.eval.".format(param))
            if hasattr(self, "clipnorm"):
                grads = [
                    clip_ops.clip_by_norm(g, self.clipnorm) for g in grads
                ]
            if hasattr(self, "clipvalue"):
                grads = [
                    clip_ops.clip_by_value(g, -self.clipvalue, self.clipvalue)
                    for g in grads
                ]
        return grads
Beispiel #14
0
    def __call__(self, ip):
        with backend.name_scope('separable_conv_block_%s' % self.block_id):
            x = layers.Activation('relu')(ip)
            if self.strides == 2:
                x = layers.ZeroPadding1D(padding=self.kernel_size,
                                         name='separable_conv_1_pad_%s' %
                                         self.block_id)(x)
                conv_pad = 'valid'
            else:
                conv_pad = 'same'

            x = layers.SeparableConv1D(self.filters,
                                       self.kernel_size,
                                       strides=self.strides,
                                       name='separable_conv_1_%s' %
                                       self.block_id,
                                       padding=conv_pad,
                                       use_bias=False,
                                       kernel_initializer='he_normal')(x)
            x = layers.BatchNormalization(momentum=0.9997,
                                          epsilon=1e-3,
                                          name='separable_conv_1_bn_%s' %
                                          self.block_id)(x)
            x = layers.Activation('relu')(x)
            x = layers.SeparableConv1D(self.filters,
                                       self.kernel_size,
                                       name='separable_conv_2_%s' %
                                       self.block_id,
                                       padding='same',
                                       use_bias=False,
                                       kernel_initializer='he_normal')(x)
            x = layers.BatchNormalization(momentum=0.9997,
                                          epsilon=1e-3,
                                          name='separable_conv_2_bn_%s' %
                                          self.block_id)(x)
        return x
Beispiel #15
0
    def inference(self, inputs, *args, **kwargs):

        call_context = base_layer_utils.call_context()
        input_list = nest.flatten(inputs)

        # We will attempt to build a TF graph if & only if all inputs are symbolic.
        # This is always the case in graph mode. It can also be the case in eager
        # mode when all inputs can be traced back to `keras.Input()` (when building
        # models using the functional API).
        build_graph = tf_utils.are_all_symbolic_tensors(input_list)

        # Accept NumPy and scalar inputs by converting to Tensors.
        if any(isinstance(x, (np.ndarray, float, int)) for x in input_list):
            def _convert_non_tensor(x):
                # Don't call `ops.convert_to_tensor` on all `inputs` because
                # `SparseTensors` can't be converted to `Tensor`.
                if isinstance(x, (np.ndarray, float, int)):
                    return ops.convert_to_tensor(x)
                return x
            inputs = nest.map_structure(_convert_non_tensor, inputs)
            input_list = nest.flatten(inputs)

        # Handle `mask` propagation from previous layer to current layer. Masks can
        # be propagated explicitly via the `mask` argument, or implicitly via
        # setting the `_keras_mask` attribute on the inputs to a Layer. Masks passed
        # explicitly take priority.
        mask_arg_passed_by_framework = False
        input_masks = self._collect_input_masks(inputs, args, kwargs)
        if (self._expects_mask_arg and input_masks is not None and
                not self._call_arg_was_passed('mask', args, kwargs)):
            mask_arg_passed_by_framework = True
            kwargs['mask'] = input_masks

        # If `training` argument was not explicitly passed, propagate `training`
        # value from this layer's calling layer.
        training_arg_passed_by_framework = False
        # Priority 1: `training` was explicitly passed.
        if self._call_arg_was_passed('training', args, kwargs):
            training_value = self._get_call_arg_value('training', args, kwargs)
            if not self._expects_training_arg:
                kwargs.pop('training')
        else:
            training_value = None
            # Priority 2: `training` was passed to a parent layer.
            if call_context.training is not None:
                training_value = call_context.training
            # Priority 3a: `learning_phase()` has been set.
            elif backend.global_learning_phase_is_set():
                training_value = backend.learning_phase()
            # Priority 3b: Pass the `learning_phase()` if in the Keras FuncGraph.
            elif build_graph:
                with backend.get_graph().as_default():
                    if base_layer_utils.is_in_keras_graph():
                        training_value = backend.learning_phase()

            if self._expects_training_arg and training_value is not None:
                # Force the training_value to be bool type which matches to the contract
                # for layer/model call args.
                if tensor_util.is_tensor(training_value):
                    training_value = math_ops.cast(training_value, dtypes.bool)
                else:
                    training_value = bool(training_value)
                kwargs['training'] = training_value
                training_arg_passed_by_framework = True

        # Only create Keras history if at least one tensor originates from a
        # `keras.Input`. Otherwise this Layer may be being used outside the Keras
        # framework.
        if build_graph and base_layer_utils.needs_keras_history(inputs):
            base_layer_utils.create_keras_history(inputs)

        # Clear eager losses on top level model call.
        # We are clearing the losses only on the top level model call and not on
        # every layer/model call because layer/model may be reused.
        if (base_layer_utils.is_in_eager_or_tf_function() and
                not call_context.in_call):
            self._clear_losses()

        with call_context.enter(self, inputs, build_graph, training_value):
            # Check input assumptions set after layer building, e.g. input shape.
            if build_graph:
                # Symbolic execution on symbolic tensors. We will attempt to build
                # the corresponding TF subgraph inside `backend.get_graph()`
                # TODO(reedwm): We should assert input compatibility after the inputs
                # are casted, not before.
                input_spec.assert_input_compatibility(self.input_spec, inputs,
                                                                                            self.name)
                if (any(isinstance(x, ragged_tensor.RaggedTensor) for x in input_list)
                        and self._supports_ragged_inputs is False):    # pylint: disable=g-bool-id-comparison
                    raise ValueError('Layer %s does not support RaggedTensors as input. '
                                                     'Inputs received: %s. You can try converting your '
                                                     'input to an uniform tensor.' % (self.name, inputs))

                graph = backend.get_graph()
                with graph.as_default(), backend.name_scope(self._name_scope()):
                    # Build layer if applicable (if the `build` method has been
                    # overridden).
                    self._maybe_build(inputs)
                    cast_inputs = self._maybe_cast_inputs(inputs)

                    # Wrapping `call` function in autograph to allow for dynamic control
                    # flow and control dependencies in call. We are limiting this to
                    # subclassed layers as autograph is strictly needed only for
                    # subclassed layers and models.
                    # tf_convert will respect the value of autograph setting in the
                    # enclosing tf.function, if any.
                    if (base_layer_utils.is_subclassed(self) and
                            not base_layer_utils.from_saved_model(self)):
                        call_fn = autograph.tf_convert(
                                self._inference, ag_ctx.control_status_ctx())
                    else:
                        call_fn = self._inference

                    if not self.dynamic:
                        try:
                            with base_layer_utils.autocast_context_manager(
                                    self._compute_dtype):
                                # Add auto_control_deps in V2 when they are not already added by
                                # a `tf.function`.
                                if (ops.executing_eagerly_outside_functions() and
                                        not base_layer_utils.is_in_eager_or_tf_function()):
                                    with auto_control_deps.AutomaticControlDependencies() as acd:
                                        outputs = call_fn(cast_inputs, *args, **kwargs)
                                        # Wrap Tensors in `outputs` in `tf.identity` to avoid
                                        # circular dependencies.
                                        outputs = base_layer_utils.mark_as_return(outputs, acd)
                                else:
                                    outputs = call_fn(cast_inputs, *args, **kwargs)

                        except errors.OperatorNotAllowedInGraphError as e:
                            raise TypeError('You are attempting to use Python control '
                                                            'flow in a layer that was not declared to be '
                                                            'dynamic. Pass `dynamic=True` to the class '
                                                            'constructor.\nEncountered error:\n"""\n' +
                                                            str(e) + '\n"""')
                    else:
                        # We will use static shape inference to return symbolic tensors
                        # matching the specifications of the layer outputs.
                        # Since `self.dynamic` is True, we will never attempt to
                        # run the underlying TF graph (which is disconnected).
                        # TODO(fchollet): consider py_func as an alternative, which
                        # would enable us to run the underlying graph if needed.
                        outputs = self._symbolic_call(inputs)

                    if outputs is None:
                        raise ValueError('A layer\'s `call` method should return a '
                                                         'Tensor or a list of Tensors, not None '
                                                         '(layer: ' + self.name + ').')
                    if base_layer_utils.have_all_keras_metadata(inputs):
                        if training_arg_passed_by_framework:
                            kwargs.pop('training')
                        if mask_arg_passed_by_framework:
                            kwargs.pop('mask')
                        inputs, outputs = self._set_connectivity_metadata_(
                                inputs, outputs, args, kwargs)
                    self._handle_activity_regularization(inputs, outputs)
                    self._set_mask_metadata(inputs, outputs, input_masks)
                    if hasattr(self, '_set_inputs') and not self.inputs:
                        # Subclassed network: explicitly set metadata normally set by
                        # a call to self._set_inputs().
                        # TODO(b/120997007): This should be done in Eager as well, but
                        # causes garbage collection issues because of the placeholders
                        # created on the default Keras graph.
                        self._set_inputs(inputs, outputs)
            else:
                # Eager execution on data tensors.
                with backend.name_scope(self._name_scope()):
                    self._maybe_build(inputs)
                    cast_inputs = self._maybe_cast_inputs(inputs)
                    with base_layer_utils.autocast_context_manager(
                            self._compute_dtype):
                        outputs = self._inference(cast_inputs, *args, **kwargs)
                    self._handle_activity_regularization(inputs, outputs)
                    self._set_mask_metadata(inputs, outputs, input_masks)

        return outputs
Beispiel #16
0
def _num_elements(losses):
  """Computes the number of elements in `losses` tensor."""
  with K.name_scope('num_elements') as scope:
    return math_ops.cast(array_ops.size(losses, name=scope), dtype=losses.dtype)
Beispiel #17
0
def _model_loss(model, inputs, targets, sample_weights=None, training=False):
  """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: List of input arrays.
      targets: List of target arrays.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss and loss value calculated using the
     specified loss function. The total loss includes regularization losses and
     applies masking and sample weighting to the loss value.
  """
  total_loss = 0
  if len(inputs) == 1:
    if model._expects_training_arg:
      outs = model.call(inputs[0], training=training)
    else:
      outs = model.call(inputs[0])
  else:
    if model._expects_training_arg:
      outs = model.call(inputs, training=training)
    else:
      outs = model.call(inputs)
  if not isinstance(outs, list):
    outs = [outs]

  if not isinstance(targets, list):
    targets = [targets]

  loss_metrics = []
  with backend.name_scope('loss'):
    for i, loss_fn in enumerate(model.loss_functions):
      if sample_weights:
        weights = sample_weights[i]
      else:
        weights = None

      # TODO(fchollet): support masking; in practice `_keras_mask` is never
      # set in this context currently.
      mask = outs[i]._keras_mask

      weighted_masked_fn = training_utils.weighted_masked_objective(loss_fn)
      with backend.name_scope(model.output_names[i] + '_loss'):
        output_loss = weighted_masked_fn(
            targets[i], outs[i], weights, mask=mask)
      # If the number of outputs is 1 then we don't append the loss metric
      # associated with each model output. When there are multiple outputs
      # associated with a model, each output's loss is calculated and returned
      # as part of the loss_metrics.
      if len(model.outputs) > 1:
        loss_metrics.append(backend.mean(output_loss))

      loss_weight = model.loss_weights_list[i]
      if total_loss is None:
        total_loss = loss_weight * output_loss
      else:
        total_loss += loss_weight * output_loss

    total_loss = backend.mean(total_loss)
    # Add regularization losses
    custom_losses = []
    for layer in model.layers:
      if layer.losses:
        custom_losses += layer.losses

    if custom_losses:
      total_loss += sum(custom_losses)

  return outs, total_loss, loss_metrics
Beispiel #18
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
  """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
  # Used to keep track of the total loss value (stateless).
  # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
  #                   loss_weight_2 * output_2_loss_fn(...) +
  #                   layer losses.
  total_loss = 0
  kwargs = {}
  if model._expects_training_arg:
    kwargs['training'] = training
  if len(inputs) == 1 and not isinstance(inputs, dict):
    inputs = inputs[0]

  # Allow mixed `NumPy` and `EagerTensor` input here.
  if any(
      isinstance(input_t, (np.ndarray, float, int))
      for input_t in nest.flatten(inputs)):
    inputs = nest.map_structure(ops.convert_to_tensor, inputs)

  outs = model(inputs, **kwargs)

  outs = nest.flatten(outs)
  # `None` by default for `EagerTensors`.
  masks = [t._keras_mask for t in outs]
  targets = nest.flatten(targets)

  # Used to keep track of individual output losses (stateless).
  output_losses = []
  # Used to keep track of individual output losses (stateful).
  aggregated_output_losses = []

  with backend.name_scope('loss'):
    for i, loss_fn in enumerate(model.loss_functions):
      weights = sample_weights[i] if sample_weights else None
      mask = masks[i]
      with backend.name_scope(model.output_names[i] + '_loss'):
        if mask is not None:
          mask = math_ops.cast(mask, outs[i].dtype)
          # Update weights with mask.
          if weights is None:
            weights = mask
          else:
            # Update dimensions of weights to match with mask if possible.
            mask, _, weights = (
                losses_utils.squeeze_or_expand_dimensions(mask, None, weights))
            weights *= mask

        # Reset reduction on the loss so that we can get the per sample loss
        # value. We use this to get both the stateless and stateful loss
        # values without having to compute the underlying loss function
        # twice.
        weighted_losses = None
        if hasattr(loss_fn, 'reduction'):
          current_loss_reduction = loss_fn.reduction
          loss_fn.reduction = losses_utils.ReductionV2.NONE
          weighted_losses = loss_fn(targets[i], outs[i], sample_weight=weights)
          loss_fn.reduction = current_loss_reduction

          # Compute the stateless loss value.
          output_loss = losses_utils.reduce_weighted_loss(weighted_losses)
        else:
          # Compute the stateless loss value for a custom loss class.
          # Here we assume that the class takes care of loss reduction
          # because if this class returns a vector value we cannot
          # differentiate between use case where a custom optimizer
          # expects a vector loss value vs unreduced per-sample loss value.
          output_loss = loss_fn(targets[i], outs[i], sample_weight=weights)

      # If the number of outputs is 1 then we don't append the loss metric
      # associated with each model output. When there are multiple outputs
      # associated with a model, each output's loss is calculated and returned
      # as part of the loss_metrics.
      if len(model.outputs) > 1:
        output_losses.append(backend.mean(output_loss))
        if output_loss_metrics is not None:
          # Compute the stateful loss value.
          if weighted_losses is not None:
            aggregated_output_loss = output_loss_metrics[i](weighted_losses)
          else:
            # Custom loss class.
            aggregated_output_loss = training_utils.call_metric_function(
                output_loss_metrics[i], targets[i], outs[i], weights=weights)
          # Keep track of the stateful output loss result.
          aggregated_output_losses.append(aggregated_output_loss)

      loss_weight = model.loss_weights_list[i]
      if total_loss is None:
        total_loss = loss_weight * output_loss
      else:
        total_loss += loss_weight * output_loss

    total_loss = backend.mean(total_loss)
    # Add regularization losses
    custom_losses = model.losses
    if custom_losses:
      total_loss += losses_utils.scale_loss_for_distribution(
          math_ops.add_n(custom_losses))

  return outs, total_loss, output_losses, aggregated_output_losses, masks
Beispiel #19
0
def _adjust_block(p, ip, filters, block_id=None):
  """Adjusts the input `previous path` to match the shape of the `input`.

  Used in situations where the output number of filters needs to be changed.

  Arguments:
      p: Input tensor which needs to be modified
      ip: Input tensor whose shape needs to be matched
      filters: Number of output filters to be matched
      block_id: String block_id

  Returns:
      Adjusted Keras tensor
  """
  channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
  img_dim = 2 if K.image_data_format() == 'channels_first' else -2

  ip_shape = K.int_shape(ip)

  if p is not None:
    p_shape = K.int_shape(p)

  with K.name_scope('adjust_block'):
    if p is None:
      p = ip

    elif p_shape[img_dim] != ip_shape[img_dim]:
      with K.name_scope('adjust_reduction_block_%s' % block_id):
        p = Activation('relu', name='adjust_relu_1_%s' % block_id)(p)

        p1 = AveragePooling2D(
            (1, 1),
            strides=(2, 2),
            padding='valid',
            name='adjust_avg_pool_1_%s' % block_id)(
                p)
        p1 = Conv2D(
            filters // 2, (1, 1),
            padding='same',
            use_bias=False,
            name='adjust_conv_1_%s' % block_id,
            kernel_initializer='he_normal')(
                p1)

        p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p)
        p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2)
        p2 = AveragePooling2D(
            (1, 1),
            strides=(2, 2),
            padding='valid',
            name='adjust_avg_pool_2_%s' % block_id)(
                p2)
        p2 = Conv2D(
            filters // 2, (1, 1),
            padding='same',
            use_bias=False,
            name='adjust_conv_2_%s' % block_id,
            kernel_initializer='he_normal')(
                p2)

        p = concatenate([p1, p2], axis=channel_dim)
        p = BatchNormalization(
            axis=channel_dim,
            momentum=0.9997,
            epsilon=1e-3,
            name='adjust_bn_%s' % block_id)(
                p)

    elif p_shape[channel_dim] != filters:
      with K.name_scope('adjust_projection_block_%s' % block_id):
        p = Activation('relu')(p)
        p = Conv2D(
            filters, (1, 1),
            strides=(1, 1),
            padding='same',
            name='adjust_conv_projection_%s' % block_id,
            use_bias=False,
            kernel_initializer='he_normal')(
                p)
        p = BatchNormalization(
            axis=channel_dim,
            momentum=0.9997,
            epsilon=1e-3,
            name='adjust_bn_%s' % block_id)(
                p)
  return p
Beispiel #20
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
  """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
  # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn.
  # Used to keep track of the total loss value (stateless).
  # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
  #                   loss_weight_2 * output_2_loss_fn(...) +
  #                   layer losses.
  total_loss = 0
  kwargs = {}
  if model._expects_training_arg:
    kwargs['training'] = training
  if len(inputs) == 1 and not isinstance(inputs, dict):
    inputs = inputs[0]

  # Allow mixed `NumPy` and `EagerTensor` input here.
  if any(
      isinstance(input_t, (np.ndarray, float, int))
      for input_t in nest.flatten(inputs)):
    inputs = nest.map_structure(ops.convert_to_tensor, inputs)

  outs = model(inputs, **kwargs)

  outs = nest.flatten(outs)
  masks = [getattr(t, '_keras_mask', None) for t in outs]
  targets = nest.flatten(targets)

  # Used to keep track of individual output losses.
  output_losses = []

  with backend.name_scope('loss'):
    loss_fns = [
        loss_fn for loss_fn in model.loss_functions if loss_fn is not None
    ]
    for i, loss_fn in enumerate(loss_fns):
      weights = sample_weights[i] if sample_weights else None
      mask = masks[i]
      with backend.name_scope(model.output_names[i] + '_loss'):
        if mask is not None:
          mask = math_ops.cast(mask, outs[i].dtype)
          # Update weights with mask.
          if weights is None:
            weights = mask
          else:
            # Update dimensions of weights to match with mask if possible.
            mask, _, weights = (
                losses_utils.squeeze_or_expand_dimensions(mask, None, weights))
            weights *= mask

        weighted_losses = None
        if hasattr(loss_fn, 'reduction'):
          per_sample_losses = loss_fn.call(targets[i], outs[i])
          weighted_losses = losses_utils.compute_weighted_loss(
              per_sample_losses,
              sample_weight=weights,
              reduction=losses_utils.ReductionV2.NONE)
          loss_reduction = loss_fn.reduction

          # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all
          # compile use cases.
          if loss_reduction == losses_utils.ReductionV2.AUTO:
            loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

          # Compute the stateless loss value.
          output_loss = losses_utils.reduce_weighted_loss(
              weighted_losses, reduction=loss_reduction)
          if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
            output_loss = losses_utils.scale_loss_for_distribution(output_loss)
        else:
          # Compute the stateless loss value for a custom loss class.
          # Here we assume that the class takes care of loss reduction
          # because if this class returns a vector value we cannot
          # differentiate between use case where a custom optimizer
          # expects a vector loss value vs unreduced per-sample loss value.
          output_loss = loss_fn(targets[i], outs[i], sample_weight=weights)
          # For custom losses we assume reduction was mean.
          output_loss = losses_utils.scale_loss_for_distribution(output_loss)

      # If the number of outputs is 1 then we don't append the loss metric
      # associated with each model output. When there are multiple outputs
      # associated with a model, each output's loss is calculated and returned
      # as part of the loss_metrics.
      if len(model.outputs) > 1:
        # Keep track of the stateful output loss result.
        output_losses.append(output_loss_metrics[i](output_loss))

      total_loss += model._loss_weights_list[i] * output_loss

    # Add regularization losses
    custom_losses = model.losses
    if custom_losses:
      total_loss += losses_utils.scale_loss_for_distribution(
          math_ops.add_n(custom_losses))

  return outs, total_loss, output_losses, masks
Beispiel #21
0
def _num_elements(losses):
    """Computes the number of elements in `losses` tensor."""
    with K.name_scope('num_elements') as scope:
        return math_ops.cast(array_ops.size(losses, name=scope),
                             dtype=losses.dtype)
Beispiel #22
0
    def _calculate_mean_and_var(self, x, axes, keep_dims):

        with K.name_scope('moments'):
            # The dynamic range of fp16 is too limited to support the collection of
            # sufficient statistics. As a workaround we simply perform the operations
            # on 32-bit floats before converting the mean and variance back to fp16
            y = math_ops.cast(
                x, dtypes.float32) if x.dtype == dtypes.float16 else x
            replica_ctx = ds.get_replica_context()
            if replica_ctx:
                # local to me
                local_sum = math_ops.reduce_sum(y, axis=axes, keepdims=True)
                local_squared_sum = math_ops.reduce_sum(math_ops.square(y),
                                                        axis=axes,
                                                        keepdims=True)
                batch_size = math_ops.cast(
                    array_ops.shape_v2(y)[0], dtypes.float32)
                # TODO(b/163099951): batch the all-reduces once we sort out the ordering
                # issue for NCCL. We don't have a mechanism to launch NCCL in the same
                # order in each replica nowadays, so we limit NCCL to batch all-reduces.

                # get the sum of all replicas (converge all devices)
                y_sum = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM,
                                               local_sum)
                # get the sum from all replicas (converge all devices)
                y_squared_sum = replica_ctx.all_reduce(
                    reduce_util.ReduceOp.SUM, local_squared_sum)
                # get the net batch size from all devices (converge all devices)
                global_batch_size = replica_ctx.all_reduce(
                    reduce_util.ReduceOp.SUM, batch_size)

                # get the number of total params you are averaging (local)
                axes_vals = [(array_ops.shape_v2(y))[i]
                             for i in range(1, len(axes))]
                multiplier = math_ops.cast(math_ops.reduce_prod(axes_vals),
                                           dtypes.float32)
                multiplier = multiplier * global_batch_size

                # conver mean var (locally)
                mean = y_sum / multiplier
                y_squared_mean = y_squared_sum / multiplier
                # var = E(x^2) - E(x)^2
                variance = y_squared_mean - math_ops.square(mean)
            else:
                # if you only have one replica dont worry about it
                # Compute true mean while keeping the dims for proper broadcasting.
                mean = math_ops.reduce_mean(y,
                                            axes,
                                            keepdims=True,
                                            name='mean')
                # sample variance, not unbiased variance
                # Note: stop_gradient does not change the gradient that gets
                #       backpropagated to the mean from the variance calculation,
                #       because that gradient is zero
                variance = math_ops.reduce_mean(math_ops.squared_difference(
                    y, mean),
                                                axes,
                                                keepdims=True,
                                                name='variance')
            if not keep_dims:
                mean = array_ops.squeeze(mean, axes)
                variance = array_ops.squeeze(variance, axes)
            if x.dtype == dtypes.float16:
                return (math_ops.cast(mean, dtypes.float16),
                        math_ops.cast(variance, dtypes.float16))
            else:
                return (mean, variance)
Beispiel #23
0
 def _assign_subdiv_new_value(self, variable, value, subdivisions, count):
     with K.name_scope('AssignNewValue') as scope:
         with ops.colocate_with(variable):
             update_value = array_ops.where((count + 1) % subdivisions == 0,
                                            value, variable)
             return state_ops.assign(variable, update_value, name=scope)
Beispiel #24
0
 def _assign_latent_avg(self, variable, value):
     with K.name_scope('latent_avg') as scope:
         with ops.colocate_with(variable):
             return state_ops.assign(variable, value, name=scope)
Beispiel #25
0
def _model_loss(model, inputs, targets, sample_weights=None, training=False):
    """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
    total_loss = 0
    kwargs = {}
    if model._expects_training_arg:
        kwargs['training'] = training
    if len(inputs) == 1 and not isinstance(inputs, dict):
        inputs = inputs[0]

    if model._compute_output_and_mask_jointly:
        outs, masks = model._call_and_compute_mask(inputs, **kwargs)
        masks = generic_utils.to_list(masks)
    else:
        outs = model.call(inputs, **kwargs)
        masks = None

    outs = generic_utils.to_list(outs)
    if masks is None:
        masks = [None for _ in outs]
    targets = generic_utils.to_list(targets)

    loss_metrics = []
    with backend.name_scope('loss'):
        for i, loss_fn in enumerate(model.loss_functions):
            if sample_weights:
                weights = sample_weights[i]
            else:
                weights = None
            mask = masks[i]

            weighted_masked_fn = training_utils.weighted_masked_objective(
                loss_fn)
            with backend.name_scope(model.output_names[i] + '_loss'):
                output_loss = weighted_masked_fn(targets[i],
                                                 outs[i],
                                                 weights,
                                                 mask=mask)
            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                loss_metrics.append(backend.mean(output_loss))

            loss_weight = model.loss_weights_list[i]
            if total_loss is None:
                total_loss = loss_weight * output_loss
            else:
                total_loss += loss_weight * output_loss

        total_loss = backend.mean(total_loss)
        # Add regularization losses
        custom_losses = []
        for layer in model.layers:
            if layer.losses:
                custom_losses += layer.losses

        if custom_losses:
            total_loss += sum(custom_losses)

    return outs, total_loss, loss_metrics, masks
Beispiel #26
0
  def apply_gradients(self,
                      grads_and_vars,
                      name=None,
                      experimental_aggregate_gradients=True):
    """Apply gradients to variables.

    This is the second part of `minimize()`. It returns an `Operation` that
    applies gradients.

    The method sums gradients from all replicas in the presence of
    `tf.distribute.Strategy` by default. You can aggregate gradients yourself by
    passing `experimental_aggregate_gradients=False`.

    Example:

    ```python
    grads = tape.gradient(loss, vars)
    grads = tf.distribute.get_replica_context().all_reduce('sum', grads)
    # Processing aggregated gradients.
    optimizer.apply_gradients(zip(grads, vars),
        experimental_aggregate_gradients=False)

    ```

    Args:
      grads_and_vars: List of (gradient, variable) pairs.
      name: Optional name for the returned operation. Default to the name passed
        to the `Optimizer` constructor.
      experimental_aggregate_gradients: Whether to sum gradients from different
        replicas in the presense of `tf.distribute.Strategy`. If False, it's
        user responsibility to aggregate the gradients. Default to True.

    Returns:
      An `Operation` that applies the specified gradients. The `iterations`
      will be automatically increased by 1.

    Raises:
      TypeError: If `grads_and_vars` is malformed.
      ValueError: If none of the variables have gradients.
    """
    grads_and_vars = _filter_grads(grads_and_vars)
    var_list = [v for (_, v) in grads_and_vars]

    with backend.name_scope(self._name):
      # Create iteration if necessary.
      with ops.init_scope():
        _ = self.iterations
        self._create_hypers()
        self._create_slots(var_list)

      if not grads_and_vars:
        # Distribution strategy does not support reducing an empty list of
        # gradients
        return control_flow_ops.no_op()

      if distribute_ctx.in_cross_replica_context():
        raise RuntimeError(
            "`apply_gradients() cannot be called in cross-replica context. "
            "Use `tf.distribute.Strategy.run` to enter replica "
            "context.")

      strategy = distribute_ctx.get_strategy()
      if (not experimental_aggregate_gradients and strategy and isinstance(
          strategy.extended,
          parameter_server_strategy.ParameterServerStrategyExtended)):
        raise NotImplementedError(
            "`experimental_aggregate_gradients=False is not supported for "
            "ParameterServerStrategy and CentralStorageStrategy")

      apply_state = self._prepare(var_list)
      if experimental_aggregate_gradients:
        reduced_grads = self._aggregate_gradients(grads_and_vars)
        var_list = [v for _, v in grads_and_vars]
        grads_and_vars = list(zip(reduced_grads, var_list))
      return distribute_ctx.get_replica_context().merge_call(
          functools.partial(self._distributed_apply, apply_state=apply_state),
          args=(grads_and_vars,),
          kwargs={
              "name": name,
          })
Beispiel #27
0
def _reduction_a_cell(ip, p, filters, block_id=None):
  """Adds a Reduction cell for NASNet-A (Fig. 4 in the paper).

  Arguments:
      ip: Input tensor `x`
      p: Input tensor `p`
      filters: Number of output filters
      block_id: String block_id

  Returns:
      A Keras tensor
  """
  channel_dim = 1 if K.image_data_format() == 'channels_first' else -1

  with K.name_scope('reduction_A_block_%s' % block_id):
    p = _adjust_block(p, ip, filters, block_id)

    h = Activation('relu')(ip)
    h = Conv2D(
        filters, (1, 1),
        strides=(1, 1),
        padding='same',
        name='reduction_conv_1_%s' % block_id,
        use_bias=False,
        kernel_initializer='he_normal')(
            h)
    h = BatchNormalization(
        axis=channel_dim,
        momentum=0.9997,
        epsilon=1e-3,
        name='reduction_bn_1_%s' % block_id)(
            h)

    with K.name_scope('block_1'):
      x1_1 = _separable_conv_block(
          h,
          filters, (5, 5),
          strides=(2, 2),
          block_id='reduction_left1_%s' % block_id)
      x1_2 = _separable_conv_block(
          p,
          filters, (7, 7),
          strides=(2, 2),
          block_id='reduction_1_%s' % block_id)
      x1 = add([x1_1, x1_2], name='reduction_add_1_%s' % block_id)

    with K.name_scope('block_2'):
      x2_1 = MaxPooling2D(
          (3, 3),
          strides=(2, 2),
          padding='same',
          name='reduction_left2_%s' % block_id)(
              h)
      x2_2 = _separable_conv_block(
          p,
          filters, (7, 7),
          strides=(2, 2),
          block_id='reduction_right2_%s' % block_id)
      x2 = add([x2_1, x2_2], name='reduction_add_2_%s' % block_id)

    with K.name_scope('block_3'):
      x3_1 = AveragePooling2D(
          (3, 3),
          strides=(2, 2),
          padding='same',
          name='reduction_left3_%s' % block_id)(
              h)
      x3_2 = _separable_conv_block(
          p,
          filters, (5, 5),
          strides=(2, 2),
          block_id='reduction_right3_%s' % block_id)
      x3 = add([x3_1, x3_2], name='reduction_add3_%s' % block_id)

    with K.name_scope('block_4'):
      x4 = AveragePooling2D(
          (3, 3),
          strides=(1, 1),
          padding='same',
          name='reduction_left4_%s' % block_id)(
              x1)
      x4 = add([x2, x4])

    with K.name_scope('block_5'):
      x5_1 = _separable_conv_block(
          x1, filters, (3, 3), block_id='reduction_left4_%s' % block_id)
      x5_2 = MaxPooling2D(
          (3, 3),
          strides=(2, 2),
          padding='same',
          name='reduction_right5_%s' % block_id)(
              h)
      x5 = add([x5_1, x5_2], name='reduction_add4_%s' % block_id)

    x = concatenate(
        [x2, x3, x4, x5],
        axis=channel_dim,
        name='reduction_concat_%s' % block_id)
    return x, ip
def remove_squeezable_dimensions(labels,
                                 predictions,
                                 expected_rank_diff=0,
                                 name=None):
    """Squeeze last dim if ranks differ from expected by exactly 1.

  In the common case where we expect shapes to match, `expected_rank_diff`
  defaults to 0, and we squeeze the last dimension of the larger rank if they
  differ by 1.

  But, for example, if `labels` contains class IDs and `predictions` contains 1
  probability per class, we expect `predictions` to have 1 more dimension than
  `labels`, so `expected_rank_diff` would be 1. In this case, we'd squeeze
  `labels` if `rank(predictions) - rank(labels) == 0`, and
  `predictions` if `rank(predictions) - rank(labels) == 2`.

  This will use static shape if available. Otherwise, it will add graph
  operations, which could result in a performance hit.

  Args:
    labels: Label values, a `Tensor` whose dimensions match `predictions`.
    predictions: Predicted values, a `Tensor` of arbitrary dimensions.
    expected_rank_diff: Expected result of `rank(predictions) - rank(labels)`.
    name: Name of the op.

  Returns:
    Tuple of `labels` and `predictions`, possibly with last dim squeezed.
  """
    with backend.name_scope(name or 'remove_squeezable_dimensions'):
        if not isinstance(predictions, ragged_tensor.RaggedTensor):
            predictions = ops.convert_to_tensor_v2_with_dispatch(predictions)
        if not isinstance(labels, ragged_tensor.RaggedTensor):
            labels = ops.convert_to_tensor_v2_with_dispatch(labels)
        predictions_shape = predictions.shape
        predictions_rank = predictions_shape.ndims
        labels_shape = labels.shape
        labels_rank = labels_shape.ndims
        if (labels_rank is not None) and (predictions_rank is not None):
            # Use static rank.
            rank_diff = predictions_rank - labels_rank
            if (rank_diff == expected_rank_diff + 1
                    and predictions_shape.dims[-1].is_compatible_with(1)):
                predictions = array_ops.squeeze(predictions, [-1])
            elif (rank_diff == expected_rank_diff - 1
                  and labels_shape.dims[-1].is_compatible_with(1)):
                labels = array_ops.squeeze(labels, [-1])
            return labels, predictions

        # Use dynamic rank.
        rank_diff = array_ops.rank(predictions) - array_ops.rank(labels)
        if (predictions_rank is
                None) or (predictions_shape.dims[-1].is_compatible_with(1)):
            predictions = control_flow_ops.cond(
                math_ops.equal(expected_rank_diff + 1, rank_diff),
                lambda: array_ops.squeeze(predictions, [-1]),
                lambda: predictions)
        if (labels_rank is
                None) or (labels_shape.dims[-1].is_compatible_with(1)):
            labels = control_flow_ops.cond(
                math_ops.equal(expected_rank_diff - 1, rank_diff),
                lambda: array_ops.squeeze(labels, [-1]), lambda: labels)
        return labels, predictions
Beispiel #29
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
  """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
  total_loss = 0
  kwargs = {}
  if model._expects_training_arg:
    kwargs['training'] = training
  if len(inputs) == 1 and not isinstance(inputs, dict):
    inputs = inputs[0]

  if model._compute_output_and_mask_jointly:
    outs, masks = model._call_and_compute_mask(inputs, **kwargs)
    masks = nest.flatten(masks)
  else:
    outs = model.call(inputs, **kwargs)
    masks = None

  outs = nest.flatten(outs)
  if masks is None:
    masks = [None for _ in outs]
  targets = nest.flatten(targets)

  loss_metrics = []
  aggregated_loss_metrics = []
  with backend.name_scope('loss'):
    for i, loss_fn in enumerate(model.loss_functions):
      if sample_weights:
        weights = sample_weights[i]
      else:
        weights = None
      mask = masks[i]
      with backend.name_scope(model.output_names[i] + '_loss'):
        if isinstance(loss_fn, losses_module.Loss):
          if mask is not None:
            mask = math_ops.cast(mask, outs[i].dtype)
            # Update weights with mask.
            if weights is None:
              weights = mask
            else:
              # Update dimensions of weights to match with mask if possible.
              mask, _, weights = squeeze_or_expand_dimensions(
                  mask, None, weights)
              weights *= mask
          output_loss = loss_fn(targets[i], outs[i], sample_weight=weights)
        else:
          weighted_masked_fn = training_utils.weighted_masked_objective(loss_fn)
          output_loss = weighted_masked_fn(
              targets[i], outs[i], weights, mask=mask)

      # If the number of outputs is 1 then we don't append the loss metric
      # associated with each model output. When there are multiple outputs
      # associated with a model, each output's loss is calculated and returned
      # as part of the loss_metrics.
      if len(model.outputs) > 1:
        loss_metrics.append(backend.mean(output_loss))

        if output_loss_metrics is not None:
          # Keep track of the stateful loss result.
          aggregated_loss_metrics.append(
              training_utils.call_metric_function(
                  output_loss_metrics[i],
                  targets[i],
                  outs[i],
                  weights=weights,
                  mask=mask))

      loss_weight = model.loss_weights_list[i]
      if total_loss is None:
        total_loss = loss_weight * output_loss
      else:
        total_loss += loss_weight * output_loss

    total_loss = backend.mean(total_loss)
    # Add regularization losses
    custom_losses = model.losses
    if custom_losses:
      total_loss += math_ops.add_n(custom_losses)
    model._clear_losses()

  return outs, total_loss, loss_metrics, aggregated_loss_metrics, masks
Beispiel #30
0
def _eager_loss_fn(outputs, targets, loss_fn, output_name):
  with backend.name_scope(output_name + '_loss'):
    loss = loss_fn(targets, outputs)
  return loss
Beispiel #31
0
def _model_loss(model, inputs, targets, sample_weights=None, training=False):
    """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: List of input arrays.
      targets: List of target arrays.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss and loss value calculated using the
     specified loss function. The total loss includes regularization losses and
     applies masking and sample weighting to the loss value.
  """
    total_loss = 0
    if len(inputs) == 1:
        if model._expects_training_arg:
            outs = model.call(inputs[0], training=training)
        else:
            outs = model.call(inputs[0])
    else:
        if model._expects_training_arg:
            outs = model.call(inputs, training=training)
        else:
            outs = model.call(inputs)
    if not isinstance(outs, list):
        outs = [outs]

    if not isinstance(targets, list):
        targets = [targets]

    loss_metrics = []
    with backend.name_scope('loss'):
        for i, loss_fn in enumerate(model.loss_functions):
            if sample_weights:
                weights = sample_weights[i]
            else:
                weights = None

            # TODO(fchollet): support masking; in practice `_keras_mask` is never
            # set in this context currently.
            mask = outs[i]._keras_mask

            weighted_masked_fn = training_utils.weighted_masked_objective(
                loss_fn)
            with backend.name_scope(model.output_names[i] + '_loss'):
                output_loss = weighted_masked_fn(targets[i],
                                                 outs[i],
                                                 weights,
                                                 mask=mask)
            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                loss_metrics.append(backend.mean(output_loss))

            loss_weight = model.loss_weights_list[i]
            if total_loss is None:
                total_loss = loss_weight * output_loss
            else:
                total_loss += loss_weight * output_loss

        total_loss = backend.mean(total_loss)
        # Add regularization losses
        custom_losses = []
        for layer in model.layers:
            if layer.losses:
                custom_losses += layer.losses

        if custom_losses:
            total_loss += sum(custom_losses)

    return outs, total_loss, loss_metrics
Beispiel #32
0
 def _assign_new_value(self, variable, value):
     with K.name_scope('AssignNewValue') as scope:
         with ops.colocate_with(variable):
             return state_ops.assign(variable, value, name=scope)
  def _distributed_apply(distribution, grads_and_vars, name, apply_state):
    """`apply_gradients` using a `DistributionStrategy`."""
    reduced_grads = distribution.extended.batch_reduce_to(
        ds_reduce_util.ReduceOp.SUM, grads_and_vars)
    var_list = [v for _, v in grads_and_vars]
    grads_and_vars = zip(reduced_grads, var_list)

    def apply_grad_to_update_var(var, grad):
      """Apply gradient to variable."""
      if isinstance(var, ops.Tensor):
        raise NotImplementedError("Trying to update a Tensor ", var)

      apply_kwargs = {}
      if not isinstance(var, de.TrainableWrapper):
        if isinstance(grad, ops.IndexedSlices):
          if var.constraint is not None:
            raise RuntimeError(
                "Cannot use a constraint function on a sparse variable.")
          if "apply_state" in self._sparse_apply_args:
            apply_kwargs["apply_state"] = apply_state
          return self._resource_apply_sparse_duplicate_indices(
              grad.values, var, grad.indices, **apply_kwargs)

        if "apply_state" in self._dense_apply_args:
          apply_kwargs["apply_state"] = apply_state
        update_op = self._resource_apply_dense(grad, var, **apply_kwargs)
        if var.constraint is not None:
          with ops.control_dependencies([update_op]):
            return var.assign(var.constraint(var))
        else:
          return update_op
      else:
        with ops.colocate_with(None, ignore_existing=True):
          _slots = [self.get_slot(var, _s) for _s in self.get_slot_names()]
          # Add the optimizer slots to restricting list.
          if var.params.restrict_policy is not None:
            var.params.restrict_policy._track_optimizer_slots(_slots)

          with ops.control_dependencies([grad]):
            _before = [var.read_value()] + [_s.read_value() for _s in _slots]
          if isinstance(grad, ops.IndexedSlices):
            if var.constraint is not None:
              raise RuntimeError(
                  "Cannot use a constraint function on a sparse variable.")
            if "apply_state" in self._sparse_apply_args:
              apply_kwargs["apply_state"] = apply_state
            with ops.control_dependencies(_before):
              _apply_op = self._resource_apply_sparse_duplicate_indices(
                  grad.values, var, grad.indices, **apply_kwargs)
            with ops.control_dependencies([_apply_op]):
              _after = control_flow_ops.group([var.update_op()] +
                                              [_s.update_op() for _s in _slots])
              return _after

          if "apply_state" in self._dense_apply_args:
            apply_kwargs["apply_state"] = apply_state
          with ops.control_dependencies(_before):
            update_op = self._resource_apply_dense(grad, var, **apply_kwargs)
          if var.constraint is not None:
            with ops.control_dependencies([update_op]):
              return var.assign(var.constraint(var))
          else:
            with ops.control_dependencies([update_op]):
              _after = control_flow_ops.group([var.update_op()] +
                                              [_s.update_op() for _s in _slots])
            return _after

    update_ops = []
    with backend.name_scope(name or self._name):
      for grad, var in grads_and_vars:
        scope_name = ("update" if ops.executing_eagerly_outside_functions() else
                      "update_" + var.op.name)
        # Colocate the update with variables to avoid unnecessary communication
        # delays. See b/136304694.
        with backend.name_scope(
            scope_name), distribution.extended.colocate_vars_with(var):
          update_ops.extend(
              distribution.extended.update(var,
                                           apply_grad_to_update_var,
                                           args=(grad,),
                                           group=False))

      any_symbolic = any(
          isinstance(i, ops.Operation) or tf_utils.is_symbolic_tensor(i)
          for i in update_ops)
      if not context.executing_eagerly() or any_symbolic:
        # If the current context is graph mode or any of the update ops are
        # symbolic then the step update should be carried out under a graph
        # context. (eager updates execute immediately)
        with ops._get_graph_from_inputs(update_ops).as_default():  # pylint: disable=protected-access
          with ops.control_dependencies(update_ops):
            return self._iterations.assign_add(1).op

      return self._iterations.assign_add(1)
def multi_gpu_model(model, gpus, cpu_merge=True, cpu_relocation=False):
  """Replicates a model on different GPUs.

  Specifically, this function implements single-machine
  multi-GPU data parallelism. It works in the following way:

  - Divide the model's input(s) into multiple sub-batches.
  - Apply a model copy on each sub-batch. Every model copy
      is executed on a dedicated GPU.
  - Concatenate the results (on CPU) into one big batch.

  E.g. if your `batch_size` is 64 and you use `gpus=2`,
  then we will divide the input into 2 sub-batches of 32 samples,
  process each sub-batch on one GPU, then return the full
  batch of 64 processed samples.

  This induces quasi-linear speedup on up to 8 GPUs.

  This function is only available with the TensorFlow backend
  for the time being.

  Args:
      model: A Keras model instance. To avoid OOM errors,
          this model could have been built on CPU, for instance
          (see usage example below).
      gpus: Integer >= 2, number of on GPUs on which to create
          model replicas.
      cpu_merge: A boolean value to identify whether to force
          merging model weights under the scope of the CPU or not.
      cpu_relocation: A boolean value to identify whether to
          create the model's weights under the scope of the CPU.
          If the model is not defined under any preceding device
          scope, you can still rescue it by activating this option.

  Returns:
      A Keras `Model` instance which can be used just like the initial
      `model` argument, but which distributes its workload on multiple GPUs.

  Example 1: Training models with weights merge on CPU

  ```python
      import tensorflow as tf
      from keras.applications import Xception
      from keras.utils import multi_gpu_model
      import numpy as np

      num_samples = 1000
      height = 224
      width = 224
      num_classes = 1000

      # Instantiate the base model (or "template" model).
      # We recommend doing this with under a CPU device scope,
      # so that the model's weights are hosted on CPU memory.
      # Otherwise they may end up hosted on a GPU, which would
      # complicate weight sharing.
      with tf.device('/cpu:0'):
          model = Xception(weights=None,
                           input_shape=(height, width, 3),
                           classes=num_classes)

      # Replicates the model on 8 GPUs.
      # This assumes that your machine has 8 available GPUs.
      parallel_model = multi_gpu_model(model, gpus=8)
      parallel_model.compile(loss='categorical_crossentropy',
                             optimizer='rmsprop')

      # Generate dummy data.
      x = np.random.random((num_samples, height, width, 3))
      y = np.random.random((num_samples, num_classes))

      # This `fit` call will be distributed on 8 GPUs.
      # Since the batch size is 256, each GPU will process 32 samples.
      parallel_model.fit(x, y, epochs=20, batch_size=256)

      # Save model via the template model (which shares the same weights):
      model.save('my_model.h5')
  ```

  Example 2: Training models with weights merge on CPU using cpu_relocation

  ```python
       ..
       # Not needed to change the device scope for model definition:
       model = Xception(weights=None, ..)

       try:
           model = multi_gpu_model(model, cpu_relocation=True)
           print("Training using multiple GPUs..")
       except:
           print("Training using single GPU or CPU..")

       model.compile(..)
       ..
  ```

  Example 3: Training models with weights merge on GPU (recommended for NV-link)

  ```python
       ..
       # Not needed to change the device scope for model definition:
       model = Xception(weights=None, ..)

       try:
           model = multi_gpu_model(model, cpu_merge=False)
           print("Training using multiple GPUs..")
       except:
           print("Training using single GPU or CPU..")
       model.compile(..)
       ..
  ```

  Raises:
    ValueError: if the `gpus` argument does not match available devices.
  """
  if isinstance(gpus, (list, tuple)):
    if len(gpus) <= 1:
      raise ValueError('For multi-gpu usage to be effective, '
                       'call `multi_gpu_model` with `len(gpus) >= 2`. '
                       'Received: `gpus=%s`' % gpus)
    num_gpus = len(gpus)
    target_gpu_ids = gpus
  else:
    if gpus <= 1:
      raise ValueError('For multi-gpu usage to be effective, '
                       'call `multi_gpu_model` with `gpus >= 2`. '
                       'Received: `gpus=%s`' % gpus)
    num_gpus = gpus
    target_gpu_ids = range(num_gpus)

  target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids]
  available_devices = _get_available_devices()
  available_devices = [
      _normalize_device_name(name) for name in available_devices
  ]
  for device in target_devices:
    if device not in available_devices:
      raise ValueError('To call `multi_gpu_model` with `gpus=%s`, '
                       'we expect the following devices to be available: %s. '
                       'However this machine only has: %s. '
                       'Try reducing `gpus`.' % (gpus, target_devices,
                                                 available_devices))

  def get_slice(data, i, parts):
    """Slice an array into `parts` slices and return slice `i`.

    Args:
      data: array to slice.
      i: index of slice to return.
      parts: number of slices to make.

    Returns:
      Slice `i` of `data`.
    """
    shape = array_ops.shape(data)
    batch_size = shape[:1]
    input_shape = shape[1:]
    step = batch_size // parts
    if i == parts - 1:
      size = batch_size - step * i
    else:
      size = step
    size = array_ops.concat([size, input_shape], axis=0)
    stride = array_ops.concat([step, input_shape * 0], axis=0)
    start = stride * i
    return array_ops.slice(data, start, size)

  # Relocate the model definition under CPU device scope if needed
  if cpu_relocation:
    from tensorflow.python.keras.models import clone_model  # pylint: disable=g-import-not-at-top
    with ops.device('/cpu:0'):
      model = clone_model(model)

  all_outputs = [[] for _ in range(len(model.outputs))]

  # Place a copy of the model on each GPU,
  # each getting a slice of the inputs.
  for i, gpu_id in enumerate(target_gpu_ids):
    with ops.device('/gpu:%d' % gpu_id):
      with backend.name_scope('replica_%d' % gpu_id):
        inputs = []
        # Retrieve a slice of the input.
        for x in model.inputs:
          input_shape = tuple(x.shape.as_list())[1:]
          slice_i = Lambda(
              get_slice,
              output_shape=input_shape,
              arguments={
                  'i': i,
                  'parts': num_gpus
              })(
                  x)
          inputs.append(slice_i)

        # Apply model on slice
        # (creating a model replica on the target device).
        outputs = model(inputs)
        if not isinstance(outputs, list):
          outputs = [outputs]

        # Save the outputs for merging back together later.
        for o, output in enumerate(outputs):
          all_outputs[o].append(output)

  # Deduplicate output names to handle Siamese networks.
  occurrences = {}
  for n in model.output_names:
    if n not in occurrences:
      occurrences[n] = 1
    else:
      occurrences[n] += 1
  conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1}
  output_names = []
  for n in model.output_names:
    if n in conflict_counter:
      conflict_counter[n] += 1
      n += '_%d' % conflict_counter[n]
    output_names.append(n)

  # Merge outputs under expected scope.
  with ops.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]):
    merged = []
    for name, outputs in zip(output_names, all_outputs):
      merged.append(concatenate(outputs, axis=0, name=name))
    return Model(model.inputs, merged)
Beispiel #35
0
def _model_loss(model, inputs, targets, sample_weights=None, training=False):
  """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
  total_loss = 0
  kwargs = {}
  if model._expects_training_arg:
    kwargs['training'] = training
  if len(inputs) == 1 and not isinstance(inputs, dict):
    inputs = inputs[0]

  if model._compute_output_and_mask_jointly:
    outs, masks = model._call_and_compute_mask(inputs, **kwargs)
    masks = generic_utils.to_list(masks)
  else:
    outs = model.call(inputs, **kwargs)
    masks = None

  outs = generic_utils.to_list(outs)
  if masks is None:
    masks = [None for _ in outs]
  targets = generic_utils.to_list(targets)

  loss_metrics = []
  with backend.name_scope('loss'):
    for i, loss_fn in enumerate(model.loss_functions):
      if sample_weights:
        weights = sample_weights[i]
      else:
        weights = None
      mask = masks[i]

      weighted_masked_fn = training_utils.weighted_masked_objective(loss_fn)
      with backend.name_scope(model.output_names[i] + '_loss'):
        output_loss = weighted_masked_fn(
            targets[i], outs[i], weights, mask=mask)
      # If the number of outputs is 1 then we don't append the loss metric
      # associated with each model output. When there are multiple outputs
      # associated with a model, each output's loss is calculated and returned
      # as part of the loss_metrics.
      if len(model.outputs) > 1:
        loss_metrics.append(backend.mean(output_loss))

      loss_weight = model.loss_weights_list[i]
      if total_loss is None:
        total_loss = loss_weight * output_loss
      else:
        total_loss += loss_weight * output_loss

    total_loss = backend.mean(total_loss)
    # Add regularization losses
    custom_losses = []
    for layer in model.layers:
      if layer.losses:
        custom_losses += layer.losses

    if custom_losses:
      total_loss += sum(custom_losses)

  return outs, total_loss, loss_metrics, masks
Beispiel #36
0
def _eager_loss_fn(outputs, targets, loss_fn, output_name):
    with backend.name_scope(output_name + '_loss'):
        loss = loss_fn(targets, outputs)
    return loss
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
    """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
    total_loss = 0
    kwargs = {}
    if model._expects_training_arg:
        kwargs['training'] = training
    if len(inputs) == 1 and not isinstance(inputs, dict):
        inputs = inputs[0]

    if model._compute_output_and_mask_jointly:
        outs, masks = model._call_and_compute_mask(inputs, **kwargs)
        masks = generic_utils.to_list(masks)
    else:
        outs = model.call(inputs, **kwargs)
        masks = None

    outs = generic_utils.to_list(outs)
    if masks is None:
        masks = [None for _ in outs]
    targets = generic_utils.to_list(targets)

    loss_metrics = []
    aggregated_loss_metrics = []
    with backend.name_scope('loss'):
        for i, loss_fn in enumerate(model.loss_functions):
            if sample_weights:
                weights = sample_weights[i]
            else:
                weights = None
            mask = masks[i]
            with backend.name_scope(model.output_names[i] + '_loss'):
                if isinstance(loss_fn, losses_module.Loss):
                    if mask is not None:
                        mask = math_ops.cast(mask, outs[i].dtype)
                        # Update weights with mask.
                        if weights is None:
                            weights = mask
                        else:
                            # Update dimensions of weights to match with mask if possible.
                            mask, _, weights = squeeze_or_expand_dimensions(
                                mask, None, weights)
                            weights *= mask
                    output_loss = loss_fn(targets[i],
                                          outs[i],
                                          sample_weight=weights)
                else:
                    weighted_masked_fn = training_utils.weighted_masked_objective(
                        loss_fn)
                    output_loss = weighted_masked_fn(targets[i],
                                                     outs[i],
                                                     weights,
                                                     mask=mask)

            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                loss_metrics.append(backend.mean(output_loss))

                if output_loss_metrics is not None:
                    # Keep track of the stateful loss result.
                    aggregated_loss_metrics.append(
                        training_utils.call_metric_function(
                            output_loss_metrics[i],
                            targets[i],
                            outs[i],
                            weights=weights,
                            mask=mask))

            loss_weight = model.loss_weights_list[i]
            if total_loss is None:
                total_loss = loss_weight * output_loss
            else:
                total_loss += loss_weight * output_loss

        total_loss = backend.mean(total_loss)
        # Add regularization losses
        custom_losses = model.losses
        if custom_losses:
            total_loss += math_ops.add_n(custom_losses)
        model._clear_losses()

    return outs, total_loss, loss_metrics, aggregated_loss_metrics, masks
Beispiel #38
0
def _adjust_block(p, ip, filters, block_id=None):
    """Adjusts the input `previous path` to match the shape of the `input`.

  Used in situations where the output number of filters needs to be changed.

  Arguments:
      p: Input tensor which needs to be modified
      ip: Input tensor whose shape needs to be matched
      filters: Number of output filters to be matched
      block_id: String block_id

  Returns:
      Adjusted Keras tensor
  """
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1
    img_dim = 2 if backend.image_data_format() == 'channels_first' else -2

    ip_shape = backend.int_shape(ip)

    if p is not None:
        p_shape = backend.int_shape(p)

    with backend.name_scope('adjust_block'):
        if p is None:
            p = ip

        elif p_shape[img_dim] != ip_shape[img_dim]:
            with backend.name_scope('adjust_reduction_block_%s' % block_id):
                p = layers.Activation('relu',
                                      name='adjust_relu_1_%s' % block_id)(p)
                p1 = layers.AveragePooling2D(
                    (1, 1),
                    strides=(2, 2),
                    padding='valid',
                    name='adjust_avg_pool_1_%s' % block_id)(p)
                p1 = layers.Conv2D(filters // 2, (1, 1),
                                   padding='same',
                                   use_bias=False,
                                   name='adjust_conv_1_%s' % block_id,
                                   kernel_initializer='he_normal')(p1)

                p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p)
                p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2)
                p2 = layers.AveragePooling2D(
                    (1, 1),
                    strides=(2, 2),
                    padding='valid',
                    name='adjust_avg_pool_2_%s' % block_id)(p2)
                p2 = layers.Conv2D(filters // 2, (1, 1),
                                   padding='same',
                                   use_bias=False,
                                   name='adjust_conv_2_%s' % block_id,
                                   kernel_initializer='he_normal')(p2)

                p = layers.concatenate([p1, p2], axis=channel_dim)
                p = layers.BatchNormalization(axis=channel_dim,
                                              momentum=0.9997,
                                              epsilon=1e-3,
                                              name='adjust_bn_%s' %
                                              block_id)(p)

        elif p_shape[channel_dim] != filters:
            with backend.name_scope('adjust_projection_block_%s' % block_id):
                p = layers.Activation('relu')(p)
                p = layers.Conv2D(filters, (1, 1),
                                  strides=(1, 1),
                                  padding='same',
                                  name='adjust_conv_projection_%s' % block_id,
                                  use_bias=False,
                                  kernel_initializer='he_normal')(p)
                p = layers.BatchNormalization(axis=channel_dim,
                                              momentum=0.9997,
                                              epsilon=1e-3,
                                              name='adjust_bn_%s' %
                                              block_id)(p)
    return p
Beispiel #39
0
 def _assign_singular_vector(self, variable, value):
     with K.name_scope('AssignSingularVector') as scope:
         with ops.colocate_with(variable):
             return state_ops.assign(variable, value, name=scope)
Beispiel #40
0
def _reduction_a_cell(ip, p, filters, block_id=None):
    """Adds a Reduction cell for NASNet-A (Fig. 4 in the paper).

  Arguments:
    ip: Input tensor `x`
    p: Input tensor `p`
    filters: Number of output filters
    block_id: String block_id

  Returns:
    A Keras tensor
  """
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1

    with backend.name_scope('reduction_A_block_%s' % block_id):
        p = _adjust_block(p, ip, filters, block_id)

        h = layers.Activation('relu')(ip)
        h = layers.Conv2D(filters, (1, 1),
                          strides=(1, 1),
                          padding='same',
                          name='reduction_conv_1_%s' % block_id,
                          use_bias=False,
                          kernel_initializer='he_normal')(h)
        h = layers.BatchNormalization(axis=channel_dim,
                                      momentum=0.9997,
                                      epsilon=1e-3,
                                      name='reduction_bn_1_%s' % block_id)(h)
        h3 = layers.ZeroPadding2D(padding=imagenet_utils.correct_pad(h, 3),
                                  name='reduction_pad_1_%s' % block_id)(h)

        with backend.name_scope('block_1'):
            x1_1 = _separable_conv_block(h,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         block_id='reduction_left1_%s' %
                                         block_id)
            x1_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         block_id='reduction_right1_%s' %
                                         block_id)
            x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id)

        with backend.name_scope('block_2'):
            x2_1 = layers.MaxPooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_left2_%s' % block_id)(h3)
            x2_2 = _separable_conv_block(p,
                                         filters, (7, 7),
                                         strides=(2, 2),
                                         block_id='reduction_right2_%s' %
                                         block_id)
            x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id)

        with backend.name_scope('block_3'):
            x3_1 = layers.AveragePooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_left3_%s' % block_id)(h3)
            x3_2 = _separable_conv_block(p,
                                         filters, (5, 5),
                                         strides=(2, 2),
                                         block_id='reduction_right3_%s' %
                                         block_id)
            x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id)

        with backend.name_scope('block_4'):
            x4 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='reduction_left4_%s' % block_id)(x1)
            x4 = layers.add([x2, x4])

        with backend.name_scope('block_5'):
            x5_1 = _separable_conv_block(x1,
                                         filters, (3, 3),
                                         block_id='reduction_left4_%s' %
                                         block_id)
            x5_2 = layers.MaxPooling2D(
                (3, 3),
                strides=(2, 2),
                padding='valid',
                name='reduction_right5_%s' % block_id)(h3)
            x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id)

        x = layers.concatenate([x2, x3, x4, x5],
                               axis=channel_dim,
                               name='reduction_concat_%s' % block_id)
        return x, ip
Beispiel #41
0
    def _subdiv_calculate_mean_and_var(self, x, axes, keep_dims):

        with K.name_scope('moments'):
            # The dynamic range of fp16 is too limited to support the collection of
            # sufficient statistics. As a workaround we simply perform the operations
            # on 32-bit floats before converting the mean and variance back to fp16
            y = math_ops.cast(
                x, dtypes.float32) if x.dtype == dtypes.float16 else x
            replica_ctx = ds.get_replica_context()

            if replica_ctx:
                # local to me

                local_sum = math_ops.reduce_sum(y, axis=axes, keepdims=True)
                local_squared_sum = math_ops.reduce_sum(math_ops.square(y),
                                                        axis=axes,
                                                        keepdims=True)
                batch_size = math_ops.cast(
                    array_ops.shape_v2(y)[0], dtypes.float32)
                # TODO(b/163099951): batch the all-reduces once we sort out the ordering
                # issue for NCCL. We don't have a mechanism to launch NCCL in the same
                # order in each replica nowadays, so we limit NCCL to batch all-reduces.
                # get the sum of all replicas (converge all devices)
                y_sum = replica_ctx.all_reduce(reduce_util.ReduceOp.SUM,
                                               local_sum)
                # get the sum from all replicas (converge all devices)
                y_squared_sum = replica_ctx.all_reduce(
                    reduce_util.ReduceOp.SUM, local_squared_sum)
                # get the net batch size from all devices (converge all devices)
                input_batch_size = replica_ctx.all_reduce(
                    reduce_util.ReduceOp.SUM, batch_size)

                #tf.print(replica_ctx.replica_id_in_sync_group, replica_ctx.num_replicas_in_sync, batch_size, self.aggregated_square_sum_batch, axes)
                # get the number of total params you are averaging (local)
                axes_vals = [(array_ops.shape_v2(y))[i]
                             for i in range(1, len(axes))]
                multiplier_ = math_ops.cast(math_ops.reduce_prod(axes_vals),
                                            dtypes.float32)
                multiplier = multiplier_ * input_batch_size

                # conver mean var (locally)
                mean = y_sum / multiplier
                y_squared_mean = y_squared_sum / multiplier
                # var = E(x^2) - E(x)^2
                variance = y_squared_mean - math_ops.square(mean)
                net_sum = y_sum / multiplier_
                squared_mean = y_squared_sum / multiplier_

            else:
                # mean = math_ops.reduce_mean(y, axes, keepdims=True, name='mean')
                # # sample variance, not unbiased variance
                # # Note: stop_gradient does not change the gradient that gets
                # #       backpropagated to the mean from the variance calculation,
                # #       because that gradient is zero
                # variance = math_ops.reduce_mean(
                #     math_ops.squared_difference(y, array_ops.stop_gradient(mean)),
                #     axes,
                #     keepdims=True,
                #     name='variance')

                net_sum = math_ops.reduce_sum(y, axis=axes, keepdims=True)
                squared_mean = math_ops.reduce_sum(math_ops.square(y),
                                                   axis=axes,
                                                   keepdims=True)

                if self._support_zero_size_input():
                    # Keras assumes that batch dimension is the first dimension for Batch
                    # Normalization.
                    input_batch_size = array_ops.shape(y)[0]
                else:
                    input_batch_size = None

                # get the number of total params you are averaging including batchsize(local)
                axes_vals = [(array_ops.shape_v2(y))[i]
                             for i in range(1, len(axes))]
                multiplier = math_ops.cast(math_ops.reduce_prod(axes_vals),
                                           dtypes.float32)

                squared_mean = squared_mean / multiplier
                net_sum = net_sum / multiplier

                if input_batch_size is None:
                    mean, variance = nn.moments(y, axes, keep_dims=True)
                    input_batch_size = 0
                else:
                    batches_ = math_ops.cast(input_batch_size,
                                             self._param_dtype)
                    # # if you only have one replica dont worry about it
                    # # Compute true mean while keeping the dims for proper broadcasting.
                    mean = net_sum / batches_
                    variance = squared_mean / batches_ - math_ops.square(mean)

            input_batch_size = math_ops.cast(input_batch_size, dtypes.int32)
            if not keep_dims:
                mean = array_ops.squeeze(mean, axes)
                net_sum = array_ops.squeeze(net_sum, axes)
                variance = array_ops.squeeze(variance, axes)
                squared_mean = array_ops.squeeze(squared_mean, axes)
            if x.dtype == dtypes.float16:
                return (math_ops.cast(mean, dtypes.float16),
                        math_ops.cast(net_sum, dtypes.float16),
                        math_ops.cast(variance, dtypes.float16),
                        math_ops.cast(squared_mean,
                                      dtypes.float16), input_batch_size)
            else:
                return (mean, net_sum, variance, squared_mean,
                        input_batch_size)
Beispiel #42
0
 def __init__(self, lr=0.2, kd=0.1, **kwargs):
     super(PD, self).__init__(**kwargs)
     with K.name_scope(self.__class__.__name__):
         self.iterations = K.variable(0, dtype='int64', name='iterations')
         self.lr = K.variable(lr, name='lr')
         self.kd = K.variable(kd, name='kd')
Beispiel #43
0
 def __init__(self, optimizer):  # pylint: disable=super-init-not-called
   self.optimizer = optimizer
   self._track_checkpointable(optimizer, name='optimizer')
   with K.name_scope(self.__class__.__name__):
     self.iterations = K.variable(0, dtype='int64', name='iterations')
Beispiel #44
0
    def __init__(self, name, **kwargs):
        """Create a new Optimizer.

    This must be called by the constructors of subclasses.
    Note that Optimizer instances should not bind to a single graph,
    and so shouldn't keep Tensors as member variables. Generally
    you should be able to use the _set_hyper()/state.get_hyper()
    facility instead.

    This class in stateful and thread-compatible.

    Args:
      name: A non-empty string.  The name to use for accumulators created
        for the optimizer.
      **kwargs: keyword arguments. Allowed to be {`clipnorm`, `clipvalue`, `lr`,
        `decay`}. `clipnorm` is clip gradients by norm; `clipvalue` is clip
        gradients by value, `decay` is included for backward compatibility to
        allow time inverse decay of learning rate. `lr` is included for backward
        compatibility, recommended to use `learning_rate` instead.

    Raises:
      ValueError: If name is malformed.
      RuntimeError: If _create_slots has been overridden instead of
          _create_vars.
    """
        allowed_kwargs = {"clipnorm", "clipvalue", "lr", "decay"}
        for k in kwargs:
            if k not in allowed_kwargs:
                raise TypeError("Unexpected keyword argument "
                                "passed to optimizer: " + str(k))
            # checks that all keyword arguments are non-negative.
            if kwargs[k] < 0:
                raise ValueError("Expected {} >= 0, received: {}".format(
                    k, kwargs[k]))

        self._use_locking = True
        self._init_set_name(name)
        # in graph mode, name_scope performs uniquification, so keep scope_context.
        with backend.name_scope(self._name) as name_scope:
            self._scope_ctx = name_scope
        self._hyper = {}
        # dict: {variable name : {slot name : variable}}
        self._slots = {}
        self._slot_names = []
        self._weights = []
        self._iterations = None

        # For implementing Trackable. Stores information about how to restore
        # slot variables which have not yet been created
        # (trackable._CheckpointPosition objects).
        #  {slot_name :
        #      {_var_key(variable_to_train): [checkpoint_position, ... ], ... },
        #   ... }
        self._deferred_slot_restorations = {}

        decay = kwargs.pop("decay", 0.0)
        if decay < 0.:
            raise ValueError("decay cannot be less than 0: {}".format(decay))
        self._initial_decay = decay
        if "clipnorm" in kwargs:
            self.clipnorm = kwargs.pop("clipnorm")
        if "clipvalue" in kwargs:
            self.clipvalue = kwargs.pop("clipvalue")

        self._hypers_created = False
Beispiel #45
0
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
    """Calculates the loss for a given model.

  Args:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
    # TODO(psv): Dedup code here with graph mode prepare_total_loss() fn.
    # Used to keep track of the total loss value (stateless).
    # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
    #                   loss_weight_2 * output_2_loss_fn(...) +
    #                   layer losses.
    total_loss = 0
    kwargs = {}
    if model._expects_training_arg:
        kwargs['training'] = training
    if len(inputs) == 1 and not isinstance(inputs, dict):
        inputs = inputs[0]

    # Allow mixed `NumPy` and `EagerTensor` input here.
    if any(
            isinstance(input_t, (np.ndarray, float, int))
            for input_t in nest.flatten(inputs)):
        inputs = nest.map_structure(ops.convert_to_tensor_v2_with_dispatch,
                                    inputs)

    outs = model(inputs, **kwargs)
    outs = nest.flatten(outs)

    if targets:
        targets = training_utils_v1.cast_if_floating_dtype_and_mismatch(
            targets, outs)
    # TODO(sallymatson/psv): check if we should do same mismatch fix for weights
    if sample_weights:
        sample_weights = [
            training_utils_v1.cast_if_floating_dtype(
                ops.convert_to_tensor_v2_with_dispatch(val))
            if val is not None else None for val in sample_weights
        ]

    masks = [getattr(t, '_keras_mask', None) for t in outs]
    targets = nest.flatten(targets)

    # Used to keep track of individual output losses.
    output_losses = []

    with backend.name_scope('loss'):
        loss_fns = [
            loss_fn for loss_fn in model.loss_functions if loss_fn is not None
        ]
        custom_losses = model.losses  # Regularization losses

        if not loss_fns and not custom_losses:
            if training:
                raise ValueError('The model cannot be trained '
                                 'because it has no loss to optimize.')
            else:
                raise ValueError('The model cannot be evaluated '
                                 'because it has no loss to compute.')

        for i, loss_fn in enumerate(loss_fns):
            weights = sample_weights[i] if sample_weights else None
            mask = masks[i]
            with backend.name_scope(model.output_names[i] + '_loss'):
                if mask is not None:
                    mask = math_ops.cast(mask, outs[i].dtype)
                    # Update weights with mask.
                    if weights is None:
                        weights = mask
                    else:
                        # Update dimensions of weights to match with mask if possible.
                        weights = math_ops.cast(weights, outs[i].dtype)
                        mask, _, weights = (
                            losses_utils.squeeze_or_expand_dimensions(
                                mask, sample_weight=weights))
                        weights *= mask

                if hasattr(loss_fn, 'reduction'):
                    per_sample_losses = loss_fn.call(targets[i], outs[i])
                    weighted_losses = losses_utils.compute_weighted_loss(
                        per_sample_losses,
                        sample_weight=weights,
                        reduction=losses_utils.ReductionV2.NONE)
                    loss_reduction = loss_fn.reduction

                    # `AUTO` loss reduction defaults to `SUM_OVER_BATCH_SIZE` for all
                    # compile use cases.
                    if loss_reduction == losses_utils.ReductionV2.AUTO:
                        loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

                    # Compute the stateless loss value.
                    output_loss = losses_utils.reduce_weighted_loss(
                        weighted_losses, reduction=loss_reduction)
                else:
                    # Compute the stateless loss value for a custom loss class.
                    # Here we assume that the class takes care of loss reduction
                    # because if this class returns a vector value we cannot
                    # differentiate between use case where a custom optimizer
                    # expects a vector loss value vs unreduced per-sample loss value.
                    output_loss = loss_fn(targets[i],
                                          outs[i],
                                          sample_weight=weights)
                    loss_reduction = losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE

            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                # Keep track of the stateful output loss result.
                output_losses.append(output_loss_metrics[i](output_loss))

            # Scale output loss for distribution. For custom losses we assume
            # reduction was mean.
            if loss_reduction == losses_utils.ReductionV2.SUM_OVER_BATCH_SIZE:
                output_loss = losses_utils.scale_loss_for_distribution(
                    output_loss)
            total_loss += model._loss_weights_list[i] * output_loss

        # Add regularization losses
        if custom_losses:
            total_loss += losses_utils.scale_loss_for_distribution(
                math_ops.add_n(custom_losses))
    return outs, total_loss, output_losses, masks
def _model_loss(model,
                inputs,
                targets,
                output_loss_metrics=None,
                sample_weights=None,
                training=False):
    """Calculates the loss for a given model.

  Arguments:
      model: The model on which metrics are being calculated.
      inputs: Either a dictionary of inputs to the model or a list of input
        arrays.
      targets: List of target arrays.
      output_loss_metrics: List of metrics that are used to aggregated output
        loss values.
      sample_weights: Optional list of sample weight arrays.
      training: Whether the model should be run in inference or training mode.

  Returns:
     Returns the model output, total loss, loss value calculated using the
     specified loss function and masks for each output. The total loss includes
     regularization losses and applies masking and sample weighting
     to the loss value.
  """
    # Used to keep track of the total loss value (stateless).
    # eg., total_loss = loss_weight_1 * output_1_loss_fn(...) +
    #                   loss_weight_2 * output_2_loss_fn(...) +
    #                   layer losses.
    total_loss = 0
    kwargs = {}
    if model._expects_training_arg:
        kwargs['training'] = training
    if len(inputs) == 1 and not isinstance(inputs, dict):
        inputs = inputs[0]

    # Allow mixed `NumPy` and `EagerTensor` input here.
    if any(
            isinstance(input_t, (np.ndarray, float, int))
            for input_t in nest.flatten(inputs)):
        inputs = nest.map_structure(ops.convert_to_tensor, inputs)

    outs = model(inputs, **kwargs)

    outs = nest.flatten(outs)
    # `None` by default for `EagerTensors`.
    masks = [t._keras_mask for t in outs]
    targets = nest.flatten(targets)

    # Used to keep track of individual output losses (stateless).
    output_losses = []
    # Used to keep track of individual output losses (stateful).
    aggregated_output_losses = []

    with backend.name_scope('loss'):
        for i, loss_fn in enumerate(model.loss_functions):
            weights = sample_weights[i] if sample_weights else None
            mask = masks[i]
            with backend.name_scope(model.output_names[i] + '_loss'):
                if mask is not None:
                    mask = math_ops.cast(mask, outs[i].dtype)
                    # Update weights with mask.
                    if weights is None:
                        weights = mask
                    else:
                        # Update dimensions of weights to match with mask if possible.
                        mask, _, weights = (
                            losses_utils.squeeze_or_expand_dimensions(
                                mask, None, weights))
                        weights *= mask

                # Reset reduction on the loss so that we can get the per sample loss
                # value. We use this to get both the stateless and stateful loss
                # values without having to compute the underlying loss function
                # twice.
                weighted_losses = None
                if hasattr(loss_fn, 'reduction'):
                    current_loss_reduction = loss_fn.reduction
                    loss_fn.reduction = losses_utils.ReductionV2.NONE
                    weighted_losses = loss_fn(targets[i],
                                              outs[i],
                                              sample_weight=weights)
                    loss_fn.reduction = current_loss_reduction

                    # Compute the stateless loss value.
                    output_loss = losses_utils.reduce_weighted_loss(
                        weighted_losses)
                else:
                    # Compute the stateless loss value for a custom loss class.
                    # Here we assume that the class takes care of loss reduction
                    # because if this class returns a vector value we cannot
                    # differentiate between use case where a custom optimizer
                    # expects a vector loss value vs unreduced per-sample loss value.
                    output_loss = loss_fn(targets[i],
                                          outs[i],
                                          sample_weight=weights)

            # If the number of outputs is 1 then we don't append the loss metric
            # associated with each model output. When there are multiple outputs
            # associated with a model, each output's loss is calculated and returned
            # as part of the loss_metrics.
            if len(model.outputs) > 1:
                output_losses.append(backend.mean(output_loss))
                if output_loss_metrics is not None:
                    # Compute the stateful loss value.
                    if weighted_losses is not None:
                        aggregated_output_loss = output_loss_metrics[i](
                            weighted_losses)
                    else:
                        # Custom loss class.
                        aggregated_output_loss = training_utils.call_metric_function(
                            output_loss_metrics[i],
                            targets[i],
                            outs[i],
                            weights=weights)
                    # Keep track of the stateful output loss result.
                    aggregated_output_losses.append(aggregated_output_loss)

            loss_weight = model.loss_weights_list[i]
            if total_loss is None:
                total_loss = loss_weight * output_loss
            else:
                total_loss += loss_weight * output_loss

        total_loss = backend.mean(total_loss)
        # Add regularization losses
        custom_losses = model.losses
        if custom_losses:
            total_loss += losses_utils.scale_loss_for_distribution(
                math_ops.add_n(custom_losses))
        model._clear_losses()

    return outs, total_loss, output_losses, aggregated_output_losses, masks
Beispiel #47
0
def _normal_a_cell(ip, p, filters, block_id=None):
    """Adds a Normal cell for NASNet-A (Fig. 4 in the paper).

  Arguments:
      ip: Input tensor `x`
      p: Input tensor `p`
      filters: Number of output filters
      block_id: String block_id

  Returns:
      A Keras tensor
  """
    channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1

    with backend.name_scope('normal_A_block_%s' % block_id):
        p = _adjust_block(p, ip, filters, block_id)

        h = layers.Activation('relu')(ip)
        h = layers.Conv2D(filters, (1, 1),
                          strides=(1, 1),
                          padding='same',
                          name='normal_conv_1_%s' % block_id,
                          use_bias=False,
                          kernel_initializer='he_normal')(h)
        h = layers.BatchNormalization(axis=channel_dim,
                                      momentum=0.9997,
                                      epsilon=1e-3,
                                      name='normal_bn_1_%s' % block_id)(h)

        with backend.name_scope('block_1'):
            x1_1 = _separable_conv_block(h,
                                         filters,
                                         kernel_size=(5, 5),
                                         block_id='normal_left1_%s' % block_id)
            x1_2 = _separable_conv_block(p,
                                         filters,
                                         block_id='normal_right1_%s' %
                                         block_id)
            x1 = layers.add([x1_1, x1_2], name='normal_add_1_%s' % block_id)

        with backend.name_scope('block_2'):
            x2_1 = _separable_conv_block(p,
                                         filters, (5, 5),
                                         block_id='normal_left2_%s' % block_id)
            x2_2 = _separable_conv_block(p,
                                         filters, (3, 3),
                                         block_id='normal_right2_%s' %
                                         block_id)
            x2 = layers.add([x2_1, x2_2], name='normal_add_2_%s' % block_id)

        with backend.name_scope('block_3'):
            x3 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='normal_left3_%s' % (block_id))(h)
            x3 = layers.add([x3, p], name='normal_add_3_%s' % block_id)

        with backend.name_scope('block_4'):
            x4_1 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='normal_left4_%s' % (block_id))(p)
            x4_2 = layers.AveragePooling2D(
                (3, 3),
                strides=(1, 1),
                padding='same',
                name='normal_right4_%s' % (block_id))(p)
            x4 = layers.add([x4_1, x4_2], name='normal_add_4_%s' % block_id)

        with backend.name_scope('block_5'):
            x5 = _separable_conv_block(h,
                                       filters,
                                       block_id='normal_left5_%s' % block_id)
            x5 = layers.add([x5, h], name='normal_add_5_%s' % block_id)

        x = layers.concatenate([p, x1, x2, x3, x4, x5],
                               axis=channel_dim,
                               name='normal_concat_%s' % block_id)
    return x, ip
Beispiel #48
0
 def build(self, input_shape):
   with K.name_scope(self.forward_layer.name):
     self.forward_layer.build(input_shape)
   with K.name_scope(self.backward_layer.name):
     self.backward_layer.build(input_shape)
   self.built = True
Beispiel #49
0
 def build(self, input_shape):
     with K.name_scope(self.forward_layer.name):
         self.forward_layer.build(input_shape)
     with K.name_scope(self.backward_layer.name):
         self.backward_layer.build(input_shape)
     self.built = True
Beispiel #50
0
  def add_weight(self,
                 name,
                 shape,
                 dtype=None,
                 initializer=None,
                 regularizer=None,
                 trainable=None,
                 constraint=None,
                 use_resource=None,
                 synchronization=vs.VariableSynchronization.AUTO,
                 aggregation=vs.VariableAggregation.NONE,
                 partitioner=None,
                 **kwargs):
    """Adds a new variable to the layer, or gets an existing one; returns it.

    Arguments:
      name: variable name.
      shape: variable shape.
      dtype: The type of the variable. Defaults to `self.dtype` or `float32`.
      initializer: initializer instance (callable).
      regularizer: regularizer instance (callable).
      trainable: whether the variable should be part of the layer's
        "trainable_variables" (e.g. variables, biases)
        or "non_trainable_variables" (e.g. BatchNorm mean, stddev).
        Note, if the current variable scope is marked as non-trainable
        then this parameter is ignored and any added variables are also
        marked as non-trainable. `trainable` defaults to `True` unless
        `synchronization` is set to `ON_READ`.
      constraint: constraint instance (callable).
      use_resource: Whether to use `ResourceVariable`.
      synchronization: Indicates when a distributed a variable will be
        aggregated. Accepted values are constants defined in the class
        `tf.VariableSynchronization`. By default the synchronization is set to
        `AUTO` and the current `DistributionStrategy` chooses
        when to synchronize. If `synchronization` is set to `ON_READ`,
        `trainable` must not be set to `True`.
      aggregation: Indicates how a distributed variable will be aggregated.
        Accepted values are constants defined in the class
        `tf.VariableAggregation`.
      partitioner: (optional) partitioner instance (callable).  If
        provided, when the requested variable is created it will be split
        into multiple partitions according to `partitioner`.  In this case,
        an instance of `PartitionedVariable` is returned.  Available
        partitioners include `tf.compat.v1.fixed_size_partitioner` and
        `tf.compat.v1.variable_axis_size_partitioner`.  For more details, see
        the documentation of `tf.compat.v1.get_variable` and the  "Variable
        Partitioners and Sharding" section of the API guide.
      **kwargs: Additional keyword arguments.

    Returns:
      The created variable.  Usually either a `Variable` or `ResourceVariable`
      instance.  If `partitioner` is not `None`, a `PartitionedVariable`
      instance is returned.

    Raises:
      RuntimeError: If called with partitioned variable regularization and
        eager execution is enabled.
      ValueError: When trainable has been set to True with synchronization
        set as `ON_READ`.
    """
    for kwarg in kwargs:
      if kwarg != 'experimental_autocast':
        raise TypeError('Unknown keyword argument:', kwarg)
    if self._keras_style:
      return super(Layer, self).add_weight(
          name=name,
          shape=shape,
          dtype=dtype,
          initializer=initializer,
          regularizer=regularizer,
          trainable=trainable and self.trainable,
          constraint=constraint,
          use_resource=use_resource,
          synchronization=vs.VariableSynchronization.AUTO,
          aggregation=vs.VariableAggregation.NONE,
          partitioner=partitioner,
          **kwargs)

    if synchronization == vs.VariableSynchronization.ON_READ:
      if trainable:
        raise ValueError(
            'Synchronization value can be set to '
            'VariableSynchronization.ON_READ only for non-trainable variables. '
            'You have specified trainable=True and '
            'synchronization=VariableSynchronization.ON_READ.')
      else:
        # Set trainable to be false when variable is to be synced on read.
        trainable = False
    elif trainable is None:
      trainable = True

    def _should_add_regularizer(variable, existing_variable_set):
      if base_layer_utils.is_split_variable(variable):
        for var in variable:
          if var in existing_variable_set:
            return False
        return True
      else:
        return variable not in existing_variable_set

    init_graph = None
    if not context.executing_eagerly():
      default_graph = ops.get_default_graph()
      if default_graph.building_function:
        with ops.init_scope():
          # Retrieve the variables from the graph into which variables
          # will be lifted; if initialization ops will be lifted into
          # the eager context, then there is nothing to retrieve, since variable
          # collections are not supported when eager execution is enabled.
          if not context.executing_eagerly():
            init_graph = ops.get_default_graph()
            existing_variables = set(tf_variables.global_variables())
      else:
        # Initialization ops will not be lifted out of the default graph.
        init_graph = default_graph
        existing_variables = set(tf_variables.global_variables())

    if dtype is None:
      dtype = self.dtype or dtypes.float32

    self._set_scope(None)
    reuse = self.built or self._reuse
    prev_len_trainable = len(self._trainable_weights)
    with vs.variable_scope(
        self._scope, reuse=reuse, auxiliary_name_scope=False) as scope:
      self._current_scope = scope
      with backend.name_scope(self._name_scope()):
        use_resource = (use_resource or
                        self._use_resource_variables or
                        scope.use_resource)
        if initializer is None:
          initializer = scope.initializer
        variable = super(Layer, self).add_weight(
            name,
            shape,
            dtype=dtypes.as_dtype(dtype),
            initializer=initializer,
            trainable=trainable and self.trainable,
            constraint=constraint,
            partitioner=partitioner,
            use_resource=use_resource,
            synchronization=synchronization,
            aggregation=aggregation,
            getter=vs.get_variable,
            **kwargs)

        if regularizer:
          if (ops.executing_eagerly_outside_functions()
              or _should_add_regularizer(variable, existing_variables)):
            self._handle_weight_regularization(name, variable, regularizer)

        if init_graph is not None:
          # Handle edge case where a custom getter has overridden `trainable`.
          # There is one known occurrence of this, in unit test
          # testBasicRNNCellNotTrainable in
          # contrib.rnn.python.kernel_tests.core_rnn_cell_test
          with init_graph.as_default():
            trainable_variables = tf_variables.trainable_variables()
          if (trainable and self.trainable and
              variable not in trainable_variables):
            # A custom getter / variable scope overrode the trainable flag.
            extra_trainable_vars = self._trainable_weights[prev_len_trainable:]
            self._trainable_weights = self._trainable_weights[
                :prev_len_trainable]
            self._non_trainable_weights += extra_trainable_vars
    return variable