def _resource_apply_dense(self, grad, var, apply_state=None):
        # # here is the change from SGD with momentum
        # print(f"orig var: {var}")
        K.set_value(var, K.clip(var, min_value=-1.0, max_value=1.0))
        # print(f"set var: {var}")

        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = ((apply_state or {}).get((var_device, var_dtype))
                        or self._fallback_apply_state(var_device, var_dtype))

        if self._momentum:
            momentum_var = self.get_slot(var, "momentum")
            return training_ops.resource_apply_keras_momentum(
                var.handle,
                momentum_var.handle,
                coefficients["lr_t"],
                grad,
                coefficients["momentum"],
                use_locking=self._use_locking,
                use_nesterov=self.nesterov)
        else:
            return training_ops.resource_apply_gradient_descent(
                var.handle,
                coefficients["lr_t"],
                grad,
                use_locking=self._use_locking)
Exemple #2
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        # pylint: disable=no-name-in-module,import-error
        from tensorflow.python.training import training_ops

        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (apply_state or {}).get(
            (var_device, var_dtype)) or self._fallback_apply_state(
                var_device, var_dtype)

        if var.name in self.lr_multipliers:
            lr_t = coefficients["lr_t"] * self.lr_multipliers[var.name]
        else:
            lr_t = coefficients["lr_t"]

        if self._momentum:
            momentum_var = self.get_slot(var, "momentum")
            return training_ops.resource_apply_keras_momentum(
                var.handle,
                momentum_var.handle,
                lr_t,
                grad,
                coefficients["momentum"],
                use_locking=self._use_locking,
                use_nesterov=self.nesterov,
            )
        else:
            return training_ops.resource_apply_gradient_descent(
                var.handle, lr_t, grad, use_locking=self._use_locking)
Exemple #3
0
 def _resource_apply_dense(self, grad, var):
   var_dtype = var.dtype.base_dtype
   lr_t = self._decayed_lr(var_dtype)
   if self._momentum:
     momentum_var = self.get_slot(var, "momentum")
     return training_ops.resource_apply_keras_momentum(
         var.handle,
         momentum_var.handle,
         lr_t,
         grad,
         self._get_hyper("momentum", var_dtype),
         use_locking=self._use_locking,
         use_nesterov=self.nesterov)
   else:
     return training_ops.resource_apply_gradient_descent(
         var.handle, lr_t, grad, use_locking=self._use_locking)
 def _resource_apply_dense(self, grad, var):
   var_dtype = var.dtype.base_dtype
   lr_t = self._decayed_lr(var_dtype)
   if self._momentum:
     momentum_var = self.get_slot(var, "momentum")
     return training_ops.resource_apply_keras_momentum(
         var.handle,
         momentum_var.handle,
         lr_t,
         grad,
         self._get_hyper("momentum", var_dtype),
         use_locking=self._use_locking,
         use_nesterov=self.nesterov)
   else:
     return training_ops.resource_apply_gradient_descent(
         var.handle, lr_t, grad, use_locking=self._use_locking)
Exemple #5
0
  def _resource_apply_dense(self, grad, var, apply_state=None):
    var_device, var_dtype = var.device, var.dtype.base_dtype
    coefficients = ((apply_state or {}).get((var_device, var_dtype))
                    or self._fallback_apply_state(var_device, var_dtype))

    scaled_lr, grad = self.compute_lr(grad, var, coefficients)
    mom = self.get_slot(var, "momentum")

    # ============================================================
    return training_ops.resource_apply_keras_momentum(
        var.handle,
        mom.handle,
        scaled_lr,
        grad,
        self.momentum,
        use_locking=False,
        use_nesterov=self.use_nesterov)
  def _resource_apply_dense(self, grad, var, apply_state=None):
    var_device, var_dtype = var.device, var.dtype.base_dtype
    coefficients = ((apply_state or {}).get((var_device, var_dtype))
                    or self._fallback_apply_state(var_device, var_dtype))

    if self._momentum:
      momentum_var = self.get_slot(var, "momentum")
      return training_ops.resource_apply_keras_momentum(
          var.handle,
          momentum_var.handle,
          coefficients["lr_t"],
          grad,
          coefficients["momentum"],
          use_locking=self._use_locking,
          use_nesterov=self.nesterov)
    else:
      return training_ops.resource_apply_gradient_descent(
          var.handle, coefficients["lr_t"], grad, use_locking=self._use_locking)
Exemple #7
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (apply_state or {}).get(
            (var_device, var_dtype)
        ) or self._fallback_apply_state(var_device, var_dtype)
        weight_decay = self._get_hyper("weight_decay")
        grad_averaging = self._get_hyper("grad_averaging")

        v = self.get_slot(var, "v")
        g_2 = tf.reduce_sum(tf.square(tf.cast(grad, tf.float32)))
        v_t = tf.cond(
            tf.equal(self.iterations, 0),
            lambda: g_2,
            lambda: v * coefficients["beta_2_t"]
            + g_2 * coefficients["one_minus_beta_2_t"],
        )
        v_t = v.assign(v_t, use_locking=self._use_locking)

        if self.amsgrad:
            vhat = self.get_slot(var, "vhat")
            vhat_t = vhat.assign(tf.maximum(vhat, v_t), use_locking=self._use_locking)
            grad = grad / (tf.sqrt(vhat_t) + self.epsilon)
        else:
            grad = grad / (tf.sqrt(v_t) + self.epsilon)
        grad = tf.cond(
            tf.greater(weight_decay, 0), lambda: grad + weight_decay * var, lambda: grad
        )
        grad = tf.cond(
            tf.logical_and(grad_averaging, tf.not_equal(self.iterations, 0)),
            lambda: grad * coefficients["one_minus_beta_1_t"],
            lambda: grad,
        )
        m = self.get_slot(var, "m")
        return training_ops.resource_apply_keras_momentum(
            var.handle,
            m.handle,
            coefficients["lr_t"],
            grad,
            coefficients["beta_1_t"],
            use_locking=self._use_locking,
            use_nesterov=False,
        )
Exemple #8
0
    def _resource_apply_dense(self, grad, var, apply_state=None):
        var_device, var_dtype = var.device, var.dtype.base_dtype
        coefficients = (apply_state or {}).get((var_device, var_dtype)) or self._fallback_apply_state(
            var_device, var_dtype
        )

        if self._momentum:
            momentum_var = self.get_slot(var, "momentum")
            return training_ops.resource_apply_keras_momentum(
                var.handle,
                momentum_var.handle,
                coefficients["lr_t"],
                grad,
                coefficients["momentum"],
                use_locking=self._use_locking,
                use_nesterov=self.nesterov,
            )
        else:
            lr = coefficients["lr_t"]
            if str(var.name).find("transpose") != -1:
                lr = constant(self._serialize_hyperparameter("learning_rate_deconv"))
            return training_ops.resource_apply_gradient_descent(var.handle, lr, grad, use_locking=self._use_locking)