def _apply_sparse(self, grad, var): mom = self.get_slot(var, "momentum") return training_ops.sparse_apply_momentum( var, mom, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), grad.values, grad.indices, math_ops.cast(self._momentum_tensor, var.dtype.base_dtype), use_locking=self._use_locking).op
def _apply_sparse(self, grad, var): mom = self.get_slot(var, "momentum") return training_ops.sparse_apply_momentum( var, mom, self._learning_rate_tensor, grad.values, grad.indices, self._momentum_tensor, use_locking=self._use_locking).op
def _apply_sparse(self, grad, var, state): mom = state.get_slot(var, "momentum") return training_ops.sparse_apply_momentum( var, mom, state.get_hyper("learning_rate", var.dtype.base_dtype), grad.values, grad.indices, state.get_hyper("momentum", var.dtype.base_dtype), use_locking=self._use_locking, use_nesterov=self._use_nesterov).op
def _apply_sparse(self, grad, var): mom = self._zdic[var.name] return training_ops.sparse_apply_momentum( var, mom, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), grad.values, grad.indices, math_ops.cast(self._momentum_tensor, var.dtype.base_dtype), use_locking=self._use_locking, use_nesterov=False).op
def momentum_apply_sparse(self, grad, var): mom = self.get_slot(var, "m") return training_ops.sparse_apply_momentum( var, mom, math_ops.cast(self._lr_t, var.dtype.base_dtype), grad.values, grad.indices, math_ops.cast(self._beta1_t, var.dtype.base_dtype), use_locking=self._use_locking, use_nesterov=self._use_nesterov).op
def _apply_sparse(self, grad, var): vec = self.get_slot(var, "velocity") lr, mom, locking, nesterov = self._params_for_var(var) return training_ops.sparse_apply_momentum( var, vec, math_ops.cast(lr, var.dtype.base_dtype), grad.values, grad.indices, math_ops.cast(mom, var.dtype.base_dtype), use_locking=locking, use_nesterov=nesterov).op
def _apply_sparse(self, grad, var): mom = self.get_slot(var, "momentum") use_nesterov = bool(self._serialize_hyperparameter("use_nesterov")) return training_ops.sparse_apply_momentum( var, mom, math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), grad.values, grad.indices, math_ops.cast(self._momentum_tensor, var.dtype.base_dtype), use_locking=False, use_nesterov=use_nesterov, ).op
def _apply_sparse(self, grad, var, apply_state=None): var_device, var_dtype = var.device, var.dtype.base_dtype coefficients = ((apply_state or {}).get((var_device, var_dtype)) or self._fallback_apply_state(var_device, var_dtype)) mom = self.get_slot(var, "momentum") return training_ops.sparse_apply_momentum( var, mom, coefficients["learning_rate"], grad.values, grad.indices, self.momentum, use_locking=False, use_nesterov=self.use_nesterov)
def _apply_sparse(self, grad, var): momentum_buffer = self.get_slot(var, "momentum") learning_rate = math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype) momentum = math_ops.cast(self._momentum_tensor, var.dtype.base_dtype) nu = math_ops.cast(self._nu_tensor, var.dtype.base_dtype) momentum_op = training_ops.sparse_apply_momentum( var, momentum_buffer, nu * (1.0 - momentum) * learning_rate, grad.values, grad.indices, momentum, use_locking=self._use_locking, use_nesterov=False, ).op with ops.control_dependencies([momentum_op]): delta = ops.IndexedSlices((nu - 1.0) * learning_rate * grad.values, grad.indices, grad.dense_shape) gd_op = var.scatter_add(delta, use_locking=self._use_locking) return control_flow_ops.group(momentum_op, gd_op)