Esempio n. 1
0
 def _apply_sparse(self, grad, var):
   g_acc = self.get_slot(var, "gradient_accumulator")
   gg_acc = self.get_slot(var, "gradient_squared_accumulator")
   with ops.device(var.device):
     global_step = array_ops.identity(self._global_step_on_worker)
   return training_ops.sparse_apply_adagrad_da(
       var,
       g_acc,
       gg_acc,
       grad.values,
       grad.indices,
       math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
       math_ops.cast(self._l1_regularization_strength, var.dtype.base_dtype),
       math_ops.cast(self._l2_regularization_strength, var.dtype.base_dtype),
       global_step,
       use_locking=self._use_locking)
Esempio n. 2
0
 def _apply_sparse(self, grad, var):
   g_acc = self.get_slot(var, "gradient_accumulator")
   gg_acc = self.get_slot(var, "gradient_squared_accumulator")
   with ops.device(var.device):
     global_step = array_ops.identity(self._global_step_on_worker)
   return training_ops.sparse_apply_adagrad_da(
       var,
       g_acc,
       gg_acc,
       grad.values,
       grad.indices,
       math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
       math_ops.cast(self._l1_regularization_strength, var.dtype.base_dtype),
       math_ops.cast(self._l2_regularization_strength, var.dtype.base_dtype),
       global_step,
       use_locking=self._use_locking)
Esempio n. 3
0
 def _apply_sparse(self, grad, var):
   g_acc = self.get_slot(var, "gradient_accumulator")
   gg_acc = self.get_slot(var, "gradient_squared_accumulator")
   # Performance optimization so that worker creates a copy of the global step
   # to avoid overloading the parameter server holding the global step.
   with ops.device(grad[0].device):
     global_step = array_ops.identity(self._global_step) + 1
   return training_ops.sparse_apply_adagrad_da(
       var,
       g_acc,
       gg_acc,
       grad.values,
       grad.indices,
       math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
       math_ops.cast(self._l1_regularization_strength, var.dtype.base_dtype),
       math_ops.cast(self._l2_regularization_strength, var.dtype.base_dtype),
       global_step,
       use_locking=self._use_locking)
Esempio n. 4
0
 def _apply_sparse(self, grad, var):
   g_acc = self.get_slot(var, "gradient_accumulator")
   gg_acc = self.get_slot(var, "gradient_squared_accumulator")
   # Performance optimization so that worker creates a copy of the global step
   # to avoid overloading the parameter server holding the global step.
   with ops.device(grad[0].device):
     global_step = array_ops.identity(self._global_step) + 1
   return training_ops.sparse_apply_adagrad_da(
       var,
       g_acc,
       gg_acc,
       grad.values,
       grad.indices,
       math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype),
       math_ops.cast(self._l1_regularization_strength, var.dtype.base_dtype),
       math_ops.cast(self._l2_regularization_strength, var.dtype.base_dtype),
       global_step,
       use_locking=self._use_locking)