コード例 #1
0
    def _apply_dense(self, grad, var):
        lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
        alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
        beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
        epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
        decay_t = math_ops.cast(self._decay_t, var.dtype.base_dtype)
        decaypower_t = math_ops.cast(self._decaypower_t, var.dtype.base_dtype)
        speed_ini_t = math_ops.cast(self._speed_ini_t, var.dtype.base_dtype)

        v = self.get_slot(var, "v1")
        #(1.-self.alpha*self.beta)*p )
        #Initialise v such that the initial speed is in the direction of -grad
        v_temp = cond(
            equal(num_iter(), 0), lambda: (1. - alpha_t * beta_t) * var -
            beta_t**2 * grad + beta_t * speed_ini_t * grad, lambda: v)

        v_t = v.assign(v_temp - (lr_t * decay_t / math_ops.pow(
            math_ops.cast(num_iter() + 1, var.dtype.base_dtype), decaypower_t)
                                 ) *
                       ((alpha_t - 1. / beta_t) * var + 1. / beta_t * v_temp))

        var_update = state_ops.assign_sub(
            var,
            (lr_t * decay_t /
             math_ops.pow(math_ops.cast(num_iter() + 1, var.dtype.base_dtype),
                          decaypower_t)) *
            ((alpha_t - 1. / beta_t) * var + 1. / beta_t * v_temp +
             beta_t * grad))  #Update 'ref' by subtracting 'value

        return control_flow_ops.group(*[var_update, v_t])
コード例 #2
0
  def _apply_dense(self, grad, var):
    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
    alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
    beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
    epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
    decay_t = math_ops.cast(self._decay_t, var.dtype.base_dtype)
    decaypower_t = math_ops.cast(self._decaypower_t, var.dtype.base_dtype)
    speed_ini_t = math_ops.cast(self._speed_ini_t, var.dtype.base_dtype)
    

    v = self.get_slot(var, "v1")
    #(1.-self.alpha*self.beta)*p )
    #Initialise v such that the initial speed is in the direction of -grad
    v_temp = cond( equal(num_iter(),0) ,
      lambda : (1.-alpha_t*beta_t) * var - beta_t**2 * grad + beta_t * speed_ini_t * grad, lambda : v )
    '''
    if k == 0:
        v_temp = (1.-alpha_t*beta_t) * var - beta_t**2 * grad + beta_t * speed_ini_t * grad
    else: 
        v_temp = v
    '''

    v_t = v.assign( v_temp - ( lr_t * decay_t / math_ops.pow(math_ops.cast(num_iter()+1, var.dtype.base_dtype),decaypower_t) ) * ( (alpha_t-1./beta_t) * var + 1./beta_t * v_temp ) )
コード例 #3
0
class INNAOptimizer(optimizer.Optimizer):
  """Optimizer that implements the INNA algorithm.
  See [Castera et al., 2019](https://arxiv.org/abs/1905.12278).
  """

  def __init__(self,
                 lr=0.01,
                 alpha=0.5,
                 beta=0.1,
                 decay=1.,
                 decaypower = 0.5,
                 speed_ini=1.0,
                 epsilon=1e-8,
                 use_locking=False,
                 name="INNA"):
   
    super(INNAOptimizer, self).__init__(use_locking,name)
    self._iterations = 0
    self._lr = lr
    self._alpha = alpha
    self._beta = beta
    self._epsilon = epsilon
    self._decay = decay
    self._decaypower = decaypower
    self._speed_ini = speed_ini

    # Tensor versions of the constructor arguments, created in _prepare().
    self._lr_t = None
    self._alpha_t = None
    self._beta_t = None
    self._epsilon_t = None
    self._decay_t = None
    self._decaypower_t = None
    self._speed_ini_t = None


  def _create_slots(self, var_list):
    # Create slots for the auxiliary variable.
    for v in var_list:
      self._zeros_slot(v, "v1", self._name)

  def _prepare(self):
    lr = self._call_if_callable(self._lr)
    alpha = self._call_if_callable(self._alpha)
    beta = self._call_if_callable(self._beta)
    epsilon = self._call_if_callable(self._epsilon)
    decay = self._call_if_callable(self._decay)
    decaypower = self._call_if_callable(self._decaypower)
    speed_ini = self._call_if_callable(self._speed_ini)
    

    self._lr_t = ops.convert_to_tensor(self._lr, name="lr")
    self._alpha_t = ops.convert_to_tensor(self._alpha, name="alpha")
    self._beta_t = ops.convert_to_tensor(self._beta, name="beta")
    self._epsilon_t = ops.convert_to_tensor(self._epsilon, name="epsilon")
    self._decay_t = ops.convert_to_tensor(self._decay, name="decay")
    self._decaypower_t = ops.convert_to_tensor(self._decaypower, name="decaypower")
    self._speed_ini_t = ops.convert_to_tensor(self._speed_ini, name="speed_ini")

  def _apply_dense(self, grad, var):
    lr_t = math_ops.cast(self._lr_t, var.dtype.base_dtype)
    alpha_t = math_ops.cast(self._alpha_t, var.dtype.base_dtype)
    beta_t = math_ops.cast(self._beta_t, var.dtype.base_dtype)
    epsilon_t = math_ops.cast(self._epsilon_t, var.dtype.base_dtype)
    decay_t = math_ops.cast(self._decay_t, var.dtype.base_dtype)
    decaypower_t = math_ops.cast(self._decaypower_t, var.dtype.base_dtype)
    speed_ini_t = math_ops.cast(self._speed_ini_t, var.dtype.base_dtype)
    

    v = self.get_slot(var, "v1")
    #(1.-self.alpha*self.beta)*p )
    #Initialise v such that the initial speed is in the direction of -grad
    v_temp = cond( equal(num_iter(),0) ,
      lambda : (1.-alpha_t*beta_t) * var - beta_t**2 * grad + beta_t * speed_ini_t * grad, lambda : v )
    '''
    if k == 0:
        v_temp = (1.-alpha_t*beta_t) * var - beta_t**2 * grad + beta_t * speed_ini_t * grad
    else: 
        v_temp = v
    '''

    v_t = v.assign( v_temp - ( lr_t * decay_t / math_ops.pow(math_ops.cast(num_iter()+1, var.dtype.base_dtype),decaypower_t) ) * ( (alpha_t-1./beta_t) * var + 1./beta_t * v_temp ) )
   
   '''
   # ψ_kp1 = ψ_k + γk ( (1/β - α) θ_k - 1/β ψ_k )
   # ψ_kp1 = ψ_k - γk ( (α - 1/β) θ_k + 1/β ψ_k )
   
    v = v_temp - ( lr_t * decay_t / math_ops.pow(math_ops.cast(num_iter()+1, var.dtype.base_dtype),decaypower_t) ) * ( (alpha_t-1./beta_t) * var + 1./beta_t * v_temp ) 
    
    '''
    var_update = state_ops.assign_sub( var, ( lr_t * decay_t / math_ops.pow(math_ops.cast(num_iter()+1, var.dtype.base_dtype),decaypower_t) ) * ( (alpha_t-1./beta_t) * var + 1./beta_t * v_temp + beta_t * grad ) ) #Update 'ref' by subtracting 'value