Exemple #1
0
def clip_gradient_norm(grad_list, clip_norm=None):
    """
    Returns a scaling factor to apply to the gradients.

    The scaling factor is computed such that the root mean squared
    average of the scaled gradients across all layers will be less than
    or equal to the provided clip_norm value. This factor is always <1, so
    never scales up the gradients.

    Arguments:
        param_list (list): List of layer parameters
        clip_norm (float, optional): Target norm for the gradients. If not provided
                                     the returned scale_factor will equal 1

    Returns:
        Computed scale factor (float)
    """
    if clip_norm is None:
        return 1
    else:
        s = None
        for param in grad_list:
            term = ng.squared_L2(param, out_axes=None)
            if s is None:
                s = term
            else:
                s = s + term

        s = ng.sqrt(s)
        return clip_norm / ng.maximum(s, clip_norm)
Exemple #2
0
 def variable_update(self, variable, grad, scale_factor, weight_clip_value):
     grad = clip_gradient_value(grad, self.gradient_clip_value)
     state = ng.persistent_tensor(axes=grad.axes, initial_value=0.)
     updates = ng.sequential([
         ng.assign(state, state + ng.square(grad)),
         ng.assign(variable,
                   clip_weight_value(variable - (scale_factor * self.lrate * grad) /
                                     (ng.sqrt(state + self.epsilon)), weight_clip_value))
     ])
     return updates
Exemple #3
0
    def __call__(self, *args, **kwargs):
        if len(self.ops) == 0:
            self.beta_1 = ng.constant(self.beta_1, dtype=np.float32)
            self.beta_2 = ng.constant(self.beta_2, dtype=np.float32)
            self.t = ng.persistent_tensor(axes=(), initial_value=0)

        self.t = ng.sequential([ng.assign(self.t, self.t + 1), self.t])
        self.ell = self.lrate * ng.sqrt(1 - self.beta_2 ** self.t) / (1 - self.beta_1 ** self.t)

        return super(Adam, self).__call__(*args, **kwargs)
Exemple #4
0
 def variable_update(self, variable, grad, scale_factor, weight_clip_value):
     m = ng.persistent_tensor(axes=grad.axes, initial_value=0.)
     v = ng.persistent_tensor(axes=grad.axes, initial_value=0.)
     updates = ng.sequential([
         ng.assign(m, m * self.beta_1 + (1 - self.beta_1) * grad),
         ng.assign(v, v * self.beta_2 + (1 - self.beta_2) * grad * grad),
         ng.assign(variable,
                   clip_weight_value(variable - (scale_factor * self.ell * m) /
                                     (ng.sqrt(v) + self.epsilon), weight_clip_value))
     ])
     return updates
Exemple #5
0
 def variable_update(self, variable, grad, scale_factor, weight_clip_value):
     epsilon, decay = (self.epsilon, self.decay_rate)
     grad = clip_gradient_value(grad, self.gradient_clip_value)
     state = ng.persistent_tensor(axes=variable.axes, initial_value=1.)
     velocity = ng.persistent_tensor(axes=variable.axes,
                                     initial_value=0.).named(variable.name + '_vel')
     updates = ng.sequential([
         ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)),
         ng.assign(velocity, velocity * self.momentum +
                   (self.lrate * scale_factor * grad / ng.sqrt(state + epsilon)) +
                   self.lrate * self.wdecay * variable),
         ng.assign(variable, clip_weight_value(variable - velocity, weight_clip_value))
     ])
     return updates
Exemple #6
0
def ngraph_l2_norm(np_array):
    """
    TODO.

    Arguments:
      np_array: TODO

    Returns:
      TODO
    """
    axes = ()
    for i, l in enumerate(np_array.shape):
        axes |= (ng.make_axis(length=l).named('axis%s' % i),)

    np_tensor = ng.constant(np_array, axes)
    var = ng.variable(axes, initial_value=np_tensor)
    with executor(ng.sqrt(ng.squared_L2(var))) as ex:
        return ex()