def clip_gradient_norm(grad_list, clip_norm=None): """ Returns a scaling factor to apply to the gradients. The scaling factor is computed such that the root mean squared average of the scaled gradients across all layers will be less than or equal to the provided clip_norm value. This factor is always <1, so never scales up the gradients. Arguments: param_list (list): List of layer parameters clip_norm (float, optional): Target norm for the gradients. If not provided the returned scale_factor will equal 1 Returns: Computed scale factor (float) """ if clip_norm is None: return 1 else: s = None for param in grad_list: term = ng.squared_L2(param, out_axes=None) if s is None: s = term else: s = s + term s = ng.sqrt(s) return clip_norm / ng.maximum(s, clip_norm)
def variable_update(self, variable, grad, scale_factor, weight_clip_value): grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=grad.axes, initial_value=0.) updates = ng.sequential([ ng.assign(state, state + ng.square(grad)), ng.assign(variable, clip_weight_value(variable - (scale_factor * self.lrate * grad) / (ng.sqrt(state + self.epsilon)), weight_clip_value)) ]) return updates
def __call__(self, *args, **kwargs): if len(self.ops) == 0: self.beta_1 = ng.constant(self.beta_1, dtype=np.float32) self.beta_2 = ng.constant(self.beta_2, dtype=np.float32) self.t = ng.persistent_tensor(axes=(), initial_value=0) self.t = ng.sequential([ng.assign(self.t, self.t + 1), self.t]) self.ell = self.lrate * ng.sqrt(1 - self.beta_2 ** self.t) / (1 - self.beta_1 ** self.t) return super(Adam, self).__call__(*args, **kwargs)
def variable_update(self, variable, grad, scale_factor, weight_clip_value): m = ng.persistent_tensor(axes=grad.axes, initial_value=0.) v = ng.persistent_tensor(axes=grad.axes, initial_value=0.) updates = ng.sequential([ ng.assign(m, m * self.beta_1 + (1 - self.beta_1) * grad), ng.assign(v, v * self.beta_2 + (1 - self.beta_2) * grad * grad), ng.assign(variable, clip_weight_value(variable - (scale_factor * self.ell * m) / (ng.sqrt(v) + self.epsilon), weight_clip_value)) ]) return updates
def variable_update(self, variable, grad, scale_factor, weight_clip_value): epsilon, decay = (self.epsilon, self.decay_rate) grad = clip_gradient_value(grad, self.gradient_clip_value) state = ng.persistent_tensor(axes=variable.axes, initial_value=1.) velocity = ng.persistent_tensor(axes=variable.axes, initial_value=0.).named(variable.name + '_vel') updates = ng.sequential([ ng.assign(state, decay * state + (1.0 - decay) * ng.square(grad)), ng.assign(velocity, velocity * self.momentum + (self.lrate * scale_factor * grad / ng.sqrt(state + epsilon)) + self.lrate * self.wdecay * variable), ng.assign(variable, clip_weight_value(variable - velocity, weight_clip_value)) ]) return updates
def ngraph_l2_norm(np_array): """ TODO. Arguments: np_array: TODO Returns: TODO """ axes = () for i, l in enumerate(np_array.shape): axes |= (ng.make_axis(length=l).named('axis%s' % i),) np_tensor = ng.constant(np_array, axes) var = ng.variable(axes, initial_value=np_tensor) with executor(ng.sqrt(ng.squared_L2(var))) as ex: return ex()