def update_gradient_with_optimizer(self, x: Variable, optimizer: Optimizer): # print(type(x)) # Gradient Clipping mask = (x.gradient < GRADIENT_CLIPPING_THRESHOLD).astype(int) mask = np.multiply( mask, (x.gradient > -GRADIENT_CLIPPING_THRESHOLD).astype(int)) contra_mask = 1 - mask x.gradient = np.multiply( mask, x.gradient) + contra_mask * GRADIENT_CLIPPING_THRESHOLD if x.back_prop is not None: # which means x is an input node x.back_prop() if x.trainable: optimizer.update_once(x) if x.lchild is not None: self.update_gradient_with_optimizer(x.lchild, optimizer) if x.rchild is not None: self.update_gradient_with_optimizer(x.rchild, optimizer)
def set_and_update_gradient(self, x: Variable, gradient): assert x.gradient.shape == gradient.shape x.gradient = gradient self.update_gradient(x)