def update_gradient(self, x: Variable): if x.back_prop is not None: x.back_prop() if x.lchild is not None: self.update_gradient(x.lchild) if x.rchild is not None: self.update_gradient(x.rchild)
def tanh(x: Variable): M = np.average(x.value) print(np.max(x.value - M)) print(np.min(x.value - M)) output_value = (np.exp(x.value - M) - np.exp(-x.value - M))/(np.exp(x.value - M) + np.exp(-x.value - M)) output = Variable(output_value, lchild=x) output.back_prop = output.back_tanh output.tanh_grad_parser = {'M': M, 'xvalue': x.value} return output
def update_gradient_with_optimizer(self, x: Variable, optimizer: Optimizer): # print(type(x)) # Gradient Clipping mask = (x.gradient < GRADIENT_CLIPPING_THRESHOLD).astype(int) mask = np.multiply( mask, (x.gradient > -GRADIENT_CLIPPING_THRESHOLD).astype(int)) contra_mask = 1 - mask x.gradient = np.multiply( mask, x.gradient) + contra_mask * GRADIENT_CLIPPING_THRESHOLD if x.back_prop is not None: # which means x is an input node x.back_prop() if x.trainable: optimizer.update_once(x) if x.lchild is not None: self.update_gradient_with_optimizer(x.lchild, optimizer) if x.rchild is not None: self.update_gradient_with_optimizer(x.rchild, optimizer)
def forward(self, X): if self.initialize: size = X.shape self.n = size[0] self.x = size[2] self.y = size[3] self.in_channel = size[1] self.x_new = int((self.x - self.kernel_size[0] + 2 * self.padding[0]) / self.stride[0] + 1) self.y_new = int((self.y - self.kernel_size[1] + 2 * self.padding[1]) / self.stride[1] + 1) self.initialize = True # Generate the new matrix output = Variable(np.zeros((self.n, self.in_channel, self.x_new, self.y_new)), lchild=X) output.mapping = np.zeros((self.n, self.in_channel, self.x_new, self.y_new, 2)) output.size = [self.n, self.in_channel, self.x_new, self.y_new] for image_idx, image in enumerate(X.value): for channel_idx in range(self.in_channel): for i in range(self.x_new): for j in range(self.y_new): x_start = int(i * self.stride[0]) x_end = int(x_start + self.kernel_size[0]) y_start = int(j * self.stride[1]) y_end = int(y_start + self.kernel_size[1]) # Forward-prop clip = image[channel_idx, x_start: x_end, y_start: y_end] output.value[image_idx, channel_idx, i, j] = np.max(clip) # Backward-prop maximum_x = int(np.argmax(clip)/clip.shape[0]) + x_start maximum_y = np.argmax(clip) % clip.shape[0] + y_start # 把最大值的位置的坐标记录在mapping里 output.mapping[image_idx, channel_idx, i, j, 0] = maximum_x output.mapping[image_idx, channel_idx, i, j, 1] = maximum_y output.back_prop = output.back_maxpooling2d() return output