def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev): """2-fold for loop implementation.""" m, H, W, C = dY.shape stride_H, stride_W = self.stride k_H, k_W = self.kernel_size for h in range(H): # slice boundaries in H direction h_start = h * stride_H h_end = h * stride_H + k_H for w in range(W): # slice boundaries in W directions w_start = w * stride_W w_end = w * stride_W + k_W # (m, k, k, C_prev) x_slice = x_pad[:, h_start:h_end, w_start:w_end, :] # (m, 1, 1, C_prev) dY_ = np.expand_dims(dY[:, h, w, :], axis=(1, 2)) if self.mode == "max": mask = x_slice == np.max(x_slice, axis=(1, 2), keepdims=True) dX_pad[:, h_start:h_end, w_start:w_end, :] += dY_ * mask elif self.mode == "avg": avg_volume = np.ones((m, k_H, k_W, C)) / (k_H * k_W) dX_pad[:, h_start:h_end, w_start:w_end, :] += (dY_ * avg_volume) # slice the gradient tensor to original size dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev)) return dX
def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev): """2-fold for loop implementation.""" m, H, W, C = dY.shape stride_H, stride_W = self.stride k_H, k_W = self.kernel_size for i in range(m): for h in range(H): # slice boundaries in H direction h_start = h * stride_H h_end = h * stride_H + k_H for w in range(W): # slice boundaries in W directions w_start = w * stride_W w_end = w * stride_W + k_W for c in range(C): # (k, k) x_slice = x_pad[i, h_start:h_end, w_start:w_end, c] if self.mode == "max": mask = x_slice == np.max(x_slice) dX_pad[i, h_start:h_end, w_start:w_end, c] += (dY[i, h, w, c] * mask) elif self.mode == "avg": avg_volume = np.ones((k_H, k_W)) / (k_H * k_W) dX_pad[i, h_start:h_end, w_start:w_end, c] += (dY[i, h, w, c] * avg_volume) # slice the gradient tensor to original size dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev)) return dX
def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev): """4-fold for loop implementation.""" m, H, W, _ = dY.shape stride_H, stride_W = self.stride k_H, k_W = self.kernel_size # loop over samples for i in range(m): for h in range(H): # slice boundaries in H direction h_start = h * stride_H h_end = h * stride_H + k_H for w in range(W): # slice boundaries in W directions w_start = w * stride_W w_end = w * stride_W + k_W # (k, k, C_prev) x_slice = x_pad[i, h_start:h_end, w_start:w_end, :] # loop over output channels for c in range(self.out_channels): # (k, k, C_prev) weights = self.W[..., c] # (k, k, C_prev) dX_pad[i, h_start:h_end, w_start:w_end, :] += (weights * dY[i, h, w, c]) # (k, k, C_prev) self.dW[..., c] += x_slice * dY[i, h, w, c] # (1, ) self.db[c] += dY[i, h, w, c] # slice the gradient tensor to original size dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev)) return dX
def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev): """3-fold for loop implementation.""" m, H, W, _ = dY.shape stride_H, stride_W = self.stride k_H, k_W = self.kernel_size for h in range(H): # slice boundaries in H direction h_start = h * stride_H h_end = h * stride_H + k_H for w in range(W): # slice boundaries in W directions w_start = w * stride_W w_end = w * stride_W + k_W # (m, k, k, C_prev) x_slice = x_pad[:, h_start:h_end, w_start:w_end, :] # loop over output channels for c in range(self.out_channels): # (m, k, k, C_prev) weights = np.repeat(self.W[np.newaxis, ..., c], repeats=m, axis=0) # (m, 1, 1, 1) dY_ = np.expand_dims(dY[:, h, w, c], axis=(1, 2, 3)) # (m, k, k, C_prev) dX_pad[:, h_start:h_end, w_start:w_end, :] += weights * dY_ # (k, k, C_prev) self.dW[..., c] += np.sum(x_slice * dY_, axis=0) # (1, ) self.db[c] += np.sum(dY_) # slice the gradient tensor to original size dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev)) return dX
def _backward_compute(self, x_pad, dX_pad, dY, H_prev, W_prev): """2-fold for loop implementation.""" m, H, W, _ = dY.shape stride_H, stride_W = self.stride k_H, k_W = self.kernel_size for h in range(H): # slice boundaries in H direction h_start = h * stride_H h_end = h * stride_H + k_H for w in range(W): # slice boundaries in W directions w_start = w * stride_W w_end = w * stride_W + k_W # (m, k, k, C_prev, C) weights = np.repeat(np.expand_dims(self.W, 0), repeats=m, axis=0) # (m, 1, 1, 1, C) dY_ = np.expand_dims(dY[:, h, w, :], axis=(1, 2, 3)) # (m, k, k, C_prev) dX_pad[:, h_start:h_end, w_start:w_end, :] += np.sum(weights * dY_, axis=4) # (m, k, k, C_prev, 1) x_slice = x_pad[:, h_start:h_end, w_start:w_end, :, np.newaxis] # (k, k, C_prev, C) self.dW += np.sum(x_slice * dY_, axis=0) # (C, ) self.db += np.sum(dY_, axis=(0, 1, 2, 3)) # slice the gradient tensor to original size dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev)) return dX
def _backward_compute(self, col_matrix, dX_pad, dY, H_prev, W_prev): """im2col implementation.""" m, H, W, _ = dY.shape # (m, H, W, C) -> (m, H x W, C) dY = dY.reshape((m, -1, self.out_channels)) # (k, k, C_prev, C) -> (k x k x C_prev, C) -> (1, k x k x C_prev, C) weights = self.W.reshape((-1, self.out_channels))[np.newaxis, ...] # gradients of activation stride_H, stride_W = self.stride k_H, k_W = self.kernel_size dcol_matrix = np.matmul(dY, weights.transpose(0, 2, 1)) for h in range(H): # slice boundaries in H direction h_start = h * stride_H h_end = h * stride_H + k_H for w in range(W): # slice boundaries in W direction w_start = w * stride_W w_end = w * stride_W + k_W idx = h * H + w drow = dcol_matrix[:, idx, :].reshape((m, k_H, k_W, -1)) dX_pad[:, h_start:h_end, w_start:w_end, :] += drow # slice the gradient tensor to original size dX = unpad_tensor(dX_pad, self.padding, (H_prev, W_prev)) # gradients of weights & biases dW = np.sum(np.matmul(col_matrix.transpose(0, 2, 1), dY), axis=0) self.dW[:] = dW.reshape( (*self.kernel_size, self.in_channels, self.out_channels)) self.db[:] = np.sum(dY, axis=(0, 1)) return dX