def backward(self, dout): FN, C, FH, FW = self.W.shape dout = dout.transpose(0, 2, 3, 1).reshape(-1, FN) self.db = np.sum(dout, axis=0) self.dW = np.dot(self.col.T, dout) self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW) dcol = np.dot(dout, self.col_W.T) dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad) return dx
def backward(self, dout): dout = dout.transpose(0, 2, 3, 1) pool_size = self.pool_h * self.pool_w dmax = np.zeros((dout.size, pool_size)) dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten() dmax = dmax.reshape(dout.shape + (pool_size,)) dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1) dx = fun.col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad) return dx
def backward(self, grad_y): n_bt = grad_y.shape[0] x_ch, x_h, x_w, pool, pad = self.params y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w # 출력 기울기의 축 변경 grad_y = grad_y.transpose(0, 2, 3, 1) # 행렬을 생성하고、각 열의 최대값이 있던 위치에 출력 기울기 입력 grad_cols = np.zeros((pool * pool, grad_y.size)) grad_cols[self.max_index.reshape(-1), np.arange(grad_y.size)] = grad_y.reshape(-1) grad_cols = grad_cols.reshape(pool, pool, n_bt, y_h, y_w, y_ch) grad_cols = grad_cols.transpose(5, 0, 1, 2, 3, 4) grad_cols = grad_cols.reshape(y_ch * pool * pool, n_bt * y_h * y_w) # 입력 기울기 x_shape = (n_bt, x_ch, x_h, x_w) self.grad_x = fn.col2im(grad_cols, x_shape, pool, pool, y_h, y_w, pool, pad)
def backward(self, grad_y): n_bt = grad_y.shape[0] x_ch, x_h, x_w, n_flt, flt_h, flt_w, stride, pad = self.params y_ch, y_h, y_w = self.y_ch, self.y_h, self.y_w # delta delta = grad_y * np.where(self.u <= 0, 0, 1) #0이하이면 0, 아니면 1(ReLU Diff) delta = delta.transpose(0, 2, 3, 1).reshape(n_bt * y_h * y_w, y_ch) # 필터와 편향 기울기 grad_w = self.cols @ delta self.grad_w = grad_w.T.reshape(n_flt, x_ch, flt_h, flt_w) self.grad_b = np.sum(delta, axis=0) # 입력 기울기 grad_cols = delta @ self.w_col x_shape = (n_bt, x_ch, x_h, x_w) self.grad_x = fn.col2im(grad_cols.T, x_shape, flt_h, flt_w, y_h, y_w, stride, pad)
def backward(self, prop): N, _, _, Dnew = prop.shape ## get db db = np.sum(prop, (0, 1, 2)) / N ## get dw: row*col*D*Dnew prop = prop.reshape(-1, Dnew) self.X = np.transpose(self.X, (1, 0)) dW = np.matmul(self.X, prop) / N ## get derivative of col dX_col = np.matmul(prop, self.W.T) ## reshape to N,rows,window _, window = dX_col.shape dX_col = dX_col.reshape(N, -1, window) dX_pad = F.col2im(dX_col, self.maps, self.input_shape) pad = self.pad ker_size = self.ker_size return dX_pad[:, pad:-pad, pad:-pad, :] / (ker_size**2)