def backward(self, d_h_stack, optimize=True): Wx, Wh, bias = self.params N, T, H = d_h_stack.shape D, _ = Wx.shape d_Wx = np.zeros_like(Wx) d_Wh = np.zeros_like(Wh) d_bias = np.zeros_like(bias) d_h_prev = init.zeros(N, H) d_c_next = init.zeros(N, H) d_x_stack = init.empty(N, T, D) for t in reversed(range(T)): cell = self.timestep_cells[t] # d_h_next = 0.2 * d_h_stack[:, t, :] + d_h_prev d_h_next = d_h_stack[:, t, :] + d_h_prev d_x, d_c_prev, d_h_prev = cell.backward(d_c_next, d_h_next) d_x_stack[:, t, :] = d_x d_Wx += cell.grads['Wx'] d_Wh += cell.grads['Wh'] d_bias += cell.grads['bias'] for d in [d_Wx, d_Wh, d_bias]: np.clip(d, -1, 1, out=d) self.grads[0][...] = d_Wx self.grads[1][...] = d_Wh self.grads[2][...] = d_bias self.d_h_0 = d_h_prev return d_x_stack
def backward(self, d_h_stack, optimize=True): Wx, Wh, bias = self.params N, T, H = d_h_stack.shape D, H = Wx.shape d_Wx = np.zeros_like(Wx) d_Wh = np.zeros_like(Wh) d_bias = np.zeros_like(bias) d_h_prev = init.zeros(N, H) for t in reversed(range(T)): if t == 0: continue # d_h_next = 0.2 * d_h_stack[:, t, :] + d_h_prev cell = self.timestep_cells[t] d_h_next = d_h_stack[:, t, :] + d_h_prev grad = cell.backward(d_h_next=d_h_next, optimize=optimize) d_Wx += grad['Wx'] d_Wh += grad['Wh'] d_bias += grad['bias'] d_h_prev[...] = grad['h_prev'] for d in [d_Wx, d_Wh, d_bias]: np.clip(d, -1, 1, out=d) self.grads['Wx'][...] = d_Wx self.grads['Wh'][...] = d_Wh self.grads['bias'][...] = d_bias return self.grads
def forward(self, x_seq): N, T, D = x_seq.shape H = self.hidden_dim Wx, Wh, bias = self.params c_prev = init.zeros(N, H) if not self.stateful or self.cell_state is None else self.cell_state h_prev = init.zeros(N, H) if not self.stateful or self.hidden_state is None else self.hidden_state # x_seq : N, T, D h_stack = init.empty(N, T, H) for t in range(T): timestep_cell = LSTMCell(N, D, H, Wx, Wh, bias) c_next, h_next = timestep_cell.forward(x_seq[:, t, :], c_prev, h_prev) self.timestep_cells.append(timestep_cell) h_stack[:, t, :] = h_next c_prev, h_prev = c_next, h_next self.cell_state, self.hidden_state = c_next, h_next h_last = h_next return h_last, h_stack
def forward(self, x_sequence, h_init=None): batch_size, timesteps, input_dim = x_sequence.shape N, T, D, H = batch_size, timesteps, input_dim, self.hidden_dim Wx, Wh, bias = self.params h_prev = init.zeros(N, H) if h_init is None else h_init # N*T*D Style h_stack = init.empty(N, T, H) for t in range(T): timestep_cell = RNNCell(N, D, H, Wx=Wx, Wh=Wh, bias=bias) h_next = timestep_cell.forward(x_sequence[:, t, :], h_prev) # 해당 timestep마다의 (N, D) 형태 2차원 텐서, h_prev self.timestep_cells.append(timestep_cell) h_stack[:, t, :] = h_next # h_next는 (N, H)의 한 timestep rnn 결과물 h_prev = h_next h_last = h_next return h_last, h_stack