예제 #1
0
    def backward(self, d_h_stack, optimize=True):
        Wx, Wh, bias = self.params
        N, T, H = d_h_stack.shape
        D, _ = Wx.shape

        d_Wx = np.zeros_like(Wx)
        d_Wh = np.zeros_like(Wh)
        d_bias = np.zeros_like(bias)
        d_h_prev = init.zeros(N, H)
        d_c_next = init.zeros(N, H)

        d_x_stack = init.empty(N, T, D)
        for t in reversed(range(T)):
            cell = self.timestep_cells[t]
            # d_h_next = 0.2 * d_h_stack[:, t, :] + d_h_prev
            d_h_next = d_h_stack[:, t, :] + d_h_prev
            d_x, d_c_prev, d_h_prev = cell.backward(d_c_next, d_h_next)
            d_x_stack[:, t, :] = d_x
            d_Wx += cell.grads['Wx']
            d_Wh += cell.grads['Wh']
            d_bias += cell.grads['bias']

        for d in [d_Wx, d_Wh, d_bias]:
            np.clip(d, -1, 1, out=d)

        self.grads[0][...] = d_Wx
        self.grads[1][...] = d_Wh
        self.grads[2][...] = d_bias

        self.d_h_0 = d_h_prev
        return d_x_stack
예제 #2
0
    def backward(self, d_h_stack, optimize=True):
        Wx, Wh, bias = self.params
        N, T, H = d_h_stack.shape
        D, H = Wx.shape

        d_Wx = np.zeros_like(Wx)
        d_Wh = np.zeros_like(Wh)
        d_bias = np.zeros_like(bias)
        d_h_prev = init.zeros(N, H)

        for t in reversed(range(T)):
            if t == 0: continue
            # d_h_next = 0.2 * d_h_stack[:, t, :] + d_h_prev
            cell = self.timestep_cells[t]
            d_h_next = d_h_stack[:, t, :] + d_h_prev
            grad = cell.backward(d_h_next=d_h_next, optimize=optimize)
            d_Wx += grad['Wx']
            d_Wh += grad['Wh']
            d_bias += grad['bias']
            d_h_prev[...] = grad['h_prev']

        for d in [d_Wx, d_Wh, d_bias]:
            np.clip(d, -1, 1, out=d)

        self.grads['Wx'][...] = d_Wx
        self.grads['Wh'][...] = d_Wh
        self.grads['bias'][...] = d_bias

        return self.grads
예제 #3
0
    def forward(self, x_seq):
        N, T, D = x_seq.shape
        H = self.hidden_dim
        Wx, Wh, bias = self.params

        c_prev = init.zeros(N, H) if not self.stateful or self.cell_state is None else self.cell_state
        h_prev = init.zeros(N, H) if not self.stateful or self.hidden_state is None else self.hidden_state

        # x_seq : N, T, D
        h_stack = init.empty(N, T, H)
        for t in range(T):
            timestep_cell = LSTMCell(N, D, H, Wx, Wh, bias)
            c_next, h_next = timestep_cell.forward(x_seq[:, t, :], c_prev, h_prev)
            self.timestep_cells.append(timestep_cell)
            h_stack[:, t, :] = h_next
            c_prev, h_prev = c_next, h_next

        self.cell_state, self.hidden_state = c_next, h_next
        h_last = h_next
        return h_last, h_stack
예제 #4
0
    def forward(self, x_sequence, h_init=None):
        batch_size, timesteps, input_dim = x_sequence.shape
        N, T, D, H = batch_size, timesteps, input_dim, self.hidden_dim
        Wx, Wh, bias = self.params
        h_prev = init.zeros(N, H) if h_init is None else h_init

        # N*T*D Style
        h_stack = init.empty(N, T, H)
        for t in range(T):
            timestep_cell = RNNCell(N, D, H, Wx=Wx, Wh=Wh, bias=bias)
            h_next = timestep_cell.forward(x_sequence[:, t, :], h_prev)  # 해당 timestep마다의 (N, D) 형태 2차원 텐서, h_prev
            self.timestep_cells.append(timestep_cell)
            h_stack[:, t, :] = h_next  # h_next는 (N, H)의 한 timestep rnn 결과물
            h_prev = h_next

        h_last = h_next
        return h_last, h_stack