def gru_step_forward(self, x, prev_h, Wzx, Wzh, bz, Wax, War, ba): """ Forward pass for a single timestep of an LSTM. The input data has dimension D, the hidden state has dimension H, and we use a minibatch size of N. Note that a sigmoid() function has already been provided for you in this file. Inputs: - x: Input data, of shape (N, D) - prev_h: Previous hidden state, of shape (N, H) - prev_c: previous cell state, of shape (N, H) - Wzx: Input-to-hidden weights, of shape (D, 4H) - Wh: Hidden-to-hidden weights, of shape (H, 4H) - b: Biases, of shape (4H,) Returns a tuple of: - next_h: Next hidden state, of shape (N, H) - next_c: Next cell state, of shape (N, H) - cache: Tuple of values needed for backward pass. """ next_h, cache = None, None ############################################################################# # TODO: Implement the forward pass for a single timestep of an LSTM. # # You may want to use the numerically stable sigmoid implementation above. # 首层,x(N,T,D), 向上变成xh(N,T,H) # 首层 Wx(D,H), 向上变成Wxh(H,H) ############################################################################# H = prev_h.shape[1] # z_hat, of shape(N,4H) z_hat = Tools.matmul(x, Wzx) + Tools.matmul(prev_h, Wzh) + bz # of shape(N,H) r = Tools.sigmoid(z_hat[:, :H]) z = Tools.sigmoid(z_hat[:, H:2 * H]) a = Tools.matmul(x, Wax) + Tools.matmul(r * prev_h, War) + ba next_h = prev_h * (1. - z) + z * np.tanh(a) cache = (x, prev_h, Wzx, Wzh, Wax, War, z_hat, r, z, a) ############################################################################## # END OF YOUR CODE # ############################################################################## return next_h, cache
def lstm_step_forward(self, x, prev_h, prev_c, Wx, Wh, b): """ Forward pass for a single timestep of an LSTM. The input data has dimension D, the hidden state has dimension H, and we use a minibatch size of N. Note that a sigmoid() function has already been provided for you in this file. Inputs: - x: Input data, of shape (N, D) - prev_h: Previous hidden state, of shape (N, H) - prev_c: previous cell state, of shape (N, H) - Wx: Input-to-hidden weights, of shape (D, 4H) - Wh: Hidden-to-hidden weights, of shape (H, 4H) - b: Biases, of shape (4H,) Returns a tuple of: - next_h: Next hidden state, of shape (N, H) - next_c: Next cell state, of shape (N, H) - cache: Tuple of values needed for backward pass. """ next_h, next_c, cache = None, None, None ############################################################################# # TODO: Implement the forward pass for a single timestep of an LSTM. # # You may want to use the numerically stable sigmoid implementation above. # 首层,x(N,T,D), 向上变成xh(N,T,H) # 首层 Wx(D,H), 向上变成Wxh(H,H) ############################################################################# H = prev_h.shape[1] #z , of shape(N,4H) z = Tools.matmul(x, Wx) + Tools.matmul(prev_h, Wh) + b # of shape(N,H) i = Tools.sigmoid(z[:, :H]) f = Tools.sigmoid(z[:, H:2 * H]) o = Tools.sigmoid(z[:, 2 * H:3 * H]) g = np.tanh(z[:, 3 * H:]) next_c = f * prev_c + i * g next_h = o * np.tanh(next_c) cache = (x, prev_h, prev_c, Wx, Wh, i, f, o, g, next_c) ############################################################################## # END OF YOUR CODE # ############################################################################## return next_h, next_c, cache