def rnn_cell_forward(xt, a_prev, parameters): """ 实现RNN单元的单步前向传播 参数: xt -- 时间步“t”输入的数据,维度为(n_x, m) a_prev -- 时间步“t - 1”的隐藏隐藏状态,维度为(n_a, m) parameters -- 字典,包含了以下内容: Wax -- 矩阵,输入乘以权重,维度为(n_a, n_x) Waa -- 矩阵,隐藏状态乘以权重,维度为(n_a, n_a) Wya -- 矩阵,隐藏状态与输出相关的权重矩阵,维度为(n_y, n_a) ba -- 偏置,维度为(n_a, 1) by -- 偏置,隐藏状态与输出相关的偏置,维度为(n_y, 1) 返回: a_t -- 下一个隐藏状态,维度为(n_a, m) yt_pred -- 在时间步“t”的预测,维度为(n_y, m) cache -- 反向传播需要的元组,包含了(a_next, a_prev, xt, parameters) """ # 从“parameters”获取参数 Wax = parameters["Wax"] Waa = parameters["Waa"] Wya = parameters["Wya"] ba = parameters["ba"] by = parameters["by"] a_t = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba) y = rnn_utils.softmax(np.dot(Wya, a_t) + by) cache = (a_t, a_prev, xt, parameters) return a_t, y, cache
def rnn_cell_forward(xt, a_prev, parameters): """ Implements a single forward step of the RNN-cell as described in Figure (2) Vectorized over 'm' samples. Arguments: xt -- your input data at timestep "t", numpy array of shape (n_x, m). a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m) parameters -- python dictionary containing: Wax -- Weight matrix multiplying the input, numpy array of shape (n_a, n_x) Waa -- Weight matrix multiplying the hidden state, numpy array of shape (n_a, n_a) Wya -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a) ba -- Bias, numpy array of shape (n_a, 1) by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1) Returns: a_next -- next hidden state, of shape (n_a, m) yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m) cache -- tuple of values needed for the backward pass, contains (a_next, a_prev, xt, parameters) """ # Retrieve parameters from "parameters" Wax = parameters["Wax"] Waa = parameters["Waa"] Wya = parameters["Wya"] ba = parameters["ba"] by = parameters["by"] a_next = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba) yt_pred = softmax(np.dot(Wya, a_next) + by) # store values you need for backward propagation in cache cache = (a_next, a_prev, xt, parameters) return a_next, yt_pred, cache
def forward_propagation(self, x): #The total number of time steps T = len(x) #During forward prop, we save all the hidden states in s as we need them later. We add one additional element for the initial hidden layer, which we set to 0. s = np.zeros((T+1, self.hidden_dim)) s[-1] = np.zeros(self.hidden_dim) #Set the last vector to zeros. #The output at each time step. We save them for later. o = np.zeros((T, self.word_dim)) #For each time step: for t in np.arange(T): #Note that we are indexing U by x[t]. This is the same as multplying U with a one-hot vector ''' s_t = \tanh(Ux_t + Ws_{t-1}) ''' s[t] = np.tanh(self.U[:, x[t]]+ self.W.dot(s[t-1])) #since xt is a one-hot vector, we just need return a column of U for which index of x is 1 ''' o_t = \textsf{softmax}(Vs_t) ''' o[t] = rnn_utils.softmax(self.V.dot(s[t])) return [o, s]
def lstm_cell_forward(xt, a_prev, c_prev, parameters): """ 根据图4实现一个LSTM单元的前向传播。 参数: xt -- 在时间步“t”输入的数据,维度为(n_x, m) a_prev -- 上一个时间步“t-1”的隐藏状态,维度为(n_a, m) c_prev -- 上一个时间步“t-1”的记忆状态,维度为(n_a, m) parameters -- 字典类型的变量,包含了: Wf -- 遗忘门的权值,维度为(n_a, n_a + n_x) bf -- 遗忘门的偏置,维度为(n_a, 1) Wi -- 更新门的权值,维度为(n_a, n_a + n_x) bi -- 更新门的偏置,维度为(n_a, 1) Wc -- 第一个“tanh”的权值,维度为(n_a, n_a + n_x) bc -- 第一个“tanh”的偏置,维度为(n_a, n_a + n_x) Wo -- 输出门的权值,维度为(n_a, n_a + n_x) bo -- 输出门的偏置,维度为(n_a, 1) Wy -- 隐藏状态与输出相关的权值,维度为(n_y, n_a) by -- 隐藏状态与输出相关的偏置,维度为(n_y, 1) 返回: a_next -- 下一个隐藏状态,维度为(n_a, m) c_next -- 下一个记忆状态,维度为(n_a, m) yt_pred -- 在时间步“t”的预测,维度为(n_y, m) cache -- 包含了反向传播所需要的参数,包含了(a_next, c_next, a_prev, c_prev, xt, parameters) 注意: ft/it/ot表示遗忘/更新/输出门,cct表示候选值(c tilda),c表示记忆值。 """ # 从“parameters”中获取相关值 Wf = parameters["Wf"] bf = parameters["bf"] Wi = parameters["Wi"] bi = parameters["bi"] Wc = parameters["Wc"] bc = parameters["bc"] Wo = parameters["Wo"] bo = parameters["bo"] Wy = parameters["Wy"] by = parameters["by"] # 获取 xt 与 Wy 的维度信息 n_x, m = xt.shape n_y, n_a = Wy.shape # 1.连接 a_prev 与 xt contact = np.zeros([n_a + n_x, m]) contact[:n_a, :] = a_prev contact[n_a:, :] = xt # 2.根据公式计算ft、it、cct、c_next、ot、a_next ## 遗忘门,公式1 ft = rnn_utils.sigmoid(np.dot(Wf, contact) + bf) ## 更新门,公式2 it = rnn_utils.sigmoid(np.dot(Wi, contact) + bi) ## 更新单元,公式3 cct = np.tanh(np.dot(Wc, contact) + bc) ## 更新单元,公式4 # c_next = np.multiply(ft, c_prev) + np.multiply(it, cct) c_next = ft * c_prev + it * cct ## 输出门,公式5 ot = rnn_utils.sigmoid(np.dot(Wo, contact) + bo) ## 输出门,公式6 # a_next = np.multiply(ot, np.tan(c_next)) a_next = ot * np.tanh(c_next) # 3.计算LSTM单元的预测值 yt_pred = rnn_utils.softmax(np.dot(Wy, a_next) + by) # 保存包含了反向传播所需要的参数 cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters) return a_next, c_next, yt_pred, cache
def lstm_cell_forward(xt, a_prev, c_prev, parameters): """ 根据图4实现一个LSTM单元的前向传播。 参数: xt -- 在时间步“t”输入的数据,维度为(n_x, m) a_prev -- 上一个时间步“t-1”的隐藏状态,维度为(n_a, m) c_prev -- 上一个时间步“t-1”的记忆状态,维度为(n_a, m) parameters -- 字典类型的变量,包含了: Wf -- 遗忘门的权值,维度为(n_a, n_a + n_x) bf -- 遗忘门的偏置,维度为(n_a, 1) Wi -- 更新门的权值,维度为(n_a, n_a + n_x) bi -- 更新门的偏置,维度为(n_a, 1) Wc -- 第一个“tanh”的权值,维度为(n_a, n_a + n_x) bc -- 第一个“tanh”的偏置,维度为(n_a, n_a + n_x) Wo -- 输出门的权值,维度为(n_a, n_a + n_x) bo -- 输出门的偏置,维度为(n_a, 1) Wy -- 隐藏状态与输出相关的权值,维度为(n_y, n_a) by -- 隐藏状态与输出相关的偏置,维度为(n_y, 1) 返回: a_next -- 下一个隐藏状态,维度为(n_a, m) c_next -- 下一个记忆状态,维度为(n_a, m) yt_pred -- 在时间步“t”的预测,维度为(n_y, m) cache -- 包含了反向传播所需要的参数,包含了(a_next, c_next, a_prev, c_prev, xt, parameters) """ # 从“parameters”中获取相关值 Wf = parameters["Wf"] bf = parameters["bf"] Wi = parameters["Wi"] bi = parameters["bi"] Wc = parameters["Wc"] bc = parameters["bc"] Wo = parameters["Wo"] bo = parameters["bo"] Wy = parameters["Wy"] by = parameters["by"] contact = np.vstack((a_prev, xt)) #遗忘门 Gf = rnn_utils.sigmoid(np.dot(Wf, contact) + bf) #更新门 Gi = rnn_utils.sigmoid(np.dot(Wi, contact) + bi) #输出门 Go = rnn_utils.sigmoid(np.dot(Wo, contact) + bo) # 更新单元 tmp_ct = np.tanh(np.dot(Wc, contact) + bc) # 更新单元 ct = np.multiply(Gi, tmp_ct) + np.multiply(Gf, c_prev) #输出 a_next = np.multiply(Go, np.tanh(ct)) #计算LSTM单元的预测值 y_pre = rnn_utils.softmax(np.dot(Wy, a_next) + by) cache = (a_next, ct, a_prev, c_prev, Gf, Gi, tmp_ct, Go, xt, parameters) return a_next, ct, y_pre, cache
def lstm_cell_forward(xt, a_prev, c_prev, parameters): """ Implement a single forward step of the LSTM-cell as described in Figure (4) Arguments: xt -- your input data at timestep "t", numpy array of shape (n_x, m). a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m) c_prev -- Memory state at timestep "t-1", numpy array of shape (n_a, m) parameters -- python dictionary containing: Wf -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x) bf -- Bias of the forget gate, numpy array of shape (n_a, 1) Wi -- Weight matrix of the update gate, numpy array of shape (n_a, n_a + n_x) bi -- Bias of the update gate, numpy array of shape (n_a, 1) Wc -- Weight matrix of the first "tanh", numpy array of shape (n_a, n_a + n_x) bc -- Bias of the first "tanh", numpy array of shape (n_a, 1) Wo -- Weight matrix of the output gate, numpy array of shape (n_a, n_a + n_x) bo -- Bias of the output gate, numpy array of shape (n_a, 1) Wy -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a) by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1) Returns: a_next -- next hidden state, of shape (n_a, m) c_next -- next memory state, of shape (n_a, m) yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m) cache -- tuple of values needed for the backward pass, contains (a_next, c_next, a_prev, c_prev, xt, parameters) Note: ft/it/ot stand for the forget/update/output gates, cct stands for the candidate value (c tilde), c stands for the memory value """ # Retrieve parameters from "parameters" Wf = parameters["Wf"] bf = parameters["bf"] Wi = parameters["Wi"] bi = parameters["bi"] Wc = parameters["Wc"] bc = parameters["bc"] Wo = parameters["Wo"] bo = parameters["bo"] Wy = parameters["Wy"] by = parameters["by"] # Retrieve dimensions from shapes of xt and Wy n_x, m = xt.shape n_y, n_a = Wy.shape # Concatenate a_prev and xt concat = np.zeros((n_a + n_x, m)) concat[:n_a, :] = a_prev concat[n_a:, :] = xt # Compute values for ft, it, cct, c_next, ot, a_next using the formulas # given figure (4) ft = sigmoid(np.dot(Wf, concat) + bf) it = sigmoid(np.dot(Wi, concat) + bi) cct = np.tanh(np.dot(Wc, concat) + bc) c_next = ft * c_prev + it * cct ot = sigmoid(np.dot(Wo, concat) + bo) a_next = ot * np.tanh(c_next) yt_pred = softmax(np.dot(Wy, a_next) + by) # store values needed for backward propagation in cache cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters) return a_next, c_next, yt_pred, cache
def lstm_cell_forward(xt, a_prev, c_prev, parameters): """ Implement a single forward step of the LSTM-cell as described in Figure (4) Arguments: xt -- your input data at timestep "t", numpy array of shape (n_x, m). a_prev -- Hidden state at timestep "t-1", numpy array of shape (n_a, m) c_prev -- Memory state at timestep "t-1", numpy array of shape (n_a, m) parameters -- python dictionary containing: Wf -- Weight matrix of the forget gate, numpy array of shape (n_a, n_a + n_x) bf -- Bias of the forget gate, numpy array of shape (n_a, 1) Wi -- Weight matrix of the update gate, numpy array of shape (n_a, n_a + n_x) bi -- Bias of the update gate, numpy array of shape (n_a, 1) Wc -- Weight matrix of the first "tanh", numpy array of shape (n_a, n_a + n_x) bc -- Bias of the first "tanh", numpy array of shape (n_a, 1) Wo -- Weight matrix of the output gate, numpy array of shape (n_a, n_a + n_x) bo -- Bias of the output gate, numpy array of shape (n_a, 1) Wy -- Weight matrix relating the hidden-state to the output, numpy array of shape (n_y, n_a) by -- Bias relating the hidden-state to the output, numpy array of shape (n_y, 1) Returns: a_next -- next hidden state, of shape (n_a, m) c_next -- next memory state, of shape (n_a, m) yt_pred -- prediction at timestep "t", numpy array of shape (n_y, m) cache -- tuple of values needed for the backward pass, contains (a_next, c_next, a_prev, c_prev, xt, parameters) Note: ft/it/ot stand for the forget/update/output gates, cct stands for the candidate value (c tilde), c stands for the memory value """ # Retrieve parameters from "parameters" Wf = parameters["Wf"] bf = parameters["bf"] Wi = parameters["Wi"] bi = parameters["bi"] Wc = parameters["Wc"] bc = parameters["bc"] Wo = parameters["Wo"] bo = parameters["bo"] Wy = parameters["Wy"] by = parameters["by"] # Retrieve dimensions from shapes of xt and Wy n_x, m = xt.shape n_y, n_a = Wy.shape # Concatenate a_prev and xt concat = np.zeros((n_a + n_x, m)) concat[: n_a, :] = a_prev concat[n_a :, :] = xt # Compute values for ft, it, cct, c_next, ot, a_next using the formulas # given figure (4) ft = sigmoid(np.dot(Wf, concat) + bf) it = sigmoid(np.dot(Wi, concat) + bi) cct = np.tanh(np.dot(Wc, concat) + bc) c_next = ft * c_prev + it * cct ot = sigmoid(np.dot(Wo, concat) + bo) a_next = ot * np.tanh(c_next) yt_pred = softmax(np.dot(Wy, a_next) + by) # store values needed for backward propagation in cache cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, parameters) return a_next, c_next, yt_pred, cache