def get_gru_states(net, x): gru = net.rnn_array[0] inp = np.concatenate( (net.in2hid[net.rnn_idx[0]] * x, net.hid2hid[net.rnn_idx[0]] * net.hidden_act), axis=1)[net.rnn_map[0]].reshape(net.bs, net.rnn_array[0].input_size) W_xh = np.matmul(inp, gru.weight_xh.transpose()) W_hh = np.matmul(gru.hx, gru.weight_hh.transpose()) R_t = sigmoid(W_xh[:, 0:1] + W_hh[:, 0:1] + gru.bias[0:1]) Z_t = sigmoid(W_xh[:, 1:2] + W_hh[:, 1:2] + gru.bias[1:2]) H_t_tilde = np.tanh(W_xh[:, 2:3] + R_t * W_hh[:, 2:3] + gru.bias[2:3]) H_t = (1 - Z_t) * H_t_tilde + Z_t * gru.hx return H_t[0, 0], H_t_tilde[0, 0], R_t[0, 0], Z_t[0, 0]
def get_lstm_states(net, x): lstm = net.rnn_array[0] inp = np.concatenate( (net.in2hid[net.rnn_idx[0]] * x, net.hid2hid[net.rnn_idx[0]] * net.hidden_act), axis=1)[net.rnn_map[0]].reshape(net.bs, net.rnn_array[0].input_size) xh = np.matmul(inp, lstm.weight_xh.transpose()) hh = np.matmul(lstm.hx, lstm.weight_hh.transpose()) F_t = sigmoid(xh[:, 0:1] + hh[:, 0:1] + lstm.bias[0:1]) I_t = sigmoid(xh[:, 1:2] + hh[:, 1:2] + lstm.bias[1:2]) O_t = sigmoid(xh[:, 2:3] + hh[:, 2:3] + lstm.bias[2:3]) C_tilde = np.tanh(xh[:, 3:4] + hh[:, 3:4] + lstm.bias[3:4]) # Reducing 1 extra variable C_t = F_t * lstm.c + I_t * C_tilde H_t = O_t * np.tanh(C_t) return H_t[0, 0], C_t[0, 0], C_tilde[0, 0], F_t[0, 0], I_t[0, 0], O_t[0, 0]
def __call__(self, x: np.ndarray): """ Forward the network by one iteration and return the updated hidden state. :note: H_tilde is NOT COMPUTED CORRECTLY if hx is larger than one, but is correct when hx.size==(1,1), which is always the case for our usage. Correct implementation: http://courses.d2l.ai/berkeley-stat-157/slides/4_9/19-RNN.pdf :param x: Input :return: Updated hidden state """ if len(self.hx) == 0: # (batch_size, hidden_size) self.hx = np.zeros((x.shape[0], self.hidden_size), dtype=np.float64) xh = np.matmul(x, self.weight_xh.transpose()) hh = np.matmul(self.hx, self.weight_hh.transpose()) R_t = sigmoid(xh[:, 0:1] + hh[:, 0:1] + self.bias[0:1]) Z_t = sigmoid(xh[:, 1:2] + hh[:, 1:2] + self.bias[1:2]) # H_tilde = tanh(W_xh[:, 2:3] + R_t * W_hh[:, 2:3] + self.bias[2:3]) # Reducing 1 extra variable self.hx = (1 - Z_t) * np.tanh(xh[:, 2:3] + R_t * hh[:, 2:3] + self.bias[2:3]) + Z_t * self.hx return self.hx
def __call__(self, x: np.ndarray): """ Forward the network by one iteration and return the updated hidden state. :param x: Input :return: Updated hidden state """ if len(self.hx) == 0: # (batch_size, hidden_size) self.hx = np.zeros((x.shape[0], self.hidden_size), dtype=np.float64) if len(self.c) == 0: # (batch_size, hidden_size) self.c = np.zeros((x.shape[0], self.hidden_size), dtype=np.float64) xh = np.matmul(x, self.weight_xh.transpose()) hh = np.matmul(self.hx, self.weight_hh.transpose()) F_t = sigmoid(xh[:, 0:1] + hh[:, 0:1] + self.bias[0:1]) I_t = sigmoid(xh[:, 1:2] + hh[:, 1:2] + self.bias[1:2]) O_t = sigmoid(xh[:, 2:3] + hh[:, 2:3] + self.bias[2:3]) # C_tilde = np.tanh(W_xh[:, 3:4] + W_hh[:, 3:4] + self.bias[3:4]) # Reducing 1 extra variable self.c = F_t * self.c + I_t * np.tanh(xh[:, 3:4] + hh[:, 3:4] + self.bias[3:4]) self.hx = O_t * np.tanh(self.c) return self.hx
def __call__(self, x: np.ndarray): """ Forward the network by one iteration and return the updated hidden state. :param x: Input :return: Updated hidden state """ if len(self.hx) == 0: # (batch_size, hidden_size) self.hx = np.zeros((x.shape[0], self.hidden_size), dtype=np.float64) xh = np.matmul(x, self.weight_xh.transpose()) hh = np.matmul(self.hx, self.weight_hh.transpose()) R_t = sigmoid(xh[:, 0:1] + hh[:, 0:1] + self.bias[0:1]) self.hx = np.tanh(xh[:, 1:2] + R_t * hh[:, 1:2] + self.bias[1:2]) return self.hx
def get_state(gru: GRUCell, x): W_xh = np.matmul(x, gru.weight_xh.transpose()) W_hh = np.matmul(gru.hx, gru.weight_hh.transpose()) R_t = sigmoid(W_xh[:, 0:1] + W_hh[:, 0:1] + gru.bias[0:1]) return np.tanh(W_xh[:, 2:3] + R_t * W_hh[:, 2:3] + gru.bias[2:3])[0, 0]
def get_state(gru: GRUCell, x): W_xh = np.matmul(x, gru.weight_xh.transpose()) W_hh = np.matmul(gru.hx, gru.weight_hh.transpose()) return sigmoid(W_xh[:, 1:2] + W_hh[:, 1:2] + gru.bias[1:2])[0, 0]