def get_gru_states(net, x):
    gru = net.rnn_array[0]
    inp = np.concatenate(
        (net.in2hid[net.rnn_idx[0]] * x,
         net.hid2hid[net.rnn_idx[0]] * net.hidden_act),
        axis=1)[net.rnn_map[0]].reshape(net.bs, net.rnn_array[0].input_size)
    W_xh = np.matmul(inp, gru.weight_xh.transpose())
    W_hh = np.matmul(gru.hx, gru.weight_hh.transpose())
    R_t = sigmoid(W_xh[:, 0:1] + W_hh[:, 0:1] + gru.bias[0:1])
    Z_t = sigmoid(W_xh[:, 1:2] + W_hh[:, 1:2] + gru.bias[1:2])
    H_t_tilde = np.tanh(W_xh[:, 2:3] + R_t * W_hh[:, 2:3] + gru.bias[2:3])
    H_t = (1 - Z_t) * H_t_tilde + Z_t * gru.hx
    return H_t[0, 0], H_t_tilde[0, 0], R_t[0, 0], Z_t[0, 0]
예제 #2
0
def get_lstm_states(net, x):
    lstm = net.rnn_array[0]
    inp = np.concatenate(
        (net.in2hid[net.rnn_idx[0]] * x,
         net.hid2hid[net.rnn_idx[0]] * net.hidden_act),
        axis=1)[net.rnn_map[0]].reshape(net.bs, net.rnn_array[0].input_size)
    xh = np.matmul(inp, lstm.weight_xh.transpose())
    hh = np.matmul(lstm.hx, lstm.weight_hh.transpose())
    F_t = sigmoid(xh[:, 0:1] + hh[:, 0:1] + lstm.bias[0:1])
    I_t = sigmoid(xh[:, 1:2] + hh[:, 1:2] + lstm.bias[1:2])
    O_t = sigmoid(xh[:, 2:3] + hh[:, 2:3] + lstm.bias[2:3])
    C_tilde = np.tanh(xh[:, 3:4] + hh[:, 3:4] +
                      lstm.bias[3:4])  # Reducing 1 extra variable
    C_t = F_t * lstm.c + I_t * C_tilde
    H_t = O_t * np.tanh(C_t)
    return H_t[0, 0], C_t[0, 0], C_tilde[0, 0], F_t[0, 0], I_t[0, 0], O_t[0, 0]
예제 #3
0
 def __call__(self, x: np.ndarray):
     """
     Forward the network by one iteration and return the updated hidden state.
     
     :note: H_tilde is NOT COMPUTED CORRECTLY if hx is larger than one, but is correct when hx.size==(1,1), which is
            always the case for our usage.
            Correct implementation: http://courses.d2l.ai/berkeley-stat-157/slides/4_9/19-RNN.pdf
     
     :param x: Input
     :return: Updated hidden state
     """
     if len(self.hx) == 0:  # (batch_size, hidden_size)
         self.hx = np.zeros((x.shape[0], self.hidden_size), dtype=np.float64)
     xh = np.matmul(x, self.weight_xh.transpose())
     hh = np.matmul(self.hx, self.weight_hh.transpose())
     R_t = sigmoid(xh[:, 0:1] + hh[:, 0:1] + self.bias[0:1])
     Z_t = sigmoid(xh[:, 1:2] + hh[:, 1:2] + self.bias[1:2])
     # H_tilde = tanh(W_xh[:, 2:3] + R_t * W_hh[:, 2:3] + self.bias[2:3])  # Reducing 1 extra variable
     self.hx = (1 - Z_t) * np.tanh(xh[:, 2:3] + R_t * hh[:, 2:3] + self.bias[2:3]) + Z_t * self.hx
     return self.hx
예제 #4
0
 def __call__(self, x: np.ndarray):
     """
     Forward the network by one iteration and return the updated hidden state.
     
     :param x: Input
     :return: Updated hidden state
     """
     if len(self.hx) == 0:  # (batch_size, hidden_size)
         self.hx = np.zeros((x.shape[0], self.hidden_size),
                            dtype=np.float64)
     if len(self.c) == 0:  # (batch_size, hidden_size)
         self.c = np.zeros((x.shape[0], self.hidden_size), dtype=np.float64)
     xh = np.matmul(x, self.weight_xh.transpose())
     hh = np.matmul(self.hx, self.weight_hh.transpose())
     F_t = sigmoid(xh[:, 0:1] + hh[:, 0:1] + self.bias[0:1])
     I_t = sigmoid(xh[:, 1:2] + hh[:, 1:2] + self.bias[1:2])
     O_t = sigmoid(xh[:, 2:3] + hh[:, 2:3] + self.bias[2:3])
     # C_tilde = np.tanh(W_xh[:, 3:4] + W_hh[:, 3:4] + self.bias[3:4])  # Reducing 1 extra variable
     self.c = F_t * self.c + I_t * np.tanh(xh[:, 3:4] + hh[:, 3:4] +
                                           self.bias[3:4])
     self.hx = O_t * np.tanh(self.c)
     return self.hx
예제 #5
0
 def __call__(self, x: np.ndarray):
     """
     Forward the network by one iteration and return the updated hidden state.
     
     :param x: Input
     :return: Updated hidden state
     """
     if len(self.hx) == 0:  # (batch_size, hidden_size)
         self.hx = np.zeros((x.shape[0], self.hidden_size),
                            dtype=np.float64)
     xh = np.matmul(x, self.weight_xh.transpose())
     hh = np.matmul(self.hx, self.weight_hh.transpose())
     R_t = sigmoid(xh[:, 0:1] + hh[:, 0:1] + self.bias[0:1])
     self.hx = np.tanh(xh[:, 1:2] + R_t * hh[:, 1:2] + self.bias[1:2])
     return self.hx
예제 #6
0
def get_state(gru: GRUCell, x):
    W_xh = np.matmul(x, gru.weight_xh.transpose())
    W_hh = np.matmul(gru.hx, gru.weight_hh.transpose())
    R_t = sigmoid(W_xh[:, 0:1] + W_hh[:, 0:1] + gru.bias[0:1])
    return np.tanh(W_xh[:, 2:3] + R_t * W_hh[:, 2:3] + gru.bias[2:3])[0, 0]
예제 #7
0
def get_state(gru: GRUCell, x):
    W_xh = np.matmul(x, gru.weight_xh.transpose())
    W_hh = np.matmul(gru.hx, gru.weight_hh.transpose())
    return sigmoid(W_xh[:, 1:2] + W_hh[:, 1:2] + gru.bias[1:2])[0, 0]