def forward(self, inputs, weights): x, lstm_state = inputs # LSTM state consists of c and h. c, h = np.split(lstm_state, 2, axis=-1) # Dense layer on the concatenation of x and h. w, b = weights y = np.dot(np.concatenate([x, h], axis=-1), w) + b # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = np.split(y, 4, axis=-1) new_c = c * backend.sigmoid(f) + backend.sigmoid(i) * np.tanh(j) new_h = np.tanh(new_c) * backend.sigmoid(o) return new_h, np.concatenate([new_c, new_h], axis=-1)
def forward(self, inputs, weights): x, gru_state = inputs # Dense layer on the concatenation of x and h. w1, b1, w2, b2 = weights y = np.dot(np.concatenate([x, gru_state], axis=-1), w1) + b1 # Update and reset gates. u, r = np.split(backend.sigmoid(y), 2, axis=-1) # Candidate. c = np.dot(np.concatenate([x, r * gru_state], axis=-1), w2) + b2 new_gru_state = u * gru_state + (1 - u) * np.tanh(c) return new_gru_state, new_gru_state
def Tanh(x, **unused_kwargs): return np.tanh(x)
def FastGelu(x, **unused_kwargs): return 0.5 * x * (1 + np.tanh(x * 0.7978845608 * (1 + 0.044715 * x * x)))