def step(self, x, states): h_s = states[2] P_j = states[3] P_t = K.dot(x, self.W_t) P_a = K.dot(states[0], self.W_a) sum3 = P_j + P_t.dimshuffle((0,'x',1)) + P_a.dimshuffle((0,'x',1)) E_kj = K.tanh(sum3).dot(self.w_e) Alpha_kj = K.softmax(E_kj) weighted = h_s * Alpha_kj.dimshuffle((0,1,'x')) a_k = weighted.sum(axis = 1) m_k = K.T.concatenate([a_k, x], axis = 1) x_i = K.dot(m_k, self.W_i) + self.b_i x_f = K.dot(m_k, self.W_f) + self.b_f x_c = K.dot(m_k, self.W_c) + self.b_c x_o = K.dot(m_k, self.W_o) + self.b_o i = self.inner_activation(x_i + K.dot(states[0], self.U_i)) f = self.inner_activation(x_f + K.dot(states[0], self.U_f)) c = f * states[1] + i * self.activation(x_c + K.dot(states[0], self.U_c)) o = self.inner_activation(x_o + K.dot(states[0], self.U_o)) h = o * self.activation(c) return h, [h, c]
def step(self, x, states): h_s = states[2] P_j = states[3] P_t = K.dot(x, self.W_t) P_a = K.dot(states[0], self.W_a) sum3 = P_j + P_t.dimshuffle((0, 'x', 1)) + P_a.dimshuffle((0, 'x', 1)) E_kj = K.tanh(sum3).dot(self.w_e) Alpha_kj = K.softmax(E_kj) weighted = h_s * Alpha_kj.dimshuffle((0, 1, 'x')) a_k = weighted.sum(axis=1) m_k = K.T.concatenate([a_k, x], axis=1) x_i = K.dot(m_k, self.W_i) + self.b_i x_f = K.dot(m_k, self.W_f) + self.b_f x_c = K.dot(m_k, self.W_c) + self.b_c x_o = K.dot(m_k, self.W_o) + self.b_o i = self.inner_activation(x_i + K.dot(states[0], self.U_i)) f = self.inner_activation(x_f + K.dot(states[0], self.U_f)) c = f * states[1] + i * self.activation(x_c + K.dot(states[0], self.U_c)) o = self.inner_activation(x_o + K.dot(states[0], self.U_o)) h = o * self.activation(c) return h, [h, c]
def get_output(self, train=False): # input shape: (nb_samples, time (padded with zeros), input_dim) X = self.get_input(train) self.h_t = X[1] self.h_s = X[0] if self.feed_state: self.h_init = X[2] self.P_j = K.dot(self.h_s, self.W_s) if self.stateful and not train: initial_states = self.states else: initial_states = self.get_initial_states(self.h_s) if self.feed_state: initial_states[0] = self.h_init last_output, outputs, states = K.rnn(self.step, self.h_t, initial_states) if self.stateful: self.updates = [] for i in range(len(states)): self.updates.append((self.states[i], states[i])) if self.return_sequences: return outputs else: return last_output
def get_initial_states(self, X): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(X) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=1) # (samples, input_dim) reducer = K.zeros((self.output_dim, self.output_dim)) initial_state = K.dot(initial_state, reducer) # (samples, output_dim) initial_states = [initial_state for _ in range(len(self.states))] return initial_states
def get_initial_states(self, x): # build an all-zero tensor of shape (samples, output_dim) initial_state = K.zeros_like(x) # (samples, timesteps, input_dim) initial_state = K.sum(initial_state, axis=1) # (samples, input_dim) reducer = K.zeros((self.input_dim, self.output_dim)) initial_state = K.dot(initial_state, reducer) # (samples, output_dim) initial_states = [initial_state for _ in range(len(self.states))] if self.feed_layer is not None: initial_states[1] = self.feed_layer return initial_states
def get_constants(self, x): return [x[1], K.dot(x[1], self.W_s)]