def call(self, _inputs, _states, training=None): # print(_inputs.shape, _states[0].shape) h_tm1_c = _states[0][:, :self.units] # the center of the last state h_tm1_r = _states[0][:, self.units:self.units * 2] # the radius of the last state h_tm1_e = _states[0][:, self.units * 2:] # the errors of the last state inputs_c = _inputs[:, :self.input_dim] # the center of the inputs inputs_r = _inputs[:, self.input_dim:self.input_dim * 2] # the radius of the inputs inputs_e = _inputs[:, self.input_dim * 2:] # the errors of the last state h_tm1 = AI(h_tm1_c, h_tm1_r, h_tm1_e, False) inputs = AI(inputs_c, inputs_r, inputs_e, False) matrix_inner = AI(h_tm1_c, h_tm1_r, h_tm1_e, False) # print(inputs_c, inputs_r, inputs_e) if self.implementation == 1: raise NotImplementedError() else: # inputs projected by all gate matrices at once inputs.matmul(self.kernel) if self.use_bias: # biases: bias_z_i, bias_r_i, bias_h_i inputs.bias_add(self.input_bias) x_z = inputs[:, :self.units] x_r = inputs[:, self.units:2 * self.units] x_h = inputs[:, 2 * self.units:] if self.reset_after: # hidden state projected by all gate matrices at once matrix_inner.matmul(self.recurrent_kernel) if self.use_bias: matrix_inner.bias_add(self.recurrent_bias) else: # hidden state projected separately for update/reset and new matrix_inner.matmul(self.recurrent_kernel[:, :2 * self.units]) recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units:2 * self.units] z = x_z + recurrent_z # z.activation(self.recurrent_activation) r = x_r + recurrent_r r.activation(self.recurrent_activation) if self.reset_after: recurrent_h = r * matrix_inner[:, 2 * self.units:] else: recurrent_h = r * h_tm1 recurrent_h.matmul(self.recurrent_kernel[:, 2 * self.units:]) hh = x_h + recurrent_h # hh.activation(self.activation) # previous and candidate state mixed by update gate h = z.GRU_merge1(self.recurrent_activation, h_tm1, hh, self.activation) h_state = h.to_state() return h_state, [h_state]
def cell(h_tm1_c, h_tm1_r, h_tm1_e, inputs_c, inputs_r, inputs_e): h_tm1 = AI(h_tm1_c, h_tm1_r, h_tm1_e, False) inputs = AI(inputs_c, inputs_r, inputs_e, False) matrix_inner = AI(h_tm1_c, h_tm1_r, h_tm1_e, False) # print(inputs_c, inputs_r, inputs_e) if self.implementation == 1: raise NotImplementedError() else: # inputs projected by all gate matrices at once inputs.matmul(self.kernel) if self.use_bias: # biases: bias_z_i, bias_r_i, bias_h_i inputs.bias_add(self.input_bias) x_z = inputs[:, :self.units] x_r = inputs[:, self.units:2 * self.units] x_h = inputs[:, 2 * self.units:] if self.reset_after: # hidden state projected by all gate matrices at once matrix_inner.matmul(self.recurrent_kernel) if self.use_bias: matrix_inner.bias_add(self.recurrent_bias) else: # hidden state projected separately for update/reset and new matrix_inner.matmul(self.recurrent_kernel[:, :2 * self.units]) recurrent_z = matrix_inner[:, :self.units] recurrent_r = matrix_inner[:, self.units:2 * self.units] z = x_z + recurrent_z # z.activation(self.recurrent_activation) r = x_r + recurrent_r r.activation(self.recurrent_activation) if self.reset_after: recurrent_h = r * matrix_inner[:, 2 * self.units:] else: recurrent_h = r * h_tm1 recurrent_h.matmul(self.recurrent_kernel[:, 2 * self.units:]) hh = x_h + recurrent_h # hh.activation(self.activation) # previous and candidate state mixed by update gate h = z.GRU_merge1(self.recurrent_activation, h_tm1, hh, self.activation) return h