def __init__(self, units, n_gaze, n_step, **kwargs): super(GGNN, self).__init__(**kwargs) self.units = units self.n_gaze = n_gaze self.n_edge = (self.n_gaze + 1) * 2 self.n_step = n_step self.gru_cell = GRUCell(units=self.units)
def __call__(self, input): rnn_cell_1 = GRUCell(units=self.rnn_units_1, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout, name=self.name + '_rnn_cell_1' if self.name else None) rnn_cell_2 = GRUCell(units=self.rnn_units_2, dropout=self.dropout, recurrent_dropout=self.recurrent_dropout, name=self.name + '_rnn_cell_2' if self.name else None) # gru_cell_3 = GRUCell(units= rnn_units_3, dropout= rnn_dropout, recurrent_dropout= rnn_recurrent_dropout, reset_after= False) rnn_stack_cell = StackedRNNCells(cells=[rnn_cell_1, rnn_cell_2], name=self.name + '_stacked_rnn_cell' if self.name else None) rnn = RNN(cell=rnn_stack_cell, return_state=self.return_state, return_sequences=self.return_sequence, unroll=self.unroll, name=self.name)(input) return rnn
def get_model(self): # Input text encoder_inputs = Input(shape=(None, )) # Input summary decoder_inputs = Input(shape=(None, )) # word embedding layer for text encoder_inputs_emb = Embedding(input_dim=self.num_encoder_tokens + 1, output_dim=self.embedding_dim, mask_zero=True)(encoder_inputs) # word embedding layer for summary decoder_inputs_emb = Embedding(input_dim=self.num_decoder_tokens + 1, output_dim=self.embedding_dim, mask_zero=True)(decoder_inputs) # Bidirectional LSTM encoder encoder_out = Bidirectional(LSTM(self.hidden_dim // 2, return_sequences=True, return_state=True), merge_mode='concat')(encoder_inputs_emb) encoder_o = encoder_out[0] initial_h_lstm = concatenate([encoder_out[1], encoder_out[2]]) initial_c_lstm = concatenate([encoder_out[3], encoder_out[4]]) initial_decoder_state = Dense(self.hidden_dim, activation='tanh')( concatenate([initial_h_lstm, initial_c_lstm])) # LSTM decoder + attention initial_attention_h = Lambda(lambda x: K.zeros_like(x)[:, 0, :])( encoder_o) initial_state = [initial_decoder_state, initial_attention_h] cell = DenseAnnotationAttention(cell=GRUCell(self.hidden_dim), units=self.hidden_dim, input_mode="concatenate", output_mode="cell_output") # TODO output_mode="concatenate", see TODO(3)/A decoder_o, decoder_h, decoder_c = RNN(cell=cell, return_sequences=True, return_state=True)( decoder_inputs_emb, initial_state=initial_state, constants=encoder_o) decoder_o = Dense(self.hidden_dim * 2)(concatenate( [decoder_o, decoder_inputs_emb])) y_pred = TimeDistributed( Dense(self.num_decoder_tokens + 1, activation='softmax'))(decoder_o) model = Model([encoder_inputs, decoder_inputs], y_pred) return model
def select_cell(cell_type, hidden_dim, l1=0.0, l2=0.0): """Select an RNN cell and initialises it with hidden_dim units.""" if cell_type == 'vanilla': return SimpleRNNCell(units=hidden_dim, kernel_regularizer=l1_l2(l1=l1, l2=l2), recurrent_regularizer=l1_l2(l1=l1, l2=l2)) elif cell_type == 'gru': return GRUCell(units=hidden_dim, kernel_regularizer=l1_l2(l1=l1, l2=l2), recurrent_regularizer=l1_l2(l1=l1, l2=l2)) elif cell_type == 'lstm': return LSTMCell(units=hidden_dim, kernel_regularizer=l1_l2(l1=l1, l2=l2), recurrent_regularizer=l1_l2(l1=l1, l2=l2)) else: raise ValueError( 'Unknown cell type. Please select one of: vanilla, gru, or lstm.')
def call(self, inputs): """ Inputs should be [message, previous_state], returns [next_state] """ return GRUCell.call(self, inputs[0], [inputs[1]])[0]
def build(self, input_shape): GRUCell.build(self, input_shape[0])
class GGNN(Layer): """ Implementation of Adapted GGNN introduced in Ding et.al. "A Neural Multi-digraph Model for Chinese NER with Gazetteers" """ def __init__(self, units, n_gaze, n_step, **kwargs): super(GGNN, self).__init__(**kwargs) self.units = units self.n_gaze = n_gaze self.n_edge = (self.n_gaze + 1) * 2 self.n_step = n_step self.gru_cell = GRUCell(units=self.units) def build(self, input_shape): embed_dim = input_shape[0][-1] assert embed_dim == self.units self.alpha = self.add_weight(name=self.name + 'contribution_coefficient', shape=(self.n_edge, ), initializer='ones') self.w = self.add_weight(name=self.name + '_w', shape=(self.n_edge, embed_dim, self.units), initializer=RandomNormal(0., 0.02)) self.b = self.add_weight(name=self.name + '_b', shape=(self.n_edge, self.units), initializer='zeros') self.gru_cell.build([None, self.units * self.n_edge]) super(GGNN, self).build(input_shape) def call(self, inputs, **kwargs): # init_state: [batch_size, n_node, embed_dim] # adj_matrix: [batch_size, n_edge, n_node, n_node] init_state, adj_matrix = inputs n_node = K.shape(init_state)[1] expand_alpha = K.expand_dims(K.expand_dims(self.alpha, axis=-1), axis=-1) weighted_adj_matrix = adj_matrix * K.sigmoid(expand_alpha) cur_state = K.identity(init_state) for _ in range(self.n_step): h = K.dot(cur_state, self.w) + self.b # [batch_size, n_node, n_edge, units] neigh_state = [] for edge_idx in range(self.n_edge): neigh_state.append( K.batch_dot(weighted_adj_matrix[:, edge_idx, :, :], h[:, :, edge_idx, :], axes=(2, 1))) # [batch_size, n_node, units] neigh_state = K.concatenate( neigh_state, axis=-1) # [batch_size, n_node, units*n_edge] gru_inputs = K.reshape(neigh_state, (-1, self.units * self.n_edge)) gru_states = K.reshape(cur_state, (-1, self.units)) # should look up into GRUCell's implementation gru_output, _ = self.gru_cell.call(inputs=gru_inputs, states=[gru_states]) cur_state = K.reshape(gru_output, (-1, n_node, self.units)) return cur_state def compute_output_shape(self, input_shape): return input_shape[0][0], input_shape[0][1], self.units @property def trainable_weights(self): return self._trainable_weights + self.gru_cell.trainable_weights
def __init__(self, units, activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='glorot_uniform', recurrent_initializer='orthogonal', bias_initializer='zeros', kernel_regularizer=None, recurrent_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None, bias_constraint=None, dropout=0., recurrent_dropout=0., implementation=1, return_sequences=False, return_state=False, go_backwards=False, stateful=False, unroll=False, reset_after=False, **kwargs): if implementation == 0: warnings.warn('`implementation=0` has been deprecated, ' 'and now defaults to `implementation=1`.' 'Please update your layer call.') if K.backend() == 'theano' and (dropout or recurrent_dropout): warnings.warn( 'RNN dropout is no longer supported with the Theano backend ' 'due to technical limitations. ' 'You can either set `dropout` and `recurrent_dropout` to 0, ' 'or use the TensorFlow backend.') dropout = 0. recurrent_dropout = 0. cell = GRUCell(units, activation=activation, recurrent_activation=recurrent_activation, use_bias=use_bias, kernel_initializer=kernel_initializer, recurrent_initializer=recurrent_initializer, bias_initializer=bias_initializer, kernel_regularizer=kernel_regularizer, recurrent_regularizer=recurrent_regularizer, bias_regularizer=bias_regularizer, kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, bias_constraint=bias_constraint, dropout=dropout, recurrent_dropout=recurrent_dropout, implementation=implementation, reset_after=reset_after) super(AttGRU, self).__init__(cell, return_sequences=return_sequences, return_state=return_state, go_backwards=go_backwards, stateful=stateful, unroll=unroll, **kwargs) self.activity_regularizer = regularizers.get(activity_regularizer)
mask_zero=True)(x) y_emb = Embedding(target_max_word_idx + 1, EMBEDDING_SIZE, mask_zero=True)(y) encoder_rnn = Bidirectional( GRU(RECURRENT_UNITS, return_sequences=True, return_state=True)) x_enc, h_enc_fwd_final, h_enc_bkw_final = encoder_rnn(x_emb) # the final state of the backward-GRU (closest to the start of the input # sentence) is used to initialize the state of the decoder initial_state_gru = Dense(RECURRENT_UNITS, activation='tanh')(h_enc_bkw_final) initial_attention_h = Lambda(lambda x: K.zeros_like(x)[:, 0, :])(x_enc) initial_state = [initial_state_gru, initial_attention_h] cell = DenseAnnotationAttention(cell=GRUCell(RECURRENT_UNITS), units=DENSE_ATTENTION_UNITS, input_mode="concatenate", output_mode="cell_output") # TODO output_mode="concatenate", see TODO(3)/A decoder_rnn = RNN(cell=cell, return_sequences=True, return_state=True) h1_and_state = decoder_rnn(y_emb, initial_state=initial_state, constants=x_enc) h1 = h1_and_state[0] def dense_maxout(x_): """Implements a dense maxout layer where max is taken over _two_ units""" x_ = Dense(READOUT_HIDDEN_UNITS * 2)(x_) x_1 = x_[:, :READOUT_HIDDEN_UNITS]