def build_inference_decoder(mask=False, demb=128, h=128, Vt=5000, tied=True, attn=True): rnn = LSTM if mask else CuDNNLSTM # build decoder decoder_input = Input(batch_shape=(None, None), dtype='float32', name='decoder_input') encoder_outputs = Input(batch_shape=(None, None, h), dtype='float32', name='encoder_outputs') encoder_h = Input(batch_shape=(None, h), dtype='float32', name='encoder_h') encoder_c = Input(batch_shape=(None, h), dtype='float32', name='encoder_c') if mask: decoder_emb_layer = Embedding(Vt + 1, demb, mask_zero=True, name='decoder_emb') else: decoder_emb_layer = Embedding(Vt, demb, mask_zero=False, name='decoder_emb') decoder_emb = decoder_emb_layer(decoder_input) decoder_rnn = rnn(h, return_sequences=True, name='decoder_rnn') decoder_outputs = decoder_rnn(decoder_emb, initial_state=[encoder_h, encoder_c]) if tied: final_outputs = DenseTransposeTied( Vt, name='outputs', tied_to=decoder_emb_layer, activation='linear')(decoder_outputs) else: final_outputs = Dense(Vt, activation='linear', name='outputs')(decoder_outputs) if attn: contexts = Attention(units=h, use_bias=False, name='attention')( [encoder_outputs, decoder_outputs]) contexts_outputs = Dense(Vt, activation='linear', use_bias=False, name='context_outputs')(contexts) final_outputs = Add(name='final_outputs')( [final_outputs, contexts_outputs]) inputs = [decoder_input, encoder_outputs, encoder_h, encoder_c] model = Model(inputs=inputs, outputs=[final_outputs]) return model
def build_lm_model(emb_h=128, h=128, nh=1, V=5000, maxlen=35, drop_p=0.25, tied=False, rnn_fn='lstm'): input_layer = Input((maxlen, )) emb_layer = Embedding(V, emb_h, mask_zero=False) emb_output = emb_layer(input_layer) if rnn_fn == 'lstm': rnn = CuDNNLSTM elif rnn_fn == 'gru': rnn = CuDNNGRU else: raise ValueError(rnn_fn) if drop_p > 0.: emb_output = Dropout(drop_p)(emb_output) lstm_layer = rnn(h, return_sequences=True)(emb_output) if drop_p > 0.: lstm_layer = Dropout(drop_p)(lstm_layer) for _ in range(nh - 1): lstm_layer = rnn(h, return_sequences=True)(lstm_layer) if drop_p > 0.: lstm_layer = Dropout(drop_p)(lstm_layer) if tied: if emb_h != h: raise ValueError( 'When using the tied flag, nhid must be equal to emsize') output = DenseTransposeTied(V, tied_to=emb_layer, activation='linear')(lstm_layer) else: output = Dense(V, activation='linear')(lstm_layer) model = Model(inputs=[input_layer], outputs=[output]) return model
def build_nmt_model(Vs, Vt, demb=128, h=128, drop_p=0.5, tied=True, mask=True, attn=True, l2_ratio=1e-4, training=None, rnn_fn='lstm'): if rnn_fn == 'lstm': rnn = LSTM if mask else CuDNNLSTM elif rnn_fn == 'gru': rnn = LSTM if mask else CuDNNGRU else: raise ValueError(rnn_fn) # build encoder encoder_input = Input((None, ), dtype='float32', name='encoder_input') if mask: encoder_emb_layer = Embedding(Vs + 1, demb, mask_zero=True, embeddings_regularizer=l2(l2_ratio), name='encoder_emb') else: encoder_emb_layer = Embedding(Vs, demb, mask_zero=False, embeddings_regularizer=l2(l2_ratio), name='encoder_emb') encoder_emb = encoder_emb_layer(encoder_input) if drop_p > 0.: encoder_emb = Dropout(drop_p)(encoder_emb, training=training) encoder_rnn = rnn(h, return_sequences=True, return_state=True, kernel_regularizer=l2(l2_ratio), name='encoder_rnn') encoder_rtn = encoder_rnn(encoder_emb) # encoder_outputs, encoder_h, encoder_c = encoder_rnn(encoder_emb) encoder_outputs = encoder_rtn[0] encoder_states = encoder_rtn[1:] # build decoder decoder_input = Input((None, ), dtype='float32', name='decoder_input') if mask: decoder_emb_layer = Embedding(Vt + 1, demb, mask_zero=True, embeddings_regularizer=l2(l2_ratio), name='decoder_emb') else: decoder_emb_layer = Embedding(Vt, demb, mask_zero=False, embeddings_regularizer=l2(l2_ratio), name='decoder_emb') decoder_emb = decoder_emb_layer(decoder_input) if drop_p > 0.: decoder_emb = Dropout(drop_p)(decoder_emb, training=training) decoder_rnn = rnn(h, return_sequences=True, kernel_regularizer=l2(l2_ratio), name='decoder_rnn') decoder_outputs = decoder_rnn(decoder_emb, initial_state=encoder_states) if drop_p > 0.: decoder_outputs = Dropout(drop_p)(decoder_outputs, training=training) if tied: final_outputs = DenseTransposeTied( Vt, kernel_regularizer=l2(l2_ratio), name='outputs', tied_to=decoder_emb_layer, activation='linear')(decoder_outputs) else: final_outputs = Dense(Vt, activation='linear', kernel_regularizer=l2(l2_ratio), name='outputs')(decoder_outputs) if attn: contexts = Attention( units=h, kernel_regularizer=l2(l2_ratio), name='attention', use_bias=False)([encoder_outputs, decoder_outputs]) if drop_p > 0.: contexts = Dropout(drop_p)(contexts, training=training) contexts_outputs = Dense(Vt, activation='linear', use_bias=False, name='context_outputs', kernel_regularizer=l2(l2_ratio))(contexts) final_outputs = Add(name='final_outputs')( [final_outputs, contexts_outputs]) model = Model(inputs=[encoder_input, decoder_input], outputs=[final_outputs]) return model
def build_dialogue_model(Vs, Vt, demb=128, h=128, drop_p=0.5, tied=True, mask=True, training=None, rnn_fn='lstm'): if rnn_fn == 'lstm': rnn = LSTM if mask else CuDNNLSTM elif rnn_fn == 'gru': rnn = LSTM if mask else CuDNNGRU else: raise ValueError(rnn_fn) # build encoder encoder_input = Input((None, ), dtype='float32', name='encoder_input') if mask: encoder_emb_layer = Embedding(Vs + 1, demb, mask_zero=True, name='encoder_emb') else: encoder_emb_layer = Embedding(Vs, demb, mask_zero=False, name='encoder_emb') encoder_emb = encoder_emb_layer(encoder_input) if drop_p > 0.: encoder_emb = Dropout(drop_p)(encoder_emb, training=training) encoder_rnn = rnn(h, return_sequences=True, return_state=True, name='encoder_rnn') encoder_rtn = encoder_rnn(encoder_emb) # # encoder_outputs, encoder_h, encoder_c = encoder_rnn(encoder_emb) # encoder_outputs = encoder_rtn[0] encoder_states = encoder_rtn[1:] # build decoder decoder_input = Input((None, ), dtype='float32', name='decoder_input') if mask: decoder_emb_layer = Embedding(Vt + 1, demb, mask_zero=True, name='decoder_emb') else: decoder_emb_layer = Embedding(Vt, demb, mask_zero=False, name='decoder_emb') decoder_emb = decoder_emb_layer(decoder_input) if drop_p > 0.: decoder_emb = Dropout(drop_p)(decoder_emb, training=training) decoder_rnn = rnn(h, return_sequences=True, name='decoder_rnn') decoder_outputs = decoder_rnn(decoder_emb, initial_state=encoder_states) if drop_p > 0.: decoder_outputs = Dropout(drop_p)(decoder_outputs, training=training) if tied: final_outputs = DenseTransposeTied(Vt, tied_to=decoder_emb_layer, activation='linear', name='outputs')(decoder_outputs) else: final_outputs = Dense(Vt, activation='linear', name='outputs')(decoder_outputs) model = Model(inputs=[encoder_input, decoder_input], outputs=[final_outputs]) return model