예제 #1
0
def build_inference_decoder(mask=False,
                            demb=128,
                            h=128,
                            Vt=5000,
                            tied=True,
                            attn=True):
    rnn = LSTM if mask else CuDNNLSTM

    # build decoder
    decoder_input = Input(batch_shape=(None, None),
                          dtype='float32',
                          name='decoder_input')
    encoder_outputs = Input(batch_shape=(None, None, h),
                            dtype='float32',
                            name='encoder_outputs')
    encoder_h = Input(batch_shape=(None, h), dtype='float32', name='encoder_h')
    encoder_c = Input(batch_shape=(None, h), dtype='float32', name='encoder_c')

    if mask:
        decoder_emb_layer = Embedding(Vt + 1,
                                      demb,
                                      mask_zero=True,
                                      name='decoder_emb')
    else:
        decoder_emb_layer = Embedding(Vt,
                                      demb,
                                      mask_zero=False,
                                      name='decoder_emb')

    decoder_emb = decoder_emb_layer(decoder_input)

    decoder_rnn = rnn(h, return_sequences=True, name='decoder_rnn')
    decoder_outputs = decoder_rnn(decoder_emb,
                                  initial_state=[encoder_h, encoder_c])

    if tied:
        final_outputs = DenseTransposeTied(
            Vt, name='outputs', tied_to=decoder_emb_layer,
            activation='linear')(decoder_outputs)
    else:
        final_outputs = Dense(Vt, activation='linear',
                              name='outputs')(decoder_outputs)

    if attn:
        contexts = Attention(units=h, use_bias=False, name='attention')(
            [encoder_outputs, decoder_outputs])
        contexts_outputs = Dense(Vt,
                                 activation='linear',
                                 use_bias=False,
                                 name='context_outputs')(contexts)
        final_outputs = Add(name='final_outputs')(
            [final_outputs, contexts_outputs])

    inputs = [decoder_input, encoder_outputs, encoder_h, encoder_c]
    model = Model(inputs=inputs, outputs=[final_outputs])
    return model
예제 #2
0
def build_lm_model(emb_h=128,
                   h=128,
                   nh=1,
                   V=5000,
                   maxlen=35,
                   drop_p=0.25,
                   tied=False,
                   rnn_fn='lstm'):
    input_layer = Input((maxlen, ))
    emb_layer = Embedding(V, emb_h, mask_zero=False)
    emb_output = emb_layer(input_layer)

    if rnn_fn == 'lstm':
        rnn = CuDNNLSTM
    elif rnn_fn == 'gru':
        rnn = CuDNNGRU
    else:
        raise ValueError(rnn_fn)

    if drop_p > 0.:
        emb_output = Dropout(drop_p)(emb_output)

    lstm_layer = rnn(h, return_sequences=True)(emb_output)
    if drop_p > 0.:
        lstm_layer = Dropout(drop_p)(lstm_layer)

    for _ in range(nh - 1):
        lstm_layer = rnn(h, return_sequences=True)(lstm_layer)
        if drop_p > 0.:
            lstm_layer = Dropout(drop_p)(lstm_layer)

    if tied:
        if emb_h != h:
            raise ValueError(
                'When using the tied flag, nhid must be equal to emsize')
        output = DenseTransposeTied(V, tied_to=emb_layer,
                                    activation='linear')(lstm_layer)
    else:
        output = Dense(V, activation='linear')(lstm_layer)
    model = Model(inputs=[input_layer], outputs=[output])
    return model
예제 #3
0
def build_nmt_model(Vs,
                    Vt,
                    demb=128,
                    h=128,
                    drop_p=0.5,
                    tied=True,
                    mask=True,
                    attn=True,
                    l2_ratio=1e-4,
                    training=None,
                    rnn_fn='lstm'):
    if rnn_fn == 'lstm':
        rnn = LSTM if mask else CuDNNLSTM
    elif rnn_fn == 'gru':
        rnn = LSTM if mask else CuDNNGRU
    else:
        raise ValueError(rnn_fn)

    # build encoder
    encoder_input = Input((None, ), dtype='float32', name='encoder_input')
    if mask:
        encoder_emb_layer = Embedding(Vs + 1,
                                      demb,
                                      mask_zero=True,
                                      embeddings_regularizer=l2(l2_ratio),
                                      name='encoder_emb')
    else:
        encoder_emb_layer = Embedding(Vs,
                                      demb,
                                      mask_zero=False,
                                      embeddings_regularizer=l2(l2_ratio),
                                      name='encoder_emb')

    encoder_emb = encoder_emb_layer(encoder_input)

    if drop_p > 0.:
        encoder_emb = Dropout(drop_p)(encoder_emb, training=training)

    encoder_rnn = rnn(h,
                      return_sequences=True,
                      return_state=True,
                      kernel_regularizer=l2(l2_ratio),
                      name='encoder_rnn')
    encoder_rtn = encoder_rnn(encoder_emb)
    # encoder_outputs, encoder_h, encoder_c = encoder_rnn(encoder_emb)
    encoder_outputs = encoder_rtn[0]
    encoder_states = encoder_rtn[1:]

    # build decoder
    decoder_input = Input((None, ), dtype='float32', name='decoder_input')
    if mask:
        decoder_emb_layer = Embedding(Vt + 1,
                                      demb,
                                      mask_zero=True,
                                      embeddings_regularizer=l2(l2_ratio),
                                      name='decoder_emb')
    else:
        decoder_emb_layer = Embedding(Vt,
                                      demb,
                                      mask_zero=False,
                                      embeddings_regularizer=l2(l2_ratio),
                                      name='decoder_emb')

    decoder_emb = decoder_emb_layer(decoder_input)

    if drop_p > 0.:
        decoder_emb = Dropout(drop_p)(decoder_emb, training=training)

    decoder_rnn = rnn(h,
                      return_sequences=True,
                      kernel_regularizer=l2(l2_ratio),
                      name='decoder_rnn')
    decoder_outputs = decoder_rnn(decoder_emb, initial_state=encoder_states)

    if drop_p > 0.:
        decoder_outputs = Dropout(drop_p)(decoder_outputs, training=training)

    if tied:
        final_outputs = DenseTransposeTied(
            Vt,
            kernel_regularizer=l2(l2_ratio),
            name='outputs',
            tied_to=decoder_emb_layer,
            activation='linear')(decoder_outputs)
    else:
        final_outputs = Dense(Vt,
                              activation='linear',
                              kernel_regularizer=l2(l2_ratio),
                              name='outputs')(decoder_outputs)

    if attn:
        contexts = Attention(
            units=h,
            kernel_regularizer=l2(l2_ratio),
            name='attention',
            use_bias=False)([encoder_outputs, decoder_outputs])
        if drop_p > 0.:
            contexts = Dropout(drop_p)(contexts, training=training)

        contexts_outputs = Dense(Vt,
                                 activation='linear',
                                 use_bias=False,
                                 name='context_outputs',
                                 kernel_regularizer=l2(l2_ratio))(contexts)

        final_outputs = Add(name='final_outputs')(
            [final_outputs, contexts_outputs])

    model = Model(inputs=[encoder_input, decoder_input],
                  outputs=[final_outputs])
    return model
예제 #4
0
def build_dialogue_model(Vs,
                         Vt,
                         demb=128,
                         h=128,
                         drop_p=0.5,
                         tied=True,
                         mask=True,
                         training=None,
                         rnn_fn='lstm'):
    if rnn_fn == 'lstm':
        rnn = LSTM if mask else CuDNNLSTM
    elif rnn_fn == 'gru':
        rnn = LSTM if mask else CuDNNGRU
    else:
        raise ValueError(rnn_fn)

    # build encoder
    encoder_input = Input((None, ), dtype='float32', name='encoder_input')
    if mask:
        encoder_emb_layer = Embedding(Vs + 1,
                                      demb,
                                      mask_zero=True,
                                      name='encoder_emb')
    else:
        encoder_emb_layer = Embedding(Vs,
                                      demb,
                                      mask_zero=False,
                                      name='encoder_emb')

    encoder_emb = encoder_emb_layer(encoder_input)

    if drop_p > 0.:
        encoder_emb = Dropout(drop_p)(encoder_emb, training=training)

    encoder_rnn = rnn(h,
                      return_sequences=True,
                      return_state=True,
                      name='encoder_rnn')
    encoder_rtn = encoder_rnn(encoder_emb)
    # # encoder_outputs, encoder_h, encoder_c = encoder_rnn(encoder_emb)
    # encoder_outputs = encoder_rtn[0]
    encoder_states = encoder_rtn[1:]

    # build decoder
    decoder_input = Input((None, ), dtype='float32', name='decoder_input')
    if mask:
        decoder_emb_layer = Embedding(Vt + 1,
                                      demb,
                                      mask_zero=True,
                                      name='decoder_emb')
    else:
        decoder_emb_layer = Embedding(Vt,
                                      demb,
                                      mask_zero=False,
                                      name='decoder_emb')

    decoder_emb = decoder_emb_layer(decoder_input)

    if drop_p > 0.:
        decoder_emb = Dropout(drop_p)(decoder_emb, training=training)

    decoder_rnn = rnn(h, return_sequences=True, name='decoder_rnn')
    decoder_outputs = decoder_rnn(decoder_emb, initial_state=encoder_states)

    if drop_p > 0.:
        decoder_outputs = Dropout(drop_p)(decoder_outputs, training=training)

    if tied:
        final_outputs = DenseTransposeTied(Vt,
                                           tied_to=decoder_emb_layer,
                                           activation='linear',
                                           name='outputs')(decoder_outputs)
    else:
        final_outputs = Dense(Vt, activation='linear',
                              name='outputs')(decoder_outputs)

    model = Model(inputs=[encoder_input, decoder_input],
                  outputs=[final_outputs])
    return model