Ejemplo n.º 1
0
    def __init__(self,
                 rnn_dim,
                 rnn_unit='gru',
                 input_shape=(0, ),
                 dropout=0.0,
                 highway=False,
                 return_sequences=False,
                 dense_dim=0):
        if rnn_unit == 'gru':
            rnn = GRU
        else:
            rnn = LSTM
        self.model = Sequential()
        self.model.add(
            Bidirectional(rnn(rnn_dim,
                              dropout=dropout,
                              recurrent_dropout=dropout,
                              return_sequences=return_sequences),
                          input_shape=input_shape))
        # self.model.add(rnn(rnn_dim,
        #                    dropout=dropout,
        #                    recurrent_dropout=dropout,
        #                    return_sequences=return_sequences,
        #                    input_shape=input_shape))
        if highway:
            if return_sequences:
                self.model.add(TimeDistributed(Highway(activation='tanh')))
            else:
                self.model.add(Highway(activation='tanh'))

        if dense_dim > 0:
            self.model.add(TimeDistributed(Dense(dense_dim,
                                                 activation='relu')))
            self.model.add(TimeDistributed(Dropout(dropout)))
            self.model.add(TimeDistributed(BatchNormalization()))
Ejemplo n.º 2
0
def build_word_feature_char(vocab_size=5,
                            char_emb_dim=CHAR_EMB_DIM,
                            mode="padding",
                            cnn_encoder=True,
                            highway=True):
    # build the feature computed by cnn for each word in the sentence. used to input to the next rnn.
    # expected input: every #comp_width int express a character.
    # mode:
    # "average": average pool the every #comp_with input embedding, output average of the indexed embeddings of a character
    # "padding": convoluate every #comp_width embedding

    # real vocab_size for ucs is 2481, including paddingblank, unkown, puncutations, kanas
    init_width = 0.5 / char_emb_dim
    init_weight = numpy.random.uniform(low=-init_width,
                                       high=init_width,
                                       size=(vocab_size, char_emb_dim))
    init_weight[0] = 0  # maybe the padding should not be zero
    # print(init_weight)
    # first layer embeds
    #  every components
    word_input = Input(shape=(MAX_WORD_LENGTH, ))
    char_embedding = \
        Embedding(input_dim=vocab_size, output_dim=char_emb_dim, weights=[init_weight], trainable=True)(word_input)
    # print("char_embedding:", char_embedding._keras_shape)
    if cnn_encoder:
        if mode == "padding":
            # print(char_embedding._keras_shape)
            # conv, filter with [1, 2, 3]*#comp_width, feature maps 50 100 150
            feature1 = Conv1D(filters=100, kernel_size=1,
                              activation='relu')(char_embedding)
            feature1 = MaxPooling1D(pool_size=MAX_WORD_LENGTH - 1 +
                                    1)(feature1)
            feature2 = Conv1D(filters=200, kernel_size=2,
                              activation='relu')(char_embedding)
            feature2 = MaxPooling1D(pool_size=MAX_WORD_LENGTH - 2 +
                                    1)(feature2)
            feature3 = Conv1D(filters=300, kernel_size=3,
                              activation='relu')(char_embedding)
            feature3 = MaxPooling1D(pool_size=MAX_WORD_LENGTH - 3 +
                                    1)(feature3)
            feature = concatenate([feature1, feature2, feature3])
        feature = Flatten()(feature)
        # print(feature._keras_shape)
        if highway:
            feature = Highway(activation="relu")(feature)
    else:
        feature = Flatten()(char_embedding)
    word_feature_encoder = Model(word_input, feature)
    return word_feature_encoder
Ejemplo n.º 3
0
    def __init__(self,
                 rnn,
                 rnn_dim,
                 input_dim,
                 dropout_W=0.0,
                 dropout_U=0.0,
                 cnn_border_mode='same'):
        if rnn == 'lstm':
            from keras.layers import CuDNNLSTM as RNN
        elif rnn == 'sru':
            from nea.cell import SRU as RNN
        elif rnn == 'nlstm':
            from nea.cell import NestedLSTM as RNN
        elif rnn == 'gru':
            from keras.layers import CuDNNGRU as RNN
        elif rnn == 'simple':
            from keras.layers.recurrent import SimpleRNN as RNN
        elif rnn == 'indrnn':
            from nea.cell import IndRNN as RNN
        self.model = Sequential()
        self.model.add(
            Conv1D(filters=100,
                   kernel_size=3,
                   padding=cnn_border_mode,
                   strides=1,
                   input_shape=input_dim))
        for i in range(MC.DEPTH):
            self.model.add(
                Bidirectional(
                    RNN(
                        rnn_dim,
                        # dropout=dropout_W,
                        #recurrent_dropout=dropout_U,
                        return_sequences=True), ))

        if MC.HIGHWAY:
            self.model.add(TimeDistributed(Highway(activation='tanh')))
        #self.model.add(TimeDistributed(Dense(MC.DENSE_DIM,activation='relu')))
        self.model.add(Dropout(MC.DROPOUT))
        self.model.add(Attention())
Ejemplo n.º 4
0
def build_sentence_rnn(real_vocab_number,
                       word_vocab_size=10,
                       char_vocab_size=10,
                       classes=2,
                       attention=False,
                       dropout=0,
                       word=True,
                       char=False,
                       char_shape=True,
                       model="rnn",
                       cnn_encoder=True,
                       highway=None,
                       nohighway=None,
                       shape_filter=True,
                       char_filter=True):
    # build the rnn of words, use the output of build_word_feature as the feature of each word
    if char_shape:
        word_feature_encoder = build_word_feature_shape(
            vocab_size=real_vocab_number,
            cnn_encoder=cnn_encoder,
            highway=highway,
            nohighway=nohighway,
            shape_filter=shape_filter,
            char_filter=char_filter)
        sentence_input = Input(shape=(MAX_SENTENCE_LENGTH,
                                      COMP_WIDTH * MAX_WORD_LENGTH),
                               dtype='int32')
        word_feature_sequence = TimeDistributed(word_feature_encoder)(
            sentence_input)
        # print(word_feature_sequence._keras_shape)
    if word:
        sentence_word_input = Input(shape=(MAX_SENTENCE_LENGTH, ),
                                    dtype='int32')
        word_embedding_sequence = Embedding(
            input_dim=word_vocab_size,
            output_dim=WORD_DIM)(sentence_word_input)
    if char:
        word_feature_encoder = build_word_feature_char(
            vocab_size=char_vocab_size,
            cnn_encoder=cnn_encoder,
            highway=highway)
        char_input = Input(shape=(MAX_SENTENCE_LENGTH, MAX_WORD_LENGTH),
                           dtype='int32')
        word_feature_sequence = TimeDistributed(word_feature_encoder)(
            char_input)
    if char_shape and word and not char:
        word_feature_sequence = concatenate(
            [word_feature_sequence, word_embedding_sequence], axis=2)
    if word and not char_shape and not char:
        word_feature_sequence = word_embedding_sequence
    # print(word_feature_sequence._keras_shape)
    if model == "rnn":
        if attention:
            lstm_rnn = Bidirectional(
                LSTM(150, dropout=dropout,
                     return_sequences=True))(word_feature_sequence)
            if highway:
                lstm_rnn = TimeDistributed(
                    Highway(activation=highway))(lstm_rnn)
            elif nohighway:
                lstm_rnn = TimeDistributed(
                    Dense(units=300, activation=nohighway))(lstm_rnn)
            lstm_rnn = AttentionWithContext()(lstm_rnn)
        else:
            lstm_rnn = Bidirectional(
                LSTM(150, dropout=dropout,
                     return_sequences=False))(word_feature_sequence)
        x = lstm_rnn
    if classes < 2:
        print("class number cannot less than 2")
        exit(1)
    else:
        preds = Dense(classes, activation='softmax')(x)
    if char_shape and not word and not char:
        sentence_model = Model(sentence_input, preds)
    if word and not char_shape and not char:
        sentence_model = Model(sentence_word_input, preds)
    if word and char_shape and not char:
        sentence_model = Model([sentence_input, sentence_word_input], preds)
    if char and not word and not char_shape:
        sentence_model = Model(char_input, preds)
    sentence_model.summary()
    return sentence_model
Ejemplo n.º 5
0
def build_word_feature_shape(vocab_size=5,
                             char_emb_dim=CHAR_EMB_DIM,
                             comp_width=COMP_WIDTH,
                             mode="padding",
                             cnn_encoder=True,
                             highway="linear",
                             nohighway=None,
                             shape_filter=True,
                             char_filter=True):
    # build the feature computed by cnn for each word in the sentence. used to input to the next rnn.
    # expected input: every #comp_width int express a character.
    # mode:
    # "average": average pool the every #comp_with input embedding, output average of the indexed embeddings of a character
    # "padding": convoluate every #comp_width embedding

    # real vocab_size for ucs is 2481, including paddingblank, unkown, puncutations, kanas
    assert shape_filter or char_filter
    init_width = 0.5 / char_emb_dim
    init_weight = numpy.random.uniform(low=-init_width,
                                       high=init_width,
                                       size=(vocab_size, char_emb_dim))
    init_weight[0] = 0  # maybe the padding should not be zero
    # print(init_weight)
    # first layer embeds
    #  every components
    word_input = Input(shape=(COMP_WIDTH * MAX_WORD_LENGTH, ))
    char_embedding = \
        Embedding(input_dim=vocab_size, output_dim=char_emb_dim, weights=[init_weight], trainable=True)(word_input)
    # print("char_embedding:", char_embedding._keras_shape)
    if cnn_encoder:
        if mode == "padding":
            # print(char_embedding._keras_shape)
            # print(comp_width)
            if shape_filter and char_filter:
                filter_sizes = [50, 100, 150]
            else:
                filter_sizes = [100, 200, 300]
            if shape_filter:
                feature_s1 = Conv1D(filters=filter_sizes[0],
                                    kernel_size=1,
                                    activation='relu')(char_embedding)
                feature_s1 = MaxPooling1D(pool_size=MAX_WORD_LENGTH *
                                          COMP_WIDTH)(feature_s1)
                feature_s2 = Conv1D(filters=filter_sizes[1],
                                    kernel_size=2,
                                    activation='relu')(char_embedding)
                feature_s2 = MaxPooling1D(
                    pool_size=MAX_WORD_LENGTH * COMP_WIDTH - 1)(feature_s2)
                feature_s3 = Conv1D(filters=filter_sizes[2],
                                    kernel_size=3,
                                    activation='relu')(char_embedding)
                feature_s3 = MaxPooling1D(
                    pool_size=MAX_WORD_LENGTH * COMP_WIDTH - 2)(feature_s3)
            if char_filter:
                feature1 = Conv1D(filters=filter_sizes[0],
                                  kernel_size=1 * comp_width,
                                  strides=comp_width,
                                  activation='relu')(char_embedding)
                feature1 = MaxPooling1D(pool_size=MAX_WORD_LENGTH - 1 +
                                        1)(feature1)
                feature2 = Conv1D(filters=filter_sizes[1],
                                  kernel_size=2 * comp_width,
                                  strides=comp_width,
                                  activation='relu')(char_embedding)
                feature2 = MaxPooling1D(pool_size=MAX_WORD_LENGTH - 2 +
                                        1)(feature2)
                feature3 = Conv1D(filters=filter_sizes[2],
                                  kernel_size=3 * comp_width,
                                  strides=comp_width,
                                  activation='relu')(char_embedding)
                feature3 = MaxPooling1D(pool_size=MAX_WORD_LENGTH - 3 +
                                        1)(feature3)
            if shape_filter and char_filter:
                feature = concatenate([
                    feature_s1, feature_s2, feature_s3, feature1, feature2,
                    feature3
                ])
            elif shape_filter and not char_filter:
                feature = concatenate([feature_s1, feature_s2, feature_s3])
            elif char_filter and not shape_filter:
                feature = concatenate([feature1, feature2, feature3])
            else:
                feature = None
        feature = Flatten()(feature)
        # print(feature._keras_shape)
        if highway:
            if isinstance(highway, str):
                feature = Highway(activation=highway)(feature)
            else:
                feature = Highway(activation='relu')(feature)
        else:
            if nohighway:
                feature = Dense(units=600, activation=nohighway)(feature)
            else:
                pass
    else:
        feature = Flatten()(char_embedding)
    word_feature_encoder = Model(word_input, feature)
    return word_feature_encoder
Ejemplo n.º 6
0
def get_darnn(nb_words,
              embedding_dim,
              embedding_matrix,
              max_sequence_length,
              out_size,
              projection_dim=50,
              projection_hidden=0,
              projection_dropout=0.2,
              compare_dim=288,
              compare_dropout=0.2,
              dense_dim=50,
              dense_dropout=0.2,
              lr=1e-3,
              activation='relu'):

    q1 = Input(shape=(max_sequence_length, ), name='first_sentences')
    q2 = Input(shape=(max_sequence_length, ), name='second_sentences')

    q1_exact_match = Input(shape=(max_sequence_length, ),
                           name='first_exact_match')
    q2_exact_match = Input(shape=(max_sequence_length, ),
                           name='second_exact_match')
    input_layer_3 = Input(shape=(36, ), name='mata-features', dtype="float32")

    embedding = Embedding(nb_words,
                          embedding_dim,
                          weights=[embedding_matrix],
                          input_length=max_sequence_length,
                          trainable=False)

    em_embeddings = Embedding(2,
                              1,
                              input_length=max_sequence_length,
                              trainable=True)

    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(0.1)(q1_embed)

    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(0.1)(q2_embed)

    th = TimeDistributed(Highway(activation='relu'))
    q1_embed = Dropout(0.1)(th(q1_embed, ))
    q2_embed = Dropout(0.1)(th(q2_embed, ))

    rnns = [
        Bidirectional(CuDNNGRU(42, return_sequences=True)) for i in range(3)
    ]

    q1_res = []
    q2_res = []

    for idx, rnn in enumerate(rnns):
        q1_seq = rnn(q1_embed)
        q1_seq = Dropout(0.15)(q1_seq)
        q2_seq = rnn(q2_embed)
        q2_seq = Dropout(0.15)(q2_seq)
        q1_aligned, q2_aligned = soft_attention_alignment(q1_seq, q2_seq)

        q1_res.append(q2_aligned)
        q1_res.append(q1_seq)

        q2_res.append(q1_aligned)
        q2_res.append(q2_seq)

        q1_embed = Concatenate()([
            q1_embed,
            q1_seq,
            q2_aligned,
        ])
        q2_embed = Concatenate()([
            q2_embed,
            q2_seq,
            q1_aligned,
        ])

    q1_res = Concatenate()(q1_res)
    q2_res = Concatenate()(q2_res)

    attn = AttentionWeightedAverage()
    q1_rep = apply_multiple(
        q1_embed,
        [GlobalAvgPool1D(), GlobalMaxPool1D(), attn])
    q2_rep = apply_multiple(
        q2_embed,
        [GlobalAvgPool1D(), GlobalMaxPool1D(), attn])

    # Classifier
    q_diff = substract(q1_rep, q2_rep)
    q_multi = Multiply()([q1_rep, q2_rep])
    h_all = Concatenate()([
        q1_rep,
        q2_rep,
        q_diff,
        q_multi,
    ])
    h_all = Dropout(0.35)(h_all)
    h_all = Dense(300, activation='relu')(h_all)
    out_ = Dense(3, activation='softmax')(h_all)

    model = Model(
        inputs=[q1, q2, input_layer_3, q1_exact_match, q2_exact_match],
        outputs=out_)
    model.compile(optimizer=Adam(lr=lr, decay=1e-6, clipvalue=1.5),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', weighted_accuracy])
    model.summary()
    return model
Ejemplo n.º 7
0
def get_multiwindow_cnn(nb_words,
                        embedding_dim,
                        embedding_matrix,
                        max_sequence_length,
                        out_size,
                        projection_dim=50,
                        projection_hidden=0,
                        projection_dropout=0.2,
                        compare_dim=288,
                        compare_dropout=0.2,
                        dense_dim=50,
                        dense_dropout=0.2,
                        lr=1e-3,
                        activation='relu'):

    q1 = Input(shape=(max_sequence_length, ), name='first_sentences')
    q2 = Input(shape=(max_sequence_length, ), name='second_sentences')
    meta_features_input = Input(shape=(36, ), name='mata-features')

    embedding = Embedding(nb_words,
                          embedding_dim,
                          weights=[embedding_matrix],
                          input_length=max_sequence_length,
                          trainable=False)

    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(0.2)(q1_embed)
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(0.2)(q2_embed)

    th = TimeDistributed(Highway(activation='relu'))

    q1_encoded = th(q1_embed, )
    q2_encoded = th(q2_embed, )

    q1_in = q1_encoded
    q2_in = q2_encoded

    nb_filters = 64

    for i in range(1, 5):
        tanh_conv = Conv1D(nb_filters, i, padding='same', activation='tanh')
        sigm_conv = Conv1D(nb_filters, i, padding='same', activation='sigmoid')
        res_conv = Conv1D(nb_filters, i, padding='same', activation='relu')
        drop = Dropout(0.1)

        q1_t = tanh_conv(q1_in)
        q1_s = sigm_conv(q1_in)
        q1_x = Multiply()([q1_t, q1_s])

        res_q1 = res_conv(q1_x)
        res_q1 = drop(res_q1)
        q1_encoded = Concatenate()([q1_encoded, q1_x])

        q2_t = tanh_conv(q2_in)
        q2_s = sigm_conv(q2_in)
        q2_x = Multiply()([q2_t, q2_s])

        res_q2 = res_conv(q2_x)
        res_q2 = drop(res_q2)
        q2_encoded = Concatenate()([q2_encoded, q2_x])

    # Align after align
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)
    q1_encoded = Concatenate()([
        q1_encoded,
        q2_aligned,
    ])
    q2_encoded = Concatenate()([
        q2_encoded,
        q1_aligned,
    ])

    attn = AttentionWeightedAverage()
    q1_rep = apply_multiple(q1_encoded, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
        attn,
    ])
    q2_rep = apply_multiple(q2_encoded, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
        attn,
    ])

    # Classifier
    q_diff = substract(q1_rep, q2_rep)
    q_multi = Multiply()([q1_rep, q2_rep])
    h_all = Concatenate()([
        q1_rep,
        q2_rep,
        q_diff,
        q_multi,
    ])
    h_all = Dropout(0.2)(h_all)
    out_ = Dense(3, activation='softmax')(h_all)

    model = Model(inputs=[q1, q2, meta_features_input], outputs=out_)
    model.compile(optimizer=Adam(lr=lr, decay=1e-6, clipnorm=1.5),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', weighted_accuracy])
    model.summary()
    return model
Ejemplo n.º 8
0
def get_dense_cnn(nb_words,
                  embedding_dim,
                  embedding_matrix,
                  max_sequence_length,
                  out_size,
                  projection_dim=50,
                  projection_hidden=0,
                  projection_dropout=0.2,
                  compare_dim=288,
                  compare_dropout=0.2,
                  dense_dim=50,
                  dense_dropout=0.2,
                  lr=1e-3,
                  activation='relu'):

    q1 = Input(shape=(max_sequence_length, ), name='first_sentences')
    q2 = Input(shape=(max_sequence_length, ), name='second_sentences')
    meta_features_input = Input(shape=(36, ), name='mata-features')

    embedding = Embedding(nb_words,
                          embedding_dim,
                          weights=[embedding_matrix],
                          input_length=max_sequence_length,
                          trainable=False)

    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(0.2)(q1_embed)
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(0.2)(q2_embed)

    th = TimeDistributed(Highway(activation='relu'))

    q1_encoded = th(q1_embed, )
    q2_encoded = th(q2_embed, )

    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)
    q1_encoded = Concatenate()([q2_aligned, q1_encoded])
    q2_encoded = Concatenate()([q1_aligned, q2_encoded])

    cnn_init = Conv1D(42, 1, strides=1, padding='same', activation='relu')
    q1_seq = cnn_init(q1_encoded)
    q2_seq = cnn_init(q2_encoded)

    cnns = [
        Conv1D(42, 3, strides=1, padding='same', activation='relu')
        for i in range(3)
    ]
    trans = [
        Conv1D(32, 1, strides=1, padding='same', activation='relu')
        for i in range(3)
    ]

    for idx, cnn in enumerate(cnns):
        q1_aligned, q2_aligned = soft_attention_alignment(q1_seq, q2_seq)
        q1_encoded = Concatenate()([q1_seq, q2_aligned, q1_encoded])
        q2_encoded = Concatenate()([q2_seq, q1_aligned, q2_encoded])
        q1_seq = cnn(q1_encoded)
        q2_seq = cnn(q2_encoded)

    attn = AttentionWeightedAverage()

    q1_rep = apply_multiple(
        q1_encoded,
        [GlobalAvgPool1D(), GlobalMaxPool1D(), attn])
    q2_rep = apply_multiple(
        q2_encoded,
        [GlobalAvgPool1D(), GlobalMaxPool1D(), attn])

    # Classifier
    q_diff = substract(q1_rep, q2_rep)
    q_multi = Multiply()([q1_rep, q2_rep])
    h_all = Concatenate()([
        q1_rep,
        q2_rep,
        q_diff,
        q_multi,
    ])
    h_all = Dropout(0.5)(h_all)

    h_all = Dense(128, activation='relu')(h_all)
    out_ = Dense(3, activation='softmax')(h_all)

    model = Model(inputs=[q1, q2, meta_features_input], outputs=out_)
    model.compile(optimizer=Adam(lr=lr, decay=1e-6, clipnorm=1),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', weighted_accuracy])
    model.summary()
    return model
Ejemplo n.º 9
0
def get_char_decomposable_attention(nb_words,
                                    embedding_dim,
                                    embedding_matrix,
                                    max_sequence_length,
                                    out_size,
                                    projection_dim=50,
                                    projection_hidden=0,
                                    projection_dropout=0.2,
                                    compare_dim=288,
                                    compare_dropout=0.2,
                                    dense_dim=50,
                                    dense_dropout=0.2,
                                    lr=1e-3,
                                    activation='relu'):

    q1 = Input(shape=(max_sequence_length, ), name='first_sentences')
    q2 = Input(shape=(max_sequence_length, ), name='second_sentences')
    q1_exact_match = Input(shape=(max_sequence_length, ),
                           name='first_exact_match')
    q2_exact_match = Input(shape=(max_sequence_length, ),
                           name='second_exact_match')

    input_layer_3 = Input(shape=(36, ), name='mata-features', dtype="float32")

    #input_encoded = BatchNormalization()(input_layer_3)
    input_encoded = Dense(2016, activation='elu')(input_layer_3)
    input_encoded = Dropout(0.25)(input_encoded)

    embedding = Embedding(nb_words,
                          150,
                          weights=[embedding_matrix],
                          input_length=max_sequence_length,
                          trainable=False)

    em_embeddings = Embedding(2,
                              1,
                              input_length=max_sequence_length,
                              trainable=True)

    #q1_embed = Concatenate()([embedding(q1), em_embeddings(q1_exact_match)])
    q1_embed = embedding(q1)
    q1_embed = SpatialDropout1D(0.1)(q1_embed)

    #q2_embed = Concatenate()([embedding(q2), em_embeddings(q2_exact_match)])
    q2_embed = embedding(q2)
    q2_embed = SpatialDropout1D(0.1)(q2_embed)

    th = TimeDistributed(Highway(activation='relu'))
    q1_embed = th(q1_embed)
    q2_embed = th(q2_embed)

    q1_aligned, q2_aligned = soft_attention_alignment(q1_embed, q2_embed)
    q1_vec = Concatenate()([
        q1_embed, q2_aligned,
        substract(q1_embed, q2_aligned),
        Multiply()([q1_embed, q2_aligned])
    ])
    q2_vec = Concatenate()([
        q2_embed, q1_aligned,
        substract(q2_embed, q1_aligned),
        Multiply()([q2_embed, q1_aligned])
    ])

    dense_compares = [
        Dense(300, activation='elu'),
        Dropout(0.2),
        Dense(200, activation='elu'),
        Dropout(0.2),
    ]

    q1_compared = time_distributed(q1_vec, dense_compares)
    q2_compared = time_distributed(q2_vec, dense_compares)

    q1_rep = apply_multiple(
        q1_compared, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep = apply_multiple(
        q2_compared, [GlobalAvgPool1D(), GlobalMaxPool1D()])

    h_all = Concatenate()([q1_rep, q2_rep])
    h_all = BatchNormalization()(h_all)

    h_all = Dense(256, activation='elu')(h_all)
    h_all = Dropout(0.2)(h_all)
    h_all = BatchNormalization()(h_all)

    h_all = Dense(256, activation='elu')(h_all)
    h_all = Dropout(0.2)(h_all)
    h_all = BatchNormalization()(h_all)

    out_ = Dense(3, activation='softmax')(h_all)

    model = Model(
        inputs=[q1, q2, input_layer_3, q1_exact_match, q2_exact_match],
        outputs=out_)
    model.compile(optimizer=Adam(lr=lr, decay=1e-6, clipnorm=1.5,
                                 amsgrad=True),
                  loss='categorical_crossentropy',
                  metrics=['accuracy', weighted_accuracy])
    model.summary()
    return model
Ejemplo n.º 10
0
def carnn(embedding_matrix,
          config,
          compare_out_size=CARNN_COMPARE_LAYER_OUTSIZE,
          rnn_size=CARNN_RNN_SIZE,
          rnn_dropout=CARNN_AGGREATION_DROPOUT):
    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    activation = 'elu'
    compare_dim = 500
    compare_dropout = 0.2
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    highway_encoder = TimeDistributed(Highway(activation='relu'))
    self_attention = SelfAttention(d_model=embedding_matrix.shape[1])

    q1_encoded = highway_encoder(q1_embed, )
    q2_encoded = highway_encoder(q2_embed, )

    s1_encoded = self_attention(q1, q1_encoded)
    s2_encoded = self_attention(q2, q2_encoded)

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare
    q1_combined1 = Concatenate()([
        q1_encoded,
        q2_aligned,
        interaction(q1_encoded, q2_aligned),
    ])
    q1_combined2 = Concatenate()([
        q2_aligned,
        q1_encoded,
        interaction(q1_encoded, q2_aligned),
    ])

    q2_combined1 = Concatenate()([
        q2_encoded,
        q1_aligned,
        interaction(q2_encoded, q1_aligned),
    ])
    q2_combined2 = Concatenate()([
        q1_aligned,
        q2_encoded,
        interaction(q2_encoded, q1_aligned),
    ])

    s1_combined1 = Concatenate()([
        q1_encoded,
        s1_encoded,
        interaction(q1_encoded, s1_encoded),
    ])
    s1_combined2 = Concatenate()([
        s1_encoded,
        q1_encoded,
        interaction(q1_encoded, s1_encoded),
    ])

    s2_combined1 = Concatenate()([
        q2_encoded,
        s2_encoded,
        interaction(q2_encoded, s2_encoded),
    ])
    s2_combined2 = Concatenate()([
        s2_encoded,
        q2_encoded,
        interaction(q2_encoded, s2_encoded),
    ])

    compare_layers_d = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_out_size, activation=activation),
        Dropout(compare_dropout),
    ]

    compare_layers_g = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_out_size, activation=activation),
        Dropout(compare_dropout),
    ]

    # NOTE these can be optimized
    q1_compare1 = time_distributed(q1_combined1, compare_layers_d)
    q1_compare2 = time_distributed(q1_combined2, compare_layers_d)
    q1_compare = Average()([q1_compare1, q1_compare2])

    q2_compare1 = time_distributed(q2_combined1, compare_layers_d)
    q2_compare2 = time_distributed(q2_combined2, compare_layers_d)
    q2_compare = Average()([q2_compare1, q2_compare2])

    s1_compare1 = time_distributed(s1_combined1, compare_layers_g)
    s1_compare2 = time_distributed(s1_combined2, compare_layers_g)
    s1_compare = Average()([s1_compare1, s1_compare2])

    s2_compare1 = time_distributed(s2_combined1, compare_layers_g)
    s2_compare2 = time_distributed(s2_combined2, compare_layers_g)
    s2_compare = Average()([s2_compare1, s2_compare2])

    # Aggregate
    q1_encoded = Concatenate()([q1_encoded, q1_compare, s1_compare])
    q2_encoded = Concatenate()([q2_encoded, q2_compare, s2_compare])

    aggreate_rnn = CuDNNGRU(rnn_size, return_sequences=True)
    q1_aggreated = aggreate_rnn(q1_encoded)
    q1_aggreated = Dropout(rnn_dropout)(q1_aggreated)
    q2_aggreated = aggreate_rnn(q2_encoded)
    q2_aggreated = Dropout(rnn_dropout)(q2_aggreated)

    # Pooling
    q1_rep = apply_multiple(q1_aggreated, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
    ])
    q2_rep = apply_multiple(q2_aggreated, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
    ])

    q_diff = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep])
    q_multi = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep])

    feature_input = Input(shape=(config['feature_length'], ))
    feature_dense = BatchNormalization()(feature_input)
    feature_dense = Dense(config['dense_dim'],
                          activation='relu')(feature_dense)
    h_all1 = Concatenate()([q1_rep, q2_rep, q_diff, q_multi, feature_dense])
    h_all2 = Concatenate()([q2_rep, q1_rep, q_diff, q_multi, feature_dense])
    h_all1 = Dropout(0.5)(h_all1)
    h_all2 = Dropout(0.5)(h_all2)

    dense = Dense(256, activation='relu')

    h_all1 = dense(h_all1)
    h_all2 = dense(h_all2)
    h_all = Average()([h_all1, h_all2])
    predictions = Dense(1, activation='sigmoid')(h_all)
    model = Model(inputs=[q1, q2, feature_input], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
def decom(embedding_matrix, config):
    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    projection_hidden = 300
    activation = 'elu'
    projection_dropout = 0.2
    projection_dim = 300
    compare_dim = 500  # 300
    compare_dropout = 0.2
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    highway_encoder = TimeDistributed(Highway(activation='relu'))

    q1_encoded = highway_encoder(q1_embed, )
    q2_encoded = highway_encoder(q2_embed, )

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned,
         interaction(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned,
         interaction(q2_encoded, q1_aligned)])
    compare_layers = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
    ]
    q1_compare = time_distributed(q1_combined, compare_layers)
    q2_compare = time_distributed(q2_combined, compare_layers)

    # Aggregate
    q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])

    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep])
    mul_rep = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep])

    # Dense meta featues
    #     meta_densed = BatchNormalization()(meta_features)
    #     meta_densed = Highway(activation='relu')(meta_densed)
    #     meta_densed = Dropout(0.2)(meta_densed)

    # Classifier
    merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep])

    dense = BatchNormalization()(merged)
    dense = Dense(config['dense_dim'], activation='elu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = Dense(config['dense_dim'], activation='elu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
Ejemplo n.º 12
0
def get_decomposable_attention(nb_words,
                               embedding_size,
                               embedding_matrix,
                               max_sequence_length,
                               out_size,
                               compare_dim=300,
                               compare_dropout=0.2,
                               dense_dim=256,
                               dense_dropout=0.2,
                               lr=1e-3,
                               activation='relu',
                               with_meta_features=False,
                               word_level=True):
    q1, q1_c, q2, q2_c, meta_features = get_input_layers()

    if word_level:
        q1_embedded, q2_embedded = get_word_embeddings(
            q1,
            q2,
            nb_words,
            embedding_size,
            embedding_matrix,
            max_sequence_length,
            trainable=False,
            embedding_dropout=model_config.EMBEDDING_DROPOUT)
    else:
        q1_embedded, q2_embedded = get_char_embeddings(
            q1_c,
            q2_c,
            max_sequence_length,
            model_config.CHAR_EMBEDDING_SIZE,
            feature_map_nums=model_config.CHAR_EMBEDDING_FEATURE_MAP_NUMS,
            window_sizes=model_config.CHAR_EMBEDDING_WINDOW_SIZES,
            embedding_dropout=model_config.EMBEDDING_DROPOUT)

    # Context encoder
    highway_encoder = TimeDistributed(Highway(activation='relu'))

    q1_encoded = highway_encoder(q1_embedded, )
    q2_encoded = highway_encoder(q2_embedded, )
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare deep views
    q1_combined = Concatenate()([
        q1_encoded,
        q2_aligned,
        interaction(q1_encoded, q2_aligned),
    ])
    q2_combined = Concatenate()([
        q2_encoded,
        q1_aligned,
        interaction(q2_encoded, q1_aligned),
    ])

    compare_layers_d = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
    ]

    q1_compare = time_distributed(q1_combined, compare_layers_d)
    q2_compare = time_distributed(q2_combined, compare_layers_d)

    # Aggregate
    q1_rep = apply_multiple(q1_compare, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
    ])
    q2_rep = apply_multiple(q2_compare, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
    ])

    # Dense meta featues
    meta_densed = BatchNormalization()(meta_features)
    meta_densed = Highway(activation='relu')(meta_densed)
    meta_densed = Dropout(0.2)(meta_densed)

    # Classifier
    q_diff = substract(q1_rep, q2_rep)
    q_multi = Multiply()([q1_rep, q2_rep])
    q_rep = Concatenate()([q1_rep, q2_rep])

    if with_meta_features:
        h_all = Concatenate()([q_diff, q_multi, q_rep, meta_densed])
    else:
        h_all = Concatenate()([
            q_diff,
            q_multi,
            q_rep,
        ])

    h_all = Dropout(0.5)(h_all)

    dense = Dense(dense_dim, activation=activation)(h_all)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)

    dense = Dense(dense_dim, activation=activation)(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)

    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2, q1_c, q2_c, meta_features], outputs=out_)
    model.compile(optimizer=Adam(lr=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model
Ejemplo n.º 13
0
def get_CARNN(nb_words,
              embedding_size,
              embedding_matrix,
              max_sequence_length,
              out_size=1,
              compare_dim=model_config.CARNN_COMPARE_LAYER_HIDDEN_SIZE,
              compare_out_size=model_config.CARNN_COMPARE_LAYER_OUTSIZE,
              compare_dropout=model_config.COMPARE_LAYER_DROPOUT,
              meta_features_dropout=model_config.META_FEATURES_DROPOUT,
              rnn_size=model_config.CARNN_RNN_SIZE,
              rnn_dropout=model_config.CARNN_AGGREATION_DROPOUT,
              with_meta_features=False,
              word_level=True,
              lr=1e-3,
              activation='relu'):

    q1, q1_c, q2, q2_c, meta_features = get_input_layers()

    if word_level:
        q1_embedded, q2_embedded = get_word_embeddings(
            q1,
            q2,
            nb_words,
            embedding_size,
            embedding_matrix,
            max_sequence_length,
            trainable=False,
            embedding_dropout=model_config.EMBEDDING_DROPOUT)
        embedding_size = model_config.WORD_EMBEDDING_SIZE
    else:
        q1_embedded, q2_embedded = get_char_embeddings(
            q1_c,
            q2_c,
            max_sequence_length,
            model_config.CHAR_EMBEDDING_SIZE,
            feature_map_nums=model_config.CHAR_EMBEDDING_FEATURE_MAP_NUMS,
            window_sizes=model_config.CHAR_EMBEDDING_WINDOW_SIZES,
            embedding_dropout=model_config.EMBEDDING_DROPOUT)
        embedding_size = model_config.CHAR_CNN_OUT_SIZE

    self_attention = SelfAttention(d_model=embedding_size)

    # Context encoder
    highway_encoder = TimeDistributed(Highway(activation='selu'))

    q1_encoded = highway_encoder(q1_embedded, )
    q2_encoded = highway_encoder(q2_embedded, )

    s1_encoded = self_attention(q1, q1_encoded)
    s2_encoded = self_attention(q2, q2_encoded)

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare deep views
    q1_combined1 = Concatenate()([
        q1_encoded,
        q2_aligned,
        interaction(q1_encoded, q2_aligned),
    ])
    q1_combined2 = Concatenate()([
        q2_aligned,
        q1_encoded,
        interaction(q1_encoded, q2_aligned),
    ])

    q2_combined1 = Concatenate()([
        q2_encoded,
        q1_aligned,
        interaction(q2_encoded, q1_aligned),
    ])
    q2_combined2 = Concatenate()([
        q1_aligned,
        q2_encoded,
        interaction(q2_encoded, q1_aligned),
    ])

    s1_combined1 = Concatenate()([
        q1_encoded,
        s1_encoded,
        interaction(q1_encoded, s1_encoded),
    ])
    s1_combined2 = Concatenate()([
        s1_encoded,
        q1_encoded,
        interaction(q1_encoded, s1_encoded),
    ])

    s2_combined1 = Concatenate()([
        q2_encoded,
        s2_encoded,
        interaction(q2_encoded, s2_encoded),
    ])
    s2_combined2 = Concatenate()([
        s2_encoded,
        q2_encoded,
        interaction(q2_encoded, s2_encoded),
    ])

    compare_layers_d = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_out_size, activation=activation),
        Dropout(compare_dropout),
    ]

    compare_layers_g = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_out_size, activation=activation),
        Dropout(compare_dropout),
    ]

    # NOTE these can be optimized
    q1_compare1 = time_distributed(q1_combined1, compare_layers_d)
    q1_compare2 = time_distributed(q1_combined2, compare_layers_d)
    q1_compare = Average()([q1_compare1, q1_compare2])

    q2_compare1 = time_distributed(q2_combined1, compare_layers_d)
    q2_compare2 = time_distributed(q2_combined2, compare_layers_d)
    q2_compare = Average()([q2_compare1, q2_compare2])

    s1_compare1 = time_distributed(s1_combined1, compare_layers_g)
    s1_compare2 = time_distributed(s1_combined2, compare_layers_g)
    s1_compare = Average()([s1_compare1, s1_compare2])

    s2_compare1 = time_distributed(s2_combined1, compare_layers_g)
    s2_compare2 = time_distributed(s2_combined2, compare_layers_g)
    s2_compare = Average()([s2_compare1, s2_compare2])

    # Aggregate
    q1_encoded = Concatenate()([q1_encoded, q1_compare, s1_compare])
    q2_encoded = Concatenate()([q2_encoded, q2_compare, s2_compare])

    aggreate_rnn = CuDNNGRU(rnn_size, return_sequences=True)
    q1_aggreated = aggreate_rnn(q1_encoded)
    q1_aggreated = Dropout(rnn_dropout)(q1_aggreated)
    q2_aggreated = aggreate_rnn(q2_encoded)
    q2_aggreated = Dropout(rnn_dropout)(q2_aggreated)

    # Pooling
    q1_rep = apply_multiple(q1_aggreated, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
    ])
    q2_rep = apply_multiple(q2_aggreated, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
    ])

    # Dense meta featues
    meta_densed = Highway(activation='relu')(meta_features)
    meta_densed = Dropout(model_config.META_FEATURES_DROPOUT)(meta_densed)

    # Classifier
    q_diff = substract(q1_rep, q2_rep)
    q_multi = Multiply()([q1_rep, q2_rep])
    if with_meta_features:
        h_all1 = Concatenate()([q1_rep, q2_rep, q_diff, q_multi, meta_densed])
        h_all2 = Concatenate()([q2_rep, q1_rep, q_diff, q_multi, meta_densed])
    else:
        h_all1 = Concatenate()([
            q1_rep,
            q2_rep,
            q_diff,
            q_multi,
        ])
        h_all2 = Concatenate()([
            q2_rep,
            q1_rep,
            q_diff,
            q_multi,
        ])

    h_all1 = Dropout(0.5)(h_all1)
    h_all2 = Dropout(0.5)(h_all2)

    dense = Dense(256, activation='relu')

    h_all1 = dense(h_all1)
    h_all2 = dense(h_all2)
    h_all = Average()([h_all1, h_all2])

    out = Dense(out_size, activation='sigmoid')(h_all)

    model = Model(inputs=[q1, q2, q1_c, q2_c, meta_features], outputs=out)
    model.compile(optimizer=Adam(lr=lr),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model