예제 #1
0
def carnn(embedding_matrix,
          config,
          compare_out_size=CARNN_COMPARE_LAYER_OUTSIZE,
          rnn_size=CARNN_RNN_SIZE,
          rnn_dropout=CARNN_AGGREATION_DROPOUT):
    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    activation = 'elu'
    compare_dim = 500
    compare_dropout = 0.2
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    highway_encoder = TimeDistributed(Highway(activation='relu'))
    self_attention = SelfAttention(d_model=embedding_matrix.shape[1])

    q1_encoded = highway_encoder(q1_embed, )
    q2_encoded = highway_encoder(q2_embed, )

    s1_encoded = self_attention(q1, q1_encoded)
    s2_encoded = self_attention(q2, q2_encoded)

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare
    q1_combined1 = Concatenate()([
        q1_encoded,
        q2_aligned,
        interaction(q1_encoded, q2_aligned),
    ])
    q1_combined2 = Concatenate()([
        q2_aligned,
        q1_encoded,
        interaction(q1_encoded, q2_aligned),
    ])

    q2_combined1 = Concatenate()([
        q2_encoded,
        q1_aligned,
        interaction(q2_encoded, q1_aligned),
    ])
    q2_combined2 = Concatenate()([
        q1_aligned,
        q2_encoded,
        interaction(q2_encoded, q1_aligned),
    ])

    s1_combined1 = Concatenate()([
        q1_encoded,
        s1_encoded,
        interaction(q1_encoded, s1_encoded),
    ])
    s1_combined2 = Concatenate()([
        s1_encoded,
        q1_encoded,
        interaction(q1_encoded, s1_encoded),
    ])

    s2_combined1 = Concatenate()([
        q2_encoded,
        s2_encoded,
        interaction(q2_encoded, s2_encoded),
    ])
    s2_combined2 = Concatenate()([
        s2_encoded,
        q2_encoded,
        interaction(q2_encoded, s2_encoded),
    ])

    compare_layers_d = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_out_size, activation=activation),
        Dropout(compare_dropout),
    ]

    compare_layers_g = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_out_size, activation=activation),
        Dropout(compare_dropout),
    ]

    # NOTE these can be optimized
    q1_compare1 = time_distributed(q1_combined1, compare_layers_d)
    q1_compare2 = time_distributed(q1_combined2, compare_layers_d)
    q1_compare = Average()([q1_compare1, q1_compare2])

    q2_compare1 = time_distributed(q2_combined1, compare_layers_d)
    q2_compare2 = time_distributed(q2_combined2, compare_layers_d)
    q2_compare = Average()([q2_compare1, q2_compare2])

    s1_compare1 = time_distributed(s1_combined1, compare_layers_g)
    s1_compare2 = time_distributed(s1_combined2, compare_layers_g)
    s1_compare = Average()([s1_compare1, s1_compare2])

    s2_compare1 = time_distributed(s2_combined1, compare_layers_g)
    s2_compare2 = time_distributed(s2_combined2, compare_layers_g)
    s2_compare = Average()([s2_compare1, s2_compare2])

    # Aggregate
    q1_encoded = Concatenate()([q1_encoded, q1_compare, s1_compare])
    q2_encoded = Concatenate()([q2_encoded, q2_compare, s2_compare])

    aggreate_rnn = CuDNNGRU(rnn_size, return_sequences=True)
    q1_aggreated = aggreate_rnn(q1_encoded)
    q1_aggreated = Dropout(rnn_dropout)(q1_aggreated)
    q2_aggreated = aggreate_rnn(q2_encoded)
    q2_aggreated = Dropout(rnn_dropout)(q2_aggreated)

    # Pooling
    q1_rep = apply_multiple(q1_aggreated, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
    ])
    q2_rep = apply_multiple(q2_aggreated, [
        GlobalAvgPool1D(),
        GlobalMaxPool1D(),
    ])

    q_diff = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep])
    q_multi = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep])

    feature_input = Input(shape=(config['feature_length'], ))
    feature_dense = BatchNormalization()(feature_input)
    feature_dense = Dense(config['dense_dim'],
                          activation='relu')(feature_dense)
    h_all1 = Concatenate()([q1_rep, q2_rep, q_diff, q_multi, feature_dense])
    h_all2 = Concatenate()([q2_rep, q1_rep, q_diff, q_multi, feature_dense])
    h_all1 = Dropout(0.5)(h_all1)
    h_all2 = Dropout(0.5)(h_all2)

    dense = Dense(256, activation='relu')

    h_all1 = dense(h_all1)
    h_all2 = dense(h_all2)
    h_all = Average()([h_all1, h_all2])
    predictions = Dense(1, activation='sigmoid')(h_all)
    model = Model(inputs=[q1, q2, feature_input], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
def esim_word_char(embedding_matrix, char_embedding_matrix, config):
    if config['rnn'] == 'gru' and config['gpu']:
        word_encode = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        word_compose = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        char_encode = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        char_compose = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
    else:
        word_encode = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        word_compose = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        char_encode = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        char_compose = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    q1_encoded = word_encode(q1_embed)
    q2_encoded = word_encode(q2_embed)

    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned,
         submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned,
         submult(q2_encoded, q1_aligned)])

    # q1_combined = Dropout(self.config['dense_dropout'])(q1_combined)
    # q2_combined = Dropout(self.config['dense_dropout'])(q2_combined)

    q1_compare = word_compose(q1_combined)
    q2_compare = word_compose(q2_combined)

    # Aggregate
    q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])

    # Classifier
    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep])
    mul_rep = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep])

    # Classifier
    merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep])

    q1_char = Input(shape=(config['char_max_length'], ),
                    dtype='int32',
                    name='q1_char_input')
    q2_char = Input((config['char_max_length'], ),
                    dtype='int32',
                    name='q2_char_input')
    char_embedding_layer = Embedding(char_embedding_matrix.shape[0],
                                     char_embedding_matrix.shape[1],
                                     trainable=config['embed_trainable'],
                                     weights=[char_embedding_matrix]
                                     # mask_zero=True
                                     )

    q1_embed_char = char_embedding_layer(q1_char)
    q2_embed_char = char_embedding_layer(q2_char)  # bsz, 1, emb_dims
    q1_embed_char = BatchNormalization(axis=2)(q1_embed_char)
    q2_embed_char = BatchNormalization(axis=2)(q2_embed_char)
    q1_embed_char = SpatialDropout1D(
        config['spatial_dropout_rate'])(q1_embed_char)
    q2_embed_char = SpatialDropout1D(
        config['spatial_dropout_rate'])(q2_embed_char)

    q1_encoded_char = char_encode(q1_embed_char)
    q2_encoded_char = char_encode(q2_embed_char)

    q1_aligned_char, q2_aligned_char = soft_attention_alignment(
        q1_encoded_char, q2_encoded_char)

    q1_combined_char = Concatenate()([
        q1_encoded_char, q2_aligned_char,
        submult(q1_encoded_char, q2_aligned_char)
    ])
    q2_combined_char = Concatenate()([
        q2_encoded_char, q1_aligned_char,
        submult(q2_encoded_char, q1_aligned_char)
    ])

    # q1_combined = Dropout(self.config['dense_dropout'])(q1_combined)
    # q2_combined = Dropout(self.config['dense_dropout'])(q2_combined)

    q1_compare_char = char_compose(q1_combined_char)
    q2_compare_char = char_compose(q2_combined_char)

    # Aggregate
    q1_rep_char = apply_multiple(
        q1_compare_char,
        [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep_char = apply_multiple(
        q2_compare_char,
        [GlobalAvgPool1D(), GlobalMaxPool1D()])

    # Classifier
    sub_rep_char = Lambda(lambda x: K.abs(x[0] - x[1]))(
        [q1_rep_char, q2_rep_char])
    mul_rep_char = Lambda(lambda x: x[0] * x[1])([q1_rep_char, q2_rep_char])

    # Classifier
    merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep])
    merged_char = Concatenate()(
        [q1_rep_char, q2_rep_char, sub_rep_char, mul_rep_char])

    dense = BatchNormalization()(merged)
    dense = Dense(config['dense_dim'], activation='elu')(dense)
    dense_char = BatchNormalization()(merged_char)
    dense_char = Dense(config['dense_dim'], activation='elu')(dense_char)
    feature_input = Input(shape=(config['feature_length'], ))
    feature_dense = BatchNormalization()(feature_input)
    feature_dense = Dense(config['dense_dim'],
                          activation='relu')(feature_dense)
    dense = Concatenate()([dense, dense_char, feature_dense])
    dense = BatchNormalization()(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = Dense(config['dense_dim'], activation='elu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2, q1_char, q2_char, feature_input],
                  outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
def decom(embedding_matrix, config):
    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    projection_hidden = 300
    activation = 'elu'
    projection_dropout = 0.2
    projection_dim = 300
    compare_dim = 500  # 300
    compare_dropout = 0.2
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)

    highway_encoder = TimeDistributed(Highway(activation='relu'))

    q1_encoded = highway_encoder(q1_embed, )
    q2_encoded = highway_encoder(q2_embed, )

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned,
         interaction(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned,
         interaction(q2_encoded, q1_aligned)])
    compare_layers = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
    ]
    q1_compare = time_distributed(q1_combined, compare_layers)
    q2_compare = time_distributed(q2_combined, compare_layers)

    # Aggregate
    q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])

    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([q1_rep, q2_rep])
    mul_rep = Lambda(lambda x: x[0] * x[1])([q1_rep, q2_rep])

    # Dense meta featues
    #     meta_densed = BatchNormalization()(meta_features)
    #     meta_densed = Highway(activation='relu')(meta_densed)
    #     meta_densed = Dropout(0.2)(meta_densed)

    # Classifier
    merged = Concatenate()([q1_rep, q2_rep, sub_rep, mul_rep])

    dense = BatchNormalization()(merged)
    dense = Dense(config['dense_dim'], activation='elu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = Dense(config['dense_dim'], activation='elu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model