예제 #1
0
def decomposable_attention(pretrained_embedding=config.word_embed_weights,
                           projection_dim=300, projection_hidden=0, projection_dropout=0.2,
                           compare_dim=500, compare_dropout=0.2,
                           dense_dim=300, dense_dropout=0.2,
                           lr=1e-3, activation='elu', maxlen=MAX_LEN):
    # Based on: https://arxiv.org/abs/1606.01933

    
    magic_input = Input(shape=(len(config.feats),))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen,))
    q2 = Input(name='q2', shape=(maxlen,))

    # Embedding
    embedding = create_pretrained_embedding(pretrained_embedding,
                                            mask_zero=False)
    q1_embed = embedding(q1)
    q2_embed = embedding(q2)

    # Projection
    projection_layers = []
    if projection_hidden > 0:
        projection_layers.extend([
            Dense(projection_hidden, activation=activation),
            Dropout(rate=projection_dropout),
        ])
    projection_layers.extend([
        Dense(projection_dim, activation=None),
        Dropout(rate=projection_dropout),
    ])
    q1_encoded = time_distributed(q1_embed, projection_layers)
    q2_encoded = time_distributed(q2_embed, projection_layers)

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)])
    compare_layers = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
    ]
    q1_compare = time_distributed(q1_combined, compare_layers)
    q2_compare = time_distributed(q2_combined, compare_layers)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    # q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])


   
    q1_rep_max = MyMaxPool(axis=1)(q1_compare)
    q2_rep_max = MyMaxPool(axis=1)(q2_compare)


    cro_max = cross(q1_rep_max,q2_rep_max,compare_dim)
 
    dist = distence(q1_rep_max,q2_rep_max)
    

    #dense = cro
    dense = Concatenate()([
        q1_rep_max, q2_rep_max,cro_max,dist,
        ])

    #merged = Concatenate()([q1_rep, q2_rep,magic_dense])
    dense = BatchNormalization()(dense)
    dense = Dense(dense_dim, activation=activation)(dense)
    dense = Dropout(dense_dropout)(dense)
    dense = BatchNormalization()(dense)
    dense = Dense(dense_dim, activation=activation)(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2,magic_input], outputs=out_)
    model.compile(optimizer=Adam(lr=lr), loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    return model
예제 #2
0
def bma_gru():

    # The embedding layer containing the word vectors
    # Embedding
    emb_layer = create_pretrained_embedding(config.char_embed_weights,
                                            mask_zero=True)
    emb_layer_word = create_pretrained_embedding(config.word_embed_weights,
                                                 mask_zero=True)
    # Model variables

    n_hidden = 128

    # Define the shared model
    x = Sequential()
    x.add(emb_layer)
    # # LSTM
    x.add(Bidirectional(LSTM(n_hidden, return_sequences=True)))
    x.add(Bidirectional(LSTM(n_hidden, return_sequences=True)))
    x.add(BatchNormalization())
    x.add(MyMaxPool(axis=1))
    shared_model = x

    x2 = Sequential()
    x2.add(emb_layer_word)
    # # LSTM
    x2.add(Bidirectional(LSTM(10, return_sequences=True)))
    #x2.add(Bidirectional(LSTM(n_hidden,return_sequences=True)))
    x2.add(BatchNormalization())
    x2.add(MyMaxPool(axis=1))
    shared_model2 = x2
    # The visible layer

    magic_input = Input(shape=(len(config.feats), ))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    left_input = Input(shape=(config.word_maxlen, ), dtype='int32')
    right_input = Input(shape=(config.word_maxlen, ), dtype='int32')
    w1 = Input(shape=(config.word_maxlen, ), dtype='int32')
    w2 = Input(shape=(config.word_maxlen, ), dtype='int32')

    left = shared_model(left_input)
    right = shared_model(right_input)

    left_w = shared_model2(w1)
    right_w = shared_model2(w2)

    # Pack it all up into a Manhattan Distance model
    malstm_distance = Lambda(
        lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)),
        output_shape=(1, ))([left, right])

    malstm_distance2 = Lambda(
        lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)),
        output_shape=(1, ))([left_w, right_w])

    cro = cross(left, right, n_hidden * 2)
    cro2 = cross(left_w, right_w, n_hidden * 2)

    #if config.nofeats:
    merge = concatenate([left, right, cro, malstm_distance2,
                         magic_dense])  # , magic_dense, malstm_distance])
    # else:
    #     merge = concatenate([ cro,cro2])
    # # The MLP that determines the outcome
    x = Dropout(0.2)(merge)
    x = BatchNormalization()(x)
    x = Dense(300, activation='relu')(x)

    x = Dropout(0.2)(x)
    x = BatchNormalization()(x)
    pred = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=[left_input, right_input, w1, w2, magic_input],
                  outputs=pred)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam",
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    shared_model.summary()
    return model
예제 #3
0
def esim(pretrained_embedding=config.word_embed_weights,
         maxlen=MAX_LEN,
         lstm_dim=300,
         dense_dim=300,
         dense_dropout=0.2):

    # Based on arXiv:1609.06038

    magic_input = Input(shape=(len(config.feats),))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='elu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen,))
    q2 = Input(name='q2', shape=(maxlen,))

    q1_w = Input(name='q1_w', shape=(maxlen,))
    q2_w = Input(name='q2_w', shape=(maxlen,))

    # Embedding
    emb_layer = create_pretrained_embedding(
        config.char_embed_weights, mask_zero=True)
    emb_layer_word = create_pretrained_embedding(
        config.word_embed_weights, mask_zero=True)
    
    # Encode
    encode = Sequential()
    encode.add(emb_layer)
    encode.add(BatchNormalization(axis=2))
    encode.add(Bidirectional(LSTM(lstm_dim, return_sequences=True)))
    
    encode2 = Sequential()
    encode2.add(emb_layer_word)
    encode2.add(BatchNormalization(axis=2))
    encode2.add(Bidirectional(LSTM(lstm_dim, return_sequences=True)))

    q1_encoded = encode(q1)
    q2_encoded = encode(q2)

    q1_w_encoded = encode2(q1_w)
    q2_w_encoded = encode2(q2_w)

   



    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)


     # Compose
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)])


    compose = Bidirectional(LSTM(lstm_dim, return_sequences=True))
    q1_compare = compose(q1_combined)
    q2_compare = compose(q2_combined)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])
    # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])


    q1_rep = MyMaxPool(axis=1)(q1_compare)
    q2_rep = MyMaxPool(axis=1)(q2_compare)

    q1_w_rep = MyMaxPool(axis=1)(q1_w_encoded)
    q2_w_rep = MyMaxPool(axis=1)(q2_w_encoded)
    
    # Classifier
    cro = cross(q1_rep,q2_rep,lstm_dim*2)
    dist = distence(q1_rep,q2_rep)
    dist2 = distence(q1_w_rep,q2_w_rep)
    #dense = cro
   
    dense = Concatenate()([q1_rep, q2_rep,cro,dist,dist2,magic_dense])

   
    dense = Dropout(dense_dropout)(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)


    model = Model(inputs=[q1, q2,q1_w,q2_w,magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam", metrics = [Precision,Recall,F1,])
    model.summary()
    return model
예제 #4
0
def BMA_GRU(pretrained_embedding=config.word_embed_weights,
            maxlen=MAX_LEN,
            lstm_dim=300,
            dense_dim=300,
            dense_dropout=0.2,
            pool="max",
            mode='char+word'):

    # Based on arXiv:1609.06038

    magic_input = Input(shape=(len(config.feats), ))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='elu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen, ))
    q2 = Input(name='q2', shape=(maxlen, ))

    q1_w = Input(name='q1_w', shape=(maxlen, ))
    q2_w = Input(name='q2_w', shape=(maxlen, ))

    # Embedding
    emb_layer = create_pretrained_embedding(config.char_embed_weights,
                                            mask_zero=False)
    emb_layer_word = create_pretrained_embedding(config.word_embed_weights,
                                                 mask_zero=False)

    # Encode
    encode = Sequential()
    encode.add(emb_layer)
    encode.add(BatchNormalization(axis=2))
    encode.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True)))

    encode2 = Sequential()
    encode2.add(emb_layer_word)
    encode2.add(BatchNormalization(axis=2))
    encode2.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True)))

    q1_encoded = encode(q1)
    q2_encoded = encode(q2)

    q1_w_encoded = encode2(q1_w)
    q2_w_encoded = encode2(q2_w)

    att_flag = True
    q1_compare, q2_compare = esim_blok(q1_encoded, q2_encoded, att_flag)
    q1_compare_w, q2_compare_w = esim_blok(q1_w_encoded, q2_w_encoded,
                                           att_flag)

    # q1_rep ,q2_rep = q1_encoded,q2_encoded
    # q1_w_rep , q2_w_rep = q1_w_encoded,q2_w_encoded

    # q1_rep ,q2_rep = q1_compare,q2_compare
    # q1_w_rep , q2_w_rep = q1_compare_w,q2_compare_w

    if pool == 'max':
        q1_rep = MyMaxPool(axis=1)(q1_compare)
        q2_rep = MyMaxPool(axis=1)(q2_compare)

        q1_w_rep = MyMaxPool(axis=1)(q1_compare_w)
        q2_w_rep = MyMaxPool(axis=1)(q2_compare_w)
    elif pool == 'mean':

        q1_rep = MyMeanPool(axis=1)(q1_compare)
        q2_rep = MyMeanPool(axis=1)(q2_compare)

        q1_w_rep = MyMeanPool(axis=1)(q1_compare_w)
        q2_w_rep = MyMeanPool(axis=1)(q2_compare_w)
    else:
        q1_rep = Attention(maxlen)(q1_compare)
        q2_rep = Attention(maxlen)(q2_compare)

        q1_w_rep = Attention(maxlen)(q1_compare_w)
        q2_w_rep = Attention(maxlen)(q2_compare_w)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])
    # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])

    # Classifier
    cro = cross(q1_rep, q2_rep, lstm_dim * 2)
    dist = distence(q1_rep, q2_rep)
    dist2 = distence(q1_w_rep, q2_w_rep)
    #dense = cro

    if mode == "char":

        dense = Concatenate()([
            q1_rep,
            q2_rep,
        ])
    elif mode == "word":
        dense = Concatenate()([q1_w_rep, q2_w_rep])
    else:
        dense = Concatenate()([q1_rep, q2_rep, q1_w_rep, q2_w_rep])

    dense = Dropout(dense_dropout)(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam",
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    return model