Esempio n. 1
0
def model_conv1D_(lr=0.005):

    # Embedding
    emb_layer = create_pretrained_embedding(config.char_embed_weights,
                                            mask_zero=False)

    emb_layer_word = create_pretrained_embedding(config.word_embed_weights,
                                                 mask_zero=False)

    seq1_char = Input(shape=(config.word_maxlen, ), name='q1_c')
    seq2_char = Input(shape=(config.word_maxlen, ), name='q2_c')

    seq1_word = Input(shape=(config.word_maxlen, ), name='q1_w')
    seq2_word = Input(shape=(config.word_maxlen, ), name='q2_w')
    magic_input = Input(shape=(len(config.feats), ))

    emb1_char = emb_layer(seq1_char)
    emb2_char = emb_layer(seq2_char)

    emb1_word = emb_layer_word(seq1_word)
    emb2_word = emb_layer_word(seq2_word)

    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    match_list_char = cnn_help(emb1_char, emb2_char)
    match_list_word = cnn_help2(emb1_word, emb2_word)
    merge = concatenate([match_list_char, match_list_word, magic_dense])

    # x = Dropout(0.5)(merge)
    # x = BatchNormalization()(x)
    x = Dense(300, activation='relu')(merge)

    x = Dropout(0.5)(x)
    x = BatchNormalization()(x)
    pred = Dense(1, activation='sigmoid')(x)
    #model = Model(inputs=[seq1_char, seq2_char, magic_input], outputs=pred)
    model = Model(
        inputs=[seq1_char, seq2_char, seq1_word, seq2_word, magic_input],
        outputs=pred)
    model.compile(loss='binary_crossentropy',
                  optimizer=Adam(lr=lr),
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    return model
Esempio n. 2
0
def MATCHSRNN(channel=2):
    emb_layer = create_pretrained_embedding(config.word_embed_weight,
                                            mask_zero=False)
    q1 = Input(shape=(config.word_maxlen, ))
    q2 = Input(shape=(config.word_maxlen, ))
    if len(config.feats) == 0:
        magic_input = Input(shape=(1, ))
    else:
        magic_input = Input(shape=(len(config.feats), ))
    q1_embed = emb_layer(q1)

    q2_embed = emb_layer(q2)

    match_tensor = MatchTensor(channel=channel)([q1_embed, q2_embed])

    match_tensor_permute = Permute((2, 3, 1))(match_tensor)
    h_ij = SpatialGRU()(match_tensor)

    h_ij_drop = Dropout(rate=0.5)(h_ij)

    out_ = Dense(2, activation='softmax')(h_ij_drop)

    model = Model(inputs=[q1, q2, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
    model.summary()
    return model
Esempio n. 3
0
def arc2(a1d_kernel_count=256,
         a1d_kernel_size=3,
         num_conv2d_layers=1,
         a2d_kernel_counts=[64],
         a2d_kernel_sizes=[[5, 5], [5, 5]],
         a2d_mpool_sizes=[[2, 2], [2, 2]]):
    emb_layer = create_pretrained_embedding(config.word_embed_weights,
                                            mask_zero=False)
    q1 = Input(shape=(config.word_maxlen, ))
    q2 = Input(shape=(config.word_maxlen, ))
    q1_w = Input(shape=(config.word_maxlen, ))
    q2_w = Input(shape=(config.word_maxlen, ))
    if len(config.feats) == 0:
        magic_input = Input(shape=(1, ))
    else:
        magic_input = Input(shape=(len(config.feats), ))
    q1_embed = emb_layer(q1)

    q2_embed = emb_layer(q2)

    q_conv1 = Conv1D(a1d_kernel_count, a1d_kernel_size,
                     padding='same')(q1_embed)

    d_conv1 = Conv1D(a1d_kernel_count, a1d_kernel_size,
                     padding='same')(q2_embed)

    cross = Match(match_type='plus')([q_conv1, d_conv1])

    z = Reshape((config.word_maxlen, config.word_maxlen, -1))(cross)

    for i in range(num_conv2d_layers):
        z = Conv2D(filters=a2d_kernel_counts[i],
                   kernel_size=a2d_kernel_sizes[i],
                   padding='same',
                   activation='relu')(z)
        z = MaxPooling2D(pool_size=(a2d_mpool_sizes[i][0],
                                    a2d_mpool_sizes[i][1]))(z)

    #dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1])([conv2d, dpool_index])
    pool1_flat = Flatten()(z)

    pool1_flat_drop = Dropout(rate=0.5)(pool1_flat)

    out_ = Dense(1, activation='sigmoid')(pool1_flat_drop)

    model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    return model
def fuck_my_rnn():
    emb_layer = create_pretrained_embedding(config.word_embed_weight,
                                            mask_zero=False)
    lstm_layer = Bidirectional(CuDNNLSTM(250))  #, recurrent_dropout=0.2))

    sequence_1_input = Input(shape=(config.word_maxlen, ), dtype="int32")
    embedded_sequences_1 = emb_layer(sequence_1_input)
    x1 = lstm_layer(embedded_sequences_1)

    sequence_2_input = Input(shape=(config.word_maxlen, ), dtype="int32")
    embedded_sequences_2 = emb_layer(sequence_2_input)
    y1 = lstm_layer(embedded_sequences_2)

    magic_input = Input(shape=(len(config.feats), ), dtype="float32")
    features_dense = BatchNormalization()(magic_input)
    features_dense = Dense(2, activation="relu")(features_dense)
    features_dense = Dropout(0.2)(features_dense)

    addition = add([x1, y1])
    minus_y1 = Lambda(lambda x: -x)(y1)
    merged = add([x1, minus_y1])
    merged = multiply([merged, merged])
    merged = concatenate([merged, addition])
    merged = Dropout(0.4)(merged)

    merged = concatenate([merged, features_dense])
    merged = BatchNormalization()(merged)
    merged = GaussianNoise(0.1)(merged)

    merged = Dense(300, activation="relu")(merged)
    # merged = Dropout(0.2)(merged)
    # merged = BatchNormalization()(merged)

    # #out = Dense(2, activation="sigmoid")(merged)
    # out = Dense(1, activation="sigmoid")(merged)

    model = Model(inputs=[sequence_1_input, sequence_2_input, magic_input],
                  outputs=merged)
    model.compile(loss="binary_crossentropy",
                  optimizer=Adam(),
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    return model
Esempio n. 5
0
def drmm_tks(num_layer=4, hidden_sizes=[256,128,128,64],topk=20):
    emb_layer = create_pretrained_embedding(
        config.word_embed_weight, mask_zero=False)
    q1 = Input(shape=(config.word_maxlen,))
    q2 = Input(shape=(config.word_maxlen,))
    magic_input = Input(shape=(len(config.feats),))

    q1_embed = emb_layer(q1)
    q2_embed = emb_layer(q2)

    mm = Dot(axes=[2, 2], normalize=True)([q1_embed, q2_embed])
    # compute term gating
    w_g = Dense(1)(q1_embed)

    g = Lambda(lambda x: softmax(x, axis=1), output_shape=(
        config.word_maxlen, ))(w_g)
  
    g = Reshape((config.word_maxlen,))(g)
  

    mm_k = Lambda(lambda x: K.tf.nn.top_k(
        x, k=topk, sorted=True)[0])(mm)
  

    for i in range(num_layer):
        mm_k = Dense(hidden_sizes[i], activation='softplus',
                     kernel_initializer='he_uniform', bias_initializer='zeros')(mm_k)
        

    mm_k_dropout = Dropout(rate=0.5)(mm_k)
  

    mm_reshape =  mm_k_dropout #Reshape((config.word_maxlen,))(mm_k_dropout)
   

    mean = Dot(axes=[1, 1])([mm_reshape, g])
  

    out_ = Dense(2, activation='softmax')(mean)


    model = Model(inputs=[q1, q2, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam', metrics=['acc'])
    model.summary()
    return model
Esempio n. 6
0
def test():

    emb_layer = create_pretrained_embedding(
        config.word_embed_weight, mask_zero=False)
    q1 = Input(shape=(config.word_maxlen,))
    q2 = Input(shape=(config.word_maxlen,))
    if len(config.feats) == 0:
        magic_input = Input(shape=(1,))
    else:
        magic_input = Input(shape=(len(config.feats),))
    q1_embed = emb_layer(q1)
    q2_embed = emb_layer(q2)

    

    cross = Dot(axes=[2, 2], normalize=False)([q1_embed, q2_embed])
    
    cross_reshape = Reshape((config.word_maxlen, config.word_maxlen, 1))(cross)

    conv2d = Conv2D(256, 3, padding='same', activation='relu')
    
    conv1 = conv2d(cross_reshape)
    conv1 = MaxPooling2D()(conv1)
    conv1 =  Conv2D(128, 3, padding='same', activation='relu')(conv1)
    pool1 = MaxPooling2D()(conv1)
    pool1_flat = Flatten()(conv1)
    
    pool1_flat_drop = Dropout(rate=0.5)(pool1_flat)
    
    out_ = Dense(128, activation='relu')(pool1_flat_drop)
    out_ = Dense(2, activation='softmax')(out_)
    

    model = Model(inputs=[q1, q2, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam', metrics=['acc'])
    model.summary()
    return model
Esempio n. 7
0
def decomposable_attention(pretrained_embedding=config.word_embed_weights,
                           projection_dim=300, projection_hidden=0, projection_dropout=0.2,
                           compare_dim=500, compare_dropout=0.2,
                           dense_dim=300, dense_dropout=0.2,
                           lr=1e-3, activation='elu', maxlen=MAX_LEN):
    # Based on: https://arxiv.org/abs/1606.01933

    
    magic_input = Input(shape=(len(config.feats),))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen,))
    q2 = Input(name='q2', shape=(maxlen,))

    # Embedding
    embedding = create_pretrained_embedding(pretrained_embedding,
                                            mask_zero=False)
    q1_embed = embedding(q1)
    q2_embed = embedding(q2)

    # Projection
    projection_layers = []
    if projection_hidden > 0:
        projection_layers.extend([
            Dense(projection_hidden, activation=activation),
            Dropout(rate=projection_dropout),
        ])
    projection_layers.extend([
        Dense(projection_dim, activation=None),
        Dropout(rate=projection_dropout),
    ])
    q1_encoded = time_distributed(q1_embed, projection_layers)
    q2_encoded = time_distributed(q2_embed, projection_layers)

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)])
    compare_layers = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
    ]
    q1_compare = time_distributed(q1_combined, compare_layers)
    q2_compare = time_distributed(q2_combined, compare_layers)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    # q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])


   
    q1_rep_max = MyMaxPool(axis=1)(q1_compare)
    q2_rep_max = MyMaxPool(axis=1)(q2_compare)


    cro_max = cross(q1_rep_max,q2_rep_max,compare_dim)
 
    dist = distence(q1_rep_max,q2_rep_max)
    

    #dense = cro
    dense = Concatenate()([
        q1_rep_max, q2_rep_max,cro_max,dist,
        ])

    #merged = Concatenate()([q1_rep, q2_rep,magic_dense])
    dense = BatchNormalization()(dense)
    dense = Dense(dense_dim, activation=activation)(dense)
    dense = Dropout(dense_dropout)(dense)
    dense = BatchNormalization()(dense)
    dense = Dense(dense_dim, activation=activation)(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2,magic_input], outputs=out_)
    model.compile(optimizer=Adam(lr=lr), loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    return model
Esempio n. 8
0
def esim(pretrained_embedding=config.word_embed_weights,
         maxlen=MAX_LEN,
         lstm_dim=300,
         dense_dim=300,
         dense_dropout=0.2):

    # Based on arXiv:1609.06038

    magic_input = Input(shape=(len(config.feats),))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='elu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen,))
    q2 = Input(name='q2', shape=(maxlen,))

    q1_w = Input(name='q1_w', shape=(maxlen,))
    q2_w = Input(name='q2_w', shape=(maxlen,))

    # Embedding
    emb_layer = create_pretrained_embedding(
        config.char_embed_weights, mask_zero=True)
    emb_layer_word = create_pretrained_embedding(
        config.word_embed_weights, mask_zero=True)
    
    # Encode
    encode = Sequential()
    encode.add(emb_layer)
    encode.add(BatchNormalization(axis=2))
    encode.add(Bidirectional(LSTM(lstm_dim, return_sequences=True)))
    
    encode2 = Sequential()
    encode2.add(emb_layer_word)
    encode2.add(BatchNormalization(axis=2))
    encode2.add(Bidirectional(LSTM(lstm_dim, return_sequences=True)))

    q1_encoded = encode(q1)
    q2_encoded = encode(q2)

    q1_w_encoded = encode2(q1_w)
    q2_w_encoded = encode2(q2_w)

   



    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)


     # Compose
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)])


    compose = Bidirectional(LSTM(lstm_dim, return_sequences=True))
    q1_compare = compose(q1_combined)
    q2_compare = compose(q2_combined)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])
    # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])


    q1_rep = MyMaxPool(axis=1)(q1_compare)
    q2_rep = MyMaxPool(axis=1)(q2_compare)

    q1_w_rep = MyMaxPool(axis=1)(q1_w_encoded)
    q2_w_rep = MyMaxPool(axis=1)(q2_w_encoded)
    
    # Classifier
    cro = cross(q1_rep,q2_rep,lstm_dim*2)
    dist = distence(q1_rep,q2_rep)
    dist2 = distence(q1_w_rep,q2_w_rep)
    #dense = cro
   
    dense = Concatenate()([q1_rep, q2_rep,cro,dist,dist2,magic_dense])

   
    dense = Dropout(dense_dropout)(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)


    model = Model(inputs=[q1, q2,q1_w,q2_w,magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam", metrics = [Precision,Recall,F1,])
    model.summary()
    return model
Esempio n. 9
0
def test0(
    alm_kernel_count=64,
    alm_hidden_sizes=[256, 512],
    dm_kernel_count=32,
    dm_kernel_size=3,
    dm_q_hidden_size=32,
    dm_d_mpool=3,
    dm_hidden_sizes=[50],
):
    def xor_match(x):
        t1 = x[0]
        t2 = x[1]
        t1_shape = t1.get_shape()
        t2_shape = t2.get_shape()
        t1_expand = K.tf.stack([t1] * t2_shape[1], 2)
        t2_expand = K.tf.stack([t2] * t1_shape[1], 1)
        out_bool = K.tf.equal(t1_expand, t2_expand)
        out = K.tf.cast(out_bool, K.tf.float32)
        return out

    def hadamard_dot(x):
        x1 = x[0]
        x2 = x[1]
        out = x1 * x2
        #out = tf.matmul(x1, x2)
        #out = K.tf.einsum('ij, ijk -> jk', x1, x2)
        return out

    emb_layer = create_pretrained_embedding(config.word_embed_weight,
                                            mask_zero=False)
    q1 = Input(shape=(config.word_maxlen, ))
    q2 = Input(shape=(config.word_maxlen, ))
    if len(config.feats) == 0:
        magic_input = Input(shape=(1, ))
    else:
        magic_input = Input(shape=(len(config.feats), ))
    q1_embed = emb_layer(q1)

    q2_embed = emb_layer(q2)

    lm_xor = Lambda(xor_match)([q1, q2])

    #lm_xor_reshape = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], 1))(lm_xor)
    #show_layer_info('Reshape', lm_xor_reshape)
    lm_conv = Conv1D(alm_kernel_count,
                     config.word_maxlen,
                     padding='same',
                     activation='tanh')(lm_xor)

    lm_conv = Dropout(0.5)(lm_conv)

    lm_feat = Reshape((-1, ))(lm_conv)

    for hidden_size in alm_hidden_sizes:
        lm_feat = Dense(hidden_size, activation='tanh')(lm_feat)

    lm_drop = Dropout(0.5)(lm_feat)

    lm_score = Dense(1)(lm_drop)

    dm_q_conv = Conv1D(dm_kernel_count,
                       dm_kernel_size,
                       padding='same',
                       activation='tanh')(q1_embed)

    dm_q_conv = Dropout(0.5)(dm_q_conv)

    dm_q_mp = MaxPooling1D(pool_size=config.word_maxlen)(dm_q_conv)

    dm_q_rep = Reshape((-1, ))(dm_q_mp)

    dm_q_rep = Dense(dm_q_hidden_size)(dm_q_rep)

    dm_q_rep = Lambda(lambda x: tf.expand_dims(x, 1))(dm_q_rep)

    dm_d_conv1 = Conv1D(dm_kernel_count,
                        dm_kernel_size,
                        padding='same',
                        activation='tanh')(q2_embed)

    dm_d_conv1 = Dropout(0.5)(dm_d_conv1)

    dm_d_mp = MaxPooling1D(pool_size=dm_d_mpool)(dm_d_conv1)

    dm_d_conv2 = Conv1D(dm_kernel_count, 1, padding='same',
                        activation='tanh')(dm_d_mp)

    dm_d_conv2 = Dropout(0.5)(dm_d_conv2)

    h_dot = Lambda(hadamard_dot)([dm_q_rep, dm_d_conv2])

    dm_feat = Reshape((-1, ))(h_dot)

    dm_feat = Dense(hidden_size)(dm_feat)

    dm_feat_drop = Dropout(0.5)(dm_feat)

    dm_score = Dense(1)(dm_feat_drop)

    out_ = Add()([lm_score, dm_score])

    out_ = Dense(2, activation='softmax')(out_)

    model = Model(inputs=[q1, q2, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['acc'])
    model.summary()
    return model
Esempio n. 10
0
def bimpm():
    print('--- Building model...')

    emb_layer = create_pretrained_embedding(config.word_embed_weight,
                                            mask_zero=False)
    sequence_length = config.word_maxlen

    rnn_unit = 'gru'

    dropout = 0.5
    context_rnn_dim = 128
    mp_dim = 128
    highway = True
    aggregate_rnn_dim = 64
    dense_dim = 128

    # Model words input
    w1 = Input(shape=(sequence_length, ), dtype='int32')
    w2 = Input(shape=(sequence_length, ), dtype='int32')

    # Build word representation layer

    w_res1 = emb_layer(w1)
    w_res2 = emb_layer(w2)

    sequence1 = w_res1
    sequence2 = w_res2

    # Build context representation layer

    context_layer = ContextLayer(context_rnn_dim,
                                 rnn_unit=rnn_unit,
                                 dropout=dropout,
                                 highway=highway,
                                 input_shape=(
                                     sequence_length,
                                     K.int_shape(sequence1)[-1],
                                 ),
                                 return_sequences=True)

    context1 = context_layer(sequence1)  #()
    context2 = context_layer(sequence2)

    print('context1', context1)
    print('context2', context2)
    # Build matching layer
    matching_layer = MultiPerspective(mp_dim)
    matching1 = matching_layer([context1, context2])

    matching2 = matching_layer([context2, context1])
    print('matching1:', matching1)
    print('matching2:', matching2)
    matching = concatenate([matching1, matching2])

    print('matching:', matching)
    # Build aggregation layer
    aggregate_layer = ContextLayer(rnn_dim=aggregate_rnn_dim,
                                   rnn_unit=rnn_unit,
                                   dropout=dropout,
                                   highway=highway,
                                   input_shape=(
                                       sequence_length,
                                       K.int_shape(matching)[-1],
                                   ),
                                   return_sequences=False)

    #aggregate_layer=Bidirectional(GRU(256, return_sequences=True,input_dim=256, input_length=40))
    #aggregation =aggregate_layer(matching)
    #aggregation = Flatten()(matching)
    aggregation = GlobalAveragePooling1D()(matching)
    print('aggregation', aggregation)
    # Build prediction layer
    # pred = PredictLayer(dense_dim,
    #                     input_dim=K.int_shape(aggregation)[-1],
    #                     dropout=dropout)(aggregation)

    # pred = Dense(512,input_shape=(256,))(aggregation)
    # print('pred',pred)
    pred = Dense(2)(aggregation)
    print('pred', pred)
    if config.feats == []:
        # Model words input
        megic_feats = Input(shape=(1, ), dtype='int32')
    else:
        megic_feats = Input(shape=(len(config.feats), ), dtype='int32')

    # Build model graph
    model = Model(inputs=[w1, w2, megic_feats], outputs=pred)

    # Compile model
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    print(model.summary())
    return model
Esempio n. 11
0
def bma_gru():

    # The embedding layer containing the word vectors
    # Embedding
    emb_layer = create_pretrained_embedding(config.char_embed_weights,
                                            mask_zero=True)
    emb_layer_word = create_pretrained_embedding(config.word_embed_weights,
                                                 mask_zero=True)
    # Model variables

    n_hidden = 128

    # Define the shared model
    x = Sequential()
    x.add(emb_layer)
    # # LSTM
    x.add(Bidirectional(LSTM(n_hidden, return_sequences=True)))
    x.add(Bidirectional(LSTM(n_hidden, return_sequences=True)))
    x.add(BatchNormalization())
    x.add(MyMaxPool(axis=1))
    shared_model = x

    x2 = Sequential()
    x2.add(emb_layer_word)
    # # LSTM
    x2.add(Bidirectional(LSTM(10, return_sequences=True)))
    #x2.add(Bidirectional(LSTM(n_hidden,return_sequences=True)))
    x2.add(BatchNormalization())
    x2.add(MyMaxPool(axis=1))
    shared_model2 = x2
    # The visible layer

    magic_input = Input(shape=(len(config.feats), ))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    left_input = Input(shape=(config.word_maxlen, ), dtype='int32')
    right_input = Input(shape=(config.word_maxlen, ), dtype='int32')
    w1 = Input(shape=(config.word_maxlen, ), dtype='int32')
    w2 = Input(shape=(config.word_maxlen, ), dtype='int32')

    left = shared_model(left_input)
    right = shared_model(right_input)

    left_w = shared_model2(w1)
    right_w = shared_model2(w2)

    # Pack it all up into a Manhattan Distance model
    malstm_distance = Lambda(
        lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)),
        output_shape=(1, ))([left, right])

    malstm_distance2 = Lambda(
        lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)),
        output_shape=(1, ))([left_w, right_w])

    cro = cross(left, right, n_hidden * 2)
    cro2 = cross(left_w, right_w, n_hidden * 2)

    #if config.nofeats:
    merge = concatenate([left, right, cro, malstm_distance2,
                         magic_dense])  # , magic_dense, malstm_distance])
    # else:
    #     merge = concatenate([ cro,cro2])
    # # The MLP that determines the outcome
    x = Dropout(0.2)(merge)
    x = BatchNormalization()(x)
    x = Dense(300, activation='relu')(x)

    x = Dropout(0.2)(x)
    x = BatchNormalization()(x)
    pred = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=[left_input, right_input, w1, w2, magic_input],
                  outputs=pred)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam",
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    shared_model.summary()
    return model
Esempio n. 12
0
def BMA_GRU(pretrained_embedding=config.word_embed_weights,
            maxlen=MAX_LEN,
            lstm_dim=300,
            dense_dim=300,
            dense_dropout=0.2,
            pool="max",
            mode='char+word'):

    # Based on arXiv:1609.06038

    magic_input = Input(shape=(len(config.feats), ))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='elu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen, ))
    q2 = Input(name='q2', shape=(maxlen, ))

    q1_w = Input(name='q1_w', shape=(maxlen, ))
    q2_w = Input(name='q2_w', shape=(maxlen, ))

    # Embedding
    emb_layer = create_pretrained_embedding(config.char_embed_weights,
                                            mask_zero=False)
    emb_layer_word = create_pretrained_embedding(config.word_embed_weights,
                                                 mask_zero=False)

    # Encode
    encode = Sequential()
    encode.add(emb_layer)
    encode.add(BatchNormalization(axis=2))
    encode.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True)))

    encode2 = Sequential()
    encode2.add(emb_layer_word)
    encode2.add(BatchNormalization(axis=2))
    encode2.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True)))

    q1_encoded = encode(q1)
    q2_encoded = encode(q2)

    q1_w_encoded = encode2(q1_w)
    q2_w_encoded = encode2(q2_w)

    att_flag = True
    q1_compare, q2_compare = esim_blok(q1_encoded, q2_encoded, att_flag)
    q1_compare_w, q2_compare_w = esim_blok(q1_w_encoded, q2_w_encoded,
                                           att_flag)

    # q1_rep ,q2_rep = q1_encoded,q2_encoded
    # q1_w_rep , q2_w_rep = q1_w_encoded,q2_w_encoded

    # q1_rep ,q2_rep = q1_compare,q2_compare
    # q1_w_rep , q2_w_rep = q1_compare_w,q2_compare_w

    if pool == 'max':
        q1_rep = MyMaxPool(axis=1)(q1_compare)
        q2_rep = MyMaxPool(axis=1)(q2_compare)

        q1_w_rep = MyMaxPool(axis=1)(q1_compare_w)
        q2_w_rep = MyMaxPool(axis=1)(q2_compare_w)
    elif pool == 'mean':

        q1_rep = MyMeanPool(axis=1)(q1_compare)
        q2_rep = MyMeanPool(axis=1)(q2_compare)

        q1_w_rep = MyMeanPool(axis=1)(q1_compare_w)
        q2_w_rep = MyMeanPool(axis=1)(q2_compare_w)
    else:
        q1_rep = Attention(maxlen)(q1_compare)
        q2_rep = Attention(maxlen)(q2_compare)

        q1_w_rep = Attention(maxlen)(q1_compare_w)
        q2_w_rep = Attention(maxlen)(q2_compare_w)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])
    # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])

    # Classifier
    cro = cross(q1_rep, q2_rep, lstm_dim * 2)
    dist = distence(q1_rep, q2_rep)
    dist2 = distence(q1_w_rep, q2_w_rep)
    #dense = cro

    if mode == "char":

        dense = Concatenate()([
            q1_rep,
            q2_rep,
        ])
    elif mode == "word":
        dense = Concatenate()([q1_w_rep, q2_w_rep])
    else:
        dense = Concatenate()([q1_rep, q2_rep, q1_w_rep, q2_w_rep])

    dense = Dropout(dense_dropout)(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam",
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    return model
Esempio n. 13
0
def esim(pretrained_embedding=config.word_embed_weights,
         maxlen=MAX_LEN,
         lstm_dim=300,
         dense_dim=300,
         dense_dropout=0.5):

    # Based on arXiv:1609.06038

    magic_input = Input(shape=(len(config.feats), ))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen, ))
    q2 = Input(name='q2', shape=(maxlen, ))
    q1_w = Input(name='q1_w', shape=(maxlen, ))
    q2_w = Input(name='q2_w', shape=(maxlen, ))
    # Embedding
    embedding = create_pretrained_embedding(pretrained_embedding,
                                            mask_zero=False)
    bn = BatchNormalization(axis=2)
    q1_embed = bn(embedding(q1))
    q2_embed = bn(embedding(q2))

    # Encode
    encode = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True))
    q1_encoded = encode(q1_embed)
    q2_encoded = encode(q2_embed)

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compose
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned,
         submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned,
         submult(q2_encoded, q1_aligned)])

    compose = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True))
    q1_compare = compose(q1_combined)
    q2_compare = compose(q2_combined)

    # Aggregate
    q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])

    # Classifier
    cro = cross(q1_rep, q2_rep, lstm_dim * 2)
    dist = distence(q1_rep, q2_rep)
    #dense = cro
    dense = Concatenate()([q1_rep, q2_rep])

    dense = BatchNormalization()(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam",
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    return model
Esempio n. 14
0
def dssm(lstmsize=20):
    # Embedding
    emb_layer_char = create_pretrained_embedding(config.char_embed_weights,
                                                 trainable=True,
                                                 mask_zero=False)

    emb_layer_word = create_pretrained_embedding(config.word_embed_weights,
                                                 trainable=False,
                                                 mask_zero=False)

    char_weights = np.load(config.char_embed_weights)
    word_weights = np.load(config.word_embed_weights)

    input1 = Input(shape=(config.word_maxlen, ))
    input2 = Input(shape=(config.word_maxlen, ))
    input3 = Input(shape=(len(config.feats), ))
    embed1 = emb_layer_word  # Embedding(word_weights.shape)
    lstm0 = CuDNNLSTM(lstmsize, return_sequences=True)
    lstm1 = Bidirectional(CuDNNLSTM(lstmsize))
    lstm2 = CuDNNLSTM(lstmsize)
    att1 = Attention(config.word_maxlen)
    den = Dense(64, activation='tanh')

    # att1 = Lambda(lambda x: K.max(x,axis = 1))
    v3 = embed1(input3)
    v1 = embed1(input1)
    v2 = embed1(input2)
    v11 = lstm1(v1)
    v22 = lstm1(v2)
    v1ls = lstm2(lstm0(v1))
    v2ls = lstm2(lstm0(v2))
    v1 = Concatenate(axis=1)([att1(v1), v11])
    v2 = Concatenate(axis=1)([att1(v2), v22])

    input1c = Input(shape=(config.word_maxlen, ))
    input2c = Input(shape=(config.word_maxlen, ))
    embed1c = emb_layer_char  #Embedding(char_weights.shape)
    lstm1c = Bidirectional(CuDNNLSTM(56, return_sequences=True))
    lstm2c = Bidirectional(CuDNNLSTM(56))
    att1c = Attention(config.word_maxlen)
    v1c = embed1(input1c)
    v2c = embed1(input2c)
    v11c = lstm1c(v1c)
    v22c = lstm1c(v2c)
    v11c = lstm2c(v11c)
    v22c = lstm2c(v22c)
    v1c = Concatenate(axis=1)([att1c(v1c), v11c])
    v2c = Concatenate(axis=1)([att1c(v2c), v22c])

    mul = Multiply()([v1, v2])
    sub = Lambda(lambda x: K.abs(x))(Subtract()([v1, v2]))
    maximum = Maximum()([Multiply()([v1, v1]), Multiply()([v2, v2])])
    mulc = Multiply()([v1c, v2c])
    subc = Lambda(lambda x: K.abs(x))(Subtract()([v1c, v2c]))
    maximumc = Maximum()([Multiply()([v1c, v1c]), Multiply()([v2c, v2c])])
    sub2 = Lambda(lambda x: K.abs(x))(Subtract()([v1ls, v2ls]))

    matchlist = Concatenate(axis=1)(
        [mul, sub, mulc, subc, maximum, maximumc, sub2])
    matchlist = Dropout(0.05)(matchlist)

    matchlist = Concatenate(axis=1)([
        Dense(32, activation='relu')(matchlist),
        Dense(48, activation='sigmoid')(matchlist)
    ])
    res = Dense(2, activation='sigmoid')(matchlist)

    model = Model(inputs=[input1c, input2c, input1, input2, input3],
                  outputs=res)
    model.compile(optimizer=Adam(lr=0.001),
                  loss="binary_crossentropy",
                  metrics=['acc'])
    model.summary()
    return model