def model_conv1D_(lr=0.005): # Embedding emb_layer = create_pretrained_embedding(config.char_embed_weights, mask_zero=False) emb_layer_word = create_pretrained_embedding(config.word_embed_weights, mask_zero=False) seq1_char = Input(shape=(config.word_maxlen, ), name='q1_c') seq2_char = Input(shape=(config.word_maxlen, ), name='q2_c') seq1_word = Input(shape=(config.word_maxlen, ), name='q1_w') seq2_word = Input(shape=(config.word_maxlen, ), name='q2_w') magic_input = Input(shape=(len(config.feats), )) emb1_char = emb_layer(seq1_char) emb2_char = emb_layer(seq2_char) emb1_word = emb_layer_word(seq1_word) emb2_word = emb_layer_word(seq2_word) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='relu')(magic_dense) match_list_char = cnn_help(emb1_char, emb2_char) match_list_word = cnn_help2(emb1_word, emb2_word) merge = concatenate([match_list_char, match_list_word, magic_dense]) # x = Dropout(0.5)(merge) # x = BatchNormalization()(x) x = Dense(300, activation='relu')(merge) x = Dropout(0.5)(x) x = BatchNormalization()(x) pred = Dense(1, activation='sigmoid')(x) #model = Model(inputs=[seq1_char, seq2_char, magic_input], outputs=pred) model = Model( inputs=[seq1_char, seq2_char, seq1_word, seq2_word, magic_input], outputs=pred) model.compile(loss='binary_crossentropy', optimizer=Adam(lr=lr), metrics=[ Precision, Recall, F1, ]) model.summary() return model
def MATCHSRNN(channel=2): emb_layer = create_pretrained_embedding(config.word_embed_weight, mask_zero=False) q1 = Input(shape=(config.word_maxlen, )) q2 = Input(shape=(config.word_maxlen, )) if len(config.feats) == 0: magic_input = Input(shape=(1, )) else: magic_input = Input(shape=(len(config.feats), )) q1_embed = emb_layer(q1) q2_embed = emb_layer(q2) match_tensor = MatchTensor(channel=channel)([q1_embed, q2_embed]) match_tensor_permute = Permute((2, 3, 1))(match_tensor) h_ij = SpatialGRU()(match_tensor) h_ij_drop = Dropout(rate=0.5)(h_ij) out_ = Dense(2, activation='softmax')(h_ij_drop) model = Model(inputs=[q1, q2, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) model.summary() return model
def arc2(a1d_kernel_count=256, a1d_kernel_size=3, num_conv2d_layers=1, a2d_kernel_counts=[64], a2d_kernel_sizes=[[5, 5], [5, 5]], a2d_mpool_sizes=[[2, 2], [2, 2]]): emb_layer = create_pretrained_embedding(config.word_embed_weights, mask_zero=False) q1 = Input(shape=(config.word_maxlen, )) q2 = Input(shape=(config.word_maxlen, )) q1_w = Input(shape=(config.word_maxlen, )) q2_w = Input(shape=(config.word_maxlen, )) if len(config.feats) == 0: magic_input = Input(shape=(1, )) else: magic_input = Input(shape=(len(config.feats), )) q1_embed = emb_layer(q1) q2_embed = emb_layer(q2) q_conv1 = Conv1D(a1d_kernel_count, a1d_kernel_size, padding='same')(q1_embed) d_conv1 = Conv1D(a1d_kernel_count, a1d_kernel_size, padding='same')(q2_embed) cross = Match(match_type='plus')([q_conv1, d_conv1]) z = Reshape((config.word_maxlen, config.word_maxlen, -1))(cross) for i in range(num_conv2d_layers): z = Conv2D(filters=a2d_kernel_counts[i], kernel_size=a2d_kernel_sizes[i], padding='same', activation='relu')(z) z = MaxPooling2D(pool_size=(a2d_mpool_sizes[i][0], a2d_mpool_sizes[i][1]))(z) #dpool = DynamicMaxPooling(self.config['dpool_size'][0], self.config['dpool_size'][1])([conv2d, dpool_index]) pool1_flat = Flatten()(z) pool1_flat_drop = Dropout(rate=0.5)(pool1_flat) out_ = Dense(1, activation='sigmoid')(pool1_flat_drop) model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[ Precision, Recall, F1, ]) model.summary() return model
def fuck_my_rnn(): emb_layer = create_pretrained_embedding(config.word_embed_weight, mask_zero=False) lstm_layer = Bidirectional(CuDNNLSTM(250)) #, recurrent_dropout=0.2)) sequence_1_input = Input(shape=(config.word_maxlen, ), dtype="int32") embedded_sequences_1 = emb_layer(sequence_1_input) x1 = lstm_layer(embedded_sequences_1) sequence_2_input = Input(shape=(config.word_maxlen, ), dtype="int32") embedded_sequences_2 = emb_layer(sequence_2_input) y1 = lstm_layer(embedded_sequences_2) magic_input = Input(shape=(len(config.feats), ), dtype="float32") features_dense = BatchNormalization()(magic_input) features_dense = Dense(2, activation="relu")(features_dense) features_dense = Dropout(0.2)(features_dense) addition = add([x1, y1]) minus_y1 = Lambda(lambda x: -x)(y1) merged = add([x1, minus_y1]) merged = multiply([merged, merged]) merged = concatenate([merged, addition]) merged = Dropout(0.4)(merged) merged = concatenate([merged, features_dense]) merged = BatchNormalization()(merged) merged = GaussianNoise(0.1)(merged) merged = Dense(300, activation="relu")(merged) # merged = Dropout(0.2)(merged) # merged = BatchNormalization()(merged) # #out = Dense(2, activation="sigmoid")(merged) # out = Dense(1, activation="sigmoid")(merged) model = Model(inputs=[sequence_1_input, sequence_2_input, magic_input], outputs=merged) model.compile(loss="binary_crossentropy", optimizer=Adam(), metrics=[ Precision, Recall, F1, ]) model.summary() return model
def drmm_tks(num_layer=4, hidden_sizes=[256,128,128,64],topk=20): emb_layer = create_pretrained_embedding( config.word_embed_weight, mask_zero=False) q1 = Input(shape=(config.word_maxlen,)) q2 = Input(shape=(config.word_maxlen,)) magic_input = Input(shape=(len(config.feats),)) q1_embed = emb_layer(q1) q2_embed = emb_layer(q2) mm = Dot(axes=[2, 2], normalize=True)([q1_embed, q2_embed]) # compute term gating w_g = Dense(1)(q1_embed) g = Lambda(lambda x: softmax(x, axis=1), output_shape=( config.word_maxlen, ))(w_g) g = Reshape((config.word_maxlen,))(g) mm_k = Lambda(lambda x: K.tf.nn.top_k( x, k=topk, sorted=True)[0])(mm) for i in range(num_layer): mm_k = Dense(hidden_sizes[i], activation='softplus', kernel_initializer='he_uniform', bias_initializer='zeros')(mm_k) mm_k_dropout = Dropout(rate=0.5)(mm_k) mm_reshape = mm_k_dropout #Reshape((config.word_maxlen,))(mm_k_dropout) mean = Dot(axes=[1, 1])([mm_reshape, g]) out_ = Dense(2, activation='softmax')(mean) model = Model(inputs=[q1, q2, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) model.summary() return model
def test(): emb_layer = create_pretrained_embedding( config.word_embed_weight, mask_zero=False) q1 = Input(shape=(config.word_maxlen,)) q2 = Input(shape=(config.word_maxlen,)) if len(config.feats) == 0: magic_input = Input(shape=(1,)) else: magic_input = Input(shape=(len(config.feats),)) q1_embed = emb_layer(q1) q2_embed = emb_layer(q2) cross = Dot(axes=[2, 2], normalize=False)([q1_embed, q2_embed]) cross_reshape = Reshape((config.word_maxlen, config.word_maxlen, 1))(cross) conv2d = Conv2D(256, 3, padding='same', activation='relu') conv1 = conv2d(cross_reshape) conv1 = MaxPooling2D()(conv1) conv1 = Conv2D(128, 3, padding='same', activation='relu')(conv1) pool1 = MaxPooling2D()(conv1) pool1_flat = Flatten()(conv1) pool1_flat_drop = Dropout(rate=0.5)(pool1_flat) out_ = Dense(128, activation='relu')(pool1_flat_drop) out_ = Dense(2, activation='softmax')(out_) model = Model(inputs=[q1, q2, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) model.summary() return model
def decomposable_attention(pretrained_embedding=config.word_embed_weights, projection_dim=300, projection_hidden=0, projection_dropout=0.2, compare_dim=500, compare_dropout=0.2, dense_dim=300, dense_dropout=0.2, lr=1e-3, activation='elu', maxlen=MAX_LEN): # Based on: https://arxiv.org/abs/1606.01933 magic_input = Input(shape=(len(config.feats),)) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='relu')(magic_dense) q1 = Input(name='q1', shape=(maxlen,)) q2 = Input(name='q2', shape=(maxlen,)) # Embedding embedding = create_pretrained_embedding(pretrained_embedding, mask_zero=False) q1_embed = embedding(q1) q2_embed = embedding(q2) # Projection projection_layers = [] if projection_hidden > 0: projection_layers.extend([ Dense(projection_hidden, activation=activation), Dropout(rate=projection_dropout), ]) projection_layers.extend([ Dense(projection_dim, activation=None), Dropout(rate=projection_dropout), ]) q1_encoded = time_distributed(q1_embed, projection_layers) q2_encoded = time_distributed(q2_embed, projection_layers) # Attention q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) # Compare q1_combined = Concatenate()( [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)]) compare_layers = [ Dense(compare_dim, activation=activation), Dropout(compare_dropout), Dense(compare_dim, activation=activation), Dropout(compare_dropout), ] q1_compare = time_distributed(q1_combined, compare_layers) q2_compare = time_distributed(q2_combined, compare_layers) # # Aggregate # q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q1_rep_max = MyMaxPool(axis=1)(q1_compare) q2_rep_max = MyMaxPool(axis=1)(q2_compare) cro_max = cross(q1_rep_max,q2_rep_max,compare_dim) dist = distence(q1_rep_max,q2_rep_max) #dense = cro dense = Concatenate()([ q1_rep_max, q2_rep_max,cro_max,dist, ]) #merged = Concatenate()([q1_rep, q2_rep,magic_dense]) dense = BatchNormalization()(dense) dense = Dense(dense_dim, activation=activation)(dense) dense = Dropout(dense_dropout)(dense) dense = BatchNormalization()(dense) dense = Dense(dense_dim, activation=activation)(dense) dense = Dropout(dense_dropout)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2,magic_input], outputs=out_) model.compile(optimizer=Adam(lr=lr), loss='binary_crossentropy', metrics=['accuracy']) model.summary() return model
def esim(pretrained_embedding=config.word_embed_weights, maxlen=MAX_LEN, lstm_dim=300, dense_dim=300, dense_dropout=0.2): # Based on arXiv:1609.06038 magic_input = Input(shape=(len(config.feats),)) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='elu')(magic_dense) q1 = Input(name='q1', shape=(maxlen,)) q2 = Input(name='q2', shape=(maxlen,)) q1_w = Input(name='q1_w', shape=(maxlen,)) q2_w = Input(name='q2_w', shape=(maxlen,)) # Embedding emb_layer = create_pretrained_embedding( config.char_embed_weights, mask_zero=True) emb_layer_word = create_pretrained_embedding( config.word_embed_weights, mask_zero=True) # Encode encode = Sequential() encode.add(emb_layer) encode.add(BatchNormalization(axis=2)) encode.add(Bidirectional(LSTM(lstm_dim, return_sequences=True))) encode2 = Sequential() encode2.add(emb_layer_word) encode2.add(BatchNormalization(axis=2)) encode2.add(Bidirectional(LSTM(lstm_dim, return_sequences=True))) q1_encoded = encode(q1) q2_encoded = encode(q2) q1_w_encoded = encode2(q1_w) q2_w_encoded = encode2(q2_w) # Attention q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) # Compose q1_combined = Concatenate()( [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)]) compose = Bidirectional(LSTM(lstm_dim, return_sequences=True)) q1_compare = compose(q1_combined) q2_compare = compose(q2_combined) # # Aggregate # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)]) # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)]) q1_rep = MyMaxPool(axis=1)(q1_compare) q2_rep = MyMaxPool(axis=1)(q2_compare) q1_w_rep = MyMaxPool(axis=1)(q1_w_encoded) q2_w_rep = MyMaxPool(axis=1)(q2_w_encoded) # Classifier cro = cross(q1_rep,q2_rep,lstm_dim*2) dist = distence(q1_rep,q2_rep) dist2 = distence(q1_w_rep,q2_w_rep) #dense = cro dense = Concatenate()([q1_rep, q2_rep,cro,dist,dist2,magic_dense]) dense = Dropout(dense_dropout)(dense) dense = Dense(dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(dense_dropout)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2,q1_w,q2_w,magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer="adam", metrics = [Precision,Recall,F1,]) model.summary() return model
def test0( alm_kernel_count=64, alm_hidden_sizes=[256, 512], dm_kernel_count=32, dm_kernel_size=3, dm_q_hidden_size=32, dm_d_mpool=3, dm_hidden_sizes=[50], ): def xor_match(x): t1 = x[0] t2 = x[1] t1_shape = t1.get_shape() t2_shape = t2.get_shape() t1_expand = K.tf.stack([t1] * t2_shape[1], 2) t2_expand = K.tf.stack([t2] * t1_shape[1], 1) out_bool = K.tf.equal(t1_expand, t2_expand) out = K.tf.cast(out_bool, K.tf.float32) return out def hadamard_dot(x): x1 = x[0] x2 = x[1] out = x1 * x2 #out = tf.matmul(x1, x2) #out = K.tf.einsum('ij, ijk -> jk', x1, x2) return out emb_layer = create_pretrained_embedding(config.word_embed_weight, mask_zero=False) q1 = Input(shape=(config.word_maxlen, )) q2 = Input(shape=(config.word_maxlen, )) if len(config.feats) == 0: magic_input = Input(shape=(1, )) else: magic_input = Input(shape=(len(config.feats), )) q1_embed = emb_layer(q1) q2_embed = emb_layer(q2) lm_xor = Lambda(xor_match)([q1, q2]) #lm_xor_reshape = Reshape((self.config['text1_maxlen'], self.config['text2_maxlen'], 1))(lm_xor) #show_layer_info('Reshape', lm_xor_reshape) lm_conv = Conv1D(alm_kernel_count, config.word_maxlen, padding='same', activation='tanh')(lm_xor) lm_conv = Dropout(0.5)(lm_conv) lm_feat = Reshape((-1, ))(lm_conv) for hidden_size in alm_hidden_sizes: lm_feat = Dense(hidden_size, activation='tanh')(lm_feat) lm_drop = Dropout(0.5)(lm_feat) lm_score = Dense(1)(lm_drop) dm_q_conv = Conv1D(dm_kernel_count, dm_kernel_size, padding='same', activation='tanh')(q1_embed) dm_q_conv = Dropout(0.5)(dm_q_conv) dm_q_mp = MaxPooling1D(pool_size=config.word_maxlen)(dm_q_conv) dm_q_rep = Reshape((-1, ))(dm_q_mp) dm_q_rep = Dense(dm_q_hidden_size)(dm_q_rep) dm_q_rep = Lambda(lambda x: tf.expand_dims(x, 1))(dm_q_rep) dm_d_conv1 = Conv1D(dm_kernel_count, dm_kernel_size, padding='same', activation='tanh')(q2_embed) dm_d_conv1 = Dropout(0.5)(dm_d_conv1) dm_d_mp = MaxPooling1D(pool_size=dm_d_mpool)(dm_d_conv1) dm_d_conv2 = Conv1D(dm_kernel_count, 1, padding='same', activation='tanh')(dm_d_mp) dm_d_conv2 = Dropout(0.5)(dm_d_conv2) h_dot = Lambda(hadamard_dot)([dm_q_rep, dm_d_conv2]) dm_feat = Reshape((-1, ))(h_dot) dm_feat = Dense(hidden_size)(dm_feat) dm_feat_drop = Dropout(0.5)(dm_feat) dm_score = Dense(1)(dm_feat_drop) out_ = Add()([lm_score, dm_score]) out_ = Dense(2, activation='softmax')(out_) model = Model(inputs=[q1, q2, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) model.summary() return model
def bimpm(): print('--- Building model...') emb_layer = create_pretrained_embedding(config.word_embed_weight, mask_zero=False) sequence_length = config.word_maxlen rnn_unit = 'gru' dropout = 0.5 context_rnn_dim = 128 mp_dim = 128 highway = True aggregate_rnn_dim = 64 dense_dim = 128 # Model words input w1 = Input(shape=(sequence_length, ), dtype='int32') w2 = Input(shape=(sequence_length, ), dtype='int32') # Build word representation layer w_res1 = emb_layer(w1) w_res2 = emb_layer(w2) sequence1 = w_res1 sequence2 = w_res2 # Build context representation layer context_layer = ContextLayer(context_rnn_dim, rnn_unit=rnn_unit, dropout=dropout, highway=highway, input_shape=( sequence_length, K.int_shape(sequence1)[-1], ), return_sequences=True) context1 = context_layer(sequence1) #() context2 = context_layer(sequence2) print('context1', context1) print('context2', context2) # Build matching layer matching_layer = MultiPerspective(mp_dim) matching1 = matching_layer([context1, context2]) matching2 = matching_layer([context2, context1]) print('matching1:', matching1) print('matching2:', matching2) matching = concatenate([matching1, matching2]) print('matching:', matching) # Build aggregation layer aggregate_layer = ContextLayer(rnn_dim=aggregate_rnn_dim, rnn_unit=rnn_unit, dropout=dropout, highway=highway, input_shape=( sequence_length, K.int_shape(matching)[-1], ), return_sequences=False) #aggregate_layer=Bidirectional(GRU(256, return_sequences=True,input_dim=256, input_length=40)) #aggregation =aggregate_layer(matching) #aggregation = Flatten()(matching) aggregation = GlobalAveragePooling1D()(matching) print('aggregation', aggregation) # Build prediction layer # pred = PredictLayer(dense_dim, # input_dim=K.int_shape(aggregation)[-1], # dropout=dropout)(aggregation) # pred = Dense(512,input_shape=(256,))(aggregation) # print('pred',pred) pred = Dense(2)(aggregation) print('pred', pred) if config.feats == []: # Model words input megic_feats = Input(shape=(1, ), dtype='int32') else: megic_feats = Input(shape=(len(config.feats), ), dtype='int32') # Build model graph model = Model(inputs=[w1, w2, megic_feats], outputs=pred) # Compile model model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) print(model.summary()) return model
def bma_gru(): # The embedding layer containing the word vectors # Embedding emb_layer = create_pretrained_embedding(config.char_embed_weights, mask_zero=True) emb_layer_word = create_pretrained_embedding(config.word_embed_weights, mask_zero=True) # Model variables n_hidden = 128 # Define the shared model x = Sequential() x.add(emb_layer) # # LSTM x.add(Bidirectional(LSTM(n_hidden, return_sequences=True))) x.add(Bidirectional(LSTM(n_hidden, return_sequences=True))) x.add(BatchNormalization()) x.add(MyMaxPool(axis=1)) shared_model = x x2 = Sequential() x2.add(emb_layer_word) # # LSTM x2.add(Bidirectional(LSTM(10, return_sequences=True))) #x2.add(Bidirectional(LSTM(n_hidden,return_sequences=True))) x2.add(BatchNormalization()) x2.add(MyMaxPool(axis=1)) shared_model2 = x2 # The visible layer magic_input = Input(shape=(len(config.feats), )) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='relu')(magic_dense) left_input = Input(shape=(config.word_maxlen, ), dtype='int32') right_input = Input(shape=(config.word_maxlen, ), dtype='int32') w1 = Input(shape=(config.word_maxlen, ), dtype='int32') w2 = Input(shape=(config.word_maxlen, ), dtype='int32') left = shared_model(left_input) right = shared_model(right_input) left_w = shared_model2(w1) right_w = shared_model2(w2) # Pack it all up into a Manhattan Distance model malstm_distance = Lambda( lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)), output_shape=(1, ))([left, right]) malstm_distance2 = Lambda( lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)), output_shape=(1, ))([left_w, right_w]) cro = cross(left, right, n_hidden * 2) cro2 = cross(left_w, right_w, n_hidden * 2) #if config.nofeats: merge = concatenate([left, right, cro, malstm_distance2, magic_dense]) # , magic_dense, malstm_distance]) # else: # merge = concatenate([ cro,cro2]) # # The MLP that determines the outcome x = Dropout(0.2)(merge) x = BatchNormalization()(x) x = Dense(300, activation='relu')(x) x = Dropout(0.2)(x) x = BatchNormalization()(x) pred = Dense(1, activation='sigmoid')(x) model = Model(inputs=[left_input, right_input, w1, w2, magic_input], outputs=pred) model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[ Precision, Recall, F1, ]) model.summary() shared_model.summary() return model
def BMA_GRU(pretrained_embedding=config.word_embed_weights, maxlen=MAX_LEN, lstm_dim=300, dense_dim=300, dense_dropout=0.2, pool="max", mode='char+word'): # Based on arXiv:1609.06038 magic_input = Input(shape=(len(config.feats), )) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='elu')(magic_dense) q1 = Input(name='q1', shape=(maxlen, )) q2 = Input(name='q2', shape=(maxlen, )) q1_w = Input(name='q1_w', shape=(maxlen, )) q2_w = Input(name='q2_w', shape=(maxlen, )) # Embedding emb_layer = create_pretrained_embedding(config.char_embed_weights, mask_zero=False) emb_layer_word = create_pretrained_embedding(config.word_embed_weights, mask_zero=False) # Encode encode = Sequential() encode.add(emb_layer) encode.add(BatchNormalization(axis=2)) encode.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True))) encode2 = Sequential() encode2.add(emb_layer_word) encode2.add(BatchNormalization(axis=2)) encode2.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True))) q1_encoded = encode(q1) q2_encoded = encode(q2) q1_w_encoded = encode2(q1_w) q2_w_encoded = encode2(q2_w) att_flag = True q1_compare, q2_compare = esim_blok(q1_encoded, q2_encoded, att_flag) q1_compare_w, q2_compare_w = esim_blok(q1_w_encoded, q2_w_encoded, att_flag) # q1_rep ,q2_rep = q1_encoded,q2_encoded # q1_w_rep , q2_w_rep = q1_w_encoded,q2_w_encoded # q1_rep ,q2_rep = q1_compare,q2_compare # q1_w_rep , q2_w_rep = q1_compare_w,q2_compare_w if pool == 'max': q1_rep = MyMaxPool(axis=1)(q1_compare) q2_rep = MyMaxPool(axis=1)(q2_compare) q1_w_rep = MyMaxPool(axis=1)(q1_compare_w) q2_w_rep = MyMaxPool(axis=1)(q2_compare_w) elif pool == 'mean': q1_rep = MyMeanPool(axis=1)(q1_compare) q2_rep = MyMeanPool(axis=1)(q2_compare) q1_w_rep = MyMeanPool(axis=1)(q1_compare_w) q2_w_rep = MyMeanPool(axis=1)(q2_compare_w) else: q1_rep = Attention(maxlen)(q1_compare) q2_rep = Attention(maxlen)(q2_compare) q1_w_rep = Attention(maxlen)(q1_compare_w) q2_w_rep = Attention(maxlen)(q2_compare_w) # # Aggregate # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)]) # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)]) # Classifier cro = cross(q1_rep, q2_rep, lstm_dim * 2) dist = distence(q1_rep, q2_rep) dist2 = distence(q1_w_rep, q2_w_rep) #dense = cro if mode == "char": dense = Concatenate()([ q1_rep, q2_rep, ]) elif mode == "word": dense = Concatenate()([q1_w_rep, q2_w_rep]) else: dense = Concatenate()([q1_rep, q2_rep, q1_w_rep, q2_w_rep]) dense = Dropout(dense_dropout)(dense) dense = Dense(dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(dense_dropout)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[ Precision, Recall, F1, ]) model.summary() return model
def esim(pretrained_embedding=config.word_embed_weights, maxlen=MAX_LEN, lstm_dim=300, dense_dim=300, dense_dropout=0.5): # Based on arXiv:1609.06038 magic_input = Input(shape=(len(config.feats), )) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='relu')(magic_dense) q1 = Input(name='q1', shape=(maxlen, )) q2 = Input(name='q2', shape=(maxlen, )) q1_w = Input(name='q1_w', shape=(maxlen, )) q2_w = Input(name='q2_w', shape=(maxlen, )) # Embedding embedding = create_pretrained_embedding(pretrained_embedding, mask_zero=False) bn = BatchNormalization(axis=2) q1_embed = bn(embedding(q1)) q2_embed = bn(embedding(q2)) # Encode encode = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True)) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) # Attention q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) # Compose q1_combined = Concatenate()( [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)]) compose = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True)) q1_compare = compose(q1_combined) q2_compare = compose(q2_combined) # Aggregate q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # Classifier cro = cross(q1_rep, q2_rep, lstm_dim * 2) dist = distence(q1_rep, q2_rep) #dense = cro dense = Concatenate()([q1_rep, q2_rep]) dense = BatchNormalization()(dense) dense = Dense(dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(dense_dropout)(dense) dense = Dense(dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(dense_dropout)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[ Precision, Recall, F1, ]) model.summary() return model
def dssm(lstmsize=20): # Embedding emb_layer_char = create_pretrained_embedding(config.char_embed_weights, trainable=True, mask_zero=False) emb_layer_word = create_pretrained_embedding(config.word_embed_weights, trainable=False, mask_zero=False) char_weights = np.load(config.char_embed_weights) word_weights = np.load(config.word_embed_weights) input1 = Input(shape=(config.word_maxlen, )) input2 = Input(shape=(config.word_maxlen, )) input3 = Input(shape=(len(config.feats), )) embed1 = emb_layer_word # Embedding(word_weights.shape) lstm0 = CuDNNLSTM(lstmsize, return_sequences=True) lstm1 = Bidirectional(CuDNNLSTM(lstmsize)) lstm2 = CuDNNLSTM(lstmsize) att1 = Attention(config.word_maxlen) den = Dense(64, activation='tanh') # att1 = Lambda(lambda x: K.max(x,axis = 1)) v3 = embed1(input3) v1 = embed1(input1) v2 = embed1(input2) v11 = lstm1(v1) v22 = lstm1(v2) v1ls = lstm2(lstm0(v1)) v2ls = lstm2(lstm0(v2)) v1 = Concatenate(axis=1)([att1(v1), v11]) v2 = Concatenate(axis=1)([att1(v2), v22]) input1c = Input(shape=(config.word_maxlen, )) input2c = Input(shape=(config.word_maxlen, )) embed1c = emb_layer_char #Embedding(char_weights.shape) lstm1c = Bidirectional(CuDNNLSTM(56, return_sequences=True)) lstm2c = Bidirectional(CuDNNLSTM(56)) att1c = Attention(config.word_maxlen) v1c = embed1(input1c) v2c = embed1(input2c) v11c = lstm1c(v1c) v22c = lstm1c(v2c) v11c = lstm2c(v11c) v22c = lstm2c(v22c) v1c = Concatenate(axis=1)([att1c(v1c), v11c]) v2c = Concatenate(axis=1)([att1c(v2c), v22c]) mul = Multiply()([v1, v2]) sub = Lambda(lambda x: K.abs(x))(Subtract()([v1, v2])) maximum = Maximum()([Multiply()([v1, v1]), Multiply()([v2, v2])]) mulc = Multiply()([v1c, v2c]) subc = Lambda(lambda x: K.abs(x))(Subtract()([v1c, v2c])) maximumc = Maximum()([Multiply()([v1c, v1c]), Multiply()([v2c, v2c])]) sub2 = Lambda(lambda x: K.abs(x))(Subtract()([v1ls, v2ls])) matchlist = Concatenate(axis=1)( [mul, sub, mulc, subc, maximum, maximumc, sub2]) matchlist = Dropout(0.05)(matchlist) matchlist = Concatenate(axis=1)([ Dense(32, activation='relu')(matchlist), Dense(48, activation='sigmoid')(matchlist) ]) res = Dense(2, activation='sigmoid')(matchlist) model = Model(inputs=[input1c, input2c, input1, input2, input3], outputs=res) model.compile(optimizer=Adam(lr=0.001), loss="binary_crossentropy", metrics=['acc']) model.summary() return model