def cnn_help2(emb1, emb2): nbfilters = [256] # 1D convolutions that can iterate over the word vectors conv1 = Conv1D(filters=nbfilters[0], kernel_size=2, padding='same', activation='relu') # Run through CONV + GAP layers conv1a = conv1(emb1) glob1a = GlobalAveragePooling1D()(conv1a) conv1b = conv1(emb2) glob1b = GlobalAveragePooling1D()(conv1b) mergea = glob1a mergeb = glob1b # We take the explicit absolute difference between the two sentences # Furthermore we take the multiply different entries to get a different # measure of equalness diff = Lambda(lambda x: K.abs(x[0] - x[1]), output_shape=(sum(nbfilters), ))([mergea, mergeb]) mul = Lambda(lambda x: x[0] * x[1], output_shape=(sum(nbfilters), ))([mergea, mergeb]) add = Lambda(lambda x: x[0] + x[1], output_shape=(sum(nbfilters), ))([mergea, mergeb]) cro = cross(mergea, mergeb, sum(nbfilters)) merge = concatenate([mergea, mergeb, cro]) return cro
def decomposable_attention(pretrained_embedding=config.word_embed_weights, projection_dim=300, projection_hidden=0, projection_dropout=0.2, compare_dim=500, compare_dropout=0.2, dense_dim=300, dense_dropout=0.2, lr=1e-3, activation='elu', maxlen=MAX_LEN): # Based on: https://arxiv.org/abs/1606.01933 magic_input = Input(shape=(len(config.feats),)) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='relu')(magic_dense) q1 = Input(name='q1', shape=(maxlen,)) q2 = Input(name='q2', shape=(maxlen,)) # Embedding embedding = create_pretrained_embedding(pretrained_embedding, mask_zero=False) q1_embed = embedding(q1) q2_embed = embedding(q2) # Projection projection_layers = [] if projection_hidden > 0: projection_layers.extend([ Dense(projection_hidden, activation=activation), Dropout(rate=projection_dropout), ]) projection_layers.extend([ Dense(projection_dim, activation=None), Dropout(rate=projection_dropout), ]) q1_encoded = time_distributed(q1_embed, projection_layers) q2_encoded = time_distributed(q2_embed, projection_layers) # Attention q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) # Compare q1_combined = Concatenate()( [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)]) compare_layers = [ Dense(compare_dim, activation=activation), Dropout(compare_dropout), Dense(compare_dim, activation=activation), Dropout(compare_dropout), ] q1_compare = time_distributed(q1_combined, compare_layers) q2_compare = time_distributed(q2_combined, compare_layers) # # Aggregate # q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q1_rep_max = MyMaxPool(axis=1)(q1_compare) q2_rep_max = MyMaxPool(axis=1)(q2_compare) cro_max = cross(q1_rep_max,q2_rep_max,compare_dim) dist = distence(q1_rep_max,q2_rep_max) #dense = cro dense = Concatenate()([ q1_rep_max, q2_rep_max,cro_max,dist, ]) #merged = Concatenate()([q1_rep, q2_rep,magic_dense]) dense = BatchNormalization()(dense) dense = Dense(dense_dim, activation=activation)(dense) dense = Dropout(dense_dropout)(dense) dense = BatchNormalization()(dense) dense = Dense(dense_dim, activation=activation)(dense) dense = Dropout(dense_dropout)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2,magic_input], outputs=out_) model.compile(optimizer=Adam(lr=lr), loss='binary_crossentropy', metrics=['accuracy']) model.summary() return model
def esim(pretrained_embedding=config.word_embed_weights, maxlen=MAX_LEN, lstm_dim=300, dense_dim=300, dense_dropout=0.2): # Based on arXiv:1609.06038 magic_input = Input(shape=(len(config.feats),)) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='elu')(magic_dense) q1 = Input(name='q1', shape=(maxlen,)) q2 = Input(name='q2', shape=(maxlen,)) q1_w = Input(name='q1_w', shape=(maxlen,)) q2_w = Input(name='q2_w', shape=(maxlen,)) # Embedding emb_layer = create_pretrained_embedding( config.char_embed_weights, mask_zero=True) emb_layer_word = create_pretrained_embedding( config.word_embed_weights, mask_zero=True) # Encode encode = Sequential() encode.add(emb_layer) encode.add(BatchNormalization(axis=2)) encode.add(Bidirectional(LSTM(lstm_dim, return_sequences=True))) encode2 = Sequential() encode2.add(emb_layer_word) encode2.add(BatchNormalization(axis=2)) encode2.add(Bidirectional(LSTM(lstm_dim, return_sequences=True))) q1_encoded = encode(q1) q2_encoded = encode(q2) q1_w_encoded = encode2(q1_w) q2_w_encoded = encode2(q2_w) # Attention q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) # Compose q1_combined = Concatenate()( [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)]) compose = Bidirectional(LSTM(lstm_dim, return_sequences=True)) q1_compare = compose(q1_combined) q2_compare = compose(q2_combined) # # Aggregate # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)]) # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)]) q1_rep = MyMaxPool(axis=1)(q1_compare) q2_rep = MyMaxPool(axis=1)(q2_compare) q1_w_rep = MyMaxPool(axis=1)(q1_w_encoded) q2_w_rep = MyMaxPool(axis=1)(q2_w_encoded) # Classifier cro = cross(q1_rep,q2_rep,lstm_dim*2) dist = distence(q1_rep,q2_rep) dist2 = distence(q1_w_rep,q2_w_rep) #dense = cro dense = Concatenate()([q1_rep, q2_rep,cro,dist,dist2,magic_dense]) dense = Dropout(dense_dropout)(dense) dense = Dense(dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(dense_dropout)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2,q1_w,q2_w,magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer="adam", metrics = [Precision,Recall,F1,]) model.summary() return model
from Cross import cross def cross(A, B): "Cross product of elements in A and B" return [a + b for a in A for b in B] digits = '12345678' rows = 'ABCEDFGHI' cols = digits # print(cross(row,cols)) squares = cross(rows, cols) def grid_values(grid): chars = [c for c in grid if c in digits or c in '0.'] assert len(chars) == 81 return dict(zip(squares, chars)) digits = '123456789' grid = "4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......" print(grid_values(grid)) # def parse_grid(grid): # values = dict((s, digits) for s in squares) # for s,d in grid_values(grid).items(): # if d in digits and not assign(values, s,d): # return False
def bma_gru(): # The embedding layer containing the word vectors # Embedding emb_layer = create_pretrained_embedding(config.char_embed_weights, mask_zero=True) emb_layer_word = create_pretrained_embedding(config.word_embed_weights, mask_zero=True) # Model variables n_hidden = 128 # Define the shared model x = Sequential() x.add(emb_layer) # # LSTM x.add(Bidirectional(LSTM(n_hidden, return_sequences=True))) x.add(Bidirectional(LSTM(n_hidden, return_sequences=True))) x.add(BatchNormalization()) x.add(MyMaxPool(axis=1)) shared_model = x x2 = Sequential() x2.add(emb_layer_word) # # LSTM x2.add(Bidirectional(LSTM(10, return_sequences=True))) #x2.add(Bidirectional(LSTM(n_hidden,return_sequences=True))) x2.add(BatchNormalization()) x2.add(MyMaxPool(axis=1)) shared_model2 = x2 # The visible layer magic_input = Input(shape=(len(config.feats), )) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='relu')(magic_dense) left_input = Input(shape=(config.word_maxlen, ), dtype='int32') right_input = Input(shape=(config.word_maxlen, ), dtype='int32') w1 = Input(shape=(config.word_maxlen, ), dtype='int32') w2 = Input(shape=(config.word_maxlen, ), dtype='int32') left = shared_model(left_input) right = shared_model(right_input) left_w = shared_model2(w1) right_w = shared_model2(w2) # Pack it all up into a Manhattan Distance model malstm_distance = Lambda( lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)), output_shape=(1, ))([left, right]) malstm_distance2 = Lambda( lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)), output_shape=(1, ))([left_w, right_w]) cro = cross(left, right, n_hidden * 2) cro2 = cross(left_w, right_w, n_hidden * 2) #if config.nofeats: merge = concatenate([left, right, cro, malstm_distance2, magic_dense]) # , magic_dense, malstm_distance]) # else: # merge = concatenate([ cro,cro2]) # # The MLP that determines the outcome x = Dropout(0.2)(merge) x = BatchNormalization()(x) x = Dense(300, activation='relu')(x) x = Dropout(0.2)(x) x = BatchNormalization()(x) pred = Dense(1, activation='sigmoid')(x) model = Model(inputs=[left_input, right_input, w1, w2, magic_input], outputs=pred) model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[ Precision, Recall, F1, ]) model.summary() shared_model.summary() return model
def BMA_GRU(pretrained_embedding=config.word_embed_weights, maxlen=MAX_LEN, lstm_dim=300, dense_dim=300, dense_dropout=0.2, pool="max", mode='char+word'): # Based on arXiv:1609.06038 magic_input = Input(shape=(len(config.feats), )) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='elu')(magic_dense) q1 = Input(name='q1', shape=(maxlen, )) q2 = Input(name='q2', shape=(maxlen, )) q1_w = Input(name='q1_w', shape=(maxlen, )) q2_w = Input(name='q2_w', shape=(maxlen, )) # Embedding emb_layer = create_pretrained_embedding(config.char_embed_weights, mask_zero=False) emb_layer_word = create_pretrained_embedding(config.word_embed_weights, mask_zero=False) # Encode encode = Sequential() encode.add(emb_layer) encode.add(BatchNormalization(axis=2)) encode.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True))) encode2 = Sequential() encode2.add(emb_layer_word) encode2.add(BatchNormalization(axis=2)) encode2.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True))) q1_encoded = encode(q1) q2_encoded = encode(q2) q1_w_encoded = encode2(q1_w) q2_w_encoded = encode2(q2_w) att_flag = True q1_compare, q2_compare = esim_blok(q1_encoded, q2_encoded, att_flag) q1_compare_w, q2_compare_w = esim_blok(q1_w_encoded, q2_w_encoded, att_flag) # q1_rep ,q2_rep = q1_encoded,q2_encoded # q1_w_rep , q2_w_rep = q1_w_encoded,q2_w_encoded # q1_rep ,q2_rep = q1_compare,q2_compare # q1_w_rep , q2_w_rep = q1_compare_w,q2_compare_w if pool == 'max': q1_rep = MyMaxPool(axis=1)(q1_compare) q2_rep = MyMaxPool(axis=1)(q2_compare) q1_w_rep = MyMaxPool(axis=1)(q1_compare_w) q2_w_rep = MyMaxPool(axis=1)(q2_compare_w) elif pool == 'mean': q1_rep = MyMeanPool(axis=1)(q1_compare) q2_rep = MyMeanPool(axis=1)(q2_compare) q1_w_rep = MyMeanPool(axis=1)(q1_compare_w) q2_w_rep = MyMeanPool(axis=1)(q2_compare_w) else: q1_rep = Attention(maxlen)(q1_compare) q2_rep = Attention(maxlen)(q2_compare) q1_w_rep = Attention(maxlen)(q1_compare_w) q2_w_rep = Attention(maxlen)(q2_compare_w) # # Aggregate # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)]) # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)]) # Classifier cro = cross(q1_rep, q2_rep, lstm_dim * 2) dist = distence(q1_rep, q2_rep) dist2 = distence(q1_w_rep, q2_w_rep) #dense = cro if mode == "char": dense = Concatenate()([ q1_rep, q2_rep, ]) elif mode == "word": dense = Concatenate()([q1_w_rep, q2_w_rep]) else: dense = Concatenate()([q1_rep, q2_rep, q1_w_rep, q2_w_rep]) dense = Dropout(dense_dropout)(dense) dense = Dense(dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(dense_dropout)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[ Precision, Recall, F1, ]) model.summary() return model
def esim(pretrained_embedding=config.word_embed_weights, maxlen=MAX_LEN, lstm_dim=300, dense_dim=300, dense_dropout=0.5): # Based on arXiv:1609.06038 magic_input = Input(shape=(len(config.feats), )) magic_dense = BatchNormalization()(magic_input) magic_dense = Dense(64, activation='relu')(magic_dense) q1 = Input(name='q1', shape=(maxlen, )) q2 = Input(name='q2', shape=(maxlen, )) q1_w = Input(name='q1_w', shape=(maxlen, )) q2_w = Input(name='q2_w', shape=(maxlen, )) # Embedding embedding = create_pretrained_embedding(pretrained_embedding, mask_zero=False) bn = BatchNormalization(axis=2) q1_embed = bn(embedding(q1)) q2_embed = bn(embedding(q2)) # Encode encode = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True)) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) # Attention q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded) # Compose q1_combined = Concatenate()( [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)]) q2_combined = Concatenate()( [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)]) compose = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True)) q1_compare = compose(q1_combined) q2_compare = compose(q2_combined) # Aggregate q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()]) # Classifier cro = cross(q1_rep, q2_rep, lstm_dim * 2) dist = distence(q1_rep, q2_rep) #dense = cro dense = Concatenate()([q1_rep, q2_rep]) dense = BatchNormalization()(dense) dense = Dense(dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(dense_dropout)(dense) dense = Dense(dense_dim, activation='relu')(dense) dense = BatchNormalization()(dense) dense = Dropout(dense_dropout)(dense) out_ = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_) model.compile(loss='binary_crossentropy', optimizer="adam", metrics=[ Precision, Recall, F1, ]) model.summary() return model
def cnn_help(emb1, emb2): nbfilters = [256, 246, 256, 128] #,64,32] # 1D convolutions that can iterate over the word vectors conv1 = Conv1D(filters=nbfilters[0], kernel_size=1, padding='same', activation='relu') conv2 = Conv1D(filters=nbfilters[1], kernel_size=2, padding='same', activation='relu') conv3 = Conv1D(filters=nbfilters[2], kernel_size=3, padding='same', activation='relu') conv4 = Conv1D(filters=nbfilters[3], kernel_size=4, padding='same', activation='relu') # conv5 = Conv1D(filters=nbfilters[4], kernel_size=5, # padding='same', activation='relu') # conv6 = Conv1D(filters=nbfilters[5], kernel_size=6, # padding='same', activation='relu') # Run through CONV + GAP layers conv1a = conv1(emb1) glob1a = GlobalAveragePooling1D()(conv1a) conv1b = conv1(emb2) glob1b = GlobalAveragePooling1D()(conv1b) conv2a = conv2(emb1) glob2a = GlobalAveragePooling1D()(conv2a) conv2b = conv2(emb2) glob2b = GlobalAveragePooling1D()(conv2b) conv3a = conv3(emb1) glob3a = GlobalAveragePooling1D()(conv3a) conv3b = conv3(emb2) glob3b = GlobalAveragePooling1D()(conv3b) conv4a = conv4(emb1) glob4a = GlobalAveragePooling1D()(conv4a) conv4b = conv4(emb2) glob4b = GlobalAveragePooling1D()(conv4b) # conv5a = conv5(emb1) # glob5a = GlobalAveragePooling1D()(conv5a) # conv5b = conv5(emb2) # glob5b = GlobalAveragePooling1D()(conv5b) # conv6a = conv6(emb1) # glob6a = GlobalAveragePooling1D()(conv6a) # conv6b = conv6(emb2) # glob6b = GlobalAveragePooling1D()(conv6b) mergea = concatenate([ glob1a, glob2a, glob3a, glob4a, ]) # glob5a,glob6a]) mergeb = concatenate([ glob1b, glob2b, glob3b, glob4b, ]) # glob5b,glob6b]) # We take the explicit absolute difference between the two sentences # Furthermore we take the multiply different entries to get a different # measure of equalness diff = Lambda(lambda x: K.abs(x[0] - x[1]), output_shape=(sum(nbfilters), ))([mergea, mergeb]) mul = Lambda(lambda x: x[0] * x[1], output_shape=(sum(nbfilters), ))([mergea, mergeb]) add = Lambda(lambda x: x[0] + x[1], output_shape=(sum(nbfilters), ))([mergea, mergeb]) # merge = concatenate([mergea,mergeb,diff, mul,add]) cro = cross(mergea, mergeb, sum(nbfilters)) merge = concatenate([mergea, mergeb, cro]) return merge