Exemple #1
0
def cnn_help2(emb1, emb2):

    nbfilters = [256]
    # 1D convolutions that can iterate over the word vectors
    conv1 = Conv1D(filters=nbfilters[0],
                   kernel_size=2,
                   padding='same',
                   activation='relu')

    # Run through CONV + GAP layers
    conv1a = conv1(emb1)
    glob1a = GlobalAveragePooling1D()(conv1a)
    conv1b = conv1(emb2)
    glob1b = GlobalAveragePooling1D()(conv1b)

    mergea = glob1a
    mergeb = glob1b
    # We take the explicit absolute difference between the two sentences
    # Furthermore we take the multiply different entries to get a different
    # measure of equalness
    diff = Lambda(lambda x: K.abs(x[0] - x[1]),
                  output_shape=(sum(nbfilters), ))([mergea, mergeb])
    mul = Lambda(lambda x: x[0] * x[1],
                 output_shape=(sum(nbfilters), ))([mergea, mergeb])
    add = Lambda(lambda x: x[0] + x[1],
                 output_shape=(sum(nbfilters), ))([mergea, mergeb])

    cro = cross(mergea, mergeb, sum(nbfilters))
    merge = concatenate([mergea, mergeb, cro])
    return cro
Exemple #2
0
def decomposable_attention(pretrained_embedding=config.word_embed_weights,
                           projection_dim=300, projection_hidden=0, projection_dropout=0.2,
                           compare_dim=500, compare_dropout=0.2,
                           dense_dim=300, dense_dropout=0.2,
                           lr=1e-3, activation='elu', maxlen=MAX_LEN):
    # Based on: https://arxiv.org/abs/1606.01933

    
    magic_input = Input(shape=(len(config.feats),))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen,))
    q2 = Input(name='q2', shape=(maxlen,))

    # Embedding
    embedding = create_pretrained_embedding(pretrained_embedding,
                                            mask_zero=False)
    q1_embed = embedding(q1)
    q2_embed = embedding(q2)

    # Projection
    projection_layers = []
    if projection_hidden > 0:
        projection_layers.extend([
            Dense(projection_hidden, activation=activation),
            Dropout(rate=projection_dropout),
        ])
    projection_layers.extend([
        Dense(projection_dim, activation=None),
        Dropout(rate=projection_dropout),
    ])
    q1_encoded = time_distributed(q1_embed, projection_layers)
    q2_encoded = time_distributed(q2_embed, projection_layers)

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compare
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)])
    compare_layers = [
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
        Dense(compare_dim, activation=activation),
        Dropout(compare_dropout),
    ]
    q1_compare = time_distributed(q1_combined, compare_layers)
    q2_compare = time_distributed(q2_combined, compare_layers)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    # q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])


   
    q1_rep_max = MyMaxPool(axis=1)(q1_compare)
    q2_rep_max = MyMaxPool(axis=1)(q2_compare)


    cro_max = cross(q1_rep_max,q2_rep_max,compare_dim)
 
    dist = distence(q1_rep_max,q2_rep_max)
    

    #dense = cro
    dense = Concatenate()([
        q1_rep_max, q2_rep_max,cro_max,dist,
        ])

    #merged = Concatenate()([q1_rep, q2_rep,magic_dense])
    dense = BatchNormalization()(dense)
    dense = Dense(dense_dim, activation=activation)(dense)
    dense = Dropout(dense_dropout)(dense)
    dense = BatchNormalization()(dense)
    dense = Dense(dense_dim, activation=activation)(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2,magic_input], outputs=out_)
    model.compile(optimizer=Adam(lr=lr), loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.summary()

    return model
Exemple #3
0
def esim(pretrained_embedding=config.word_embed_weights,
         maxlen=MAX_LEN,
         lstm_dim=300,
         dense_dim=300,
         dense_dropout=0.2):

    # Based on arXiv:1609.06038

    magic_input = Input(shape=(len(config.feats),))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='elu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen,))
    q2 = Input(name='q2', shape=(maxlen,))

    q1_w = Input(name='q1_w', shape=(maxlen,))
    q2_w = Input(name='q2_w', shape=(maxlen,))

    # Embedding
    emb_layer = create_pretrained_embedding(
        config.char_embed_weights, mask_zero=True)
    emb_layer_word = create_pretrained_embedding(
        config.word_embed_weights, mask_zero=True)
    
    # Encode
    encode = Sequential()
    encode.add(emb_layer)
    encode.add(BatchNormalization(axis=2))
    encode.add(Bidirectional(LSTM(lstm_dim, return_sequences=True)))
    
    encode2 = Sequential()
    encode2.add(emb_layer_word)
    encode2.add(BatchNormalization(axis=2))
    encode2.add(Bidirectional(LSTM(lstm_dim, return_sequences=True)))

    q1_encoded = encode(q1)
    q2_encoded = encode(q2)

    q1_w_encoded = encode2(q1_w)
    q2_w_encoded = encode2(q2_w)

   



    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)


     # Compose
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned, submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned, submult(q2_encoded, q1_aligned)])


    compose = Bidirectional(LSTM(lstm_dim, return_sequences=True))
    q1_compare = compose(q1_combined)
    q2_compare = compose(q2_combined)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])
    # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])


    q1_rep = MyMaxPool(axis=1)(q1_compare)
    q2_rep = MyMaxPool(axis=1)(q2_compare)

    q1_w_rep = MyMaxPool(axis=1)(q1_w_encoded)
    q2_w_rep = MyMaxPool(axis=1)(q2_w_encoded)
    
    # Classifier
    cro = cross(q1_rep,q2_rep,lstm_dim*2)
    dist = distence(q1_rep,q2_rep)
    dist2 = distence(q1_w_rep,q2_w_rep)
    #dense = cro
   
    dense = Concatenate()([q1_rep, q2_rep,cro,dist,dist2,magic_dense])

   
    dense = Dropout(dense_dropout)(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)


    model = Model(inputs=[q1, q2,q1_w,q2_w,magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam", metrics = [Precision,Recall,F1,])
    model.summary()
    return model
Exemple #4
0
from Cross import cross


def cross(A, B):
    "Cross product of elements in A and B"
    return [a + b for a in A for b in B]


digits = '12345678'
rows = 'ABCEDFGHI'
cols = digits
# print(cross(row,cols))

squares = cross(rows, cols)


def grid_values(grid):
    chars = [c for c in grid if c in digits or c in '0.']
    assert len(chars) == 81
    return dict(zip(squares, chars))


digits = '123456789'
grid = "4.....8.5.3..........7......2.....6.....8.4......1.......6.3.7.5..2.....1.4......"

print(grid_values(grid))
# def parse_grid(grid):
#     values = dict((s, digits) for s in squares)
#     for s,d in grid_values(grid).items():
#         if d in digits and not assign(values, s,d):
#             return False
Exemple #5
0
def bma_gru():

    # The embedding layer containing the word vectors
    # Embedding
    emb_layer = create_pretrained_embedding(config.char_embed_weights,
                                            mask_zero=True)
    emb_layer_word = create_pretrained_embedding(config.word_embed_weights,
                                                 mask_zero=True)
    # Model variables

    n_hidden = 128

    # Define the shared model
    x = Sequential()
    x.add(emb_layer)
    # # LSTM
    x.add(Bidirectional(LSTM(n_hidden, return_sequences=True)))
    x.add(Bidirectional(LSTM(n_hidden, return_sequences=True)))
    x.add(BatchNormalization())
    x.add(MyMaxPool(axis=1))
    shared_model = x

    x2 = Sequential()
    x2.add(emb_layer_word)
    # # LSTM
    x2.add(Bidirectional(LSTM(10, return_sequences=True)))
    #x2.add(Bidirectional(LSTM(n_hidden,return_sequences=True)))
    x2.add(BatchNormalization())
    x2.add(MyMaxPool(axis=1))
    shared_model2 = x2
    # The visible layer

    magic_input = Input(shape=(len(config.feats), ))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    left_input = Input(shape=(config.word_maxlen, ), dtype='int32')
    right_input = Input(shape=(config.word_maxlen, ), dtype='int32')
    w1 = Input(shape=(config.word_maxlen, ), dtype='int32')
    w2 = Input(shape=(config.word_maxlen, ), dtype='int32')

    left = shared_model(left_input)
    right = shared_model(right_input)

    left_w = shared_model2(w1)
    right_w = shared_model2(w2)

    # Pack it all up into a Manhattan Distance model
    malstm_distance = Lambda(
        lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)),
        output_shape=(1, ))([left, right])

    malstm_distance2 = Lambda(
        lambda x: K.exp(-K.sum(K.abs(x[0] - x[1]), axis=1, keepdims=True)),
        output_shape=(1, ))([left_w, right_w])

    cro = cross(left, right, n_hidden * 2)
    cro2 = cross(left_w, right_w, n_hidden * 2)

    #if config.nofeats:
    merge = concatenate([left, right, cro, malstm_distance2,
                         magic_dense])  # , magic_dense, malstm_distance])
    # else:
    #     merge = concatenate([ cro,cro2])
    # # The MLP that determines the outcome
    x = Dropout(0.2)(merge)
    x = BatchNormalization()(x)
    x = Dense(300, activation='relu')(x)

    x = Dropout(0.2)(x)
    x = BatchNormalization()(x)
    pred = Dense(1, activation='sigmoid')(x)
    model = Model(inputs=[left_input, right_input, w1, w2, magic_input],
                  outputs=pred)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam",
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    shared_model.summary()
    return model
Exemple #6
0
def BMA_GRU(pretrained_embedding=config.word_embed_weights,
            maxlen=MAX_LEN,
            lstm_dim=300,
            dense_dim=300,
            dense_dropout=0.2,
            pool="max",
            mode='char+word'):

    # Based on arXiv:1609.06038

    magic_input = Input(shape=(len(config.feats), ))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='elu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen, ))
    q2 = Input(name='q2', shape=(maxlen, ))

    q1_w = Input(name='q1_w', shape=(maxlen, ))
    q2_w = Input(name='q2_w', shape=(maxlen, ))

    # Embedding
    emb_layer = create_pretrained_embedding(config.char_embed_weights,
                                            mask_zero=False)
    emb_layer_word = create_pretrained_embedding(config.word_embed_weights,
                                                 mask_zero=False)

    # Encode
    encode = Sequential()
    encode.add(emb_layer)
    encode.add(BatchNormalization(axis=2))
    encode.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True)))

    encode2 = Sequential()
    encode2.add(emb_layer_word)
    encode2.add(BatchNormalization(axis=2))
    encode2.add(Bidirectional(CuDNNGRU(lstm_dim, return_sequences=True)))

    q1_encoded = encode(q1)
    q2_encoded = encode(q2)

    q1_w_encoded = encode2(q1_w)
    q2_w_encoded = encode2(q2_w)

    att_flag = True
    q1_compare, q2_compare = esim_blok(q1_encoded, q2_encoded, att_flag)
    q1_compare_w, q2_compare_w = esim_blok(q1_w_encoded, q2_w_encoded,
                                           att_flag)

    # q1_rep ,q2_rep = q1_encoded,q2_encoded
    # q1_w_rep , q2_w_rep = q1_w_encoded,q2_w_encoded

    # q1_rep ,q2_rep = q1_compare,q2_compare
    # q1_w_rep , q2_w_rep = q1_compare_w,q2_compare_w

    if pool == 'max':
        q1_rep = MyMaxPool(axis=1)(q1_compare)
        q2_rep = MyMaxPool(axis=1)(q2_compare)

        q1_w_rep = MyMaxPool(axis=1)(q1_compare_w)
        q2_w_rep = MyMaxPool(axis=1)(q2_compare_w)
    elif pool == 'mean':

        q1_rep = MyMeanPool(axis=1)(q1_compare)
        q2_rep = MyMeanPool(axis=1)(q2_compare)

        q1_w_rep = MyMeanPool(axis=1)(q1_compare_w)
        q2_w_rep = MyMeanPool(axis=1)(q2_compare_w)
    else:
        q1_rep = Attention(maxlen)(q1_compare)
        q2_rep = Attention(maxlen)(q2_compare)

        q1_w_rep = Attention(maxlen)(q1_compare_w)
        q2_w_rep = Attention(maxlen)(q2_compare_w)

    # # Aggregate
    # q1_rep = apply_multiple(q1_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])
    # q2_rep = apply_multiple(q2_compare, [MyMaxPool(axis=1), MyMeanPool(axis=1)])

    # Classifier
    cro = cross(q1_rep, q2_rep, lstm_dim * 2)
    dist = distence(q1_rep, q2_rep)
    dist2 = distence(q1_w_rep, q2_w_rep)
    #dense = cro

    if mode == "char":

        dense = Concatenate()([
            q1_rep,
            q2_rep,
        ])
    elif mode == "word":
        dense = Concatenate()([q1_w_rep, q2_w_rep])
    else:
        dense = Concatenate()([q1_rep, q2_rep, q1_w_rep, q2_w_rep])

    dense = Dropout(dense_dropout)(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam",
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    return model
Exemple #7
0
def esim(pretrained_embedding=config.word_embed_weights,
         maxlen=MAX_LEN,
         lstm_dim=300,
         dense_dim=300,
         dense_dropout=0.5):

    # Based on arXiv:1609.06038

    magic_input = Input(shape=(len(config.feats), ))
    magic_dense = BatchNormalization()(magic_input)
    magic_dense = Dense(64, activation='relu')(magic_dense)

    q1 = Input(name='q1', shape=(maxlen, ))
    q2 = Input(name='q2', shape=(maxlen, ))
    q1_w = Input(name='q1_w', shape=(maxlen, ))
    q2_w = Input(name='q2_w', shape=(maxlen, ))
    # Embedding
    embedding = create_pretrained_embedding(pretrained_embedding,
                                            mask_zero=False)
    bn = BatchNormalization(axis=2)
    q1_embed = bn(embedding(q1))
    q2_embed = bn(embedding(q2))

    # Encode
    encode = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True))
    q1_encoded = encode(q1_embed)
    q2_encoded = encode(q2_embed)

    # Attention
    q1_aligned, q2_aligned = soft_attention_alignment(q1_encoded, q2_encoded)

    # Compose
    q1_combined = Concatenate()(
        [q1_encoded, q2_aligned,
         submult(q1_encoded, q2_aligned)])
    q2_combined = Concatenate()(
        [q2_encoded, q1_aligned,
         submult(q2_encoded, q1_aligned)])

    compose = Bidirectional(CuDNNLSTM(lstm_dim, return_sequences=True))
    q1_compare = compose(q1_combined)
    q2_compare = compose(q2_combined)

    # Aggregate
    q1_rep = apply_multiple(q1_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])
    q2_rep = apply_multiple(q2_compare, [GlobalAvgPool1D(), GlobalMaxPool1D()])

    # Classifier
    cro = cross(q1_rep, q2_rep, lstm_dim * 2)
    dist = distence(q1_rep, q2_rep)
    #dense = cro
    dense = Concatenate()([q1_rep, q2_rep])

    dense = BatchNormalization()(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    dense = Dense(dense_dim, activation='relu')(dense)
    dense = BatchNormalization()(dense)
    dense = Dropout(dense_dropout)(dense)
    out_ = Dense(1, activation='sigmoid')(dense)

    model = Model(inputs=[q1, q2, q1_w, q2_w, magic_input], outputs=out_)
    model.compile(loss='binary_crossentropy',
                  optimizer="adam",
                  metrics=[
                      Precision,
                      Recall,
                      F1,
                  ])
    model.summary()
    return model
Exemple #8
0
def cnn_help(emb1, emb2):

    nbfilters = [256, 246, 256, 128]  #,64,32]
    # 1D convolutions that can iterate over the word vectors
    conv1 = Conv1D(filters=nbfilters[0],
                   kernel_size=1,
                   padding='same',
                   activation='relu')
    conv2 = Conv1D(filters=nbfilters[1],
                   kernel_size=2,
                   padding='same',
                   activation='relu')
    conv3 = Conv1D(filters=nbfilters[2],
                   kernel_size=3,
                   padding='same',
                   activation='relu')
    conv4 = Conv1D(filters=nbfilters[3],
                   kernel_size=4,
                   padding='same',
                   activation='relu')
    # conv5 = Conv1D(filters=nbfilters[4], kernel_size=5,
    #                padding='same', activation='relu')
    # conv6 = Conv1D(filters=nbfilters[5], kernel_size=6,
    #                padding='same', activation='relu')

    # Run through CONV + GAP layers
    conv1a = conv1(emb1)
    glob1a = GlobalAveragePooling1D()(conv1a)
    conv1b = conv1(emb2)
    glob1b = GlobalAveragePooling1D()(conv1b)

    conv2a = conv2(emb1)
    glob2a = GlobalAveragePooling1D()(conv2a)
    conv2b = conv2(emb2)
    glob2b = GlobalAveragePooling1D()(conv2b)

    conv3a = conv3(emb1)
    glob3a = GlobalAveragePooling1D()(conv3a)
    conv3b = conv3(emb2)
    glob3b = GlobalAveragePooling1D()(conv3b)

    conv4a = conv4(emb1)
    glob4a = GlobalAveragePooling1D()(conv4a)
    conv4b = conv4(emb2)
    glob4b = GlobalAveragePooling1D()(conv4b)

    # conv5a = conv5(emb1)
    # glob5a = GlobalAveragePooling1D()(conv5a)
    # conv5b = conv5(emb2)
    # glob5b = GlobalAveragePooling1D()(conv5b)

    # conv6a = conv6(emb1)
    # glob6a = GlobalAveragePooling1D()(conv6a)
    # conv6b = conv6(emb2)
    # glob6b = GlobalAveragePooling1D()(conv6b)

    mergea = concatenate([
        glob1a,
        glob2a,
        glob3a,
        glob4a,
    ])  # glob5a,glob6a])
    mergeb = concatenate([
        glob1b,
        glob2b,
        glob3b,
        glob4b,
    ])  # glob5b,glob6b])

    # We take the explicit absolute difference between the two sentences
    # Furthermore we take the multiply different entries to get a different
    # measure of equalness
    diff = Lambda(lambda x: K.abs(x[0] - x[1]),
                  output_shape=(sum(nbfilters), ))([mergea, mergeb])
    mul = Lambda(lambda x: x[0] * x[1],
                 output_shape=(sum(nbfilters), ))([mergea, mergeb])
    add = Lambda(lambda x: x[0] + x[1],
                 output_shape=(sum(nbfilters), ))([mergea, mergeb])

    # merge = concatenate([mergea,mergeb,diff, mul,add])
    cro = cross(mergea, mergeb, sum(nbfilters))
    merge = concatenate([mergea, mergeb, cro])
    return merge