Esempio n. 1
0
    def init_model(self,
                   embedding_matrix,
                   seq_len,
                   num_features,
                   num_classes,
                   is_multilabel,
                   is_balanced,
                   classes_ratio,
                   filters=100,
                   emb_size=300,
                   use_step_decay=False,
                   **kwargs):
        self._use_step_decay = use_step_decay
        self._num_classes = num_classes
        self._is_multilabel = is_multilabel
        if num_classes == 2 or is_multilabel:
            loss = 'binary_crossentropy' if is_balanced or is_multilabel else binary_focal_loss(
                gamma=2, alpha=(1 - classes_ratio[1]))
            output_activation = 'sigmoid'
            if is_multilabel:
                output_units = self._num_classes
            else:
                output_units = 1
        else:
            loss = 'sparse_categorical_crossentropy'
            output_activation = 'softmax'
            output_units = num_classes

        trainable = True
        inputs = Input(name='inputs', shape=(seq_len, ))
        if embedding_matrix is None:
            x = Embedding(input_dim=num_features,
                          output_dim=emb_size,
                          input_length=seq_len,
                          trainable=trainable)(inputs)
        else:
            x = Embedding(input_dim=num_features,
                          output_dim=emb_size,
                          input_length=seq_len,
                          trainable=trainable,
                          embeddings_initializer=keras.initializers.Constant(
                              embedding_matrix))(inputs)

        # QMC
        # x = CuDNNGRU(128, return_sequences=True)(x)
        # x = Activation('tanh')(x)
        # x = SpatialDropout1D(0.4)(x)
        # x = GlobalMaxPooling1D()(x)
        #
        # x = Dense(128, activation='softplus')(x)  #
        # x = Dropout(0.5)(x)  # 0
        # x = BatchNormalization()(x)

        # DB
        x = CuDNNGRU(128, return_sequences=True)(x)
        x = GlobalMaxPooling1D()(x)

        x = Dense(128)(x)
        x = PReLU()(x)
        x = Dropout(0.35)(x)
        x = BatchNormalization()(x)

        output = Dense(output_units, activation=output_activation)(x)
        model = keras.models.Model(inputs=inputs, outputs=output)

        optimizer = optimizers.Adam()
        model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy'])
        model.summary()
        self.is_init = True
        self._model = model
Esempio n. 2
0
    embedding_dim = 100
    # print(word_index)

    embedding_layer = word_embedding(Max_Sequence_Length, embedding_dim,
                                     word_index, embeddings_index)

    sequence_input = Input(shape=(Max_Sequence_Length, ), dtype=tf.int32)
    embeddings = embedding_layer(sequence_input)

    x = Dropout(0.2)(embeddings)
    x = Conv1D(FLAGS.filters,
               FLAGS.kernel_size,
               padding='valid',
               activation='relu',
               strides=1)(x)
    x = GlobalMaxPooling1D()(x)
    x = Dense(FLAGS.hidden_dims)(x)
    x = Dropout(0.2)(x)
    x = Activation('relu')(x)
    x = Dense(1)(x)

    preds = Activation('sigmoid')(x)
    model = Model(sequence_input, preds)

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])

    print('Train...')
    model.fit(x_train,
              y_train,
Esempio n. 3
0
File: cvcv.py Progetto: willzzp/NLP
            score = roc_auc_score(self.y_val, y_pred)
            print("\n ROC-AUC - epoch: %d - score: %.6f \n" % (epoch+1, score))

################################学习模型##########################################
def get_model():                            #定义学习模型
    inp = Input(shape=(maxlen, ))           #定义输入层,输入为maxlen长度的列向量。
    x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp)    #嵌入层,把下表转化为向量
    x = SpatialDropout1D(0.1)(x)
"""SpatialDropout1D与Dropout的作用类似(随机按比例断开输入神经元链接),但它断开的是整个1D特征图,而不是单个神经元。
如果一张特征图的相邻像素之间有很强的相关性(通常发生在低层的卷积层中),那么普通的dropout无法正则化其输出,
否则就会导致明显的学习率下降。这种情况下,SpatialDropout2D(3D)能够帮助提高特征图之间的独立性,应该用其取代普通的Dropout"""
    x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x)
	#GRU()中的128是输出维度  参考http://blog.csdn.net/jiangpeng59/article/details/77646186
	#Bidirectional是双向RNN包装器
    avg_pool = GlobalAveragePooling1D()(x)    #时域信号施加全局平均值池化
    max_pool = GlobalMaxPooling1D()(x)        #对时间信号的全局最大值池化
    conc = concatenate([avg_pool, max_pool])  #合并池化结果
    outp = Dense(6, activation="sigmoid")(conc)  #Dense是普通全连接层,输出维度6,激活函数是sigmoid.
    
    model = Model(inputs=inp, outputs=outp)    #打包模型 输入输出
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
# 编译学习过程,此处选择损失函数为对数损失函数,优化器为adam, ( Adam算法可以看做是修正后的Momentum+RMSProp算法)
    return model

model = get_model()
################################训练和预测##########################################

batch_size = 32
epochs = 5
def baseline_CNN(sequences_length_for_training, embedding_dim,
                 embedding_matrix, vocab_size):

    which_model = 2

    print 'Build MAIN model...'
    ngram_filters = [2, 3, 4, 5]
    conv_hidden_units = [200, 200, 200, 200]

    main_input = Input(shape=(embedding_dim, ),
                       dtype='float32',
                       name='main-input')

    main_input_embedder = Embedding(vocab_size + 1,
                                    GLOVE_EMBEDDING_DIM,
                                    weights=[embedding_matrix],
                                    input_length=embedding_dim,
                                    init='uniform')
    embedded_input_main = main_input_embedder(main_input)

    convsM = []
    for n_gram, hidden_units in zip(ngram_filters, conv_hidden_units):
        conv_layer = Convolution1D(
            nb_filter=hidden_units,
            filter_length=n_gram,
            border_mode='same',
            #border_mode='valid',
            activation='tanh',
            name='Convolution-' + str(n_gram) + "gram")
        mid = conv_layer(embedded_input_main)

        # Use Flatten() instead of MaxPooling()
        #flat_M = TimeDistributed(Flatten(), name='TD-flatten-mid-'+str(n_gram)+"gram")(mid)
        #convsM.append(flat_M)

        # Use GlobalMaxPooling1D() instead of Flatten()
        pool_M = GlobalMaxPooling1D()(mid)
        convsM.append(pool_M)

    convoluted_mid = Merge(mode='concat')(convsM)
    CONV_DIM = sum(conv_hidden_units)

    ####convoluted_mid, convoluted_left, convoluted_right, CONV_DIM = main_input, left_context, right_context, 300
    #flat_mid = Flatten()(convoluted_mid)
    encode_mid = Dense(300,
                       name='dense-intermediate-mid-encoder')(convoluted_mid)

    #context_encoder_intermediate1 = LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), consume_less='gpu', dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False)
    #context_encoder = LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), consume_less='gpu', dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False)
    #context_encoder_intermediate1 = Bidirectional(LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), consume_less='gpu', dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False), name='BiLSTM-context-encoder-intermediate1', merge_mode='concat')
    #context_encoder = Bidirectional(LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), consume_less='gpu', dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False), name='BiLSTM-context-encoder', merge_mode='concat')
    ####encode_left = context_encoder(context_encoder_intermediate1(convoluted_left))

    encode_mid_drop = Dropout(0.2)(encode_mid)

    decoded = Dense(300, name='decoded')(encode_mid_drop)
    decoded_drop = Dropout(0.3, name='decoded_drop')(decoded)

    output = Dense(2, activation='sigmoid')(decoded_drop)
    model = Model(input=[main_input], output=output)
    model.layers[1].trainable = TRAINABLE_EMBEDDINGS
    model.compile(loss=w_binary_crossentropy,
                  optimizer='rmsprop',
                  metrics=['accuracy', 'recall'])
    #model.compile(loss=w_binary_crossentropy, optimizer='adadelta', metrics=['accuracy', 'recall'])

    print model.summary(line_length=150, positions=[.46, .65, .77, 1.])
    return model
Esempio n. 5
0
import pandas as pd
import matplotlib.pyplot as plt

(x_train, y_train), (x_test, y_test) = mnist.load_data()

D = 28
M = 15

x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255

input_ = Input(shape=(D, D))

rnn1 = Bidirectional(CuDNNLSTM(M, return_sequences=True))
x1 = rnn1(input_)
x1 = GlobalMaxPooling1D()(x1)

rnn2 = Bidirectional(CuDNNLSTM(M, return_sequences=True))

permutor = Lambda(lambda t: K.permute_dimensions(t, pattern=(0, 2, 1)))

x2 = permutor(input_)
x2 = rnn2(x2)
x2 = GlobalMaxPooling1D()(x2)

concatenator = Concatenate(axis=1)
x = concatenator([x1, x2])

output = Dense(10, activation='softmax')(x)

model = Model(inputs=input_, outputs=output)
# model.add(Dense(n_dense, activation='relu'))
# model.add(Dropout(dropout))
# model.add(Dense(1,activation='sigmoid'))

input_layer = Input(shape=(max_review_lenth, ), dtype='int16', name="input")
emd_layer = Embedding(n_unique_words,
                      n_dim,
                      input_length=max_review_lenth,
                      name='Embedded_layer')(input_layer)
drop_emb_layer = SpatialDropout1D(drop_emd, name="dropemb")(emd_layer)

conv_layer_1 = (Conv1D(n_conv1,
                       filer_size,
                       activation='relu',
                       name="con1_Layer"))(drop_emb_layer)
maxpool_layer1 = GlobalMaxPooling1D()(conv_layer_1)

conv_layer_2 = (Conv1D(n_conv1,
                       filer_size,
                       activation='relu',
                       name="con2_Layer"))(drop_emb_layer)
maxpool_layer2 = GlobalMaxPooling1D()(conv_layer_2)

conv_layer_3 = (Conv1D(n_conv1,
                       filer_size,
                       activation='relu',
                       name="con3_Layer"))(drop_emb_layer)
maxpool_layer3 = GlobalMaxPooling1D()(conv_layer_3)

concat_layer = concatenate([maxpool_layer1, maxpool_layer2,
                            maxpool_layer3])  # ,name="concat_layer")
Esempio n. 7
0
def build_model(vocab_size, emb_size, hidden_size, emb_matrix, my_model_kind):
    use_Ng, use_AR, use_KenLM, use_CAR=use_config(my_model_kind)
    # --- 論文中のInput Layer ---
    sent_input=Input(shape=(MAX_LENGTH,))   #(b, s)
    c1=Input(shape=(C_MAXLEN,)) #(b, c)
    c2=Input(shape=(C_MAXLEN,))
    c3=Input(shape=(C_MAXLEN,))
    c4=Input(shape=(C_MAXLEN,))

    sent_E=Embedding(output_dim=emb_size, input_dim=vocab_size, input_length=MAX_LENGTH, mask_zero=True, weights=[emb_matrix], trainable=True)
    sent_emb=sent_E(sent_input)

    choices_E=Embedding(output_dim=emb_size, input_dim=vocab_size, input_length=C_MAXLEN, mask_zero=True, weights=[emb_matrix], trainable=True)
    c1_emb=choices_E(c1)    #(b, c, h)
    c2_emb=choices_E(c2)
    c3_emb=choices_E(c3)
    c4_emb=choices_E(c4)

    sent_vec=Bidirectional(GRU(hidden_size, dropout=0.5, return_sequences=True))(sent_emb) #(b, s, 2h)

    choices_BiGRU=Bidirectional(GRU(hidden_size, dropout=0.5, return_sequences=True))
    c1_gru=NonMasking()(choices_BiGRU(c1_emb))    #(b, c, 2h)
    c2_gru=NonMasking()(choices_BiGRU(c2_emb))
    c3_gru=NonMasking()(choices_BiGRU(c3_emb))
    c4_gru=NonMasking()(choices_BiGRU(c4_emb))

    c1_vec=Reshape((hidden_size*2*C_MAXLEN,))(c1_gru)    #(b, c*2h)
    c2_vec=Reshape((hidden_size*2*C_MAXLEN,))(c2_gru)
    c3_vec=Reshape((hidden_size*2*C_MAXLEN,))(c3_gru)
    c4_vec=Reshape((hidden_size*2*C_MAXLEN,))(c4_gru)

    choices_Dense=Dense(hidden_size*2)
    c1_vec=choices_Dense(c1_vec)    #(b, 2h)
    c2_vec=choices_Dense(c2_vec)
    c3_vec=choices_Dense(c3_vec)
    c4_vec=choices_Dense(c4_vec)

    # --- 論文中のMulti-Perspective Aggregation Layer ---
    bsize=K.int_shape(sent_vec)[0]

    # --- MPALayerの一部: Selective Copying ---
    cloze_input=Input(shape=(MAX_LENGTH,))   #(b, s)
    P_sc = SCLayer(hidden_size*2, bsize)([NonMasking()(sent_vec), NonMasking()(cloze_input)])

    # --- MPALayerの一部: Iterative Dilated Convolution ---
    sent_cnn = BatchNormalization(axis=2)(sent_vec)
    sent_cnn = Activation("relu")(sent_cnn)
    sent_cnn = NonMasking()(sent_cnn)
    sent_cnn = Conv1D(hidden_size*2, kernel_size=3, dilation_rate=1)(sent_cnn)
    sent_cnn = Conv1D(hidden_size*2, kernel_size=3, dilation_rate=3)(sent_cnn)
    #sent_cnn = BatchNormalization(axis=2)(sent_cnn)
    #sent_cnn = Activation("relu")(sent_cnn)
    sent_cnn = Conv1D(hidden_size*2, kernel_size=3, dilation_rate=1)(sent_cnn)

    sent_cnn = Conv1D(hidden_size*2, kernel_size=3, dilation_rate=3)(sent_cnn)
    P_idc = GlobalMaxPooling1D()(sent_cnn)

    # --- MPALayerの一部: Attentive Reader ---
    if use_AR==1:
        P1_ar, P2_ar, P3_ar, P4_ar=ARLayer(hidden_size*2, bsize)([NonMasking()(sent_vec), c1_vec, c2_vec, c3_vec, c4_vec])

    # --- MPALayerの一部: N-gram Statistics ---
    if use_Ng==1:
        Ngram_1=Input(shape=(5,))   #(b, 5)
        Ngram_2=Input(shape=(5,))
        Ngram_3=Input(shape=(5,))
        Ngram_4=Input(shape=(5,))

        P1_ng = NonMasking()(Ngram_1)
        P2_ng = NonMasking()(Ngram_2)
        P3_ng = NonMasking()(Ngram_3)
        P4_ng = NonMasking()(Ngram_4)

    # 自作拡張: 空所補充文Attentive Reader
    if use_CAR==1:
        CAR_sent1=Input(shape=(MAX_LENGTH,))
        CAR_sent2=Input(shape=(MAX_LENGTH,))
        CAR_sent3=Input(shape=(MAX_LENGTH,))
        CAR_sent4=Input(shape=(MAX_LENGTH,))

        CAR_sent1_emb=sent_E(CAR_sent1)
        CAR_sent2_emb=sent_E(CAR_sent2)
        CAR_sent3_emb=sent_E(CAR_sent3)
        CAR_sent4_emb=sent_E(CAR_sent4)

        CAR_sent_GRU=Bidirectional(GRU(hidden_size, dropout=0.5, return_sequences=True))

        CAR_sent1_vec=NonMasking()(CAR_sent_GRU(CAR_sent1_emb)) #(b, s, 2h)
        CAR_sent2_vec=NonMasking()(CAR_sent_GRU(CAR_sent2_emb))
        CAR_sent3_vec=NonMasking()(CAR_sent_GRU(CAR_sent3_emb))
        CAR_sent4_vec=NonMasking()(CAR_sent_GRU(CAR_sent4_emb))

        P1_car, P2_car, P3_car, P4_car=CARLayer(hidden_size*2, bsize)([CAR_sent1_vec, CAR_sent2_vec, CAR_sent3_vec, CAR_sent4_vec, c1_vec, c2_vec, c3_vec, c4_vec])

    # 自作拡張: KenLM Score
    if use_KenLM==1:
        KenLM_1=Input(shape=(5,))   #(b, 5)
        KenLM_2=Input(shape=(5,))
        KenLM_3=Input(shape=(5,))
        KenLM_4=Input(shape=(5,))

        P1_ks = NonMasking()(KenLM_1)
        P2_ks = NonMasking()(KenLM_2)
        P3_ks = NonMasking()(KenLM_3)
        P4_ks = NonMasking()(KenLM_4)

    # --- MPALayerの一部: 最後にマージ ---
    P =  Concatenate(axis=1)([P_sc, P_idc])     #(b, 2h+2h)

    C1_tmp=[c1_vec]
    C2_tmp=[c2_vec]
    C3_tmp=[c3_vec]
    C4_tmp=[c4_vec]

    if use_AR==1:
        C1_tmp.append(P1_ar)
        C2_tmp.append(P2_ar)
        C3_tmp.append(P3_ar)
        C4_tmp.append(P4_ar)

    if use_Ng==1:
        C1_tmp.append(P1_ng)
        C2_tmp.append(P2_ng)
        C3_tmp.append(P3_ng)
        C4_tmp.append(P4_ng)

    if use_CAR==1:
        C1_tmp.append(P1_car)
        C2_tmp.append(P2_car)
        C3_tmp.append(P3_car)
        C4_tmp.append(P4_car)

    if use_KenLM==1:
        C1_tmp.append(P1_ks)
        C2_tmp.append(P2_ks)
        C3_tmp.append(P3_ks)
        C4_tmp.append(P4_ks)

    C1 = Concatenate(axis=1)(C1_tmp)
    C2 = Concatenate(axis=1)(C2_tmp)
    C3 = Concatenate(axis=1)(C3_tmp)
    C4 = Concatenate(axis=1)(C4_tmp)

    # --- 論文中のOutput Layer (PointerNet) ---
    # 出力層一応完了
    Pdim=K.int_shape(P)[-1]
    Cdim=K.int_shape(C1)[-1]

    output=PointerNet(hidden_size*2, Pdim, Cdim, bsize)([P, C1, C2, C3, C4]) #(b, 4)
    #preds = softmax(output, axis=1)   #(b, 4)
    preds=Activation('softmax')(output)

    #--------------------------
    X=[sent_input, c1, c2, c3, c4, cloze_input]
    if use_Ng==1:
        X.extend([Ngram_1, Ngram_2, Ngram_3, Ngram_4])

    if use_CAR==1:
        X.extend([CAR_sent1, CAR_sent2, CAR_sent3, CAR_sent4])

    if use_KenLM==1:
        X.extend([KenLM_1, KenLM_2, KenLM_3, KenLM_4])

    my_model=Model(X, preds)
    opt=optimizers.Adam(lr=0.001, clipnorm=math.sqrt(5))    #デフォルト:lr=0.001
    my_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

    return my_model
Esempio n. 8
0
                      name='word_embedding', trainable=True, mask_zero=True)(txt_input)

txt_drpot = Dropout(WDROP_RATE, name='word_dropout')(txt_embed)

# character-level input with randomized initializations
cnn_input = Input(shape=(TXT_MAXLEN, CHR_MAXLEN), name='cnn_input')

cnn_embed = TimeDistributed(Embedding(CHR_VOCAB, CEMBED_SIZE, input_length=CHR_MAXLEN,
                                      weights=[char_embedding_matrix],
                                      name='cnn_embedding', trainable=True, mask_zero=False))(cnn_input)

# 1-size window CNN with batch-norm & tanh activation (Kim 2015)
cnns1 = TimeDistributed(Conv1D(filters=20, kernel_size=1, padding="same", strides=1), name='cnn1_cnn')(cnn_embed)
cnns1 = TimeDistributed(BatchNormalization(), name='cnn1_bnorm')(cnns1)
cnns1 = TimeDistributed(Activation('tanh'), name='cnn1_act')(cnns1)
cnns1 = TimeDistributed(GlobalMaxPooling1D(), name='cnn1_gmp')(cnns1)

# 2-size window CNN with batch-norm & tanh activation (Kim 2015)
cnns2 = TimeDistributed(Conv1D(filters=40, kernel_size=2, padding="same", strides=1), name='cnn2_cnn')(cnn_embed)
cnns2 = TimeDistributed(BatchNormalization(), name='cnn2_bnorm')(cnns2)
cnns2 = TimeDistributed(Activation('tanh'), name='cnn2_act')(cnns2)
cnns2 = TimeDistributed(GlobalMaxPooling1D(), name='cnn2_gmp')(cnns2)

# 3-size window CNN with batch-norm & tanh activation (Kim 2015)
cnns3 = TimeDistributed(Conv1D(filters=60, kernel_size=3, padding="same", strides=1), name='cnn3_cnn')(cnn_embed)
cnns3 = TimeDistributed(BatchNormalization(), name='cnn3_bnorm')(cnns3)
cnns3 = TimeDistributed(Activation('tanh'), name='cnn3_act')(cnns3)
cnns3 = TimeDistributed(GlobalMaxPooling1D(), name='cnn3_gmp')(cnns3)

# 4-size window CNN with batch-norm & tanh activation (Kim 2015)
cnns4 = TimeDistributed(Conv1D(filters=80, kernel_size=4, padding="same", strides=1), name='cnn4_cnn')(cnn_embed)
Esempio n. 9
0
# Question 1 - Embeddings -> Convolutional

model_q1 = Sequential()
model_q1.add(Embedding(len(word_index) + 1,
                     300,
                     weights=[embedding_matrix],
                     input_length=40,
                     trainable=False))

model_q1.add(Convolution1D(nb_filter=num_filter,
                         filter_length=filter_length,
                         border_mode='valid',
                         activation='relu',
                         subsample_length=1))

model_q1.add(GlobalMaxPooling1D())

model_q1.add(Convolution1D(nb_filter=num_filter,
                           filter_length=filter_length,
                           border_mode='valid',
                           activation='relu',
                           subsample_length=1))

model_q1.add(GlobalMaxPooling1D())

# Question 2 - Embeddings -> Convolutional

model_q2 = Sequential()

model_q2.add(Embedding(len(word_index) + 1,
                     300,
Esempio n. 10
0
input_polarity = Input(shape=(2, ))
input_hand = Input(shape=(26, ))
input_sim = Input(shape=(1, ))
input_bleu = Input(shape=(1, ))
input_rouge = Input(shape=(3, ))
input_cider = Input(shape=(1, ))

###############################

# Define the sentence encoder #

mask = Masking(mask_value=0, input_shape=(max_seq_len, ))(input_premisse)
embed = embedding_layer(mask)
l1 = lstm1(embed)
drop1 = Dropout(0.1)(l1)
maxim = GlobalMaxPooling1D()(drop1)
att = SelfAttLayer()(drop1)
out = concatenate([maxim, att])
SentenceEncoder = Model(input_premisse, maxim, name='SentenceEncoder')

##############################

# Combining the representations #

premisse_representation = SentenceEncoder(input_premisse)
hyp_representation = SentenceEncoder(input_hyp)
concat = concatenate([premisse_representation, hyp_representation])
mul = multiply([premisse_representation, hyp_representation])
dif = subtract([premisse_representation, hyp_representation])
final_merge = concatenate([
    concat, mul, dif, input_overlap, input_refuting, input_polarity,
Esempio n. 11
0
def lstm_model(sequences_length_for_training, embedding_dim, embedding_matrix,
               vocab_size):
    GLOVE_EMBEDDING_DIM = 300

    print
    'Build MAIN model...'
    ngram_filters = [2, 3, 4, 5]
    conv_hidden_units = [200, 200, 200, 200]

    left_context = Input(shape=(ONE_SIDE_CONTEXT_SIZE + 1, embedding_dim),
                         dtype='float32',
                         name='left-context')
    main_input = Input(shape=(1, embedding_dim),
                       dtype='float32',
                       name='main-input')
    right_context = Input(shape=(ONE_SIDE_CONTEXT_SIZE + 1, embedding_dim),
                          dtype='float32',
                          name='right-context')

    context_embedder = TimeDistributed(
        Embedding(vocab_size + 1,
                  GLOVE_EMBEDDING_DIM,
                  input_length=embedding_dim,
                  weights=[embedding_matrix],
                  init='uniform',
                  trainable=False))
    main_input_embedder = TimeDistributed(
        Embedding(vocab_size + 1,
                  GLOVE_EMBEDDING_DIM,
                  input_length=embedding_dim,
                  weights=[embedding_matrix],
                  init='uniform',
                  trainable=False))

    embedded_input_left, embedded_input_main, embedded_input_right = context_embedder(
        left_context), main_input_embedder(main_input), context_embedder(
            right_context)

    convsL, convsM, convsR = [], [], []
    for n_gram, hidden_units in zip(ngram_filters, conv_hidden_units):
        conv_layer = Convolution1D(
            nb_filter=hidden_units,
            filter_length=n_gram,
            border_mode='same',
            # border_mode='valid',
            activation='tanh',
            name='Convolution-' + str(n_gram) + "gram")
        lef = TimeDistributed(conv_layer,
                              name='TD-convolution-left-' + str(n_gram) +
                              "gram")(embedded_input_left)
        mid = TimeDistributed(conv_layer,
                              name='TD-convolution-mid-' + str(n_gram) +
                              "gram")(embedded_input_main)
        rig = TimeDistributed(conv_layer,
                              name='TD-convolution-right-' + str(n_gram) +
                              "gram")(embedded_input_right)

        # Use GlobalMaxPooling1D() instead of Flatten()
        pool_L = TimeDistributed(GlobalMaxPooling1D(),
                                 name='TD-GlobalMaxPooling-left-' +
                                 str(n_gram) + "gram")(lef)
        pool_M = TimeDistributed(GlobalMaxPooling1D(),
                                 name='TD-GlobalMaxPooling-mid-' +
                                 str(n_gram) + "gram")(mid)
        pool_R = TimeDistributed(GlobalMaxPooling1D(),
                                 name='TD-GlobalMaxPooling-right-' +
                                 str(n_gram) + "gram")(rig)
        convsL.append(pool_L), convsM.append(pool_M), convsR.append(pool_R)

    convoluted_left, convoluted_mid, convoluted_right = Merge(
        mode='concat')(convsL), Merge(mode='concat')(convsM), Merge(
            mode='concat')(convsR)
    CONV_DIM = sum(conv_hidden_units)

    flat_mid = Flatten()(convoluted_mid)

    encode_mid = Dense(300, name='dense-intermediate-mid-encoder')(flat_mid)

    context_encoder_intermediate1 = Bidirectional(
        LSTM(600,
             input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM),
             dropout_W=0.3,
             dropout_U=0.3,
             return_sequences=True,
             stateful=False),
        name='BiLSTM-context-encoder-intermediate1',
        merge_mode='concat')
    context_encoder = Bidirectional(LSTM(600,
                                         input_shape=(ONE_SIDE_CONTEXT_SIZE,
                                                      CONV_DIM),
                                         dropout_W=0.3,
                                         dropout_U=0.3,
                                         return_sequences=True,
                                         stateful=False),
                                    name='BiLSTM-context-encoder',
                                    merge_mode='concat')

    encode_left = AttentionWithContext()(context_encoder(
        context_encoder_intermediate1(convoluted_left)))
    encode_right = AttentionWithContext()(context_encoder(
        context_encoder_intermediate1(convoluted_right)))

    encode_left_drop, encode_mid_drop, encode_right_drop = Dropout(0.3)(
        encode_left), Dropout(0.2)(encode_mid), Dropout(0.3)(encode_right)

    encoded_info = Merge(mode='concat', name='encode_info')(
        [encode_left_drop, encode_mid_drop, encode_right_drop])

    decoded = Dense(500, name='decoded')(encoded_info)
    decoded_drop = Dropout(0.3, name='decoded_drop')(decoded)

    output = Dense(1, activation='sigmoid')(decoded_drop)
    model = Model(input=[left_context, main_input, right_context],
                  output=output)
    model.layers[1].trainable = False
    # model.compile(loss=w_binary_crossentropy, optimizer='rmsprop', metrics=['accuracy', 'recall'])
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy', 'recall'])

    print
    model.summary()
    return model
    def __init__(
        self,
        args,
        tau,
        transformer_dropout: float = 0.05,
        embedding_dropout: float = 0.05,
        l2_reg_penalty: float = 1e-4,
        use_same_embedding=True,
        use_vanilla_transformer=True,
    ):
        self.args = args
        self.tau = tau
        self.pos_number = args.positive_number
        self.neg_number = args.negative_number
        self.query_retrieval_number = args.query_retrieval_number
        self.semantic_dim = args.semantic_dim
        self.transformer_dropout = transformer_dropout
        self.embedding_dropout = embedding_dropout

        self.query_dense = Dense(self.semantic_dim,
                                 activation='tanh',
                                 name='query_sem')
        self.query_retrieval_dense = Dense(self.semantic_dim,
                                           activation='tanh',
                                           name='query_retrieval_sem')
        self.fact_dense = Dense(self.semantic_dim,
                                activation='tanh',
                                name='fact_sem')
        self.semantic_dim_dense = Dense(self.args.semantic_dim,
                                        activation='tanh',
                                        name='semantic_dim_sem')

        self.query_conv = SeparableConv1D(self.args.embedding_dim,
                                          self.args.max_pooling_filter_length,
                                          padding="same",
                                          activation="tanh")
        self.query_max = GlobalMaxPooling1D(data_format='channels_last',
                                            name='query_max_pooling')
        self.fact_conv = SeparableConv1D(self.args.embedding_dim,
                                         self.args.max_pooling_filter_length,
                                         padding="same",
                                         activation="tanh")
        self.fact_max = GlobalMaxPooling1D(data_format='channels_last',
                                           name='fact_max_pooling')

        self.cosine_merger_layer = AutoPointerMerger(name='cosine_merger',
                                                     args=self.args)

        # prepare layers
        l2_regularizer = (regularizers.l2(l2_reg_penalty)
                          if l2_reg_penalty else None)
        if use_same_embedding:
            self.query_embedding_layer = self.fact_embedding_layer = ReusableEmbedding(
                self.args.vocab_size,
                self.args.embedding_dim,
                name='embeddings',
                # Regularization is based on paper "A Comparative Study on
                # Regularization Strategies for Embedding-based Neural Networks"
                # https://arxiv.org/pdf/1508.03721.pdf
                embeddings_regularizer=l2_regularizer)
        else:
            self.query_embedding_layer = ReusableEmbedding(
                self.args.vocab_size,
                self.args.embedding_dim,
                name='query_embeddings',
                embeddings_regularizer=l2_regularizer)
            self.fact_embedding_layer = ReusableEmbedding(
                self.args.vocab_size,
                self.args.embedding_dim,
                name='fact_embeddings',
                embeddings_regularizer=l2_regularizer)

        self.query_coord_embedding_layer = TransformerCoordinateEmbedding(
            self.args.src_seq_length,
            1 if use_vanilla_transformer else self.args.transformer_depth,
            name='query_coordinate_embedding')

        self.output_softmax_layer = Softmax(name='pos_neg_predictions')

        self.query_encoder_blocks = [
            TransformerEncoderBlock(name='query_encoder%s' % i,
                                    num_heads=self.args.num_heads,
                                    residual_dropout=self.transformer_dropout,
                                    attention_dropout=self.transformer_dropout,
                                    activation='relu',
                                    vanilla_wiring=True)
            for i in range(self.args.transformer_depth)
        ]
Esempio n. 13
0
print('Length of embedding_matrix:', embedding_matrix.shape[0])
embedding_layer = Embedding(len(word_index) + 1,
                            EMBEDDING_DIM,
                            weights=[embedding_matrix],
                            mask_zero=False,
                            input_length=MAX_SEQUENCE_LENGTH,
                            trainable=False)

print('Traing and validation set number of positive and negative reviews')
print(y_train.sum(axis=0))
print(y_val.sum(axis=0))

sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32')
embedded_sequences = embedding_layer(sequence_input)
dense_1 = Dense(100, activation='tanh')(embedded_sequences)
max_pooling = GlobalMaxPooling1D()(dense_1)
dense_2 = Dense(2, activation='softmax')(max_pooling)

model = Model(sequence_input, dense_2)

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['acc'])

model.summary()
model.fit(x_train,
          y_train,
          validation_data=(x_val, y_val),
          nb_epoch=10,
          batch_size=50)
    def __init__(self,
                 title_word_length,
                 content_word_length,
                 title_char_length,
                 content_char_length,
                 fs_btm_tw_cw_length,
                 fs_btm_tc_length,
                 class_num,
                 word_embedding_matrix,
                 char_embedding_matrix,
                 optimizer_name,
                 lr,
                 metrics):
        # set attributes
        self.title_word_length = title_word_length
        self.content_word_length = content_word_length
        self.title_char_length = title_char_length
        self.content_char_length = content_char_length
        self.fs_btm_tw_cw_length = fs_btm_tw_cw_length
        self.fs_btm_tc_length = fs_btm_tc_length
        self.class_num = class_num
        self.word_embedding_matrix = word_embedding_matrix
        self.char_embedding_matrix = char_embedding_matrix
        self.optimizer_name = optimizer_name
        self.lr = lr
        self.metrics = metrics
        # Placeholder for input (title and content)
        title_word_input = Input(shape=(title_word_length,), dtype='int32', name="title_word_input")
        cont_word_input = Input(shape=(content_word_length,), dtype='int32', name="content_word_input")

        title_char_input = Input(shape=(title_char_length,), dtype='int32', name="title_char_input")
        cont_char_input = Input(shape=(content_char_length,), dtype='int32', name="content_char_input")

        # Embedding layer
        with K.tf.device("/cpu:0"):
            word_embedding_layer = Embedding(len(word_embedding_matrix),
                                             256,
                                             weights=[word_embedding_matrix],
                                             trainable=True, name='word_embedding')
            title_word_emb = word_embedding_layer(title_word_input)
            cont_word_emb = word_embedding_layer(cont_word_input)

            char_embedding_layer = Embedding(len(char_embedding_matrix),
                                             256,
                                             weights=[char_embedding_matrix],
                                             trainable=True, name='char_embedding')
            title_char_emb = char_embedding_layer(title_char_input)
            cont_char_emb = char_embedding_layer(cont_char_input)

        # Create a convolution + max pooling layer
        title_content_conv = list()
        title_content_pool = list()

        for win_size in range(1, 8):
            # batch_size x doc_len x embed_size
            title_content_conv.append(Conv1D(100, win_size, activation='relu', padding='same')(title_word_emb))
            title_content_conv.append(Conv1D(100, win_size, activation='relu', padding='same')(cont_word_emb))
            title_content_conv.append(Conv1D(100, win_size, activation='relu', padding='same')(title_char_emb))
            title_content_conv.append(Conv1D(100, win_size, activation='relu', padding='same')(cont_char_emb))

        for conv_out in title_content_conv:
            title_content_pool.append(GlobalMaxPooling1D()(conv_out))

        title_content_att = list()
        for conv_out, pool_out in zip(title_content_conv, title_content_pool):
            title_content_att.append(Attention()([ conv_out, pool_out ]))

        # add btm_tw_cw features + btm_tc features
        fs_btm_tw_cw_input = Input(shape=(fs_btm_tw_cw_length,), dtype='float32', name="fs_btm_tw_cw_input")
        fs_btm_tc_input = Input(shape=(fs_btm_tc_length,), dtype='float32', name="fs_btm_tc_input")
        fs_btm_raw_features = concatenate([fs_btm_tw_cw_input, fs_btm_tc_input])
        fs_btm_emb_features = Dense(1024, activation='relu', name='fs_btm_embedding')(fs_btm_raw_features)
        fs_btm_emb_features = Dropout(0.5, name='fs_btm_embedding_dropout')(fs_btm_emb_features)

        title_content_pool_features = concatenate(title_content_pool)
        title_content_pool_features = Dense(1600, activation='relu', name='title_content_pool_embedding')(title_content_pool_features)
        title_content_pool_features = Dropout(0.1, name='title_content_pool_dropout')(title_content_pool_features)

        title_content_att_features = concatenate(title_content_att)
        title_content_att_features = Dense(1600, activation='relu', name='title_content_att_embedding')(title_content_att_features)
        title_content_att_features = Dropout(0.1, name='title_content_att_dropout')(title_content_att_features)

        title_content_features = concatenate([title_content_pool_features, title_content_att_features, fs_btm_emb_features])

        # Full connection
        title_content_features = Dense(3600, activation='relu', name='fs_embedding')(title_content_features)
        title_content_features = Dropout(0.5, name='fs_embedding_dropout')(title_content_features)

        # Prediction
        preds = Dense(class_num, activation='sigmoid', name='prediction')(title_content_features)

        self._model = Model([title_word_input,
                             cont_word_input,
                             title_char_input,
                             cont_char_input,
                             fs_btm_tw_cw_input,
                             fs_btm_tc_input], preds)
        if 'rmsprop' == optimizer_name:
            optimizer = optimizers.RMSprop(lr=lr)
        elif 'adam' == optimizer_name:
            optimizer = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08)
        else:
            optimizer = None
        self._model.compile(loss=binary_crossentropy_sum, optimizer=optimizer, metrics=metrics)
        self._model.summary()
Esempio n. 15
0
def cnn_branch(n_filters,k_size,d_rate,my_input):
    return Dropout(d_rate)(GlobalMaxPooling1D()(Activation("relu")(Conv1D(filters=n_filters, kernel_size=k_size)(my_input))))
Esempio n. 16
0
def Model_BiLSTM_CnnDecoder(sourcevocabsize,
                            targetvocabsize,
                            source_W,
                            input_seq_lenth,
                            output_seq_lenth,
                            hidden_dim,
                            emd_dim,
                            sourcecharsize,
                            character_W,
                            input_word_length,
                            char_emd_dim,
                            sourcepossize,
                            pos_W,
                            pos_emd_dim,
                            batch_size=32,
                            loss='categorical_crossentropy',
                            optimizer='rmsprop'):

    # 0.8349149507609669--attention,lstm*2decoder

    # pos_input = Input(shape=(input_seq_lenth,), dtype='int32')
    # pos_embeding = Embedding(input_dim=sourcepossize + 1,
    #                               output_dim=pos_emd_dim,
    #                               input_length=input_seq_lenth,
    #                               mask_zero=False,
    #                               trainable=True,
    #                               weights=[pos_W])(pos_input)

    word_input = Input(shape=(input_seq_lenth, ), dtype='int32')

    char_input = Input(shape=(
        input_seq_lenth,
        input_word_length,
    ),
                       dtype='int32')

    char_embedding = Embedding(input_dim=sourcecharsize,
                               output_dim=char_emd_dim,
                               batch_input_shape=(batch_size, input_seq_lenth,
                                                  input_word_length),
                               mask_zero=False,
                               trainable=True,
                               weights=[character_W])

    char_embedding2 = TimeDistributed(char_embedding)(char_input)

    char_cnn = TimeDistributed(
        Conv1D(50, 3, activation='relu', border_mode='valid'))(char_embedding2)

    char_macpool = TimeDistributed(GlobalMaxPooling1D())(char_cnn)
    # char_macpool = Dropout(0.5)(char_macpool)

    pos_input = Input(shape=(
        input_seq_lenth,
        3,
    ), dtype='int32')
    pos_embedding = Embedding(input_dim=sourcepossize + 1,
                              output_dim=pos_emd_dim,
                              batch_input_shape=(batch_size, input_seq_lenth,
                                                 3),
                              mask_zero=False,
                              trainable=True,
                              weights=[pos_W])
    pos_embedding2 = TimeDistributed(pos_embedding)(pos_input)
    pos_cnn = TimeDistributed(
        Conv1D(20, 2, activation='relu', border_mode='valid'))(pos_embedding2)
    pos_macpool = TimeDistributed(GlobalMaxPooling1D())(pos_cnn)

    word_embedding_RNN = Embedding(input_dim=sourcevocabsize + 1,
                                   output_dim=emd_dim,
                                   input_length=input_seq_lenth,
                                   mask_zero=False,
                                   trainable=False,
                                   weights=[source_W])(word_input)
    # word_embedding_RNN = Dropout(0.5)(word_embedding_RNN)

    embedding = concatenate([word_embedding_RNN, char_macpool, pos_macpool],
                            axis=-1)
    embedding = Dropout(0.5)(embedding)

    BiLSTM = Bidirectional(LSTM(int(hidden_dim / 2), return_sequences=True),
                           merge_mode='concat')(embedding)
    BiLSTM = BatchNormalization()(BiLSTM)
    # BiLSTM = Dropout(0.3)(BiLSTM)

    # decodelayer1 = LSTM(50, return_sequences=False, go_backwards=True)(concat_LC_d)#!!!!!
    # repeat_decodelayer1 = RepeatVector(input_seq_lenth)(decodelayer1)
    # concat_decoder = concatenate([concat_LC_d, repeat_decodelayer1], axis=-1)#!!!!
    # decodelayer2 = LSTM(hidden_dim, return_sequences=True)(concat_decoder)
    # decodelayer = Dropout(0.5)(decodelayer2)

    # decoderlayer1 = LSTM(50, return_sequences=True, go_backwards=False)(BiLSTM)
    decoderlayer5 = Conv1D(50, 5, activation='relu', strides=1,
                           padding='same')(BiLSTM)
    decoderlayer2 = Conv1D(50, 2, activation='relu', strides=1,
                           padding='same')(BiLSTM)
    decoderlayer3 = Conv1D(50, 3, activation='relu', strides=1,
                           padding='same')(BiLSTM)
    decoderlayer4 = Conv1D(50, 4, activation='relu', strides=1,
                           padding='same')(BiLSTM)
    # 0.8868111121100423
    decodelayer = concatenate(
        [decoderlayer2, decoderlayer3, decoderlayer4, decoderlayer5], axis=-1)
    decodelayer = BatchNormalization()(decodelayer)
    decodelayer = Dropout(0.5)(decodelayer)

    TimeD = TimeDistributed(Dense(targetvocabsize + 1))(decodelayer)
    # TimeD = Dropout(0.5)(TimeD)
    model = Activation('softmax')(TimeD)  # 0.8769744561783556

    # crf = CRF(targetvocabsize + 1, sparse_target=False)
    # model = crf(TimeD)

    Models = Model([word_input, char_input, pos_input], model)

    # Models.compile(loss=my_cross_entropy_Weight, optimizer='adam', metrics=['acc'])
    Models.compile(loss=loss, optimizer='adam', metrics=['acc'])
    # Models.compile(loss=loss, optimizer='adam', metrics=['acc'], sample_weight_mode="temporal")
    # Models.compile(loss=loss, optimizer=optimizers.RMSprop(lr=0.01), metrics=['acc'])
    # Models.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy])
    # Models.compile(loss=crf.loss_function, optimizer=optimizers.RMSprop(lr=0.005), metrics=[crf.accuracy])

    return Models
Esempio n. 17
0
def get_test_model_full():
    """Returns a maximally complex test model,
    using all supported layer types with different parameter combination.
    """
    input_shapes = [
        (26, 28, 3),
        (4, 4, 3),
        (4, 4, 3),
        (4, ),
        (2, 3),
        (27, 29, 1),
        (17, 1),
        (17, 4),
        (2, 3),
        (2, 3, 4, 5),
        (2, 3, 4, 5, 6),
        (2, 3, 4, 5, 6),
        (7, 8, 9, 10),
        (7, 8, 9, 10),
        (11, 12, 13),
        (11, 12, 13),
        (14, 15),
        (14, 15),
        (16, ),
        (16, ),
    ]

    inputs = [Input(shape=s) for s in input_shapes]

    outputs = []

    outputs.append(Flatten()(inputs[4]))
    outputs.append(Flatten()(inputs[5]))
    outputs.append(Flatten()(inputs[9]))
    outputs.append(Flatten()(inputs[10]))

    for axis in [-5, -4, -3, -2, -1, 1, 2, 3, 4, 5]:
        outputs.append(Concatenate(axis=axis)([inputs[10], inputs[11]]))

    for axis in [-4, -3, -2, -1, 1, 2, 3, 4]:
        outputs.append(Concatenate(axis=axis)([inputs[12], inputs[13]]))

    for axis in [-3, -2, -1, 1, 2, 3]:
        outputs.append(Concatenate(axis=axis)([inputs[14], inputs[15]]))

    for axis in [-2, -1, 1, 2]:
        outputs.append(Concatenate(axis=axis)([inputs[16], inputs[17]]))

    for axis in [-1, 1]:
        outputs.append(Concatenate(axis=axis)([inputs[18], inputs[19]]))

    for inp in inputs[6:8]:
        for padding in ['valid', 'same', 'causal']:
            for s in range(1, 6):
                for out_channels in [1, 2]:
                    for d in range(1, 4):
                        outputs.append(
                            Conv1D(out_channels,
                                   s,
                                   padding=padding,
                                   dilation_rate=d)(inp))
        for padding_size in range(0, 5):
            outputs.append(ZeroPadding1D(padding_size)(inp))
        for crop_left in range(0, 2):
            for crop_right in range(0, 2):
                outputs.append(Cropping1D((crop_left, crop_right))(inp))
        for upsampling_factor in range(1, 5):
            outputs.append(UpSampling1D(upsampling_factor)(inp))
        for padding in ['valid', 'same']:
            for pool_factor in range(1, 6):
                for s in range(1, 4):
                    outputs.append(
                        MaxPooling1D(pool_factor, strides=s,
                                     padding=padding)(inp))
                    outputs.append(
                        AveragePooling1D(pool_factor,
                                         strides=s,
                                         padding=padding)(inp))
        outputs.append(GlobalMaxPooling1D()(inp))
        outputs.append(GlobalAveragePooling1D()(inp))

    for inp in [inputs[0], inputs[5]]:
        for padding in ['valid', 'same']:
            for h in range(1, 6):
                for out_channels in [1, 2]:
                    for d in range(1, 4):
                        outputs.append(
                            Conv2D(out_channels, (h, 1),
                                   padding=padding,
                                   dilation_rate=(d, 1))(inp))
                        outputs.append(
                            SeparableConv2D(out_channels, (h, 1),
                                            padding=padding,
                                            dilation_rate=(d, 1))(inp))
                    for sy in range(1, 4):
                        outputs.append(
                            Conv2D(out_channels, (h, 1),
                                   strides=(1, sy),
                                   padding=padding)(inp))
                        outputs.append(
                            SeparableConv2D(out_channels, (h, 1),
                                            strides=(sy, sy),
                                            padding=padding)(inp))
                for sy in range(1, 4):
                    outputs.append(
                        DepthwiseConv2D((h, 1),
                                        strides=(sy, sy),
                                        padding=padding)(inp))
                    outputs.append(
                        MaxPooling2D((h, 1), strides=(1, sy),
                                     padding=padding)(inp))
            for w in range(1, 6):
                for out_channels in [1, 2]:
                    for d in range(1, 4) if sy == 1 else [1]:
                        outputs.append(
                            Conv2D(out_channels, (1, w),
                                   padding=padding,
                                   dilation_rate=(1, d))(inp))
                        outputs.append(
                            SeparableConv2D(out_channels, (1, w),
                                            padding=padding,
                                            dilation_rate=(1, d))(inp))
                    for sx in range(1, 4):
                        outputs.append(
                            Conv2D(out_channels, (1, w),
                                   strides=(sx, 1),
                                   padding=padding)(inp))
                        outputs.append(
                            SeparableConv2D(out_channels, (1, w),
                                            strides=(sx, sx),
                                            padding=padding)(inp))
                for sx in range(1, 4):
                    outputs.append(
                        DepthwiseConv2D((1, w),
                                        strides=(sy, sy),
                                        padding=padding)(inp))
                    outputs.append(
                        MaxPooling2D((1, w), strides=(1, sx),
                                     padding=padding)(inp))
    outputs.append(ZeroPadding2D(2)(inputs[0]))
    outputs.append(ZeroPadding2D((2, 3))(inputs[0]))
    outputs.append(ZeroPadding2D(((1, 2), (3, 4)))(inputs[0]))
    outputs.append(Cropping2D(2)(inputs[0]))
    outputs.append(Cropping2D((2, 3))(inputs[0]))
    outputs.append(Cropping2D(((1, 2), (3, 4)))(inputs[0]))
    for y in range(1, 3):
        for x in range(1, 3):
            outputs.append(UpSampling2D(size=(y, x))(inputs[0]))
    outputs.append(GlobalAveragePooling2D()(inputs[0]))
    outputs.append(GlobalMaxPooling2D()(inputs[0]))
    outputs.append(AveragePooling2D((2, 2))(inputs[0]))
    outputs.append(MaxPooling2D((2, 2))(inputs[0]))
    outputs.append(UpSampling2D((2, 2))(inputs[0]))
    outputs.append(Dropout(0.5)(inputs[0]))

    # same as axis=-1
    outputs.append(Concatenate()([inputs[1], inputs[2]]))
    outputs.append(Concatenate(axis=3)([inputs[1], inputs[2]]))
    # axis=0 does not make sense, since dimension 0 is the batch dimension
    outputs.append(Concatenate(axis=1)([inputs[1], inputs[2]]))
    outputs.append(Concatenate(axis=2)([inputs[1], inputs[2]]))

    outputs.append(BatchNormalization()(inputs[0]))
    outputs.append(BatchNormalization(center=False)(inputs[0]))
    outputs.append(BatchNormalization(scale=False)(inputs[0]))

    outputs.append(Conv2D(2, (3, 3), use_bias=True)(inputs[0]))
    outputs.append(Conv2D(2, (3, 3), use_bias=False)(inputs[0]))
    outputs.append(SeparableConv2D(2, (3, 3), use_bias=True)(inputs[0]))
    outputs.append(SeparableConv2D(2, (3, 3), use_bias=False)(inputs[0]))
    outputs.append(DepthwiseConv2D(2, (3, 3), use_bias=True)(inputs[0]))
    outputs.append(DepthwiseConv2D(2, (3, 3), use_bias=False)(inputs[0]))

    outputs.append(Dense(2, use_bias=True)(inputs[3]))
    outputs.append(Dense(2, use_bias=False)(inputs[3]))

    shared_conv = Conv2D(1, (1, 1),
                         padding='valid',
                         name='shared_conv',
                         activation='relu')

    up_scale_2 = UpSampling2D((2, 2))
    x1 = shared_conv(up_scale_2(inputs[1]))  # (1, 8, 8)
    x2 = shared_conv(up_scale_2(inputs[2]))  # (1, 8, 8)
    x3 = Conv2D(1, (1, 1), padding='valid')(up_scale_2(inputs[2]))  # (1, 8, 8)
    x = Concatenate()([x1, x2, x3])  # (3, 8, 8)
    outputs.append(x)

    x = Conv2D(3, (1, 1), padding='same', use_bias=False)(x)  # (3, 8, 8)
    outputs.append(x)
    x = Dropout(0.5)(x)
    outputs.append(x)
    x = Concatenate()([MaxPooling2D((2, 2))(x),
                       AveragePooling2D((2, 2))(x)])  # (6, 4, 4)
    outputs.append(x)

    x = Flatten()(x)  # (1, 1, 96)
    x = Dense(4, use_bias=False)(x)
    outputs.append(x)
    x = Dense(3)(x)  # (1, 1, 3)
    outputs.append(x)

    outputs.append(keras.layers.Add()([inputs[4], inputs[8], inputs[8]]))
    outputs.append(keras.layers.Subtract()([inputs[4], inputs[8]]))
    outputs.append(keras.layers.Multiply()([inputs[4], inputs[8], inputs[8]]))
    outputs.append(keras.layers.Average()([inputs[4], inputs[8], inputs[8]]))
    outputs.append(keras.layers.Maximum()([inputs[4], inputs[8], inputs[8]]))
    outputs.append(Concatenate()([inputs[4], inputs[8], inputs[8]]))

    intermediate_input_shape = (3, )
    intermediate_in = Input(intermediate_input_shape)
    intermediate_x = intermediate_in
    intermediate_x = Dense(8)(intermediate_x)
    intermediate_x = Dense(5)(intermediate_x)
    intermediate_model = Model(inputs=[intermediate_in],
                               outputs=[intermediate_x],
                               name='intermediate_model')
    intermediate_model.compile(loss='mse', optimizer='nadam')

    x = intermediate_model(x)  # (1, 1, 5)

    intermediate_model_2 = Sequential()
    intermediate_model_2.add(Dense(7, input_shape=(5, )))
    intermediate_model_2.add(Dense(5))
    intermediate_model_2.compile(optimizer='rmsprop',
                                 loss='categorical_crossentropy')

    x = intermediate_model_2(x)  # (1, 1, 5)

    x = Dense(3)(x)  # (1, 1, 3)

    shared_activation = Activation('tanh')

    outputs = outputs + [
        Activation('tanh')(inputs[3]),
        Activation('hard_sigmoid')(inputs[3]),
        Activation('selu')(inputs[3]),
        Activation('sigmoid')(inputs[3]),
        Activation('softplus')(inputs[3]),
        Activation('softmax')(inputs[3]),
        Activation('relu')(inputs[3]),
        LeakyReLU()(inputs[3]),
        ELU()(inputs[3]),
        PReLU()(inputs[2]),
        PReLU()(inputs[3]),
        PReLU()(inputs[4]),
        shared_activation(inputs[3]),
        Activation('linear')(inputs[4]),
        Activation('linear')(inputs[1]),
        x,
        shared_activation(x),
    ]

    print('Model has {} outputs.'.format(len(outputs)))

    model = Model(inputs=inputs, outputs=outputs, name='test_model_full')
    model.compile(loss='mse', optimizer='nadam')

    # fit to dummy data
    training_data_size = 1
    batch_size = 1
    epochs = 10
    data_in = generate_input_data(training_data_size, input_shapes)
    initial_data_out = model.predict(data_in)
    data_out = generate_output_data(training_data_size, initial_data_out)
    model.fit(data_in, data_out, epochs=epochs, batch_size=batch_size)
    return model
Esempio n. 18
0
def Model_Dense_Softmax(sourcevocabsize,
                        targetvocabsize,
                        source_W,
                        input_seq_lenth,
                        output_seq_lenth,
                        hidden_dim,
                        emd_dim,
                        sourcecharsize,
                        character_W,
                        input_word_length,
                        char_emd_dim,
                        sourcepossize,
                        pos_W,
                        pos_emd_dim,
                        batch_size=32,
                        loss='categorical_crossentropy',
                        optimizer='rmsprop'):

    word_input = Input(shape=(input_seq_lenth, ), dtype='int32')

    char_input = Input(shape=(
        input_seq_lenth,
        input_word_length,
    ),
                       dtype='int32')

    char_embedding = Embedding(input_dim=sourcecharsize,
                               output_dim=char_emd_dim,
                               batch_input_shape=(batch_size, input_seq_lenth,
                                                  input_word_length),
                               mask_zero=False,
                               trainable=True,
                               weights=[character_W])

    char_embedding2 = TimeDistributed(char_embedding)(char_input)

    char_cnn = TimeDistributed(Conv1D(50, 3, activation='relu',
                                      padding='same'))(char_embedding2)

    char_macpool = TimeDistributed(GlobalMaxPooling1D())(char_cnn)
    # char_macpool = Dropout(0.5)(char_macpool)
    # !!!!!!!!!!!!!!
    char_macpool = Dropout(0.25)(char_macpool)

    word_embedding = Embedding(input_dim=sourcevocabsize + 1,
                               output_dim=emd_dim,
                               input_length=input_seq_lenth,
                               mask_zero=False,
                               trainable=True,
                               weights=[source_W])(word_input)

    word_embedding_dropout = Dropout(0.5)(word_embedding)

    embedding = concatenate([word_embedding_dropout, char_macpool], axis=-1)

    Dense1 = TimeDistributed(Dense(400, activation='tanh'))(embedding)
    Dense1 = Dropout(0.5)(Dense1)
    Dense2 = TimeDistributed(Dense(200, activation='tanh'))(Dense1)
    Dense2 = Dropout(0.3)(Dense2)
    Dense3 = TimeDistributed(Dense(100, activation='tanh'))(Dense2)
    Dense3 = Dropout(0.2)(Dense3)

    TimeD = TimeDistributed(Dense(targetvocabsize + 1))(Dense3)

    # TimeD = Dropout(0.5)(TimeD)
    # !!!!!!!!!!!!!!!delete dropout

    model = Activation('softmax')(TimeD)

    # crflayer = CRF(targetvocabsize+1, sparse_target=False)
    # model = crflayer(TimeD)

    Models = Model([word_input, char_input], [model])

    Models.compile(loss=loss,
                   optimizer=optimizers.RMSprop(lr=0.001),
                   metrics=['acc'])

    # Models.compile(loss=crflayer.loss_function, optimizer=optimizers.RMSprop(lr=0.001), metrics=[crflayer.accuracy])

    return Models
Esempio n. 19
0
def get_resNet_model(input_shape, output_shape):
    def resnet_v1(input_shape,
                  depth,
                  num_classes=10,
                  input_tensor=None,
                  local_conv=False):
        if (depth - 2) % 6 != 0:
            raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
        # Start model definition.
        num_filters = 16
        num_res_blocks = int((depth - 2) / 6)

        if (input_tensor == None):
            inputs = Input(shape=input_shape)
        else:
            inputs = input_tensor

        x = resnet_layer_naive(inputs=inputs)
        # Instantiate the stack of residual units
        for stack in range(3):
            for res_block in range(num_res_blocks):
                strides = 1
                # if stack > 0 and res_block == 0:  # first layer but not first stack
                #     strides = 2  # downsample
                y = resnet_layer_local(inputs=x,
                                       kernel_size=8,
                                       num_filters=num_filters,
                                       strides=strides)
                y = resnet_layer_local(inputs=y,
                                       kernel_size=16,
                                       num_filters=num_filters,
                                       activation=None)
                if stack > 0 and res_block == 0:  # first layer but not first stack
                    # linear projection residual shortcut connection to match
                    # changed dims
                    x = resnet_layer_naive(inputs=x,
                                           num_filters=num_filters,
                                           kernel_size=16,
                                           strides=strides,
                                           activation=None,
                                           batch_normalization=True)
                x = keras.layers.add([x, y])
                x = Activation(default_activation)(x)
            num_filters *= 2
        return x

    inputs = Input(shape=input_shape)
    xxx = inputs
    xxx = Conv1D(filters=xl_filter_num,
                 kernel_size=m_filter_num,
                 padding='same',
                 activation=None,
                 strides=1)(xxx)
    xxx = BatchNormalization()(xxx)
    xxx = Activation('relu')(xxx)
    xxx = MaxPooling1D(pool_size=2, padding='same', strides=2)(xxx)

    xxx = resnet_v1(input_shape,
                    num_classes=output_shape,
                    depth=3 * 6 + 2,
                    input_tensor=xxx,
                    local_conv=False)

    xxx = LocallyConnected1D(filters=l_filter_num,
                             kernel_size=m_filter_num,
                             padding='valid',
                             activation=default_activation,
                             strides=1)(xxx)
    xxx = BatchNormalization()(xxx)

    xxx = GlobalMaxPooling1D()(xxx)
    xxx = Dense(output_shape,
                activation='softmax',
                kernel_initializer='he_normal')(xxx)
    model = Model(inputs=inputs, outputs=xxx)
    return model
Esempio n. 20
0
def Model_BiLSTM_X2_CRF(sourcevocabsize,
                        targetvocabsize,
                        source_W,
                        input_seq_lenth,
                        output_seq_lenth,
                        hidden_dim,
                        emd_dim,
                        sourcecharsize,
                        character_W,
                        input_word_length,
                        char_emd_dim,
                        batch_size=32,
                        loss='categorical_crossentropy',
                        optimizer='rmsprop'):

    word_input = Input(shape=(input_seq_lenth, ), dtype='int32')

    char_input = Input(shape=(
        input_seq_lenth,
        input_word_length,
    ),
                       dtype='int32')

    char_embedding = Embedding(input_dim=sourcecharsize,
                               output_dim=char_emd_dim,
                               batch_input_shape=(batch_size, input_seq_lenth,
                                                  input_word_length),
                               mask_zero=False,
                               trainable=True,
                               weights=[character_W])

    char_embedding2 = TimeDistributed(char_embedding)(char_input)

    char_cnn = TimeDistributed(Conv1D(50, 3, activation='relu',
                                      padding='same'))(char_embedding2)

    char_macpool = TimeDistributed(GlobalMaxPooling1D())(char_cnn)
    # char_macpool = Dropout(0.5)(char_macpool)
    # !!!!!!!!!!!!!!
    char_macpool = Dropout(0.25)(char_macpool)

    word_embedding = Embedding(input_dim=sourcevocabsize + 1,
                               output_dim=emd_dim,
                               input_length=input_seq_lenth,
                               mask_zero=True,
                               trainable=True,
                               weights=[source_W])(word_input)

    word_embedding_dropout = Dropout(0.5)(word_embedding)

    embedding = concatenate([word_embedding_dropout, char_macpool], axis=-1)

    BiLSTM = Bidirectional(LSTM(hidden_dim, return_sequences=True),
                           merge_mode='concat')(embedding)
    BiLSTM = BatchNormalization(axis=1)(BiLSTM)
    BiLSTM_dropout = Dropout(0.5)(BiLSTM)

    BiLSTM2 = Bidirectional(LSTM(hidden_dim // 2, return_sequences=True),
                            merge_mode='concat')(BiLSTM_dropout)
    BiLSTM_dropout2 = Dropout(0.5)(BiLSTM2)

    TimeD = TimeDistributed(Dense(targetvocabsize + 1))(BiLSTM_dropout2)
    # TimeD = TimeDistributed(Dense(int(hidden_dim / 2)))(BiLSTM_dropout)
    # TimeD = Dropout(0.5)(TimeD)
    # !!!!!!!!!!!!!!!delete dropout

    # model = Activation('softmax')(TimeD)

    crflayer = CRF(targetvocabsize + 1, sparse_target=False)
    model = crflayer(TimeD)  #0.8746633147782367
    # # model = crf(BiLSTM_dropout)#0.870420501714492

    Models = Model([word_input, char_input], [model])

    # Models.compile(loss=loss, optimizer='adam', metrics=['acc'])
    # Models.compile(loss=crflayer.loss_function, optimizer='adam', metrics=[crflayer.accuracy])
    Models.compile(loss=crflayer.loss_function,
                   optimizer=optimizers.RMSprop(lr=0.001),
                   metrics=[crflayer.accuracy])

    return Models
Esempio n. 21
0
def get_model(embedding_layer,RNN,embed_size,Feature_dic,Para_dic):

    MAX_SEQUENCE_LENGTH=Para_dic['MAX_SEQUENCE_LENGTH']
    comment_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')
    embedded_sequences_raw= embedding_layer(comment_input)
    embedded_sequences = SpatialDropout1D(Para_dic['spatial_dropout'])(embedded_sequences_raw)
    
    ### RNN
    if RNN=='LSTM':
        RNN_x = Bidirectional(CuDNNLSTM(Para_dic['num_lstm'],return_sequences=True))(embedded_sequences)
    elif RNN=='GRU':
        RNN_x = Bidirectional(CuDNNGRU(Para_dic['num_lstm'],return_sequences=True))(embedded_sequences)

    Feature=[]

    ######## RNN Features
    ##### Attention
    if Feature_dic['Attention']==1:
        Feature.append(Attention(MAX_SEQUENCE_LENGTH)(RNN_x))

    if Feature_dic['RNN_maxpool']==1:
        Feature.append(GlobalMaxPooling1D()(RNN_x))

    ##### Capsule
    if Feature_dic['Capsule']==1:

        capsule = Capsule(share_weights=True)(RNN_x)
        capsule = Flatten()(capsule)
        Feature.append(capsule)

    ##### RNN_CNNN1d

    if Feature_dic['RNN_CNN_conv1d']==1:
        
        Cx = Conv1D(64, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(RNN_x)
        avg_pool = GlobalAveragePooling1D()(Cx)
        max_pool = GlobalMaxPooling1D()(Cx)
        Feature.append(avg_pool)
        Feature.append(max_pool)

    ######## CNN Features
    ### CNN2d
    if Feature_dic['CNN2d']==1:
        CNN2d=get_CNN2d(embedded_sequences,embed_size,MAX_SEQUENCE_LENGTH,Para_dic)
        Feature.append(CNN2d)


    ### DPCNN
    if Feature_dic['DPCNN']==1:
        DPCNN=get_DPCNN(embedded_sequences,Para_dic)
        Feature.append(DPCNN)


    ### Concatnation
    merged = Concatenate()(Feature)

    ### dense, add L1 reg to enable sparsity
    merged = Dense(Para_dic['dense_num'], \
                   activation=Para_dic['dense_act'],\
                   kernel_regularizer=regularizers.l1(Para_dic['L1_reg']))(merged)

    merged = Dropout(Para_dic['dense_dropout'])(merged)
    preds = Dense(6, activation='sigmoid')(merged)

    model = Model(inputs=[comment_input],         outputs=preds)
    model.compile(loss='binary_crossentropy',
            optimizer=RMSprop(),
            metrics=['accuracy'])
    print(model.summary())
    return model
Esempio n. 22
0
# Input shape
inp = Input(shape=(maxlen, ))

# Embedding and GRU
x = Embedding(max_features, 150)(inp)
x = SpatialDropout1D(0.25)(x)
x = Bidirectional(
    LSTM(64, return_sequences=True, dropout=0.15, recurrent_dropout=0.15))(x)
x = Conv1D(32,
           kernel_size=3,
           padding='valid',
           kernel_initializer='glorot_uniform')(x)

# Pooling
avg_pool = GlobalAveragePooling1D()(x)
max_pool = GlobalMaxPooling1D()(x)
conc = concatenate([avg_pool, max_pool])

# Output layer
output = Dense(1, activation='sigmoid')(conc)

model = Model(inputs=inp, outputs=output)
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# model.load_weights('Weights/gru5_3.h5')
model.fit(X_train, Y_train, epochs=3, batch_size=32, verbose=1)

results = model.predict(X_test, batch_size=1, verbose=1)
run_test(results, Y_test)
Esempio n. 23
0
def cnn_rnn(embedding_matrix, config):
    if config['rnn'] == 'gru' and config['gpu']:
        encode = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'], return_sequences=True))
    else:
        encode = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'], return_sequences=True))
        encode2 = Bidirectional(
            CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True))
        encode3 = Bidirectional(
            CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True))
    q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input')
    q2 = Input((config['max_length'], ), dtype='int32', name='q2_input')
    embedding_layer = Embedding(embedding_matrix.shape[0],
                                embedding_matrix.shape[1],
                                trainable=config['embed_trainable'],
                                weights=[embedding_matrix]
                                # mask_zero=True
                                )

    q1_embed = embedding_layer(q1)
    q2_embed = embedding_layer(q2)  # bsz, 1, emb_dims
    q1_embed = BatchNormalization(axis=2)(q1_embed)
    q2_embed = BatchNormalization(axis=2)(q2_embed)
    q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed)
    q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed)
    q1_encoded = encode(q1_embed)
    q2_encoded = encode(q2_embed)
    q1_encoded = Dropout(0.2)(q1_encoded)
    q2_encoded = Dropout(0.2)(q2_encoded)
    # 双向
    #     q1_encoded = encode2(q1_encoded)
    #     q2_encoded = encode2(q2_encoded)
    # resnet
    rnn_layer2_input1 = concatenate([q1_embed, q1_encoded])
    rnn_layer2_input2 = concatenate([q2_embed, q2_encoded])
    q1_encoded2 = encode2(rnn_layer2_input1)
    q2_encoded2 = encode2(rnn_layer2_input2)

    # add res shortcut
    res_block1 = add([q1_encoded, q1_encoded2])
    res_block2 = add([q2_encoded, q2_encoded2])
    rnn_layer3_input1 = concatenate([q1_embed, res_block1])
    rnn_layer3_input2 = concatenate([q2_embed, res_block2])
    #     rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2])
    #     rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2])
    q1_encoded3 = encode3(rnn_layer3_input1)
    q2_encoded3 = encode3(rnn_layer3_input2)
    convs1, convs2 = [], []
    for ksz in config['kernel_sizes']:
        pooling1, pooling2 = block(q1_embed, q2_embed, ksz, config['filters'])
        convs1.append(pooling1)
        convs2.append(pooling2)
    rnn_rep1 = GlobalMaxPooling1D()(q1_encoded3)
    rnn_rep2 = GlobalMaxPooling1D()(q2_encoded3)
    convs1.append(rnn_rep1)
    convs2.append(rnn_rep2)
    merged1 = concatenate(convs1, axis=-1)
    merged2 = concatenate(convs2, axis=-1)
    sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2])
    mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2])
    # merged = Concatenate()([mul_rep, sub_rep])
    merged = Concatenate()([merged1, merged2, mul_rep, sub_rep])
    dense = Dropout(config['dense_dropout'])(merged)
    dense = BatchNormalization()(dense)
    dense = Dense(config['dense_dim'], activation='relu')(dense)
    dense = Dropout(config['dense_dropout'])(dense)
    dense = BatchNormalization()(dense)
    predictions = Dense(1, activation='sigmoid')(dense)
    model = Model(inputs=[q1, q2], outputs=predictions)
    opt = optimizers.get(config['optimizer'])
    K.set_value(opt.lr, config['learning_rate'])
    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1])
    return model
Esempio n. 24
0
def get_model_rnn_cnn(embedding_matrix,
                      cell_size=80,
                      cell_type_GRU=True,
                      maxlen=180,
                      max_features=100000,
                      embed_size=300,
                      prob_dropout=0.2,
                      emb_train=False,
                      filter_size=128,
                      kernel_size=2,
                      stride=1):
    inp_pre = Input(shape=(maxlen, ), name='input_pre')
    inp_post = Input(shape=(maxlen, ), name='input_post')

    ##pre
    x1 = Embedding(max_features,
                   embed_size,
                   weights=[embedding_matrix],
                   trainable=emb_train)(inp_pre)
    x1 = SpatialDropout1D(prob_dropout)(x1)

    if cell_type_GRU:
        x1 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1)
    else:
        x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1)

    x1 = Conv1D(filter_size,
                kernel_size=kernel_size,
                strides=stride,
                padding="valid",
                kernel_initializer="he_uniform")(x1)
    avg_pool1 = GlobalAveragePooling1D()(x1)
    max_pool1 = GlobalMaxPooling1D()(x1)

    ##post
    x2 = Embedding(max_features,
                   embed_size,
                   weights=[embedding_matrix],
                   trainable=emb_train)(inp_post)
    x2 = SpatialDropout1D(prob_dropout)(x2)

    if cell_type_GRU:
        x2 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2)
    else:
        x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2)

    x2 = Conv1D(filter_size,
                kernel_size=kernel_size,
                strides=stride,
                padding="valid",
                kernel_initializer="he_uniform")(x2)
    avg_pool2 = GlobalAveragePooling1D()(x2)
    max_pool2 = GlobalMaxPooling1D()(x2)

    ##merge
    conc = concatenate([avg_pool1, max_pool1, avg_pool2, max_pool2])
    outp = Dense(6, activation="sigmoid")(conc)

    model = Model(inputs=[inp_pre, inp_post], outputs=outp)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['binary_crossentropy', 'accuracy'])

    return model
Esempio n. 25
0
def train():
    print('Generating training data')

    #pretrain_data = generate_data(None, '/home/hanmoe/CAFA3/ngrams/4kai/assocI-min_len5-min_freq3-top_fun5k/ngram-id2seq.tsv.gz', '/home/hanmoe/CAFA3/ngrams/4kai/assocI-min_len5-min_freq3-top_fun5k/ann-train-data.tsv.gz', ann_ids, 256)
    #pretrain_size = _data_size('/home/hanmoe/CAFA3/ngrams/4kai/assocI-min_len5-min_freq3-top_fun5k/ngram-id2seq.tsv.gz')/2
    train_path = './data/train.txt.gz'
    train_data = generate_data(train_path, SEQUENCE_PATH, ann_path, ann_ids,
                               batch_size)
    train_size = _data_size(train_path)
    train_ids = read_split_ids(train_path, unique=False)
    # import pdb; pdb.set_trace()
    devel_path = './data/devel.txt.gz'
    devel_data = generate_data(devel_path, SEQUENCE_PATH, ann_path, ann_ids,
                               batch_size // 10)
    devel_size = _data_size(devel_path)
    devel_ids = read_split_ids(devel_path, unique=False)

    test_path = './data/test.txt.gz'
    test_data = generate_data(test_path, SEQUENCE_PATH, ann_path, ann_ids,
                              batch_size // 10)
    test_size = _data_size(test_path)
    test_ids = read_split_ids(test_path, unique=False)

    #print "Making baseline predictions"
    #import baseline
    #devel_baseline = baseline.predict(devel_data['prot_ids'], blast_dict, ann_path)
    #devel_baseline_ids = go_to_ids([b[1] for b in devel_baseline], ann_ids)
    #from sklearn import metrics
    #baseline_score = metrics.precision_recall_fscore_support(devel_data['labels'], devel_baseline_ids, average='micro')
    #print 'Baseline score: ', baseline_score

    #import pdb; pdb.set_trace()
    #for ii in [3, 6, 9, 15, 27, 50]:
    #    print '### Testing window size %s' % ii
    print('Building model')
    inputs = Input(shape=(timesteps, ), name='sequence')
    input_list = [inputs]
    embedding = Embedding(vocab_size, latent_dim, mask_zero=False)(inputs)
    embedding = Dropout(0.5)(embedding)
    #embedding = Embedding(aa_embedding.shape[0], aa_embedding.shape[1], mask_zero=False, weights=[aa_embedding], trainable=True)(inputs)
    #mask = Masking()(embedding)

    convs = []

    # Stacked CNN experiments
    #encoded = Convolution1D(50, 3, border_mode='valid', activation='linear')(embedding)
    ##maxed = GlobalMaxPooling1D()(encoded)
    ##convs.append(maxed)
    #encoded = Convolution1D(50, 3, border_mode='valid', activation='linear')(encoded)
    ##maxed = GlobalMaxPooling1D()(encoded)
    ##convs.append(maxed)
    #encoded = Convolution1D(50, 3, border_mode='valid', activation='linear')(encoded)
    #encoded = GlobalMaxPooling1D()(encoded)
    #convs.append(maxed)

    for i in [3, 9, 27]:
        encoded = Convolution1D(400, i, padding='valid',
                                activation='relu')(embedding)
        encoded = GlobalMaxPooling1D()(encoded)
        convs.append(encoded)

        ## LSTM attention
        #lstm = LSTM(50)(mask)
        ##convs.append(lstm)
        #
        #from attention import Attention
        #att = Attention()([encoded, lstm])
        #convs.append(att)

    if use_features:
        #feature_input = Input(shape=(len(blast_hit_ids), ), name='features')
        feature_input = Input(shape=(json_feature_matrix.shape[1], ),
                              name='features')  # For Jari's feature vectors
        dropout = Dropout(0.5)(feature_input)
        feature_encoding = Dense(300, activation='tanh')(
            dropout)  # Squeeze the feature vectors to a tiny encoding
        convs.append(feature_encoding)
        input_list.append(feature_input)
    #
    #encoded = feature_encoding
    encoded = concatenate(convs)

    predictions = Dense(len(ann_ids), activation='sigmoid',
                        name='labels')(encoded)

    model = Model(input_list, predictions)
    model.compile(optimizer=Adam(lr=0.0005),
                  loss=weighted_binary_crossentropy,
                  metrics=['accuracy'])
    print(model.summary())

    print('Training model')
    pickle.dump(ann_ids, open(os.path.join(model_dir, 'ann_ids.pkl'), 'wb'))
    pickle.dump(reverse_ann_ids,
                open(os.path.join(model_dir, 'reverse_ann_ids.pkl'), 'wb'))

    if use_features:
        # For Jari's features
        pickle.dump(json_id_map,
                    open(os.path.join(model_dir, 'json_id_map.pkl'), 'wb'))
        pickle.dump(json_vectorizer,
                    open(os.path.join(model_dir, 'json_vectorizer.pkl'), 'wb'))
        pickle.dump(
            feature_selector,
            open(os.path.join(model_dir, 'feature_selector.pkl'), 'wb'))

    es_cb = EarlyStopping(monitor='val_acc',
                          patience=10,
                          verbose=0,
                          mode='max')
    cp_cb = ModelCheckpoint(filepath=os.path.join(model_dir, 'model.hdf5'),
                            monitor='val_acc',
                            mode='max',
                            save_best_only=True,
                            verbose=0)
    ev_cb = Evaluate(devel_path, 500, reverse_ann_ids)
    # next(devel_data)
    # import pdb; pdb.set_trace()
    model.fit_generator(train_data,
                        steps_per_epoch=batch_size,
                        nb_epoch=60,
                        validation_data=devel_data,
                        validation_steps=batch_size,
                        callbacks=[ev_cb])

    # If using our own blast features
    #pickle.dump(blast_hit_ids, open(os.path.join(model_dir, 'blast_hit_ids.pkl') ,'wb'))

    #import pdb; pdb.set_trace()

    #     print "Making predictions"
    #     from keras.models import load_model
    #     model = load_model(filepath=os.path.join(model_dir, 'model.h5'), custom_objects={"weighted_binary_crossentropy":weighted_binary_crossentropy})
    #
    #     # # Reinstantiate the data generators, otherwise they are not correctly aligned anymore
    #     # devel_data = generate_data(devel_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size)
    #     # test_data = generate_data(test_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size)
    #     #
    #     # devel_score = model.evaluate_generator(devel_data, devel_size)
    #     # test_score = model.evaluate_generator(test_data, test_size)
    #     # print 'Devel l/a/p/r/f: ', devel_score
    #     # print 'Test l/a/p/r/f: ', test_score
    #
    #
    #     # Reinstantiate the data generators, otherwise they are not correctly aligned anymore
    #     devel_data = generate_data(devel_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size)
    #     test_data = generate_data(test_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size)
    #
    #     devel_pred = model.predict_generator(devel_data, steps=batch_size)
    #     test_pred = model.predict_generator(test_data, steps=batch_size)
    #
    #     save_predictions(os.path.join(model_dir, 'devel_pred.tsv.gz'), devel_ids, devel_pred, reverse_ann_ids)
    #     save_predictions(os.path.join(model_dir, 'test_pred.tsv.gz'), test_ids, test_pred, reverse_ann_ids)
    #
    #     print 'Making CAFA target predictions'
    #
    #     cafa_id_path = '/home/sukaew/CAFA_PI/targetFiles/sequences/target.all.ids.gz'
    #     cafa_seq_path = '/home/sukaew/CAFA_PI/targetFiles/sequences/target.all.fasta.gz'
    #     cafa_data = generate_data(None, cafa_seq_path, ann_path, ann_ids, batch_size, cafa_targets=True, verbose=False)
    #     cafa_size = _data_size(cafa_id_path)
    #     cafa_ids = _get_ids(generate_data(None, cafa_seq_path, ann_path, ann_ids, batch_size, cafa_targets=True, verbose=False, endless=False))
    # #    cafa_ids = read_split_ids(cafa_id_path, unique=False)
    #     #import pdb; pdb.set_trace()
    #     cafa_pred = model.predict_generator(cafa_data, batch_size)
    #
    #     save_predictions(os.path.join(model_dir, 'cafa_targets.tsv.gz'), cafa_ids, cafa_pred, reverse_ann_ids, cafa_targets=True)
    # #import pdb; pdb.set_trace()

    print('All done.')
Esempio n. 26
0
def get_model_2rnn_cnn(embedding_matrix,
                       cell_size=80,
                       cell_type_GRU=True,
                       maxlen=180,
                       max_features=100000,
                       embed_size=300,
                       prob_dropout=0.2,
                       emb_train=False,
                       filter_size=128,
                       kernel_size=2,
                       stride=1):

    inp_pre = Input(shape=(maxlen, ), name='input_pre')
    inp_post = Input(shape=(maxlen, ), name='input_post')

    ##pre
    x1 = Embedding(max_features,
                   embed_size,
                   weights=[embedding_matrix],
                   trainable=emb_train)(inp_pre)
    x1 = SpatialDropout1D(prob_dropout)(x1)

    if cell_type_GRU:
        x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1)
        x1 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1)
    else:
        x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1)
        x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1)

    x1 = Conv1D(filter_size,
                kernel_size=kernel_size,
                strides=stride,
                padding="valid",
                kernel_initializer="he_uniform")(x1)
    avg_pool1 = GlobalAveragePooling1D()(x1)
    max_pool1 = GlobalMaxPooling1D()(x1)

    ##post
    x2 = Embedding(max_features,
                   embed_size,
                   weights=[embedding_matrix],
                   trainable=emb_train)(inp_post)
    x2 = SpatialDropout1D(prob_dropout)(x2)

    if cell_type_GRU:
        x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2)
        x2 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2)
    else:
        x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2)
        x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2)

    x2 = Conv1D(filter_size,
                kernel_size=kernel_size,
                strides=stride,
                padding="valid",
                kernel_initializer="he_uniform")(x2)
    avg_pool2 = GlobalAveragePooling1D()(x2)
    max_pool2 = GlobalMaxPooling1D()(x2)

    ##merge
    conc = concatenate([avg_pool1, max_pool1, avg_pool2, max_pool2])
    outp = Dense(6, activation="sigmoid")(conc)

    model = Model(inputs=[inp_pre, inp_post], outputs=outp)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['binary_crossentropy', 'accuracy'])

    return model


# def get_model_2rnn_cnn_sp(
#                           embedding_matrix, cell_size = 80, cell_type_GRU = True,
#                           maxlen = 180, max_features = 100000, embed_size = 300,
#                           prob_dropout = 0.2, emb_train = False,
#                           filter_size=128, kernel_size = 2, stride = 1
#                          ):

#     inp_pre = Input(shape=(maxlen, ), name='input_pre')
#     inp_post = Input(shape=(maxlen, ), name='input_post')

#     ##pre
#     x1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_pre)
#     x1 = SpatialDropout1D(prob_dropout)(x1)

#     if cell_type_GRU:
#         x1_ = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1)
#         x1 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1_)
#     else :
#         x1_ = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1)
#         x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1_)

#     x1_ = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_)
#     avg_pool1_ = GlobalAveragePooling1D()(x1_)
#     max_pool1_ = GlobalMaxPooling1D()(x1_)

#     x1 = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1)
#     avg_pool1 = GlobalAveragePooling1D()(x1)
#     max_pool1 = GlobalMaxPooling1D()(x1)

#     ##post
#     x2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_post)
#     x2 = SpatialDropout1D(prob_dropout)(x2)

#     if cell_type_GRU:
#         x2_ = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2)
#         x2 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2_)
#     else :
#         x2_ = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2)
#         x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2_)

#     x2_ = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_)
#     avg_pool2_ = GlobalAveragePooling1D()(x2_)
#     max_pool2_ = GlobalMaxPooling1D()(x2_)

#     x2 = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2)
#     avg_pool2 = GlobalAveragePooling1D()(x2)
#     max_pool2 = GlobalMaxPooling1D()(x2)

#     ##merge
#     conc = concatenate([avg_pool1, max_pool1, avg_pool2, max_pool2, avg_pool1_, max_pool1_, avg_pool2_, max_pool2_])
#     outp = Dense(6, activation="sigmoid")(conc)

#     model = Model(inputs=[inp_pre, inp_post], outputs=outp)
#     model.compile(loss='binary_crossentropy',
#                   optimizer='adam',
#                   metrics=['binary_crossentropy', 'accuracy'])

#     return model

# def get_model_dual_2rnn_cnn_sp(
#                                embedding_matrix, cell_size = 80, cell_type_GRU = True,
#                                maxlen = 180, max_features = 100000, embed_size = 300,
#                                prob_dropout = 0.2, emb_train = False,
#                                filter_size=128, kernel_size = 2, stride = 1
#                               ):

#     inp_pre = Input(shape=(maxlen, ), name='input_pre')
#     inp_post = Input(shape=(maxlen, ), name='input_post')

#     ##pre
#     x1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_pre)
#     x1g = SpatialDropout1D(prob_dropout)(x1)
#     x1l = SpatialDropout1D(prob_dropout)(x1)

#     x1_g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1g)
#     x1g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1_g)
#     x1_l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1l)
#     x1l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1_l)

#     x1_g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_g)
#     x1_l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_l)
#     avg_pool1_g = GlobalAveragePooling1D()(x1_g)
#     max_pool1_g = GlobalMaxPooling1D()(x1_g)
#     avg_pool1_l = GlobalAveragePooling1D()(x1_l)
#     max_pool1_l = GlobalMaxPooling1D()(x1_l)

#     x1g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1g)
#     x1l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1l)
#     avg_pool1g = GlobalAveragePooling1D()(x1g)
#     max_pool1g = GlobalMaxPooling1D()(x1g)
#     avg_pool1l = GlobalAveragePooling1D()(x1l)
#     max_pool1l = GlobalMaxPooling1D()(x1l)

#     ##post
#     x2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_post)
#     x2g = SpatialDropout1D(prob_dropout)(x2)
#     x2l = SpatialDropout1D(prob_dropout)(x2)

#     x2_g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2g)
#     x2g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2_g)
#     x2_l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2l)
#     x2l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2_l)

#     x2_g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_g)
#     x2_l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_l)
#     avg_pool2_g = GlobalAveragePooling1D()(x2_g)
#     max_pool2_g = GlobalMaxPooling1D()(x2_g)
#     avg_pool2_l = GlobalAveragePooling1D()(x2_l)
#     max_pool2_l = GlobalMaxPooling1D()(x2_l)

#     x2g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2g)
#     x2l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2l)
#     avg_pool2g = GlobalAveragePooling1D()(x2g)
#     max_pool2g = GlobalMaxPooling1D()(x2g)
#     avg_pool2l = GlobalAveragePooling1D()(x2l)
#     max_pool2l = GlobalMaxPooling1D()(x2l)

#     ##merge
#     conc = concatenate([avg_pool1g, max_pool1g, avg_pool1l, max_pool1l, avg_pool1_g, max_pool1_g, avg_pool1_l, max_pool1_l,
#                         avg_pool2g, max_pool2g, avg_pool2l, max_pool2l, avg_pool2_g, max_pool2_g, avg_pool2_l, max_pool2_l])
#     outp = Dense(6, activation="sigmoid")(conc)

#     model = Model(inputs=[inp_pre, inp_post], outputs=outp)
#     model.compile(loss='binary_crossentropy',
#                   optimizer='adam',
#                   metrics=['binary_crossentropy', 'accuracy'])

#     return model

# def get_model_dual_2rnn_cnn_sp_drop(
#                                embedding_matrix, cell_size = 80, cell_type_GRU = True,
#                                maxlen = 180, max_features = 100000, embed_size = 300,
#                                prob_dropout = 0.2, emb_train = False,
#                                filter_size=128, kernel_size = 2, stride = 1
#                               ):

#     inp_pre = Input(shape=(maxlen, ), name='input_pre')
#     inp_post = Input(shape=(maxlen, ), name='input_post')

#     ##pre
#     x1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_pre)
#     x1g = SpatialDropout1D(prob_dropout)(x1)
#     x1l = SpatialDropout1D(prob_dropout)(x1)

#     x1_g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1g)
#     x1g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1_g)
#     x1_l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1l)
#     x1l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1_l)

#     x1_g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_g)
#     x1_l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_l)
#     avg_pool1_g = GlobalAveragePooling1D()(x1_g)
#     max_pool1_g = GlobalMaxPooling1D()(x1_g)
#     avg_pool1_l = GlobalAveragePooling1D()(x1_l)
#     max_pool1_l = GlobalMaxPooling1D()(x1_l)

#     x1g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1g)
#     x1l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1l)
#     avg_pool1g = GlobalAveragePooling1D()(x1g)
#     max_pool1g = GlobalMaxPooling1D()(x1g)
#     avg_pool1l = GlobalAveragePooling1D()(x1l)
#     max_pool1l = GlobalMaxPooling1D()(x1l)

#     ##post
#     x2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_post)
#     x2g = SpatialDropout1D(prob_dropout)(x2)
#     x2l = SpatialDropout1D(prob_dropout)(x2)

#     x2_g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2g)
#     x2g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2_g)
#     x2_l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2l)
#     x2l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2_l)

#     x2_g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_g)
#     x2_l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_l)
#     avg_pool2_g = GlobalAveragePooling1D()(x2_g)
#     max_pool2_g = GlobalMaxPooling1D()(x2_g)
#     avg_pool2_l = GlobalAveragePooling1D()(x2_l)
#     max_pool2_l = GlobalMaxPooling1D()(x2_l)

#     x2g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2g)
#     x2l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2l)
#     avg_pool2g = GlobalAveragePooling1D()(x2g)
#     max_pool2g = GlobalMaxPooling1D()(x2g)
#     avg_pool2l = GlobalAveragePooling1D()(x2l)
#     max_pool2l = GlobalMaxPooling1D()(x2l)

#     ##merge
#     conc = concatenate([avg_pool1g, max_pool1g, avg_pool1l, max_pool1l, avg_pool1_g, max_pool1_g, avg_pool1_l, max_pool1_l,
#                         avg_pool2g, max_pool2g, avg_pool2l, max_pool2l, avg_pool2_g, max_pool2_g, avg_pool2_l, max_pool2_l])
#     conc = SpatialDropout1D(prob_dropout)(conc)
#     outp = Dense(6, activation="sigmoid")(conc)

#     model = Model(inputs=[inp_pre, inp_post], outputs=outp)
#     model.compile(loss='binary_crossentropy',
#                   optimizer='adam',
#                   metrics=['binary_crossentropy', 'accuracy'])

#     return model

# def get_model_dpcnn(
#                     embedding_matrix, cell_size = 80, cell_type_GRU = True,
#                     maxlen = 180, max_features = 100000, embed_size = 300,
#                     prob_dropout = 0.2, emb_train = False,
#                     filter_nr=128, filter_size = 2, stride = 1,
#                     max_pool_size = 3, max_pool_strides = 2, dense_nr = 256,
#                     spatial_dropout = 0.2, dense_dropout = 0.5,
#                     conv_kern_reg = regularizers.l2(0.00001), conv_bias_reg = regularizers.l2(0.00001)
#                     ):

#     comment = Input(shape=(maxlen,))
#     emb_comment = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=emb_train)(comment)
#     emb_comment = SpatialDropout1D(spatial_dropout)(emb_comment)

#     block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(emb_comment)
#     block1 = BatchNormalization()(block1)
#     block1 = PReLU()(block1)
#     block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block1)
#     block1 = BatchNormalization()(block1)
#     block1 = PReLU()(block1)

#     #we pass embedded comment through conv1d with filter size 1 because it needs to have the same shape as block output
#     #if you choose filter_nr = embed_size (300 in this case) you don't have to do this part and can add emb_comment directly to block1_output
#     resize_emb = Conv1D(filter_nr, kernel_size=1, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(emb_comment)
#     resize_emb = PReLU()(resize_emb)

#     block1_output = add([block1, resize_emb])
#     block1_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block1_output)

#     block2 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block1_output)
#     block2 = BatchNormalization()(block2)
#     block2 = PReLU()(block2)
#     block2 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block2)
#     block2 = BatchNormalization()(block2)
#     block2 = PReLU()(block2)

#     block2_output = add([block2, block1_output])
#     block2_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block2_output)

#     block3 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block2_output)
#     block3 = BatchNormalization()(block3)
#     block3 = PReLU()(block3)
#     block3 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block3)
#     block3 = BatchNormalization()(block3)
#     block3 = PReLU()(block3)

#     block3_output = add([block3, block2_output])
#     block3_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block3_output)

#     block4 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block3_output)
#     block4 = BatchNormalization()(block4)
#     block4 = PReLU()(block4)
#     block4 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block4)
#     block4 = BatchNormalization()(block4)
#     block4 = PReLU()(block4)

#     block4_output = add([block4, block3_output])
#     block4_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block4_output)

#     block5 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block4_output)
#     block5 = BatchNormalization()(block5)
#     block5 = PReLU()(block5)
#     block5 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block5)
#     block5 = BatchNormalization()(block5)
#     block5 = PReLU()(block5)

#     block5_output = add([block5, block4_output])
#     block5_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block5_output)

#     block6 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block5_output)
#     block6 = BatchNormalization()(block6)
#     block6 = PReLU()(block6)
#     block6 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block6)
#     block6 = BatchNormalization()(block6)
#     block6 = PReLU()(block6)

#     block6_output = add([block6, block5_output])
#     block6_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block6_output)

#     block7 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block6_output)
#     block7 = BatchNormalization()(block7)
#     block7 = PReLU()(block7)
#     block7 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear',
#                 kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block7)
#     block7 = BatchNormalization()(block7)
#     block7 = PReLU()(block7)

#     block7_output = add([block7, block6_output])
#     output = GlobalMaxPooling1D()(block7_output)

#     output = Dense(dense_nr, activation='linear')(output)
#     output = BatchNormalization()(output)
#     output = PReLU()(output)
#     output = Dropout(dense_dropout)(output)
#     output = Dense(6, activation='sigmoid')(output)

#     model = Model(comment, output)

#     model.compile(loss='binary_crossentropy',
#                 optimizer='adam',
#                 metrics=['accuracy'])

#     return model
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
print('----- x_train shape:', x_train.shape)
print('----- x_test shape:', x_test.shape)

# 搭建神经网络模型
print('========== 3.Build model...')
model = Sequential()

# input_dim=max_features单词表大小,output_dim=embedding_dims=50为词向量维度,input_length=maxlen每条样本数据长度
model.add(Embedding(max_features, embedding_dims, input_length=maxlen))     # 输出(*,400,50)
model.add(Dropout(0.2))

# 1维卷积层,卷积输出维度为filters,卷积步长为strides
model.add(Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1))  # 输出(*,398,250)
# 对于时间信号的全局最大池化
model.add(GlobalMaxPooling1D())     # 输出(*,250)

model.add(Dense(hidden_dims))       # 输出(*,250)
model.add(Dropout(0.2))
model.add(Activation('relu'))

model.add(Dense(1))
model.add(Activation('sigmoid'))

# 神经网络编译/训练/测试集测试性能
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test))
Esempio n. 28
0
def construct_gumbel_selector(X_ph, num_words, embedding_dims, maxlen, y=None):
    """
    Build the MEED model for selecting words. 

    """
    emb_layer = Embedding(num_words,
                          embedding_dims,
                          input_length=maxlen,
                          name='emb_gumbel')
    emb = emb_layer(X_ph)
    net = Dropout(0.2, name='dropout_gumbel')(emb)
    net = emb
    first_layer = Conv1D(100,
                         kernel_size,
                         padding='same',
                         activation='relu',
                         strides=1,
                         name='conv1_gumbel')(net)

    # global info
    net_new = GlobalMaxPooling1D(
        name='new_global_max_pooling1d_1')(first_layer)
    global_info = Dense(100, name='new_dense_1', activation='relu')(net_new)

    if y is not None:
        hy = Dense(100)(y)
        hy = Dense(100, activation='relu')(hy)
        hy = Dense(100, activation='relu')(hy)

    # local info
    net = Conv1D(100,
                 3,
                 padding='same',
                 activation='relu',
                 strides=1,
                 name='conv2_gumbel')(first_layer)
    local_info = Conv1D(100,
                        3,
                        padding='same',
                        activation='relu',
                        strides=1,
                        name='conv3_gumbel')(net)
    if y is not None:
        global_info = concatenate([global_info, hy])
        combined = Concatenate()([global_info, local_info])
    else:
        combined = Concatenate()([global_info, local_info])
    net = Dropout(0.2, name='new_dropout_2')(combined)
    net = Conv1D(100,
                 1,
                 padding='same',
                 activation='relu',
                 strides=1,
                 name='conv_last_gumbel')(net)

    logits_T = Conv1D(1,
                      1,
                      padding='same',
                      activation=None,
                      strides=1,
                      name='conv4_gumbel')(net)

    return logits_T
Esempio n. 29
0
# x4 = Conv1D(activation="relu", filters=100, kernel_size=5, padding="same")(x)
# x = concatenate([x1, x2, x3, x4])
# x = GlobalMaxPooling1D()(x)
# x = Dense(100, activation='relu')(x)
# output = Dense(1, activation='sigmoid')(x)
# model = Model(inputs=words_input, outputs=output)

np.random.seed(42)

model = Sequential()
model.add(Embedding(vocab_size + 1, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH, trainable=True))
model.add(Conv1D(activation="relu", filters=100, kernel_size=5, padding="valid"))
model.add(SpatialDropout1D(0.1))
model.add(BatchNormalization())
model.add(Conv1D(activation="relu", filters=100, kernel_size=5, padding="valid"))
model.add(GlobalMaxPooling1D())
model.add(Dense(100, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


# callbacks initialization
# automatic generation of learning curves
callback_1 = TensorBoard(log_dir='../logs/logs_{}'.format(NAME), histogram_freq=0,
                         write_graph=False, write_images=False)
# stop training model if accuracy does not increase more than five epochs
callback_2 = EarlyStopping(monitor='val_f1', min_delta=0, patience=5, verbose=0, mode='max')
# best model saving
callback_3 = ModelCheckpoint("models/model_{}.hdf5".format(NAME), monitor='val_f1',
                             save_best_only=True, verbose=0, mode='max')
model.compile(loss='binary_crossentropy',
              optimizer='adam',
Esempio n. 30
0
def trian_cnn():
    # set parameters:
    max_features = 20000
    maxlen = 8
    batch_size = 32
    embedding_dims = 50
    nb_filter = 250
    filter_length = 3
    hidden_dims = 250
    nb_epoch = 2
    print('Loading data...')
    X_train, y_train, X_test, y_test = load_data()
    print(len(X_train), 'train sequences')
    print(len(X_test), 'test sequences')

    print('Pad sequences (samples x time)')
    X_train = sequence.pad_sequences(X_train, maxlen=maxlen, value=3)
    X_test = sequence.pad_sequences(X_test, maxlen=maxlen, value=3)
    print('X_train shape:', X_train.shape)
    print('X_test shape:', X_test.shape)

    y_train = np.array(y_train)
    print('y_train shape', y_train.shape)
    y_test = np.array(y_test)
    print('y_train shape', y_test.shape)

    print('Build model...')
    model = Sequential()

    model.add(
        Embedding(max_features,
                  embedding_dims,
                  input_length=maxlen,
                  dropout=0.2))

    # we add a Convolution1D, which will learn nb_filter
    # word group filters of size filter_length:
    model.add(
        Convolution1D(nb_filter=nb_filter,
                      filter_length=filter_length,
                      border_mode='valid',
                      activation='relu',
                      subsample_length=1))
    # we use max pooling:
    model.add(GlobalMaxPooling1D())

    # We add a vanilla hidden layer:
    model.add(Dense(hidden_dims))
    model.add(Dropout(0.2))
    model.add(Activation('relu'))

    # We project onto a single unit output layer, and squash it with a sigmoid:
    model.add(Dense(1))
    model.add(Activation('sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])
    model.fit(X_train,
              y_train,
              batch_size=batch_size,
              nb_epoch=nb_epoch,
              validation_data=(X_test, y_test))

    score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)

    #save the model &serialize model to JSON
    model_json = model.to_json()
    with open("./dict/model_cnn.json", "w") as json_file:
        json_file.write(model_json)
    # serialize weights to HDF5
    model.save_weights("./dict/model_cnn_weights.h5")
    print("Saved model to disk")

    del model

    print('Test score:', score)
    print('Test accuracy:', acc)