def init_model(self, embedding_matrix, seq_len, num_features, num_classes, is_multilabel, is_balanced, classes_ratio, filters=100, emb_size=300, use_step_decay=False, **kwargs): self._use_step_decay = use_step_decay self._num_classes = num_classes self._is_multilabel = is_multilabel if num_classes == 2 or is_multilabel: loss = 'binary_crossentropy' if is_balanced or is_multilabel else binary_focal_loss( gamma=2, alpha=(1 - classes_ratio[1])) output_activation = 'sigmoid' if is_multilabel: output_units = self._num_classes else: output_units = 1 else: loss = 'sparse_categorical_crossentropy' output_activation = 'softmax' output_units = num_classes trainable = True inputs = Input(name='inputs', shape=(seq_len, )) if embedding_matrix is None: x = Embedding(input_dim=num_features, output_dim=emb_size, input_length=seq_len, trainable=trainable)(inputs) else: x = Embedding(input_dim=num_features, output_dim=emb_size, input_length=seq_len, trainable=trainable, embeddings_initializer=keras.initializers.Constant( embedding_matrix))(inputs) # QMC # x = CuDNNGRU(128, return_sequences=True)(x) # x = Activation('tanh')(x) # x = SpatialDropout1D(0.4)(x) # x = GlobalMaxPooling1D()(x) # # x = Dense(128, activation='softplus')(x) # # x = Dropout(0.5)(x) # 0 # x = BatchNormalization()(x) # DB x = CuDNNGRU(128, return_sequences=True)(x) x = GlobalMaxPooling1D()(x) x = Dense(128)(x) x = PReLU()(x) x = Dropout(0.35)(x) x = BatchNormalization()(x) output = Dense(output_units, activation=output_activation)(x) model = keras.models.Model(inputs=inputs, outputs=output) optimizer = optimizers.Adam() model.compile(loss=loss, optimizer=optimizer, metrics=['accuracy']) model.summary() self.is_init = True self._model = model
embedding_dim = 100 # print(word_index) embedding_layer = word_embedding(Max_Sequence_Length, embedding_dim, word_index, embeddings_index) sequence_input = Input(shape=(Max_Sequence_Length, ), dtype=tf.int32) embeddings = embedding_layer(sequence_input) x = Dropout(0.2)(embeddings) x = Conv1D(FLAGS.filters, FLAGS.kernel_size, padding='valid', activation='relu', strides=1)(x) x = GlobalMaxPooling1D()(x) x = Dense(FLAGS.hidden_dims)(x) x = Dropout(0.2)(x) x = Activation('relu')(x) x = Dense(1)(x) preds = Activation('sigmoid')(x) model = Model(sequence_input, preds) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) print('Train...') model.fit(x_train, y_train,
score = roc_auc_score(self.y_val, y_pred) print("\n ROC-AUC - epoch: %d - score: %.6f \n" % (epoch+1, score)) ################################学习模型########################################## def get_model(): #定义学习模型 inp = Input(shape=(maxlen, )) #定义输入层,输入为maxlen长度的列向量。 x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp) #嵌入层,把下表转化为向量 x = SpatialDropout1D(0.1)(x) """SpatialDropout1D与Dropout的作用类似(随机按比例断开输入神经元链接),但它断开的是整个1D特征图,而不是单个神经元。 如果一张特征图的相邻像素之间有很强的相关性(通常发生在低层的卷积层中),那么普通的dropout无法正则化其输出, 否则就会导致明显的学习率下降。这种情况下,SpatialDropout2D(3D)能够帮助提高特征图之间的独立性,应该用其取代普通的Dropout""" x = Bidirectional(CuDNNGRU(128, return_sequences=True))(x) #GRU()中的128是输出维度 参考http://blog.csdn.net/jiangpeng59/article/details/77646186 #Bidirectional是双向RNN包装器 avg_pool = GlobalAveragePooling1D()(x) #时域信号施加全局平均值池化 max_pool = GlobalMaxPooling1D()(x) #对时间信号的全局最大值池化 conc = concatenate([avg_pool, max_pool]) #合并池化结果 outp = Dense(6, activation="sigmoid")(conc) #Dense是普通全连接层,输出维度6,激活函数是sigmoid. model = Model(inputs=inp, outputs=outp) #打包模型 输入输出 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # 编译学习过程,此处选择损失函数为对数损失函数,优化器为adam, ( Adam算法可以看做是修正后的Momentum+RMSProp算法) return model model = get_model() ################################训练和预测########################################## batch_size = 32 epochs = 5
def baseline_CNN(sequences_length_for_training, embedding_dim, embedding_matrix, vocab_size): which_model = 2 print 'Build MAIN model...' ngram_filters = [2, 3, 4, 5] conv_hidden_units = [200, 200, 200, 200] main_input = Input(shape=(embedding_dim, ), dtype='float32', name='main-input') main_input_embedder = Embedding(vocab_size + 1, GLOVE_EMBEDDING_DIM, weights=[embedding_matrix], input_length=embedding_dim, init='uniform') embedded_input_main = main_input_embedder(main_input) convsM = [] for n_gram, hidden_units in zip(ngram_filters, conv_hidden_units): conv_layer = Convolution1D( nb_filter=hidden_units, filter_length=n_gram, border_mode='same', #border_mode='valid', activation='tanh', name='Convolution-' + str(n_gram) + "gram") mid = conv_layer(embedded_input_main) # Use Flatten() instead of MaxPooling() #flat_M = TimeDistributed(Flatten(), name='TD-flatten-mid-'+str(n_gram)+"gram")(mid) #convsM.append(flat_M) # Use GlobalMaxPooling1D() instead of Flatten() pool_M = GlobalMaxPooling1D()(mid) convsM.append(pool_M) convoluted_mid = Merge(mode='concat')(convsM) CONV_DIM = sum(conv_hidden_units) ####convoluted_mid, convoluted_left, convoluted_right, CONV_DIM = main_input, left_context, right_context, 300 #flat_mid = Flatten()(convoluted_mid) encode_mid = Dense(300, name='dense-intermediate-mid-encoder')(convoluted_mid) #context_encoder_intermediate1 = LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), consume_less='gpu', dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False) #context_encoder = LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), consume_less='gpu', dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False) #context_encoder_intermediate1 = Bidirectional(LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), consume_less='gpu', dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False), name='BiLSTM-context-encoder-intermediate1', merge_mode='concat') #context_encoder = Bidirectional(LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), consume_less='gpu', dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False), name='BiLSTM-context-encoder', merge_mode='concat') ####encode_left = context_encoder(context_encoder_intermediate1(convoluted_left)) encode_mid_drop = Dropout(0.2)(encode_mid) decoded = Dense(300, name='decoded')(encode_mid_drop) decoded_drop = Dropout(0.3, name='decoded_drop')(decoded) output = Dense(2, activation='sigmoid')(decoded_drop) model = Model(input=[main_input], output=output) model.layers[1].trainable = TRAINABLE_EMBEDDINGS model.compile(loss=w_binary_crossentropy, optimizer='rmsprop', metrics=['accuracy', 'recall']) #model.compile(loss=w_binary_crossentropy, optimizer='adadelta', metrics=['accuracy', 'recall']) print model.summary(line_length=150, positions=[.46, .65, .77, 1.]) return model
import pandas as pd import matplotlib.pyplot as plt (x_train, y_train), (x_test, y_test) = mnist.load_data() D = 28 M = 15 x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 input_ = Input(shape=(D, D)) rnn1 = Bidirectional(CuDNNLSTM(M, return_sequences=True)) x1 = rnn1(input_) x1 = GlobalMaxPooling1D()(x1) rnn2 = Bidirectional(CuDNNLSTM(M, return_sequences=True)) permutor = Lambda(lambda t: K.permute_dimensions(t, pattern=(0, 2, 1))) x2 = permutor(input_) x2 = rnn2(x2) x2 = GlobalMaxPooling1D()(x2) concatenator = Concatenate(axis=1) x = concatenator([x1, x2]) output = Dense(10, activation='softmax')(x) model = Model(inputs=input_, outputs=output)
# model.add(Dense(n_dense, activation='relu')) # model.add(Dropout(dropout)) # model.add(Dense(1,activation='sigmoid')) input_layer = Input(shape=(max_review_lenth, ), dtype='int16', name="input") emd_layer = Embedding(n_unique_words, n_dim, input_length=max_review_lenth, name='Embedded_layer')(input_layer) drop_emb_layer = SpatialDropout1D(drop_emd, name="dropemb")(emd_layer) conv_layer_1 = (Conv1D(n_conv1, filer_size, activation='relu', name="con1_Layer"))(drop_emb_layer) maxpool_layer1 = GlobalMaxPooling1D()(conv_layer_1) conv_layer_2 = (Conv1D(n_conv1, filer_size, activation='relu', name="con2_Layer"))(drop_emb_layer) maxpool_layer2 = GlobalMaxPooling1D()(conv_layer_2) conv_layer_3 = (Conv1D(n_conv1, filer_size, activation='relu', name="con3_Layer"))(drop_emb_layer) maxpool_layer3 = GlobalMaxPooling1D()(conv_layer_3) concat_layer = concatenate([maxpool_layer1, maxpool_layer2, maxpool_layer3]) # ,name="concat_layer")
def build_model(vocab_size, emb_size, hidden_size, emb_matrix, my_model_kind): use_Ng, use_AR, use_KenLM, use_CAR=use_config(my_model_kind) # --- 論文中のInput Layer --- sent_input=Input(shape=(MAX_LENGTH,)) #(b, s) c1=Input(shape=(C_MAXLEN,)) #(b, c) c2=Input(shape=(C_MAXLEN,)) c3=Input(shape=(C_MAXLEN,)) c4=Input(shape=(C_MAXLEN,)) sent_E=Embedding(output_dim=emb_size, input_dim=vocab_size, input_length=MAX_LENGTH, mask_zero=True, weights=[emb_matrix], trainable=True) sent_emb=sent_E(sent_input) choices_E=Embedding(output_dim=emb_size, input_dim=vocab_size, input_length=C_MAXLEN, mask_zero=True, weights=[emb_matrix], trainable=True) c1_emb=choices_E(c1) #(b, c, h) c2_emb=choices_E(c2) c3_emb=choices_E(c3) c4_emb=choices_E(c4) sent_vec=Bidirectional(GRU(hidden_size, dropout=0.5, return_sequences=True))(sent_emb) #(b, s, 2h) choices_BiGRU=Bidirectional(GRU(hidden_size, dropout=0.5, return_sequences=True)) c1_gru=NonMasking()(choices_BiGRU(c1_emb)) #(b, c, 2h) c2_gru=NonMasking()(choices_BiGRU(c2_emb)) c3_gru=NonMasking()(choices_BiGRU(c3_emb)) c4_gru=NonMasking()(choices_BiGRU(c4_emb)) c1_vec=Reshape((hidden_size*2*C_MAXLEN,))(c1_gru) #(b, c*2h) c2_vec=Reshape((hidden_size*2*C_MAXLEN,))(c2_gru) c3_vec=Reshape((hidden_size*2*C_MAXLEN,))(c3_gru) c4_vec=Reshape((hidden_size*2*C_MAXLEN,))(c4_gru) choices_Dense=Dense(hidden_size*2) c1_vec=choices_Dense(c1_vec) #(b, 2h) c2_vec=choices_Dense(c2_vec) c3_vec=choices_Dense(c3_vec) c4_vec=choices_Dense(c4_vec) # --- 論文中のMulti-Perspective Aggregation Layer --- bsize=K.int_shape(sent_vec)[0] # --- MPALayerの一部: Selective Copying --- cloze_input=Input(shape=(MAX_LENGTH,)) #(b, s) P_sc = SCLayer(hidden_size*2, bsize)([NonMasking()(sent_vec), NonMasking()(cloze_input)]) # --- MPALayerの一部: Iterative Dilated Convolution --- sent_cnn = BatchNormalization(axis=2)(sent_vec) sent_cnn = Activation("relu")(sent_cnn) sent_cnn = NonMasking()(sent_cnn) sent_cnn = Conv1D(hidden_size*2, kernel_size=3, dilation_rate=1)(sent_cnn) sent_cnn = Conv1D(hidden_size*2, kernel_size=3, dilation_rate=3)(sent_cnn) #sent_cnn = BatchNormalization(axis=2)(sent_cnn) #sent_cnn = Activation("relu")(sent_cnn) sent_cnn = Conv1D(hidden_size*2, kernel_size=3, dilation_rate=1)(sent_cnn) sent_cnn = Conv1D(hidden_size*2, kernel_size=3, dilation_rate=3)(sent_cnn) P_idc = GlobalMaxPooling1D()(sent_cnn) # --- MPALayerの一部: Attentive Reader --- if use_AR==1: P1_ar, P2_ar, P3_ar, P4_ar=ARLayer(hidden_size*2, bsize)([NonMasking()(sent_vec), c1_vec, c2_vec, c3_vec, c4_vec]) # --- MPALayerの一部: N-gram Statistics --- if use_Ng==1: Ngram_1=Input(shape=(5,)) #(b, 5) Ngram_2=Input(shape=(5,)) Ngram_3=Input(shape=(5,)) Ngram_4=Input(shape=(5,)) P1_ng = NonMasking()(Ngram_1) P2_ng = NonMasking()(Ngram_2) P3_ng = NonMasking()(Ngram_3) P4_ng = NonMasking()(Ngram_4) # 自作拡張: 空所補充文Attentive Reader if use_CAR==1: CAR_sent1=Input(shape=(MAX_LENGTH,)) CAR_sent2=Input(shape=(MAX_LENGTH,)) CAR_sent3=Input(shape=(MAX_LENGTH,)) CAR_sent4=Input(shape=(MAX_LENGTH,)) CAR_sent1_emb=sent_E(CAR_sent1) CAR_sent2_emb=sent_E(CAR_sent2) CAR_sent3_emb=sent_E(CAR_sent3) CAR_sent4_emb=sent_E(CAR_sent4) CAR_sent_GRU=Bidirectional(GRU(hidden_size, dropout=0.5, return_sequences=True)) CAR_sent1_vec=NonMasking()(CAR_sent_GRU(CAR_sent1_emb)) #(b, s, 2h) CAR_sent2_vec=NonMasking()(CAR_sent_GRU(CAR_sent2_emb)) CAR_sent3_vec=NonMasking()(CAR_sent_GRU(CAR_sent3_emb)) CAR_sent4_vec=NonMasking()(CAR_sent_GRU(CAR_sent4_emb)) P1_car, P2_car, P3_car, P4_car=CARLayer(hidden_size*2, bsize)([CAR_sent1_vec, CAR_sent2_vec, CAR_sent3_vec, CAR_sent4_vec, c1_vec, c2_vec, c3_vec, c4_vec]) # 自作拡張: KenLM Score if use_KenLM==1: KenLM_1=Input(shape=(5,)) #(b, 5) KenLM_2=Input(shape=(5,)) KenLM_3=Input(shape=(5,)) KenLM_4=Input(shape=(5,)) P1_ks = NonMasking()(KenLM_1) P2_ks = NonMasking()(KenLM_2) P3_ks = NonMasking()(KenLM_3) P4_ks = NonMasking()(KenLM_4) # --- MPALayerの一部: 最後にマージ --- P = Concatenate(axis=1)([P_sc, P_idc]) #(b, 2h+2h) C1_tmp=[c1_vec] C2_tmp=[c2_vec] C3_tmp=[c3_vec] C4_tmp=[c4_vec] if use_AR==1: C1_tmp.append(P1_ar) C2_tmp.append(P2_ar) C3_tmp.append(P3_ar) C4_tmp.append(P4_ar) if use_Ng==1: C1_tmp.append(P1_ng) C2_tmp.append(P2_ng) C3_tmp.append(P3_ng) C4_tmp.append(P4_ng) if use_CAR==1: C1_tmp.append(P1_car) C2_tmp.append(P2_car) C3_tmp.append(P3_car) C4_tmp.append(P4_car) if use_KenLM==1: C1_tmp.append(P1_ks) C2_tmp.append(P2_ks) C3_tmp.append(P3_ks) C4_tmp.append(P4_ks) C1 = Concatenate(axis=1)(C1_tmp) C2 = Concatenate(axis=1)(C2_tmp) C3 = Concatenate(axis=1)(C3_tmp) C4 = Concatenate(axis=1)(C4_tmp) # --- 論文中のOutput Layer (PointerNet) --- # 出力層一応完了 Pdim=K.int_shape(P)[-1] Cdim=K.int_shape(C1)[-1] output=PointerNet(hidden_size*2, Pdim, Cdim, bsize)([P, C1, C2, C3, C4]) #(b, 4) #preds = softmax(output, axis=1) #(b, 4) preds=Activation('softmax')(output) #-------------------------- X=[sent_input, c1, c2, c3, c4, cloze_input] if use_Ng==1: X.extend([Ngram_1, Ngram_2, Ngram_3, Ngram_4]) if use_CAR==1: X.extend([CAR_sent1, CAR_sent2, CAR_sent3, CAR_sent4]) if use_KenLM==1: X.extend([KenLM_1, KenLM_2, KenLM_3, KenLM_4]) my_model=Model(X, preds) opt=optimizers.Adam(lr=0.001, clipnorm=math.sqrt(5)) #デフォルト:lr=0.001 my_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) return my_model
name='word_embedding', trainable=True, mask_zero=True)(txt_input) txt_drpot = Dropout(WDROP_RATE, name='word_dropout')(txt_embed) # character-level input with randomized initializations cnn_input = Input(shape=(TXT_MAXLEN, CHR_MAXLEN), name='cnn_input') cnn_embed = TimeDistributed(Embedding(CHR_VOCAB, CEMBED_SIZE, input_length=CHR_MAXLEN, weights=[char_embedding_matrix], name='cnn_embedding', trainable=True, mask_zero=False))(cnn_input) # 1-size window CNN with batch-norm & tanh activation (Kim 2015) cnns1 = TimeDistributed(Conv1D(filters=20, kernel_size=1, padding="same", strides=1), name='cnn1_cnn')(cnn_embed) cnns1 = TimeDistributed(BatchNormalization(), name='cnn1_bnorm')(cnns1) cnns1 = TimeDistributed(Activation('tanh'), name='cnn1_act')(cnns1) cnns1 = TimeDistributed(GlobalMaxPooling1D(), name='cnn1_gmp')(cnns1) # 2-size window CNN with batch-norm & tanh activation (Kim 2015) cnns2 = TimeDistributed(Conv1D(filters=40, kernel_size=2, padding="same", strides=1), name='cnn2_cnn')(cnn_embed) cnns2 = TimeDistributed(BatchNormalization(), name='cnn2_bnorm')(cnns2) cnns2 = TimeDistributed(Activation('tanh'), name='cnn2_act')(cnns2) cnns2 = TimeDistributed(GlobalMaxPooling1D(), name='cnn2_gmp')(cnns2) # 3-size window CNN with batch-norm & tanh activation (Kim 2015) cnns3 = TimeDistributed(Conv1D(filters=60, kernel_size=3, padding="same", strides=1), name='cnn3_cnn')(cnn_embed) cnns3 = TimeDistributed(BatchNormalization(), name='cnn3_bnorm')(cnns3) cnns3 = TimeDistributed(Activation('tanh'), name='cnn3_act')(cnns3) cnns3 = TimeDistributed(GlobalMaxPooling1D(), name='cnn3_gmp')(cnns3) # 4-size window CNN with batch-norm & tanh activation (Kim 2015) cnns4 = TimeDistributed(Conv1D(filters=80, kernel_size=4, padding="same", strides=1), name='cnn4_cnn')(cnn_embed)
# Question 1 - Embeddings -> Convolutional model_q1 = Sequential() model_q1.add(Embedding(len(word_index) + 1, 300, weights=[embedding_matrix], input_length=40, trainable=False)) model_q1.add(Convolution1D(nb_filter=num_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) model_q1.add(GlobalMaxPooling1D()) model_q1.add(Convolution1D(nb_filter=num_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) model_q1.add(GlobalMaxPooling1D()) # Question 2 - Embeddings -> Convolutional model_q2 = Sequential() model_q2.add(Embedding(len(word_index) + 1, 300,
input_polarity = Input(shape=(2, )) input_hand = Input(shape=(26, )) input_sim = Input(shape=(1, )) input_bleu = Input(shape=(1, )) input_rouge = Input(shape=(3, )) input_cider = Input(shape=(1, )) ############################### # Define the sentence encoder # mask = Masking(mask_value=0, input_shape=(max_seq_len, ))(input_premisse) embed = embedding_layer(mask) l1 = lstm1(embed) drop1 = Dropout(0.1)(l1) maxim = GlobalMaxPooling1D()(drop1) att = SelfAttLayer()(drop1) out = concatenate([maxim, att]) SentenceEncoder = Model(input_premisse, maxim, name='SentenceEncoder') ############################## # Combining the representations # premisse_representation = SentenceEncoder(input_premisse) hyp_representation = SentenceEncoder(input_hyp) concat = concatenate([premisse_representation, hyp_representation]) mul = multiply([premisse_representation, hyp_representation]) dif = subtract([premisse_representation, hyp_representation]) final_merge = concatenate([ concat, mul, dif, input_overlap, input_refuting, input_polarity,
def lstm_model(sequences_length_for_training, embedding_dim, embedding_matrix, vocab_size): GLOVE_EMBEDDING_DIM = 300 print 'Build MAIN model...' ngram_filters = [2, 3, 4, 5] conv_hidden_units = [200, 200, 200, 200] left_context = Input(shape=(ONE_SIDE_CONTEXT_SIZE + 1, embedding_dim), dtype='float32', name='left-context') main_input = Input(shape=(1, embedding_dim), dtype='float32', name='main-input') right_context = Input(shape=(ONE_SIDE_CONTEXT_SIZE + 1, embedding_dim), dtype='float32', name='right-context') context_embedder = TimeDistributed( Embedding(vocab_size + 1, GLOVE_EMBEDDING_DIM, input_length=embedding_dim, weights=[embedding_matrix], init='uniform', trainable=False)) main_input_embedder = TimeDistributed( Embedding(vocab_size + 1, GLOVE_EMBEDDING_DIM, input_length=embedding_dim, weights=[embedding_matrix], init='uniform', trainable=False)) embedded_input_left, embedded_input_main, embedded_input_right = context_embedder( left_context), main_input_embedder(main_input), context_embedder( right_context) convsL, convsM, convsR = [], [], [] for n_gram, hidden_units in zip(ngram_filters, conv_hidden_units): conv_layer = Convolution1D( nb_filter=hidden_units, filter_length=n_gram, border_mode='same', # border_mode='valid', activation='tanh', name='Convolution-' + str(n_gram) + "gram") lef = TimeDistributed(conv_layer, name='TD-convolution-left-' + str(n_gram) + "gram")(embedded_input_left) mid = TimeDistributed(conv_layer, name='TD-convolution-mid-' + str(n_gram) + "gram")(embedded_input_main) rig = TimeDistributed(conv_layer, name='TD-convolution-right-' + str(n_gram) + "gram")(embedded_input_right) # Use GlobalMaxPooling1D() instead of Flatten() pool_L = TimeDistributed(GlobalMaxPooling1D(), name='TD-GlobalMaxPooling-left-' + str(n_gram) + "gram")(lef) pool_M = TimeDistributed(GlobalMaxPooling1D(), name='TD-GlobalMaxPooling-mid-' + str(n_gram) + "gram")(mid) pool_R = TimeDistributed(GlobalMaxPooling1D(), name='TD-GlobalMaxPooling-right-' + str(n_gram) + "gram")(rig) convsL.append(pool_L), convsM.append(pool_M), convsR.append(pool_R) convoluted_left, convoluted_mid, convoluted_right = Merge( mode='concat')(convsL), Merge(mode='concat')(convsM), Merge( mode='concat')(convsR) CONV_DIM = sum(conv_hidden_units) flat_mid = Flatten()(convoluted_mid) encode_mid = Dense(300, name='dense-intermediate-mid-encoder')(flat_mid) context_encoder_intermediate1 = Bidirectional( LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False), name='BiLSTM-context-encoder-intermediate1', merge_mode='concat') context_encoder = Bidirectional(LSTM(600, input_shape=(ONE_SIDE_CONTEXT_SIZE, CONV_DIM), dropout_W=0.3, dropout_U=0.3, return_sequences=True, stateful=False), name='BiLSTM-context-encoder', merge_mode='concat') encode_left = AttentionWithContext()(context_encoder( context_encoder_intermediate1(convoluted_left))) encode_right = AttentionWithContext()(context_encoder( context_encoder_intermediate1(convoluted_right))) encode_left_drop, encode_mid_drop, encode_right_drop = Dropout(0.3)( encode_left), Dropout(0.2)(encode_mid), Dropout(0.3)(encode_right) encoded_info = Merge(mode='concat', name='encode_info')( [encode_left_drop, encode_mid_drop, encode_right_drop]) decoded = Dense(500, name='decoded')(encoded_info) decoded_drop = Dropout(0.3, name='decoded_drop')(decoded) output = Dense(1, activation='sigmoid')(decoded_drop) model = Model(input=[left_context, main_input, right_context], output=output) model.layers[1].trainable = False # model.compile(loss=w_binary_crossentropy, optimizer='rmsprop', metrics=['accuracy', 'recall']) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', 'recall']) print model.summary() return model
def __init__( self, args, tau, transformer_dropout: float = 0.05, embedding_dropout: float = 0.05, l2_reg_penalty: float = 1e-4, use_same_embedding=True, use_vanilla_transformer=True, ): self.args = args self.tau = tau self.pos_number = args.positive_number self.neg_number = args.negative_number self.query_retrieval_number = args.query_retrieval_number self.semantic_dim = args.semantic_dim self.transformer_dropout = transformer_dropout self.embedding_dropout = embedding_dropout self.query_dense = Dense(self.semantic_dim, activation='tanh', name='query_sem') self.query_retrieval_dense = Dense(self.semantic_dim, activation='tanh', name='query_retrieval_sem') self.fact_dense = Dense(self.semantic_dim, activation='tanh', name='fact_sem') self.semantic_dim_dense = Dense(self.args.semantic_dim, activation='tanh', name='semantic_dim_sem') self.query_conv = SeparableConv1D(self.args.embedding_dim, self.args.max_pooling_filter_length, padding="same", activation="tanh") self.query_max = GlobalMaxPooling1D(data_format='channels_last', name='query_max_pooling') self.fact_conv = SeparableConv1D(self.args.embedding_dim, self.args.max_pooling_filter_length, padding="same", activation="tanh") self.fact_max = GlobalMaxPooling1D(data_format='channels_last', name='fact_max_pooling') self.cosine_merger_layer = AutoPointerMerger(name='cosine_merger', args=self.args) # prepare layers l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty else None) if use_same_embedding: self.query_embedding_layer = self.fact_embedding_layer = ReusableEmbedding( self.args.vocab_size, self.args.embedding_dim, name='embeddings', # Regularization is based on paper "A Comparative Study on # Regularization Strategies for Embedding-based Neural Networks" # https://arxiv.org/pdf/1508.03721.pdf embeddings_regularizer=l2_regularizer) else: self.query_embedding_layer = ReusableEmbedding( self.args.vocab_size, self.args.embedding_dim, name='query_embeddings', embeddings_regularizer=l2_regularizer) self.fact_embedding_layer = ReusableEmbedding( self.args.vocab_size, self.args.embedding_dim, name='fact_embeddings', embeddings_regularizer=l2_regularizer) self.query_coord_embedding_layer = TransformerCoordinateEmbedding( self.args.src_seq_length, 1 if use_vanilla_transformer else self.args.transformer_depth, name='query_coordinate_embedding') self.output_softmax_layer = Softmax(name='pos_neg_predictions') self.query_encoder_blocks = [ TransformerEncoderBlock(name='query_encoder%s' % i, num_heads=self.args.num_heads, residual_dropout=self.transformer_dropout, attention_dropout=self.transformer_dropout, activation='relu', vanilla_wiring=True) for i in range(self.args.transformer_depth) ]
print('Length of embedding_matrix:', embedding_matrix.shape[0]) embedding_layer = Embedding(len(word_index) + 1, EMBEDDING_DIM, weights=[embedding_matrix], mask_zero=False, input_length=MAX_SEQUENCE_LENGTH, trainable=False) print('Traing and validation set number of positive and negative reviews') print(y_train.sum(axis=0)) print(y_val.sum(axis=0)) sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH, ), dtype='int32') embedded_sequences = embedding_layer(sequence_input) dense_1 = Dense(100, activation='tanh')(embedded_sequences) max_pooling = GlobalMaxPooling1D()(dense_1) dense_2 = Dense(2, activation='softmax')(max_pooling) model = Model(sequence_input, dense_2) model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) model.summary() model.fit(x_train, y_train, validation_data=(x_val, y_val), nb_epoch=10, batch_size=50)
def __init__(self, title_word_length, content_word_length, title_char_length, content_char_length, fs_btm_tw_cw_length, fs_btm_tc_length, class_num, word_embedding_matrix, char_embedding_matrix, optimizer_name, lr, metrics): # set attributes self.title_word_length = title_word_length self.content_word_length = content_word_length self.title_char_length = title_char_length self.content_char_length = content_char_length self.fs_btm_tw_cw_length = fs_btm_tw_cw_length self.fs_btm_tc_length = fs_btm_tc_length self.class_num = class_num self.word_embedding_matrix = word_embedding_matrix self.char_embedding_matrix = char_embedding_matrix self.optimizer_name = optimizer_name self.lr = lr self.metrics = metrics # Placeholder for input (title and content) title_word_input = Input(shape=(title_word_length,), dtype='int32', name="title_word_input") cont_word_input = Input(shape=(content_word_length,), dtype='int32', name="content_word_input") title_char_input = Input(shape=(title_char_length,), dtype='int32', name="title_char_input") cont_char_input = Input(shape=(content_char_length,), dtype='int32', name="content_char_input") # Embedding layer with K.tf.device("/cpu:0"): word_embedding_layer = Embedding(len(word_embedding_matrix), 256, weights=[word_embedding_matrix], trainable=True, name='word_embedding') title_word_emb = word_embedding_layer(title_word_input) cont_word_emb = word_embedding_layer(cont_word_input) char_embedding_layer = Embedding(len(char_embedding_matrix), 256, weights=[char_embedding_matrix], trainable=True, name='char_embedding') title_char_emb = char_embedding_layer(title_char_input) cont_char_emb = char_embedding_layer(cont_char_input) # Create a convolution + max pooling layer title_content_conv = list() title_content_pool = list() for win_size in range(1, 8): # batch_size x doc_len x embed_size title_content_conv.append(Conv1D(100, win_size, activation='relu', padding='same')(title_word_emb)) title_content_conv.append(Conv1D(100, win_size, activation='relu', padding='same')(cont_word_emb)) title_content_conv.append(Conv1D(100, win_size, activation='relu', padding='same')(title_char_emb)) title_content_conv.append(Conv1D(100, win_size, activation='relu', padding='same')(cont_char_emb)) for conv_out in title_content_conv: title_content_pool.append(GlobalMaxPooling1D()(conv_out)) title_content_att = list() for conv_out, pool_out in zip(title_content_conv, title_content_pool): title_content_att.append(Attention()([ conv_out, pool_out ])) # add btm_tw_cw features + btm_tc features fs_btm_tw_cw_input = Input(shape=(fs_btm_tw_cw_length,), dtype='float32', name="fs_btm_tw_cw_input") fs_btm_tc_input = Input(shape=(fs_btm_tc_length,), dtype='float32', name="fs_btm_tc_input") fs_btm_raw_features = concatenate([fs_btm_tw_cw_input, fs_btm_tc_input]) fs_btm_emb_features = Dense(1024, activation='relu', name='fs_btm_embedding')(fs_btm_raw_features) fs_btm_emb_features = Dropout(0.5, name='fs_btm_embedding_dropout')(fs_btm_emb_features) title_content_pool_features = concatenate(title_content_pool) title_content_pool_features = Dense(1600, activation='relu', name='title_content_pool_embedding')(title_content_pool_features) title_content_pool_features = Dropout(0.1, name='title_content_pool_dropout')(title_content_pool_features) title_content_att_features = concatenate(title_content_att) title_content_att_features = Dense(1600, activation='relu', name='title_content_att_embedding')(title_content_att_features) title_content_att_features = Dropout(0.1, name='title_content_att_dropout')(title_content_att_features) title_content_features = concatenate([title_content_pool_features, title_content_att_features, fs_btm_emb_features]) # Full connection title_content_features = Dense(3600, activation='relu', name='fs_embedding')(title_content_features) title_content_features = Dropout(0.5, name='fs_embedding_dropout')(title_content_features) # Prediction preds = Dense(class_num, activation='sigmoid', name='prediction')(title_content_features) self._model = Model([title_word_input, cont_word_input, title_char_input, cont_char_input, fs_btm_tw_cw_input, fs_btm_tc_input], preds) if 'rmsprop' == optimizer_name: optimizer = optimizers.RMSprop(lr=lr) elif 'adam' == optimizer_name: optimizer = optimizers.Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08) else: optimizer = None self._model.compile(loss=binary_crossentropy_sum, optimizer=optimizer, metrics=metrics) self._model.summary()
def cnn_branch(n_filters,k_size,d_rate,my_input): return Dropout(d_rate)(GlobalMaxPooling1D()(Activation("relu")(Conv1D(filters=n_filters, kernel_size=k_size)(my_input))))
def Model_BiLSTM_CnnDecoder(sourcevocabsize, targetvocabsize, source_W, input_seq_lenth, output_seq_lenth, hidden_dim, emd_dim, sourcecharsize, character_W, input_word_length, char_emd_dim, sourcepossize, pos_W, pos_emd_dim, batch_size=32, loss='categorical_crossentropy', optimizer='rmsprop'): # 0.8349149507609669--attention,lstm*2decoder # pos_input = Input(shape=(input_seq_lenth,), dtype='int32') # pos_embeding = Embedding(input_dim=sourcepossize + 1, # output_dim=pos_emd_dim, # input_length=input_seq_lenth, # mask_zero=False, # trainable=True, # weights=[pos_W])(pos_input) word_input = Input(shape=(input_seq_lenth, ), dtype='int32') char_input = Input(shape=( input_seq_lenth, input_word_length, ), dtype='int32') char_embedding = Embedding(input_dim=sourcecharsize, output_dim=char_emd_dim, batch_input_shape=(batch_size, input_seq_lenth, input_word_length), mask_zero=False, trainable=True, weights=[character_W]) char_embedding2 = TimeDistributed(char_embedding)(char_input) char_cnn = TimeDistributed( Conv1D(50, 3, activation='relu', border_mode='valid'))(char_embedding2) char_macpool = TimeDistributed(GlobalMaxPooling1D())(char_cnn) # char_macpool = Dropout(0.5)(char_macpool) pos_input = Input(shape=( input_seq_lenth, 3, ), dtype='int32') pos_embedding = Embedding(input_dim=sourcepossize + 1, output_dim=pos_emd_dim, batch_input_shape=(batch_size, input_seq_lenth, 3), mask_zero=False, trainable=True, weights=[pos_W]) pos_embedding2 = TimeDistributed(pos_embedding)(pos_input) pos_cnn = TimeDistributed( Conv1D(20, 2, activation='relu', border_mode='valid'))(pos_embedding2) pos_macpool = TimeDistributed(GlobalMaxPooling1D())(pos_cnn) word_embedding_RNN = Embedding(input_dim=sourcevocabsize + 1, output_dim=emd_dim, input_length=input_seq_lenth, mask_zero=False, trainable=False, weights=[source_W])(word_input) # word_embedding_RNN = Dropout(0.5)(word_embedding_RNN) embedding = concatenate([word_embedding_RNN, char_macpool, pos_macpool], axis=-1) embedding = Dropout(0.5)(embedding) BiLSTM = Bidirectional(LSTM(int(hidden_dim / 2), return_sequences=True), merge_mode='concat')(embedding) BiLSTM = BatchNormalization()(BiLSTM) # BiLSTM = Dropout(0.3)(BiLSTM) # decodelayer1 = LSTM(50, return_sequences=False, go_backwards=True)(concat_LC_d)#!!!!! # repeat_decodelayer1 = RepeatVector(input_seq_lenth)(decodelayer1) # concat_decoder = concatenate([concat_LC_d, repeat_decodelayer1], axis=-1)#!!!! # decodelayer2 = LSTM(hidden_dim, return_sequences=True)(concat_decoder) # decodelayer = Dropout(0.5)(decodelayer2) # decoderlayer1 = LSTM(50, return_sequences=True, go_backwards=False)(BiLSTM) decoderlayer5 = Conv1D(50, 5, activation='relu', strides=1, padding='same')(BiLSTM) decoderlayer2 = Conv1D(50, 2, activation='relu', strides=1, padding='same')(BiLSTM) decoderlayer3 = Conv1D(50, 3, activation='relu', strides=1, padding='same')(BiLSTM) decoderlayer4 = Conv1D(50, 4, activation='relu', strides=1, padding='same')(BiLSTM) # 0.8868111121100423 decodelayer = concatenate( [decoderlayer2, decoderlayer3, decoderlayer4, decoderlayer5], axis=-1) decodelayer = BatchNormalization()(decodelayer) decodelayer = Dropout(0.5)(decodelayer) TimeD = TimeDistributed(Dense(targetvocabsize + 1))(decodelayer) # TimeD = Dropout(0.5)(TimeD) model = Activation('softmax')(TimeD) # 0.8769744561783556 # crf = CRF(targetvocabsize + 1, sparse_target=False) # model = crf(TimeD) Models = Model([word_input, char_input, pos_input], model) # Models.compile(loss=my_cross_entropy_Weight, optimizer='adam', metrics=['acc']) Models.compile(loss=loss, optimizer='adam', metrics=['acc']) # Models.compile(loss=loss, optimizer='adam', metrics=['acc'], sample_weight_mode="temporal") # Models.compile(loss=loss, optimizer=optimizers.RMSprop(lr=0.01), metrics=['acc']) # Models.compile(loss=crf.loss_function, optimizer='adam', metrics=[crf.accuracy]) # Models.compile(loss=crf.loss_function, optimizer=optimizers.RMSprop(lr=0.005), metrics=[crf.accuracy]) return Models
def get_test_model_full(): """Returns a maximally complex test model, using all supported layer types with different parameter combination. """ input_shapes = [ (26, 28, 3), (4, 4, 3), (4, 4, 3), (4, ), (2, 3), (27, 29, 1), (17, 1), (17, 4), (2, 3), (2, 3, 4, 5), (2, 3, 4, 5, 6), (2, 3, 4, 5, 6), (7, 8, 9, 10), (7, 8, 9, 10), (11, 12, 13), (11, 12, 13), (14, 15), (14, 15), (16, ), (16, ), ] inputs = [Input(shape=s) for s in input_shapes] outputs = [] outputs.append(Flatten()(inputs[4])) outputs.append(Flatten()(inputs[5])) outputs.append(Flatten()(inputs[9])) outputs.append(Flatten()(inputs[10])) for axis in [-5, -4, -3, -2, -1, 1, 2, 3, 4, 5]: outputs.append(Concatenate(axis=axis)([inputs[10], inputs[11]])) for axis in [-4, -3, -2, -1, 1, 2, 3, 4]: outputs.append(Concatenate(axis=axis)([inputs[12], inputs[13]])) for axis in [-3, -2, -1, 1, 2, 3]: outputs.append(Concatenate(axis=axis)([inputs[14], inputs[15]])) for axis in [-2, -1, 1, 2]: outputs.append(Concatenate(axis=axis)([inputs[16], inputs[17]])) for axis in [-1, 1]: outputs.append(Concatenate(axis=axis)([inputs[18], inputs[19]])) for inp in inputs[6:8]: for padding in ['valid', 'same', 'causal']: for s in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4): outputs.append( Conv1D(out_channels, s, padding=padding, dilation_rate=d)(inp)) for padding_size in range(0, 5): outputs.append(ZeroPadding1D(padding_size)(inp)) for crop_left in range(0, 2): for crop_right in range(0, 2): outputs.append(Cropping1D((crop_left, crop_right))(inp)) for upsampling_factor in range(1, 5): outputs.append(UpSampling1D(upsampling_factor)(inp)) for padding in ['valid', 'same']: for pool_factor in range(1, 6): for s in range(1, 4): outputs.append( MaxPooling1D(pool_factor, strides=s, padding=padding)(inp)) outputs.append( AveragePooling1D(pool_factor, strides=s, padding=padding)(inp)) outputs.append(GlobalMaxPooling1D()(inp)) outputs.append(GlobalAveragePooling1D()(inp)) for inp in [inputs[0], inputs[5]]: for padding in ['valid', 'same']: for h in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4): outputs.append( Conv2D(out_channels, (h, 1), padding=padding, dilation_rate=(d, 1))(inp)) outputs.append( SeparableConv2D(out_channels, (h, 1), padding=padding, dilation_rate=(d, 1))(inp)) for sy in range(1, 4): outputs.append( Conv2D(out_channels, (h, 1), strides=(1, sy), padding=padding)(inp)) outputs.append( SeparableConv2D(out_channels, (h, 1), strides=(sy, sy), padding=padding)(inp)) for sy in range(1, 4): outputs.append( DepthwiseConv2D((h, 1), strides=(sy, sy), padding=padding)(inp)) outputs.append( MaxPooling2D((h, 1), strides=(1, sy), padding=padding)(inp)) for w in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4) if sy == 1 else [1]: outputs.append( Conv2D(out_channels, (1, w), padding=padding, dilation_rate=(1, d))(inp)) outputs.append( SeparableConv2D(out_channels, (1, w), padding=padding, dilation_rate=(1, d))(inp)) for sx in range(1, 4): outputs.append( Conv2D(out_channels, (1, w), strides=(sx, 1), padding=padding)(inp)) outputs.append( SeparableConv2D(out_channels, (1, w), strides=(sx, sx), padding=padding)(inp)) for sx in range(1, 4): outputs.append( DepthwiseConv2D((1, w), strides=(sy, sy), padding=padding)(inp)) outputs.append( MaxPooling2D((1, w), strides=(1, sx), padding=padding)(inp)) outputs.append(ZeroPadding2D(2)(inputs[0])) outputs.append(ZeroPadding2D((2, 3))(inputs[0])) outputs.append(ZeroPadding2D(((1, 2), (3, 4)))(inputs[0])) outputs.append(Cropping2D(2)(inputs[0])) outputs.append(Cropping2D((2, 3))(inputs[0])) outputs.append(Cropping2D(((1, 2), (3, 4)))(inputs[0])) for y in range(1, 3): for x in range(1, 3): outputs.append(UpSampling2D(size=(y, x))(inputs[0])) outputs.append(GlobalAveragePooling2D()(inputs[0])) outputs.append(GlobalMaxPooling2D()(inputs[0])) outputs.append(AveragePooling2D((2, 2))(inputs[0])) outputs.append(MaxPooling2D((2, 2))(inputs[0])) outputs.append(UpSampling2D((2, 2))(inputs[0])) outputs.append(Dropout(0.5)(inputs[0])) # same as axis=-1 outputs.append(Concatenate()([inputs[1], inputs[2]])) outputs.append(Concatenate(axis=3)([inputs[1], inputs[2]])) # axis=0 does not make sense, since dimension 0 is the batch dimension outputs.append(Concatenate(axis=1)([inputs[1], inputs[2]])) outputs.append(Concatenate(axis=2)([inputs[1], inputs[2]])) outputs.append(BatchNormalization()(inputs[0])) outputs.append(BatchNormalization(center=False)(inputs[0])) outputs.append(BatchNormalization(scale=False)(inputs[0])) outputs.append(Conv2D(2, (3, 3), use_bias=True)(inputs[0])) outputs.append(Conv2D(2, (3, 3), use_bias=False)(inputs[0])) outputs.append(SeparableConv2D(2, (3, 3), use_bias=True)(inputs[0])) outputs.append(SeparableConv2D(2, (3, 3), use_bias=False)(inputs[0])) outputs.append(DepthwiseConv2D(2, (3, 3), use_bias=True)(inputs[0])) outputs.append(DepthwiseConv2D(2, (3, 3), use_bias=False)(inputs[0])) outputs.append(Dense(2, use_bias=True)(inputs[3])) outputs.append(Dense(2, use_bias=False)(inputs[3])) shared_conv = Conv2D(1, (1, 1), padding='valid', name='shared_conv', activation='relu') up_scale_2 = UpSampling2D((2, 2)) x1 = shared_conv(up_scale_2(inputs[1])) # (1, 8, 8) x2 = shared_conv(up_scale_2(inputs[2])) # (1, 8, 8) x3 = Conv2D(1, (1, 1), padding='valid')(up_scale_2(inputs[2])) # (1, 8, 8) x = Concatenate()([x1, x2, x3]) # (3, 8, 8) outputs.append(x) x = Conv2D(3, (1, 1), padding='same', use_bias=False)(x) # (3, 8, 8) outputs.append(x) x = Dropout(0.5)(x) outputs.append(x) x = Concatenate()([MaxPooling2D((2, 2))(x), AveragePooling2D((2, 2))(x)]) # (6, 4, 4) outputs.append(x) x = Flatten()(x) # (1, 1, 96) x = Dense(4, use_bias=False)(x) outputs.append(x) x = Dense(3)(x) # (1, 1, 3) outputs.append(x) outputs.append(keras.layers.Add()([inputs[4], inputs[8], inputs[8]])) outputs.append(keras.layers.Subtract()([inputs[4], inputs[8]])) outputs.append(keras.layers.Multiply()([inputs[4], inputs[8], inputs[8]])) outputs.append(keras.layers.Average()([inputs[4], inputs[8], inputs[8]])) outputs.append(keras.layers.Maximum()([inputs[4], inputs[8], inputs[8]])) outputs.append(Concatenate()([inputs[4], inputs[8], inputs[8]])) intermediate_input_shape = (3, ) intermediate_in = Input(intermediate_input_shape) intermediate_x = intermediate_in intermediate_x = Dense(8)(intermediate_x) intermediate_x = Dense(5)(intermediate_x) intermediate_model = Model(inputs=[intermediate_in], outputs=[intermediate_x], name='intermediate_model') intermediate_model.compile(loss='mse', optimizer='nadam') x = intermediate_model(x) # (1, 1, 5) intermediate_model_2 = Sequential() intermediate_model_2.add(Dense(7, input_shape=(5, ))) intermediate_model_2.add(Dense(5)) intermediate_model_2.compile(optimizer='rmsprop', loss='categorical_crossentropy') x = intermediate_model_2(x) # (1, 1, 5) x = Dense(3)(x) # (1, 1, 3) shared_activation = Activation('tanh') outputs = outputs + [ Activation('tanh')(inputs[3]), Activation('hard_sigmoid')(inputs[3]), Activation('selu')(inputs[3]), Activation('sigmoid')(inputs[3]), Activation('softplus')(inputs[3]), Activation('softmax')(inputs[3]), Activation('relu')(inputs[3]), LeakyReLU()(inputs[3]), ELU()(inputs[3]), PReLU()(inputs[2]), PReLU()(inputs[3]), PReLU()(inputs[4]), shared_activation(inputs[3]), Activation('linear')(inputs[4]), Activation('linear')(inputs[1]), x, shared_activation(x), ] print('Model has {} outputs.'.format(len(outputs))) model = Model(inputs=inputs, outputs=outputs, name='test_model_full') model.compile(loss='mse', optimizer='nadam') # fit to dummy data training_data_size = 1 batch_size = 1 epochs = 10 data_in = generate_input_data(training_data_size, input_shapes) initial_data_out = model.predict(data_in) data_out = generate_output_data(training_data_size, initial_data_out) model.fit(data_in, data_out, epochs=epochs, batch_size=batch_size) return model
def Model_Dense_Softmax(sourcevocabsize, targetvocabsize, source_W, input_seq_lenth, output_seq_lenth, hidden_dim, emd_dim, sourcecharsize, character_W, input_word_length, char_emd_dim, sourcepossize, pos_W, pos_emd_dim, batch_size=32, loss='categorical_crossentropy', optimizer='rmsprop'): word_input = Input(shape=(input_seq_lenth, ), dtype='int32') char_input = Input(shape=( input_seq_lenth, input_word_length, ), dtype='int32') char_embedding = Embedding(input_dim=sourcecharsize, output_dim=char_emd_dim, batch_input_shape=(batch_size, input_seq_lenth, input_word_length), mask_zero=False, trainable=True, weights=[character_W]) char_embedding2 = TimeDistributed(char_embedding)(char_input) char_cnn = TimeDistributed(Conv1D(50, 3, activation='relu', padding='same'))(char_embedding2) char_macpool = TimeDistributed(GlobalMaxPooling1D())(char_cnn) # char_macpool = Dropout(0.5)(char_macpool) # !!!!!!!!!!!!!! char_macpool = Dropout(0.25)(char_macpool) word_embedding = Embedding(input_dim=sourcevocabsize + 1, output_dim=emd_dim, input_length=input_seq_lenth, mask_zero=False, trainable=True, weights=[source_W])(word_input) word_embedding_dropout = Dropout(0.5)(word_embedding) embedding = concatenate([word_embedding_dropout, char_macpool], axis=-1) Dense1 = TimeDistributed(Dense(400, activation='tanh'))(embedding) Dense1 = Dropout(0.5)(Dense1) Dense2 = TimeDistributed(Dense(200, activation='tanh'))(Dense1) Dense2 = Dropout(0.3)(Dense2) Dense3 = TimeDistributed(Dense(100, activation='tanh'))(Dense2) Dense3 = Dropout(0.2)(Dense3) TimeD = TimeDistributed(Dense(targetvocabsize + 1))(Dense3) # TimeD = Dropout(0.5)(TimeD) # !!!!!!!!!!!!!!!delete dropout model = Activation('softmax')(TimeD) # crflayer = CRF(targetvocabsize+1, sparse_target=False) # model = crflayer(TimeD) Models = Model([word_input, char_input], [model]) Models.compile(loss=loss, optimizer=optimizers.RMSprop(lr=0.001), metrics=['acc']) # Models.compile(loss=crflayer.loss_function, optimizer=optimizers.RMSprop(lr=0.001), metrics=[crflayer.accuracy]) return Models
def get_resNet_model(input_shape, output_shape): def resnet_v1(input_shape, depth, num_classes=10, input_tensor=None, local_conv=False): if (depth - 2) % 6 != 0: raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') # Start model definition. num_filters = 16 num_res_blocks = int((depth - 2) / 6) if (input_tensor == None): inputs = Input(shape=input_shape) else: inputs = input_tensor x = resnet_layer_naive(inputs=inputs) # Instantiate the stack of residual units for stack in range(3): for res_block in range(num_res_blocks): strides = 1 # if stack > 0 and res_block == 0: # first layer but not first stack # strides = 2 # downsample y = resnet_layer_local(inputs=x, kernel_size=8, num_filters=num_filters, strides=strides) y = resnet_layer_local(inputs=y, kernel_size=16, num_filters=num_filters, activation=None) if stack > 0 and res_block == 0: # first layer but not first stack # linear projection residual shortcut connection to match # changed dims x = resnet_layer_naive(inputs=x, num_filters=num_filters, kernel_size=16, strides=strides, activation=None, batch_normalization=True) x = keras.layers.add([x, y]) x = Activation(default_activation)(x) num_filters *= 2 return x inputs = Input(shape=input_shape) xxx = inputs xxx = Conv1D(filters=xl_filter_num, kernel_size=m_filter_num, padding='same', activation=None, strides=1)(xxx) xxx = BatchNormalization()(xxx) xxx = Activation('relu')(xxx) xxx = MaxPooling1D(pool_size=2, padding='same', strides=2)(xxx) xxx = resnet_v1(input_shape, num_classes=output_shape, depth=3 * 6 + 2, input_tensor=xxx, local_conv=False) xxx = LocallyConnected1D(filters=l_filter_num, kernel_size=m_filter_num, padding='valid', activation=default_activation, strides=1)(xxx) xxx = BatchNormalization()(xxx) xxx = GlobalMaxPooling1D()(xxx) xxx = Dense(output_shape, activation='softmax', kernel_initializer='he_normal')(xxx) model = Model(inputs=inputs, outputs=xxx) return model
def Model_BiLSTM_X2_CRF(sourcevocabsize, targetvocabsize, source_W, input_seq_lenth, output_seq_lenth, hidden_dim, emd_dim, sourcecharsize, character_W, input_word_length, char_emd_dim, batch_size=32, loss='categorical_crossentropy', optimizer='rmsprop'): word_input = Input(shape=(input_seq_lenth, ), dtype='int32') char_input = Input(shape=( input_seq_lenth, input_word_length, ), dtype='int32') char_embedding = Embedding(input_dim=sourcecharsize, output_dim=char_emd_dim, batch_input_shape=(batch_size, input_seq_lenth, input_word_length), mask_zero=False, trainable=True, weights=[character_W]) char_embedding2 = TimeDistributed(char_embedding)(char_input) char_cnn = TimeDistributed(Conv1D(50, 3, activation='relu', padding='same'))(char_embedding2) char_macpool = TimeDistributed(GlobalMaxPooling1D())(char_cnn) # char_macpool = Dropout(0.5)(char_macpool) # !!!!!!!!!!!!!! char_macpool = Dropout(0.25)(char_macpool) word_embedding = Embedding(input_dim=sourcevocabsize + 1, output_dim=emd_dim, input_length=input_seq_lenth, mask_zero=True, trainable=True, weights=[source_W])(word_input) word_embedding_dropout = Dropout(0.5)(word_embedding) embedding = concatenate([word_embedding_dropout, char_macpool], axis=-1) BiLSTM = Bidirectional(LSTM(hidden_dim, return_sequences=True), merge_mode='concat')(embedding) BiLSTM = BatchNormalization(axis=1)(BiLSTM) BiLSTM_dropout = Dropout(0.5)(BiLSTM) BiLSTM2 = Bidirectional(LSTM(hidden_dim // 2, return_sequences=True), merge_mode='concat')(BiLSTM_dropout) BiLSTM_dropout2 = Dropout(0.5)(BiLSTM2) TimeD = TimeDistributed(Dense(targetvocabsize + 1))(BiLSTM_dropout2) # TimeD = TimeDistributed(Dense(int(hidden_dim / 2)))(BiLSTM_dropout) # TimeD = Dropout(0.5)(TimeD) # !!!!!!!!!!!!!!!delete dropout # model = Activation('softmax')(TimeD) crflayer = CRF(targetvocabsize + 1, sparse_target=False) model = crflayer(TimeD) #0.8746633147782367 # # model = crf(BiLSTM_dropout)#0.870420501714492 Models = Model([word_input, char_input], [model]) # Models.compile(loss=loss, optimizer='adam', metrics=['acc']) # Models.compile(loss=crflayer.loss_function, optimizer='adam', metrics=[crflayer.accuracy]) Models.compile(loss=crflayer.loss_function, optimizer=optimizers.RMSprop(lr=0.001), metrics=[crflayer.accuracy]) return Models
def get_model(embedding_layer,RNN,embed_size,Feature_dic,Para_dic): MAX_SEQUENCE_LENGTH=Para_dic['MAX_SEQUENCE_LENGTH'] comment_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedded_sequences_raw= embedding_layer(comment_input) embedded_sequences = SpatialDropout1D(Para_dic['spatial_dropout'])(embedded_sequences_raw) ### RNN if RNN=='LSTM': RNN_x = Bidirectional(CuDNNLSTM(Para_dic['num_lstm'],return_sequences=True))(embedded_sequences) elif RNN=='GRU': RNN_x = Bidirectional(CuDNNGRU(Para_dic['num_lstm'],return_sequences=True))(embedded_sequences) Feature=[] ######## RNN Features ##### Attention if Feature_dic['Attention']==1: Feature.append(Attention(MAX_SEQUENCE_LENGTH)(RNN_x)) if Feature_dic['RNN_maxpool']==1: Feature.append(GlobalMaxPooling1D()(RNN_x)) ##### Capsule if Feature_dic['Capsule']==1: capsule = Capsule(share_weights=True)(RNN_x) capsule = Flatten()(capsule) Feature.append(capsule) ##### RNN_CNNN1d if Feature_dic['RNN_CNN_conv1d']==1: Cx = Conv1D(64, kernel_size = 2, padding = "valid", kernel_initializer = "he_uniform")(RNN_x) avg_pool = GlobalAveragePooling1D()(Cx) max_pool = GlobalMaxPooling1D()(Cx) Feature.append(avg_pool) Feature.append(max_pool) ######## CNN Features ### CNN2d if Feature_dic['CNN2d']==1: CNN2d=get_CNN2d(embedded_sequences,embed_size,MAX_SEQUENCE_LENGTH,Para_dic) Feature.append(CNN2d) ### DPCNN if Feature_dic['DPCNN']==1: DPCNN=get_DPCNN(embedded_sequences,Para_dic) Feature.append(DPCNN) ### Concatnation merged = Concatenate()(Feature) ### dense, add L1 reg to enable sparsity merged = Dense(Para_dic['dense_num'], \ activation=Para_dic['dense_act'],\ kernel_regularizer=regularizers.l1(Para_dic['L1_reg']))(merged) merged = Dropout(Para_dic['dense_dropout'])(merged) preds = Dense(6, activation='sigmoid')(merged) model = Model(inputs=[comment_input], outputs=preds) model.compile(loss='binary_crossentropy', optimizer=RMSprop(), metrics=['accuracy']) print(model.summary()) return model
# Input shape inp = Input(shape=(maxlen, )) # Embedding and GRU x = Embedding(max_features, 150)(inp) x = SpatialDropout1D(0.25)(x) x = Bidirectional( LSTM(64, return_sequences=True, dropout=0.15, recurrent_dropout=0.15))(x) x = Conv1D(32, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(x) # Pooling avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) conc = concatenate([avg_pool, max_pool]) # Output layer output = Dense(1, activation='sigmoid')(conc) model = Model(inputs=inp, outputs=output) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # model.load_weights('Weights/gru5_3.h5') model.fit(X_train, Y_train, epochs=3, batch_size=32, verbose=1) results = model.predict(X_test, batch_size=1, verbose=1) run_test(results, Y_test)
def cnn_rnn(embedding_matrix, config): if config['rnn'] == 'gru' and config['gpu']: encode = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'], return_sequences=True)) else: encode = Bidirectional( CuDNNLSTM(config['rnn_output_size'], return_sequences=True)) encode2 = Bidirectional( CuDNNLSTM(config['rnn_output_size'] * 2, return_sequences=True)) encode3 = Bidirectional( CuDNNGRU(config['rnn_output_size'] * 4, return_sequences=True)) q1 = Input(shape=(config['max_length'], ), dtype='int32', name='q1_input') q2 = Input((config['max_length'], ), dtype='int32', name='q2_input') embedding_layer = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], trainable=config['embed_trainable'], weights=[embedding_matrix] # mask_zero=True ) q1_embed = embedding_layer(q1) q2_embed = embedding_layer(q2) # bsz, 1, emb_dims q1_embed = BatchNormalization(axis=2)(q1_embed) q2_embed = BatchNormalization(axis=2)(q2_embed) q1_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q1_embed) q2_embed = SpatialDropout1D(config['spatial_dropout_rate'])(q2_embed) q1_encoded = encode(q1_embed) q2_encoded = encode(q2_embed) q1_encoded = Dropout(0.2)(q1_encoded) q2_encoded = Dropout(0.2)(q2_encoded) # 双向 # q1_encoded = encode2(q1_encoded) # q2_encoded = encode2(q2_encoded) # resnet rnn_layer2_input1 = concatenate([q1_embed, q1_encoded]) rnn_layer2_input2 = concatenate([q2_embed, q2_encoded]) q1_encoded2 = encode2(rnn_layer2_input1) q2_encoded2 = encode2(rnn_layer2_input2) # add res shortcut res_block1 = add([q1_encoded, q1_encoded2]) res_block2 = add([q2_encoded, q2_encoded2]) rnn_layer3_input1 = concatenate([q1_embed, res_block1]) rnn_layer3_input2 = concatenate([q2_embed, res_block2]) # rnn_layer3_input1 = concatenate([q1_embed,q1_encoded,q1_encoded2]) # rnn_layer3_input2 = concatenate([q2_embed,q2_encoded,q2_encoded2]) q1_encoded3 = encode3(rnn_layer3_input1) q2_encoded3 = encode3(rnn_layer3_input2) convs1, convs2 = [], [] for ksz in config['kernel_sizes']: pooling1, pooling2 = block(q1_embed, q2_embed, ksz, config['filters']) convs1.append(pooling1) convs2.append(pooling2) rnn_rep1 = GlobalMaxPooling1D()(q1_encoded3) rnn_rep2 = GlobalMaxPooling1D()(q2_encoded3) convs1.append(rnn_rep1) convs2.append(rnn_rep2) merged1 = concatenate(convs1, axis=-1) merged2 = concatenate(convs2, axis=-1) sub_rep = Lambda(lambda x: K.abs(x[0] - x[1]))([merged1, merged2]) mul_rep = Lambda(lambda x: x[0] * x[1])([merged1, merged2]) # merged = Concatenate()([mul_rep, sub_rep]) merged = Concatenate()([merged1, merged2, mul_rep, sub_rep]) dense = Dropout(config['dense_dropout'])(merged) dense = BatchNormalization()(dense) dense = Dense(config['dense_dim'], activation='relu')(dense) dense = Dropout(config['dense_dropout'])(dense) dense = BatchNormalization()(dense) predictions = Dense(1, activation='sigmoid')(dense) model = Model(inputs=[q1, q2], outputs=predictions) opt = optimizers.get(config['optimizer']) K.set_value(opt.lr, config['learning_rate']) model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[f1]) return model
def get_model_rnn_cnn(embedding_matrix, cell_size=80, cell_type_GRU=True, maxlen=180, max_features=100000, embed_size=300, prob_dropout=0.2, emb_train=False, filter_size=128, kernel_size=2, stride=1): inp_pre = Input(shape=(maxlen, ), name='input_pre') inp_post = Input(shape=(maxlen, ), name='input_post') ##pre x1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=emb_train)(inp_pre) x1 = SpatialDropout1D(prob_dropout)(x1) if cell_type_GRU: x1 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1) else: x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1) x1 = Conv1D(filter_size, kernel_size=kernel_size, strides=stride, padding="valid", kernel_initializer="he_uniform")(x1) avg_pool1 = GlobalAveragePooling1D()(x1) max_pool1 = GlobalMaxPooling1D()(x1) ##post x2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=emb_train)(inp_post) x2 = SpatialDropout1D(prob_dropout)(x2) if cell_type_GRU: x2 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2) else: x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2) x2 = Conv1D(filter_size, kernel_size=kernel_size, strides=stride, padding="valid", kernel_initializer="he_uniform")(x2) avg_pool2 = GlobalAveragePooling1D()(x2) max_pool2 = GlobalMaxPooling1D()(x2) ##merge conc = concatenate([avg_pool1, max_pool1, avg_pool2, max_pool2]) outp = Dense(6, activation="sigmoid")(conc) model = Model(inputs=[inp_pre, inp_post], outputs=outp) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_crossentropy', 'accuracy']) return model
def train(): print('Generating training data') #pretrain_data = generate_data(None, '/home/hanmoe/CAFA3/ngrams/4kai/assocI-min_len5-min_freq3-top_fun5k/ngram-id2seq.tsv.gz', '/home/hanmoe/CAFA3/ngrams/4kai/assocI-min_len5-min_freq3-top_fun5k/ann-train-data.tsv.gz', ann_ids, 256) #pretrain_size = _data_size('/home/hanmoe/CAFA3/ngrams/4kai/assocI-min_len5-min_freq3-top_fun5k/ngram-id2seq.tsv.gz')/2 train_path = './data/train.txt.gz' train_data = generate_data(train_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size) train_size = _data_size(train_path) train_ids = read_split_ids(train_path, unique=False) # import pdb; pdb.set_trace() devel_path = './data/devel.txt.gz' devel_data = generate_data(devel_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size // 10) devel_size = _data_size(devel_path) devel_ids = read_split_ids(devel_path, unique=False) test_path = './data/test.txt.gz' test_data = generate_data(test_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size // 10) test_size = _data_size(test_path) test_ids = read_split_ids(test_path, unique=False) #print "Making baseline predictions" #import baseline #devel_baseline = baseline.predict(devel_data['prot_ids'], blast_dict, ann_path) #devel_baseline_ids = go_to_ids([b[1] for b in devel_baseline], ann_ids) #from sklearn import metrics #baseline_score = metrics.precision_recall_fscore_support(devel_data['labels'], devel_baseline_ids, average='micro') #print 'Baseline score: ', baseline_score #import pdb; pdb.set_trace() #for ii in [3, 6, 9, 15, 27, 50]: # print '### Testing window size %s' % ii print('Building model') inputs = Input(shape=(timesteps, ), name='sequence') input_list = [inputs] embedding = Embedding(vocab_size, latent_dim, mask_zero=False)(inputs) embedding = Dropout(0.5)(embedding) #embedding = Embedding(aa_embedding.shape[0], aa_embedding.shape[1], mask_zero=False, weights=[aa_embedding], trainable=True)(inputs) #mask = Masking()(embedding) convs = [] # Stacked CNN experiments #encoded = Convolution1D(50, 3, border_mode='valid', activation='linear')(embedding) ##maxed = GlobalMaxPooling1D()(encoded) ##convs.append(maxed) #encoded = Convolution1D(50, 3, border_mode='valid', activation='linear')(encoded) ##maxed = GlobalMaxPooling1D()(encoded) ##convs.append(maxed) #encoded = Convolution1D(50, 3, border_mode='valid', activation='linear')(encoded) #encoded = GlobalMaxPooling1D()(encoded) #convs.append(maxed) for i in [3, 9, 27]: encoded = Convolution1D(400, i, padding='valid', activation='relu')(embedding) encoded = GlobalMaxPooling1D()(encoded) convs.append(encoded) ## LSTM attention #lstm = LSTM(50)(mask) ##convs.append(lstm) # #from attention import Attention #att = Attention()([encoded, lstm]) #convs.append(att) if use_features: #feature_input = Input(shape=(len(blast_hit_ids), ), name='features') feature_input = Input(shape=(json_feature_matrix.shape[1], ), name='features') # For Jari's feature vectors dropout = Dropout(0.5)(feature_input) feature_encoding = Dense(300, activation='tanh')( dropout) # Squeeze the feature vectors to a tiny encoding convs.append(feature_encoding) input_list.append(feature_input) # #encoded = feature_encoding encoded = concatenate(convs) predictions = Dense(len(ann_ids), activation='sigmoid', name='labels')(encoded) model = Model(input_list, predictions) model.compile(optimizer=Adam(lr=0.0005), loss=weighted_binary_crossentropy, metrics=['accuracy']) print(model.summary()) print('Training model') pickle.dump(ann_ids, open(os.path.join(model_dir, 'ann_ids.pkl'), 'wb')) pickle.dump(reverse_ann_ids, open(os.path.join(model_dir, 'reverse_ann_ids.pkl'), 'wb')) if use_features: # For Jari's features pickle.dump(json_id_map, open(os.path.join(model_dir, 'json_id_map.pkl'), 'wb')) pickle.dump(json_vectorizer, open(os.path.join(model_dir, 'json_vectorizer.pkl'), 'wb')) pickle.dump( feature_selector, open(os.path.join(model_dir, 'feature_selector.pkl'), 'wb')) es_cb = EarlyStopping(monitor='val_acc', patience=10, verbose=0, mode='max') cp_cb = ModelCheckpoint(filepath=os.path.join(model_dir, 'model.hdf5'), monitor='val_acc', mode='max', save_best_only=True, verbose=0) ev_cb = Evaluate(devel_path, 500, reverse_ann_ids) # next(devel_data) # import pdb; pdb.set_trace() model.fit_generator(train_data, steps_per_epoch=batch_size, nb_epoch=60, validation_data=devel_data, validation_steps=batch_size, callbacks=[ev_cb]) # If using our own blast features #pickle.dump(blast_hit_ids, open(os.path.join(model_dir, 'blast_hit_ids.pkl') ,'wb')) #import pdb; pdb.set_trace() # print "Making predictions" # from keras.models import load_model # model = load_model(filepath=os.path.join(model_dir, 'model.h5'), custom_objects={"weighted_binary_crossentropy":weighted_binary_crossentropy}) # # # # Reinstantiate the data generators, otherwise they are not correctly aligned anymore # # devel_data = generate_data(devel_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size) # # test_data = generate_data(test_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size) # # # # devel_score = model.evaluate_generator(devel_data, devel_size) # # test_score = model.evaluate_generator(test_data, test_size) # # print 'Devel l/a/p/r/f: ', devel_score # # print 'Test l/a/p/r/f: ', test_score # # # # Reinstantiate the data generators, otherwise they are not correctly aligned anymore # devel_data = generate_data(devel_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size) # test_data = generate_data(test_path, SEQUENCE_PATH, ann_path, ann_ids, batch_size) # # devel_pred = model.predict_generator(devel_data, steps=batch_size) # test_pred = model.predict_generator(test_data, steps=batch_size) # # save_predictions(os.path.join(model_dir, 'devel_pred.tsv.gz'), devel_ids, devel_pred, reverse_ann_ids) # save_predictions(os.path.join(model_dir, 'test_pred.tsv.gz'), test_ids, test_pred, reverse_ann_ids) # # print 'Making CAFA target predictions' # # cafa_id_path = '/home/sukaew/CAFA_PI/targetFiles/sequences/target.all.ids.gz' # cafa_seq_path = '/home/sukaew/CAFA_PI/targetFiles/sequences/target.all.fasta.gz' # cafa_data = generate_data(None, cafa_seq_path, ann_path, ann_ids, batch_size, cafa_targets=True, verbose=False) # cafa_size = _data_size(cafa_id_path) # cafa_ids = _get_ids(generate_data(None, cafa_seq_path, ann_path, ann_ids, batch_size, cafa_targets=True, verbose=False, endless=False)) # # cafa_ids = read_split_ids(cafa_id_path, unique=False) # #import pdb; pdb.set_trace() # cafa_pred = model.predict_generator(cafa_data, batch_size) # # save_predictions(os.path.join(model_dir, 'cafa_targets.tsv.gz'), cafa_ids, cafa_pred, reverse_ann_ids, cafa_targets=True) # #import pdb; pdb.set_trace() print('All done.')
def get_model_2rnn_cnn(embedding_matrix, cell_size=80, cell_type_GRU=True, maxlen=180, max_features=100000, embed_size=300, prob_dropout=0.2, emb_train=False, filter_size=128, kernel_size=2, stride=1): inp_pre = Input(shape=(maxlen, ), name='input_pre') inp_post = Input(shape=(maxlen, ), name='input_post') ##pre x1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=emb_train)(inp_pre) x1 = SpatialDropout1D(prob_dropout)(x1) if cell_type_GRU: x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1) x1 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1) else: x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1) x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1) x1 = Conv1D(filter_size, kernel_size=kernel_size, strides=stride, padding="valid", kernel_initializer="he_uniform")(x1) avg_pool1 = GlobalAveragePooling1D()(x1) max_pool1 = GlobalMaxPooling1D()(x1) ##post x2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=emb_train)(inp_post) x2 = SpatialDropout1D(prob_dropout)(x2) if cell_type_GRU: x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2) x2 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2) else: x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2) x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2) x2 = Conv1D(filter_size, kernel_size=kernel_size, strides=stride, padding="valid", kernel_initializer="he_uniform")(x2) avg_pool2 = GlobalAveragePooling1D()(x2) max_pool2 = GlobalMaxPooling1D()(x2) ##merge conc = concatenate([avg_pool1, max_pool1, avg_pool2, max_pool2]) outp = Dense(6, activation="sigmoid")(conc) model = Model(inputs=[inp_pre, inp_post], outputs=outp) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_crossentropy', 'accuracy']) return model # def get_model_2rnn_cnn_sp( # embedding_matrix, cell_size = 80, cell_type_GRU = True, # maxlen = 180, max_features = 100000, embed_size = 300, # prob_dropout = 0.2, emb_train = False, # filter_size=128, kernel_size = 2, stride = 1 # ): # inp_pre = Input(shape=(maxlen, ), name='input_pre') # inp_post = Input(shape=(maxlen, ), name='input_post') # ##pre # x1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_pre) # x1 = SpatialDropout1D(prob_dropout)(x1) # if cell_type_GRU: # x1_ = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1) # x1 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1_) # else : # x1_ = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1) # x1 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1_) # x1_ = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_) # avg_pool1_ = GlobalAveragePooling1D()(x1_) # max_pool1_ = GlobalMaxPooling1D()(x1_) # x1 = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1) # avg_pool1 = GlobalAveragePooling1D()(x1) # max_pool1 = GlobalMaxPooling1D()(x1) # ##post # x2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_post) # x2 = SpatialDropout1D(prob_dropout)(x2) # if cell_type_GRU: # x2_ = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2) # x2 = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2_) # else : # x2_ = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2) # x2 = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2_) # x2_ = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_) # avg_pool2_ = GlobalAveragePooling1D()(x2_) # max_pool2_ = GlobalMaxPooling1D()(x2_) # x2 = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2) # avg_pool2 = GlobalAveragePooling1D()(x2) # max_pool2 = GlobalMaxPooling1D()(x2) # ##merge # conc = concatenate([avg_pool1, max_pool1, avg_pool2, max_pool2, avg_pool1_, max_pool1_, avg_pool2_, max_pool2_]) # outp = Dense(6, activation="sigmoid")(conc) # model = Model(inputs=[inp_pre, inp_post], outputs=outp) # model.compile(loss='binary_crossentropy', # optimizer='adam', # metrics=['binary_crossentropy', 'accuracy']) # return model # def get_model_dual_2rnn_cnn_sp( # embedding_matrix, cell_size = 80, cell_type_GRU = True, # maxlen = 180, max_features = 100000, embed_size = 300, # prob_dropout = 0.2, emb_train = False, # filter_size=128, kernel_size = 2, stride = 1 # ): # inp_pre = Input(shape=(maxlen, ), name='input_pre') # inp_post = Input(shape=(maxlen, ), name='input_post') # ##pre # x1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_pre) # x1g = SpatialDropout1D(prob_dropout)(x1) # x1l = SpatialDropout1D(prob_dropout)(x1) # x1_g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1g) # x1g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1_g) # x1_l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1l) # x1l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1_l) # x1_g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_g) # x1_l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_l) # avg_pool1_g = GlobalAveragePooling1D()(x1_g) # max_pool1_g = GlobalMaxPooling1D()(x1_g) # avg_pool1_l = GlobalAveragePooling1D()(x1_l) # max_pool1_l = GlobalMaxPooling1D()(x1_l) # x1g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1g) # x1l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1l) # avg_pool1g = GlobalAveragePooling1D()(x1g) # max_pool1g = GlobalMaxPooling1D()(x1g) # avg_pool1l = GlobalAveragePooling1D()(x1l) # max_pool1l = GlobalMaxPooling1D()(x1l) # ##post # x2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_post) # x2g = SpatialDropout1D(prob_dropout)(x2) # x2l = SpatialDropout1D(prob_dropout)(x2) # x2_g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2g) # x2g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2_g) # x2_l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2l) # x2l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2_l) # x2_g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_g) # x2_l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_l) # avg_pool2_g = GlobalAveragePooling1D()(x2_g) # max_pool2_g = GlobalMaxPooling1D()(x2_g) # avg_pool2_l = GlobalAveragePooling1D()(x2_l) # max_pool2_l = GlobalMaxPooling1D()(x2_l) # x2g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2g) # x2l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2l) # avg_pool2g = GlobalAveragePooling1D()(x2g) # max_pool2g = GlobalMaxPooling1D()(x2g) # avg_pool2l = GlobalAveragePooling1D()(x2l) # max_pool2l = GlobalMaxPooling1D()(x2l) # ##merge # conc = concatenate([avg_pool1g, max_pool1g, avg_pool1l, max_pool1l, avg_pool1_g, max_pool1_g, avg_pool1_l, max_pool1_l, # avg_pool2g, max_pool2g, avg_pool2l, max_pool2l, avg_pool2_g, max_pool2_g, avg_pool2_l, max_pool2_l]) # outp = Dense(6, activation="sigmoid")(conc) # model = Model(inputs=[inp_pre, inp_post], outputs=outp) # model.compile(loss='binary_crossentropy', # optimizer='adam', # metrics=['binary_crossentropy', 'accuracy']) # return model # def get_model_dual_2rnn_cnn_sp_drop( # embedding_matrix, cell_size = 80, cell_type_GRU = True, # maxlen = 180, max_features = 100000, embed_size = 300, # prob_dropout = 0.2, emb_train = False, # filter_size=128, kernel_size = 2, stride = 1 # ): # inp_pre = Input(shape=(maxlen, ), name='input_pre') # inp_post = Input(shape=(maxlen, ), name='input_post') # ##pre # x1 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_pre) # x1g = SpatialDropout1D(prob_dropout)(x1) # x1l = SpatialDropout1D(prob_dropout)(x1) # x1_g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1g) # x1g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x1_g) # x1_l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1l) # x1l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x1_l) # x1_g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_g) # x1_l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1_l) # avg_pool1_g = GlobalAveragePooling1D()(x1_g) # max_pool1_g = GlobalMaxPooling1D()(x1_g) # avg_pool1_l = GlobalAveragePooling1D()(x1_l) # max_pool1_l = GlobalMaxPooling1D()(x1_l) # x1g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1g) # x1l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x1l) # avg_pool1g = GlobalAveragePooling1D()(x1g) # max_pool1g = GlobalMaxPooling1D()(x1g) # avg_pool1l = GlobalAveragePooling1D()(x1l) # max_pool1l = GlobalMaxPooling1D()(x1l) # ##post # x2 = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable = emb_train)(inp_post) # x2g = SpatialDropout1D(prob_dropout)(x2) # x2l = SpatialDropout1D(prob_dropout)(x2) # x2_g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2g) # x2g = Bidirectional(CuDNNGRU(cell_size, return_sequences=True))(x2_g) # x2_l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2l) # x2l = Bidirectional(CuDNNLSTM(cell_size, return_sequences=True))(x2_l) # x2_g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_g) # x2_l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2_l) # avg_pool2_g = GlobalAveragePooling1D()(x2_g) # max_pool2_g = GlobalMaxPooling1D()(x2_g) # avg_pool2_l = GlobalAveragePooling1D()(x2_l) # max_pool2_l = GlobalMaxPooling1D()(x2_l) # x2g = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2g) # x2l = Conv1D(filter_size, kernel_size = kernel_size, strides=stride, padding = "valid", kernel_initializer = "he_uniform")(x2l) # avg_pool2g = GlobalAveragePooling1D()(x2g) # max_pool2g = GlobalMaxPooling1D()(x2g) # avg_pool2l = GlobalAveragePooling1D()(x2l) # max_pool2l = GlobalMaxPooling1D()(x2l) # ##merge # conc = concatenate([avg_pool1g, max_pool1g, avg_pool1l, max_pool1l, avg_pool1_g, max_pool1_g, avg_pool1_l, max_pool1_l, # avg_pool2g, max_pool2g, avg_pool2l, max_pool2l, avg_pool2_g, max_pool2_g, avg_pool2_l, max_pool2_l]) # conc = SpatialDropout1D(prob_dropout)(conc) # outp = Dense(6, activation="sigmoid")(conc) # model = Model(inputs=[inp_pre, inp_post], outputs=outp) # model.compile(loss='binary_crossentropy', # optimizer='adam', # metrics=['binary_crossentropy', 'accuracy']) # return model # def get_model_dpcnn( # embedding_matrix, cell_size = 80, cell_type_GRU = True, # maxlen = 180, max_features = 100000, embed_size = 300, # prob_dropout = 0.2, emb_train = False, # filter_nr=128, filter_size = 2, stride = 1, # max_pool_size = 3, max_pool_strides = 2, dense_nr = 256, # spatial_dropout = 0.2, dense_dropout = 0.5, # conv_kern_reg = regularizers.l2(0.00001), conv_bias_reg = regularizers.l2(0.00001) # ): # comment = Input(shape=(maxlen,)) # emb_comment = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=emb_train)(comment) # emb_comment = SpatialDropout1D(spatial_dropout)(emb_comment) # block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(emb_comment) # block1 = BatchNormalization()(block1) # block1 = PReLU()(block1) # block1 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block1) # block1 = BatchNormalization()(block1) # block1 = PReLU()(block1) # #we pass embedded comment through conv1d with filter size 1 because it needs to have the same shape as block output # #if you choose filter_nr = embed_size (300 in this case) you don't have to do this part and can add emb_comment directly to block1_output # resize_emb = Conv1D(filter_nr, kernel_size=1, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(emb_comment) # resize_emb = PReLU()(resize_emb) # block1_output = add([block1, resize_emb]) # block1_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block1_output) # block2 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block1_output) # block2 = BatchNormalization()(block2) # block2 = PReLU()(block2) # block2 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block2) # block2 = BatchNormalization()(block2) # block2 = PReLU()(block2) # block2_output = add([block2, block1_output]) # block2_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block2_output) # block3 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block2_output) # block3 = BatchNormalization()(block3) # block3 = PReLU()(block3) # block3 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block3) # block3 = BatchNormalization()(block3) # block3 = PReLU()(block3) # block3_output = add([block3, block2_output]) # block3_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block3_output) # block4 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block3_output) # block4 = BatchNormalization()(block4) # block4 = PReLU()(block4) # block4 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block4) # block4 = BatchNormalization()(block4) # block4 = PReLU()(block4) # block4_output = add([block4, block3_output]) # block4_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block4_output) # block5 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block4_output) # block5 = BatchNormalization()(block5) # block5 = PReLU()(block5) # block5 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block5) # block5 = BatchNormalization()(block5) # block5 = PReLU()(block5) # block5_output = add([block5, block4_output]) # block5_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block5_output) # block6 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block5_output) # block6 = BatchNormalization()(block6) # block6 = PReLU()(block6) # block6 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block6) # block6 = BatchNormalization()(block6) # block6 = PReLU()(block6) # block6_output = add([block6, block5_output]) # block6_output = MaxPooling1D(pool_size=max_pool_size, strides=max_pool_strides)(block6_output) # block7 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block6_output) # block7 = BatchNormalization()(block7) # block7 = PReLU()(block7) # block7 = Conv1D(filter_nr, kernel_size=filter_size, padding='same', activation='linear', # kernel_regularizer=conv_kern_reg, bias_regularizer=conv_bias_reg)(block7) # block7 = BatchNormalization()(block7) # block7 = PReLU()(block7) # block7_output = add([block7, block6_output]) # output = GlobalMaxPooling1D()(block7_output) # output = Dense(dense_nr, activation='linear')(output) # output = BatchNormalization()(output) # output = PReLU()(output) # output = Dropout(dense_dropout)(output) # output = Dense(6, activation='sigmoid')(output) # model = Model(comment, output) # model.compile(loss='binary_crossentropy', # optimizer='adam', # metrics=['accuracy']) # return model
x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) print('----- x_train shape:', x_train.shape) print('----- x_test shape:', x_test.shape) # 搭建神经网络模型 print('========== 3.Build model...') model = Sequential() # input_dim=max_features单词表大小,output_dim=embedding_dims=50为词向量维度,input_length=maxlen每条样本数据长度 model.add(Embedding(max_features, embedding_dims, input_length=maxlen)) # 输出(*,400,50) model.add(Dropout(0.2)) # 1维卷积层,卷积输出维度为filters,卷积步长为strides model.add(Conv1D(filters, kernel_size, padding='valid', activation='relu', strides=1)) # 输出(*,398,250) # 对于时间信号的全局最大池化 model.add(GlobalMaxPooling1D()) # 输出(*,250) model.add(Dense(hidden_dims)) # 输出(*,250) model.add(Dropout(0.2)) model.add(Activation('relu')) model.add(Dense(1)) model.add(Activation('sigmoid')) # 神经网络编译/训练/测试集测试性能 model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test))
def construct_gumbel_selector(X_ph, num_words, embedding_dims, maxlen, y=None): """ Build the MEED model for selecting words. """ emb_layer = Embedding(num_words, embedding_dims, input_length=maxlen, name='emb_gumbel') emb = emb_layer(X_ph) net = Dropout(0.2, name='dropout_gumbel')(emb) net = emb first_layer = Conv1D(100, kernel_size, padding='same', activation='relu', strides=1, name='conv1_gumbel')(net) # global info net_new = GlobalMaxPooling1D( name='new_global_max_pooling1d_1')(first_layer) global_info = Dense(100, name='new_dense_1', activation='relu')(net_new) if y is not None: hy = Dense(100)(y) hy = Dense(100, activation='relu')(hy) hy = Dense(100, activation='relu')(hy) # local info net = Conv1D(100, 3, padding='same', activation='relu', strides=1, name='conv2_gumbel')(first_layer) local_info = Conv1D(100, 3, padding='same', activation='relu', strides=1, name='conv3_gumbel')(net) if y is not None: global_info = concatenate([global_info, hy]) combined = Concatenate()([global_info, local_info]) else: combined = Concatenate()([global_info, local_info]) net = Dropout(0.2, name='new_dropout_2')(combined) net = Conv1D(100, 1, padding='same', activation='relu', strides=1, name='conv_last_gumbel')(net) logits_T = Conv1D(1, 1, padding='same', activation=None, strides=1, name='conv4_gumbel')(net) return logits_T
# x4 = Conv1D(activation="relu", filters=100, kernel_size=5, padding="same")(x) # x = concatenate([x1, x2, x3, x4]) # x = GlobalMaxPooling1D()(x) # x = Dense(100, activation='relu')(x) # output = Dense(1, activation='sigmoid')(x) # model = Model(inputs=words_input, outputs=output) np.random.seed(42) model = Sequential() model.add(Embedding(vocab_size + 1, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH, trainable=True)) model.add(Conv1D(activation="relu", filters=100, kernel_size=5, padding="valid")) model.add(SpatialDropout1D(0.1)) model.add(BatchNormalization()) model.add(Conv1D(activation="relu", filters=100, kernel_size=5, padding="valid")) model.add(GlobalMaxPooling1D()) model.add(Dense(100, activation='relu')) model.add(Dense(1, activation='sigmoid')) # callbacks initialization # automatic generation of learning curves callback_1 = TensorBoard(log_dir='../logs/logs_{}'.format(NAME), histogram_freq=0, write_graph=False, write_images=False) # stop training model if accuracy does not increase more than five epochs callback_2 = EarlyStopping(monitor='val_f1', min_delta=0, patience=5, verbose=0, mode='max') # best model saving callback_3 = ModelCheckpoint("models/model_{}.hdf5".format(NAME), monitor='val_f1', save_best_only=True, verbose=0, mode='max') model.compile(loss='binary_crossentropy', optimizer='adam',
def trian_cnn(): # set parameters: max_features = 20000 maxlen = 8 batch_size = 32 embedding_dims = 50 nb_filter = 250 filter_length = 3 hidden_dims = 250 nb_epoch = 2 print('Loading data...') X_train, y_train, X_test, y_test = load_data() print(len(X_train), 'train sequences') print(len(X_test), 'test sequences') print('Pad sequences (samples x time)') X_train = sequence.pad_sequences(X_train, maxlen=maxlen, value=3) X_test = sequence.pad_sequences(X_test, maxlen=maxlen, value=3) print('X_train shape:', X_train.shape) print('X_test shape:', X_test.shape) y_train = np.array(y_train) print('y_train shape', y_train.shape) y_test = np.array(y_test) print('y_train shape', y_test.shape) print('Build model...') model = Sequential() model.add( Embedding(max_features, embedding_dims, input_length=maxlen, dropout=0.2)) # we add a Convolution1D, which will learn nb_filter # word group filters of size filter_length: model.add( Convolution1D(nb_filter=nb_filter, filter_length=filter_length, border_mode='valid', activation='relu', subsample_length=1)) # we use max pooling: model.add(GlobalMaxPooling1D()) # We add a vanilla hidden layer: model.add(Dense(hidden_dims)) model.add(Dropout(0.2)) model.add(Activation('relu')) # We project onto a single unit output layer, and squash it with a sigmoid: model.add(Dense(1)) model.add(Activation('sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(X_test, y_test)) score, acc = model.evaluate(X_test, y_test, batch_size=batch_size) #save the model &serialize model to JSON model_json = model.to_json() with open("./dict/model_cnn.json", "w") as json_file: json_file.write(model_json) # serialize weights to HDF5 model.save_weights("./dict/model_cnn_weights.h5") print("Saved model to disk") del model print('Test score:', score) print('Test accuracy:', acc)