def triple_classification(): model = Sequential() model.add(Dense(100, input_dim=w2v_dim, activation='relu')) model.add(Dropout(0.4)) model.add(Dense(50, activation='relu')) model.add(Dropout(0.4)) model.add(Dense(10, activation='relu')) # model.add(Dropout(0.4)) model.add(Dense(label_categories, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc]) model.fit(X_train, dummy_Y_train, shuffle=True, epochs=n_epochs, batch_size=64, validation_split=0.2, verbose=2, callbacks=[early_stopping]) scores = model.evaluate(X_test, dummy_Y_test, batch_size=64) print(model.metrics_names[0]+":"+str(scores[0])+" " +model.metrics_names[1]+":"+str(scores[1])+" " +model.metrics_names[2]+":"+str(scores[2])+" ") score_dic = {model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2]} predictions = model.predict(X_test) confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index) print(confusion_matrix) return score_dic, confusion_matrix
def train_text_dense_3(): input_text = Input(shape=(100, )) input_dense = Input(shape=(50, )) text_out = Dense(50, input_dim=100, activation='relu')(input_text) text_out = Dropout(0.4)(text_out) text_out = Dense(10, activation='relu')(text_out) # auxi_out = Dropout(0.4)(auxi_out) # auxi_out = Dense(label_categories, activation='sigmoid', name='auxi_out')(auxi_out) lstm_out = Dense(10, input_dim=50, activation='relu')(input_dense) # lstm_out = Dropout(0.4)(lstm_out) # lstm_out = Dense(10, activation='relu')(lstm_out) # lstm_out = Dropout(0.4)(lstm_out) # senti_out = Dense(label_categories, activation='sigmoid', name='senti_out')(senti_out) x = keras.layers.concatenate([text_out, lstm_out]) # x = Dense(10, activation='relu')(x) y = Dense(label_categories, activation='sigmoid')(x) model = Model(inputs=[input_text, input_dense], outputs=y) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.fit([X_train_text, X_train_dense], dummy_Y_train, shuffle=True, validation_split=0.2, epochs=300, batch_size=64, verbose=2) scores_train = model.evaluate([X_train_text, X_train_dense], dummy_Y_train, batch_size=64) print('result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " ") scores = model.evaluate([X_test_text, X_test_dense], dummy_Y_test, batch_size=64) print('result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " ") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } Y_predict = model.predict([X_test_text, X_test_dense]) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_text(): input = Input(shape=(w2v_dim,)) x = Dense(50, input_dim=w2v_dim, activation='relu')(input) x = Dropout(0.4)(x) x = Dense(10, activation='relu')(x) y = Dense(label_categories, activation='sigmoid', name='text_out')(x) model = Model(inputs=input, outputs=y) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc]) model.fit(X_train, dummy_Y_train, shuffle=True, epochs=n_epochs, batch_size=64, validation_split=0.2, verbose=2, callbacks=[early_stopping]) scores = model.evaluate(X_test, dummy_Y_test, batch_size=64) print(model.metrics_names[0] + ":" + str(scores[0]) + " " + model.metrics_names[1] + ":" + str(scores[1]) + " " + model.metrics_names[2] + ":" + str(scores[2]) + " ") score_dic = {model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2]} predictions = model.predict(X_test) confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index) print(confusion_matrix) print('round '+str(train_chunk_number)+' finished') return score_dic,confusion_matrix
def train_text_senti_2(): input_text = Input(shape=(100,)) input_senti = Input(shape=(2,)) text_out = Dense(50, input_dim=100, activation='relu')(input_text) text_out = Dropout(0.4)(text_out) text_out = Dense(10, activation='relu')(text_out) text_out = Dropout(0.4)(text_out) auxi_out = Dense(label_categories, activation='sigmoid', name='text_out')(text_out) senti_out = Dense(10, input_dim=2, activation='sigmoid')(input_senti) senti_out = Dropout(0.4)(senti_out) # senti_out = Dense(label_categories, activation='sigmoid', name='senti_out')(senti_out) x = keras.layers.concatenate([text_out, senti_out]) y = Dense(label_categories, activation='sigmoid', name='final_out')(x) model = Model(inputs=[input_text, input_senti], outputs=[y, auxi_out]) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.fit([X_train_text, X_train_senti], [dummy_Y_train, dummy_Y_train], shuffle=True, validation_split=0.2, epochs=350, batch_size=64) scores_train = model.evaluate([X_train_text, X_train_senti], [dummy_Y_train, dummy_Y_train], batch_size=64) print('result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " " + model.metrics_names[3] + ": " + str(scores_train[3]) + " " + model.metrics_names[4] + ": " + str(scores_train[4]) + " " + model.metrics_names[5] + ": " + str(scores_train[5]) + " " + model.metrics_names[6] + ": " + str(scores_train[6]) + " " ) scores = model.evaluate([X_test_text, X_test_senti], [dummy_Y_test, dummy_Y_test], batch_size=64) print('result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " " + model.metrics_names[3] + ": " + str(scores[3]) + " " + model.metrics_names[4] + ": " + str(scores[4]) + " " + model.metrics_names[5] + ": " + str(scores[5]) + " " + model.metrics_names[6] + ": " + str(scores[6]) + " " ) score_dic = {model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2], model.metrics_names[3]: scores[3], model.metrics_names[4]: scores[4], model.metrics_names[5]: scores[5], model.metrics_names[6]: scores[6] } Y_predict = model.predict([X_test_text, X_test_senti]) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict[0], label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_text_senti_lstm(): input_text = Input(shape=(100,)) input_senti = Input(shape=(6,)) input_lstm = Input(shape=(128,)) input_text_senti = keras.layers.concatenate([input_text,input_senti,input_lstm]) text_out = Dense(120, input_dim=234, activation='relu')(input_text_senti) text_out = Dropout(0.4)(text_out) text_out = Dense(10, activation='relu')(text_out) y = Dense(label_categories, activation='sigmoid',name='text_out')(text_out) model = Model(inputs = [input_text, input_senti, input_lstm], outputs = y) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.fit([X_train_text, X_train_senti, X_train_lstm], dummy_Y_train,shuffle=True, validation_split=0.2, epochs=n_epochs, batch_size=64) scores_train = model.evaluate([X_train_text, X_train_senti, X_train_lstm], dummy_Y_train, batch_size=64) print(str(n_epochs)+' epochs, '+str(train_chunk_number)+' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " ) f.write(str(n_epochs)+' epochs, '+str(train_chunk_number)+' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " \n") scores = model.evaluate([X_test_text, X_test_senti, X_test_lstm], dummy_Y_test, batch_size=64) print(str(n_epochs)+' epochs, '+str(train_chunk_number)+' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " ) f.write(str(n_epochs)+' epochs, '+str(train_chunk_number)+' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " \n") score_dic = {model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2]} Y_predict = model.predict([X_test_text, X_test_senti, X_test_lstm]) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_text_cnn(): inputs = Input(shape=(maxlen, w2v_dim, 1)) x = Conv2D(filters=32, kernel_size=(5, 100), activation='relu', name='conv_out')(inputs) x = AveragePooling2D((4, 1), name='pool_out')(x) x = Flatten()(x) x = Dense(50, activation='relu', name='dense_50')(x) y = Dense(label_categories, activation='sigmoid', name='final_out')(x) model = Model(inputs=inputs, outputs=y) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc]) model.fit(X_train, dummy_Y_train, shuffle=True, epochs=n_epochs, batch_size=64, validation_split=0.2, verbose=2, callbacks=[early_stopping]) scores = model.evaluate(X_test, dummy_Y_test, batch_size=64) print(model.metrics_names[0] + ":" + str(scores[0]) + " " + model.metrics_names[1] + ":" + str(scores[1]) + " " + model.metrics_names[2] + ":" + str(scores[2]) + " ") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } predictions = model.predict(X_test) confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_insertdense(): # 加载lstm模型 lstm_inputs = Input(shape=(maxlen, )) lstm_embedding = Embedding(output_dim=word2vec_dim, input_dim=n_symbols, mask_zero=True, weights=[embedding_weights], input_length=maxlen, name='embedding_1')(lstm_inputs) lstm_lstm = LSTM(128, name='lstm_out')(lstm_embedding) lstm_dropout_1 = Dropout(0.5)(lstm_lstm) lstm_dense_50 = Dense(50, activation='relu', name='dense_50')(lstm_dropout_1) lstm_model = Model(inputs=lstm_inputs, outputs=lstm_dense_50) lstm_model.load_weights(path.join(path.dirname(__file__), 'sentiment_lstm.h5'), by_name=True) # 主要用来分类的全连接层 fcnn_inputs = Input(shape=(100, )) fcnn_conca = keras.layers.concatenate([fcnn_inputs, lstm_dense_50]) fcnn_dense_100 = Dense(75, activation='relu')(fcnn_conca) fcnn_dropout_1 = Dropout(0.4)(fcnn_dense_100) fcnn_dense_20 = Dense(15, activation='relu')(fcnn_dropout_1) fcnn_dropout_2 = Dropout(0.4)(fcnn_dense_20) fcnn_out = Dense(label_categories, activation='sigmoid')(fcnn_dropout_2) model = Model(inputs=[fcnn_inputs, lstm_inputs], outputs=fcnn_out) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.summary() print('fit') model.fit([X_train_text, X_train_lstm], dummy_Y_train, shuffle=True, validation_split=0.2, epochs=n_epochs, batch_size=64, verbose=1) print('evaluate') scores_train = model.evaluate([X_train_text, X_train_lstm], dummy_Y_train, batch_size=64) print( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, train_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " ") f.write( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, train_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " \n") scores = model.evaluate([X_test_text, X_test_lstm], dummy_Y_test, batch_size=512) print( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " ") f.write( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " \n") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1] } Y_predict = model.predict([X_test_text, X_test_lstm]) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_insertlstm(): # 加载lstm模型 lstm_inputs = Input(shape=(maxlen, word2vec_dim)) lstm_lstm = LSTM(128, name='lstm_out')(lstm_inputs) lstm_model = Model(inputs=lstm_inputs, outputs=lstm_lstm) lstm_model.load_weights(path.join(path.dirname(__file__), 'smote_simJD_sentiment_lstm.h5'), by_name=True) print(lstm_model.get_weights()) # 主要用来分类的全连接层 fcnn_inputs = Input(shape=(100, ), name='fcnn_input') # fcnn_dense_a = Dense(50,activation='relu')(fcnn_inputs) # fcnn_dropout_a = Dropout(0.4)(fcnn_dense_a) # fcnn_dense_aa = Dense(10,activation='relu')(fcnn_dropout_a) # lstm_dense_b = Dense(50,activation='relu')(lstm_lstm) # lstm_dropout_b = Dropout(0.4)(lstm_dense_b) # lstm_dense_bb = Dense(10,activation='relu')(lstm_dropout_b) fcnn_conca = keras.layers.concatenate([fcnn_inputs, lstm_lstm]) x = Dense(100, activation='relu')(fcnn_conca) x = Dropout(0.4)(x) x = Dense(20, activation='relu')(x) fcnn_out = Dense(label_categories, activation='sigmoid')(x) model = Model(inputs=[fcnn_inputs, lstm_inputs], outputs=fcnn_out) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.summary() print('fit') model.fit([X_train_text, X_train_lstm], dummy_Y_train, shuffle=True, validation_split=0.2, epochs=n_epochs, batch_size=64, verbose=1) print('evaluate') scores_train = model.evaluate([X_train_text, X_train_lstm], dummy_Y_train, batch_size=64) print( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, train_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " ") f.write( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, train_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " \n") scores = model.evaluate([X_test_text, X_test_lstm], dummy_Y_test, batch_size=512) print( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " ") f.write( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " \n") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } Y_predict = model.predict([X_test_text, X_test_lstm]) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_onlyText_cnn(): # 加载lstm模型 inputs = Input(shape=(maxlen, word2vec_dim, 1)) x = Conv2D(filters=32, kernel_size=(5, 100), activation='relu', name='conv_out')(inputs) x = AveragePooling2D((4, 1), name='pool_out')(x) x = Flatten()(x) x = Dense(50, activation='relu', name='dense_50')(x) y = Dense(label_categories, activation='sigmoid')(x) model = Model(inputs=inputs, outputs=y) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.summary() print('fit') model.fit(X_train_cnn, dummy_Y_train, shuffle=True, validation_split=0.2, epochs=n_epochs, batch_size=64, verbose=1) print('evaluate') scores_train = model.evaluate(X_train_cnn, dummy_Y_train, batch_size=64) print( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, train_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " ") f.write( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, train_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " \n") scores = model.evaluate(X_test_cnn, dummy_Y_test, batch_size=512) print( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " ") f.write( str(n_epochs) + ' epochs, ' + str(train_chunk_number) + ' chunk, test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " \n") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } Y_predict = model.predict(X_test_cnn) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix