def triple_classification(): model = Sequential() model.add(Dense(100, input_dim=w2v_dim, activation='relu')) model.add(Dropout(0.4)) model.add(Dense(50, activation='relu')) model.add(Dropout(0.4)) model.add(Dense(10, activation='relu')) # model.add(Dropout(0.4)) model.add(Dense(label_categories, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc]) model.fit(X_train, dummy_Y_train, shuffle=True, epochs=800, batch_size=64, validation_split=0.2, verbose=2, callbacks=[early_stopping]) scores = model.evaluate(X_test, dummy_Y_test, batch_size=64) print(model.metrics_names[0] + ":" + str(scores[0]) + " " + model.metrics_names[1] + ":" + str(scores[1]) + " " + model.metrics_names[2] + ":" + str(scores[2]) + " ") score_dic = {model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2]} predictions = model.predict(X_test) confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index) print(confusion_matrix) return score_dic, confusion_matrix
def train_text_dense_3(): input_text = Input(shape=(100, )) input_dense = Input(shape=(50, )) text_out = Dense(50, input_dim=100, activation='relu')(input_text) text_out = Dropout(0.4)(text_out) text_out = Dense(10, activation='relu')(text_out) # auxi_out = Dropout(0.4)(auxi_out) # auxi_out = Dense(label_categories, activation='sigmoid', name='auxi_out')(auxi_out) lstm_out = Dense(10, input_dim=50, activation='relu')(input_dense) # lstm_out = Dropout(0.4)(lstm_out) # lstm_out = Dense(10, activation='relu')(lstm_out) # lstm_out = Dropout(0.4)(lstm_out) # senti_out = Dense(label_categories, activation='sigmoid', name='senti_out')(senti_out) x = keras.layers.concatenate([text_out, lstm_out]) # x = Dense(10, activation='relu')(x) y = Dense(label_categories, activation='sigmoid')(x) model = Model(inputs=[input_text, input_dense], outputs=y) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.fit([X_train_text, X_train_dense], dummy_Y_train, shuffle=True, validation_split=0.2, epochs=300, batch_size=256, verbose=2) scores_train = model.evaluate([X_train_text, X_train_dense], dummy_Y_train, batch_size=256) print('result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " ") scores = model.evaluate([X_test_text, X_test_dense], dummy_Y_test, batch_size=256) print('result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " ") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } Y_predict = model.predict([X_test_text, X_test_dense]) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_text(): input = Input(shape=(w2v_dim, )) x = Dense(300, input_dim=w2v_dim, activation='sigmoid')(input) x = Dropout(0.4)(x) x = Dense(200, activation='sigmoid')(x) x = Dense(50, activation='sigmoid')(x) x = Dense(10, activation='sigmoid')(x) y = Dense(label_categories, activation='softmax', name='text_out')(x) model = Model(inputs=input, outputs=y) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc]) model.fit(X_train, dummy_Y_train, shuffle=True, epochs=15, batch_size=64, validation_split=0.2, verbose=2, callbacks=[early_stopping]) scores = model.evaluate(X_test, dummy_Y_test, batch_size=64) print(model.metrics_names[0] + ":" + str(scores[0]) + " " + model.metrics_names[1] + ":" + str(scores[1]) + " " + model.metrics_names[2] + ":" + str(scores[2]) + " ") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } predictions = model.predict(X_test) print('general_confusion_matrix =>') print( print_conf_matrix( conf_matrix(dummy_Y_test, predictions, label_index))) # for i in range(len(dummy_Y_test)): # print(str(i)+':') # print(dummy_Y_test[i,:]) # if (dummy_Y_test[i,:]==np.zeros((1,2))).all(): # print(dummy_Y_test[i,:]) # print('find [0,0]') # exit(1) confusion_matrix, acc = get_w_acc_conf(train_chunk_number, label_index, dummy_Y_test, predictions) print('w_confusion_matrix =>') print(print_conf_matrix(confusion_matrix)) print('w_acc =>', acc) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_text_keywords_1(): input_text = Input(shape=(100, )) input_keywords = Input(shape=(256, )) input_text_senti = keras.layers.concatenate( [input_text, input_keywords]) text_out = Dense(150, input_dim=102, activation='relu')(input_text_senti) text_out = Dropout(0.4)(text_out) text_out = Dense(50, activation='relu')(text_out) text_out = Dropout(0.4)(text_out) text_out = Dense(10, activation='relu')(text_out) y = Dense(label_categories, activation='sigmoid', name='text_out')(text_out) model = Model(inputs=[input_text, input_keywords], outputs=y) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.fit([X_train_text, X_train_keywords], dummy_Y_train, shuffle=True, validation_split=0.2, epochs=400, batch_size=512, callbacks=[early_stopping]) scores_train = model.evaluate([X_train_text, X_train_keywords], dummy_Y_train, batch_size=512) print('train_result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " ") scores = model.evaluate([X_test_text, X_test_keywords], dummy_Y_test, batch_size=512) print('test_result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " ") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } Y_predict = model.predict([X_test_text, X_test_keywords]) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_text_cnn(): input = Input(shape=(max_count, w2v_dim, 1)) x = Conv2D(filters=16, kernel_size=(5, 100), activation='tanh')(input) x = AveragePooling2D(pool_size=(4, 1))(x) x = Flatten()(x) x = Dense(100, activation='relu')(x) x = Dense(10, activation='relu')(x) y = Dense(label_categories, activation='tanh', name='text_out')(x) model = Model(inputs=input, outputs=y) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', auc]) model.summary() model.fit(X_train, dummy_Y_train, shuffle=True, epochs=10, batch_size=64, validation_split=0.2, verbose=2, callbacks=[early_stopping]) scores = model.evaluate(X_test, dummy_Y_test, batch_size=64) print(model.metrics_names[0] + ":" + str(scores[0]) + " " + model.metrics_names[1] + ":" + str(scores[1]) + " " + model.metrics_names[2] + ":" + str(scores[2]) + " ") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } predictions = model.predict(X_test) confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_text_lstm(): input = Input(shape=(max_count, w2v_dim)) x = LSTM(256)(input) x = Dropout(0.4)(x) x = Dense(100, activation='tanh')(x) x = Dense(20, activation='tanh')(x) y = Dense(label_categories, activation='tanh', name='text_out')(x) model = Model(inputs=input, outputs=y) model.compile(loss='binary_crossentropy', optimizer='sgd', metrics=['accuracy', auc]) model.fit(X_train, dummy_Y_train, shuffle=True, epochs=20, batch_size=64, validation_split=0.2, verbose=2) scores = model.evaluate(X_test, dummy_Y_test, batch_size=64) print(model.metrics_names[0] + ":" + str(scores[0]) + " " + model.metrics_names[1] + ":" + str(scores[1]) + " " + model.metrics_names[2] + ":" + str(scores[2]) + " ") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2] } predictions = model.predict(X_test) confusion_matrix = conf_matrix(dummy_Y_test, predictions, label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix
def train_text_keywords_2(): input_text = Input(shape=(100, )) input_keywords = Input(shape=(256, )) text_out = Dense(50, input_dim=100, activation='relu')(input_text) auxi_out = Dropout(0.4)(text_out) auxi_out = Dense(10, activation='relu')(auxi_out) auxi_out = Dropout(0.4)(auxi_out) auxi_out = Dense(label_categories, activation='sigmoid', name='auxi_out')(auxi_out) keywords_out = Dense(50, input_dim=2, activation='sigmoid')(input_keywords) # keywords_out = Dropout(0.4)(keywords_out) # keywords_out = Dense(10,activation='relu')(keywords_out) # senti_out = Dense(label_categories, activation='sigmoid', name='senti_out')(senti_out) x = keras.layers.concatenate([text_out, keywords_out]) x = Dense(10, activation='relu')(x) y = Dense(label_categories, activation='sigmoid', name='final_out')(x) model = Model(inputs=[input_text, input_keywords], outputs=[y, auxi_out]) model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', auc]) model.fit([X_train_text, X_train_keywords], [dummy_Y_train, dummy_Y_train], shuffle=True, validation_split=0.2, epochs=350, batch_size=64, callbacks=[early_stopping]) scores_train = model.evaluate([X_train_text, X_train_keywords], [dummy_Y_train, dummy_Y_train], batch_size=512) print('result => ' + model.metrics_names[0] + ": " + str(scores_train[0]) + " " + model.metrics_names[1] + ": " + str(scores_train[1]) + " " + model.metrics_names[2] + ": " + str(scores_train[2]) + " " + model.metrics_names[3] + ": " + str(scores_train[3]) + " " + model.metrics_names[4] + ": " + str(scores_train[4]) + " " + model.metrics_names[5] + ": " + str(scores_train[5]) + " " + model.metrics_names[6] + ": " + str(scores_train[6]) + " ") scores = model.evaluate([X_test_text, X_test_keywords], [dummy_Y_test, dummy_Y_test], batch_size=512) print('result => ' + model.metrics_names[0] + ": " + str(scores[0]) + " " + model.metrics_names[1] + ": " + str(scores[1]) + " " + model.metrics_names[2] + ": " + str(scores[2]) + " " + model.metrics_names[3] + ": " + str(scores[3]) + " " + model.metrics_names[4] + ": " + str(scores[4]) + " " + model.metrics_names[5] + ": " + str(scores[5]) + " " + model.metrics_names[6] + ": " + str(scores[6]) + " ") score_dic = { model.metrics_names[0]: scores[0], model.metrics_names[1]: scores[1], model.metrics_names[2]: scores[2], model.metrics_names[3]: scores[3], model.metrics_names[4]: scores[4], model.metrics_names[5]: scores[5], model.metrics_names[6]: scores[6] } Y_predict = model.predict([X_test_text, X_test_keywords]) confusion_matrix = conf_matrix(dummy_Y_test, Y_predict[0], label_index) print(confusion_matrix) print('round ' + str(train_chunk_number) + ' finished') return score_dic, confusion_matrix