def main(): emotionals, rationals = emotional_rational() preprocessor = Preprocessor() emotionals = preprocessor.parse_sentences(emotionals) rationals = preprocessor.parse_sentences(rationals) train_pos = emotionals[:len(emotionals) // 2] train_neg = rationals[:len(rationals) // 2] test_pos = emotionals[len(emotionals) // 2:] test_neg = rationals[len(rationals) // 2:] vectorizer = CountVectorizer() X_train = vectorizer.fit_transform(train_pos + train_neg) y_train = np.array([1] * len(train_pos) + [0] * len(train_neg)) X_test = vectorizer.transform(test_pos + test_neg) y_test = np.array([1] * len(test_pos) + [0] * len(test_neg)) print('Vocabulary size : {}'.format(len(vectorizer.vocabulary_))) nbsvm = NBSVM() nbsvm.fit(X_train, y_train) print('Test accuracy : {}'.format(nbsvm.score(X_test, y_test))) y_pred = nbsvm.predict(X_test) print('F1 score : {}'.format(f1_score(y_test, y_pred, average='macro'))) fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=1) roc_auc = auc(fpr, tpr) print('AUC of emotionals : {}'.format(roc_auc)) plot_roc_curve(fpr, tpr, roc_auc, 'nbsvm_emotional_roc.png') fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=0) roc_auc = auc(fpr, tpr) print('AUC of rationals : {}'.format(roc_auc)) plot_roc_curve(fpr, tpr, roc_auc, 'nbsvm_rational_roc.png')
def main(): emotionals, rationals = emotional_rational() preprocessor = Preprocessor() emotionals = preprocessor.parse_sentences(emotionals) rationals = preprocessor.parse_sentences(rationals) emotionals = emotionals[:len(emotionals)] rationals = rationals[:len(emotionals)] sentences = emotionals + rationals Y = np.array([[0, 1]] * len(emotionals) + [[1, 0]] * len(rationals)) max_features = 200 tokenizer = Tokenizer(num_words=max_features, split=' ') tokenizer.fit_on_texts(sentences) X = tokenizer.texts_to_sequences(sentences) X = pad_sequences(X, maxlen=MAX_LEN) epochs = 15 # --- Add Features --- dict_loader = EmotionalDict('dataset/nouns', 'dataset/verbs') emotional_dict = dict_loader.load() features_loader = AdditionalFeatures(emotionals+rationals, emotional_dict) add_features = features_loader.emotional_features() ###################### x_aux_train = add_features[:848] x_aux_test = add_features[848:] model = build_model(x_aux_train.shape) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42) print(X_train.shape, Y_train.shape) print(X_test.shape, Y_test.shape) batch_size = 32 model.fit({'main_input': X_train, 'add_input': x_aux_train}, Y_train, epochs=epochs, batch_size=batch_size, verbose=2) score, acc = model.evaluate({'main_input': X_test, 'add_input': x_aux_test}, Y_test, verbose=2, batch_size=batch_size) print('score: {}'.format(score)) print('acc: {}'.format(acc)) Y_pred = model.predict({'main_input': X_test, 'add_input': x_aux_test}, batch_size=1, verbose=2) print(classification_report(Y_test[:, 1], np.round(Y_pred[:, 1]), target_names=['rationals', 'emotionals'])) fpr, tpr, _ = roc_curve(Y_test[:, 1], Y_pred[:, 1]) roc_auc = auc(fpr, tpr) plot_roc_curve(fpr, tpr, roc_auc, 'roc.png') cnf_matrix = confusion_matrix(Y_test[:, 1], np.round(Y_pred[:, 1])) plot_confusion_matrix(cnf_matrix, ['rationals', 'emotionals'], 'cnf.png') attention_vector = np.mean(get_activations(model, X_test, True, 'attention_vec')[0], axis=2).squeeze() attention_vector = np.mean(attention_vector, axis=0) import matplotlib.pyplot as plt import pandas as pd pd.DataFrame(attention_vector, columns=['attention (%)']).plot(kind='bar', title='Attention') plt.savefig('attention_vec.png') attention_vector_indices = np.argsort(attention_vector)[::-1] word_index = tokenizer.word_index word_index_inv = {v: k for k, v in word_index.items()} with open('attention_word.txt', 'w') as f: for i, attention_index in enumerate(attention_vector_indices, start=1): try: print('No.{} : {}'.format(i, word_index_inv[attention_index]), file=f) except KeyError: continue