Beispiel #1
0
def main():
    emotionals, rationals = emotional_rational()

    preprocessor = Preprocessor()
    emotionals = preprocessor.parse_sentences(emotionals)
    rationals = preprocessor.parse_sentences(rationals)

    train_pos = emotionals[:len(emotionals) // 2]
    train_neg = rationals[:len(rationals) // 2]

    test_pos = emotionals[len(emotionals) // 2:]
    test_neg = rationals[len(rationals) // 2:]

    vectorizer = CountVectorizer()

    X_train = vectorizer.fit_transform(train_pos + train_neg)
    y_train = np.array([1] * len(train_pos) + [0] * len(train_neg))

    X_test = vectorizer.transform(test_pos + test_neg)
    y_test = np.array([1] * len(test_pos) + [0] * len(test_neg))

    print('Vocabulary size : {}'.format(len(vectorizer.vocabulary_)))

    nbsvm = NBSVM()
    nbsvm.fit(X_train, y_train)

    print('Test accuracy : {}'.format(nbsvm.score(X_test, y_test)))

    y_pred = nbsvm.predict(X_test)
    print('F1 score : {}'.format(f1_score(y_test, y_pred, average='macro')))

    fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=1)
    roc_auc = auc(fpr, tpr)
    print('AUC of emotionals : {}'.format(roc_auc))
    plot_roc_curve(fpr, tpr, roc_auc, 'nbsvm_emotional_roc.png')

    fpr, tpr, thresholds = roc_curve(y_test, y_pred, pos_label=0)
    roc_auc = auc(fpr, tpr)
    print('AUC of rationals : {}'.format(roc_auc))
    plot_roc_curve(fpr, tpr, roc_auc, 'nbsvm_rational_roc.png')
Beispiel #2
0
def main():
    emotionals, rationals = emotional_rational()

    preprocessor = Preprocessor()
    emotionals = preprocessor.parse_sentences(emotionals)
    rationals = preprocessor.parse_sentences(rationals)

    emotionals = emotionals[:len(emotionals)]
    rationals = rationals[:len(emotionals)]

    sentences = emotionals + rationals
    Y = np.array([[0, 1]] * len(emotionals) + [[1, 0]] * len(rationals))

    max_features = 200
    tokenizer = Tokenizer(num_words=max_features, split=' ')
    tokenizer.fit_on_texts(sentences)

    X = tokenizer.texts_to_sequences(sentences)
    X = pad_sequences(X, maxlen=MAX_LEN)

    epochs = 15

    # --- Add Features ---
    dict_loader = EmotionalDict('dataset/nouns', 'dataset/verbs')
    emotional_dict = dict_loader.load()

    features_loader = AdditionalFeatures(emotionals+rationals, emotional_dict)
    add_features = features_loader.emotional_features()
    ######################

    x_aux_train = add_features[:848]
    x_aux_test = add_features[848:]

    model = build_model(x_aux_train.shape)

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.33, random_state=42)
    print(X_train.shape, Y_train.shape)
    print(X_test.shape, Y_test.shape)

    batch_size = 32
    model.fit({'main_input': X_train, 'add_input': x_aux_train}, Y_train, epochs=epochs, batch_size=batch_size, verbose=2)

    score, acc = model.evaluate({'main_input': X_test, 'add_input': x_aux_test}, Y_test, verbose=2, batch_size=batch_size)

    print('score: {}'.format(score))
    print('acc: {}'.format(acc))

    Y_pred = model.predict({'main_input': X_test, 'add_input': x_aux_test}, batch_size=1, verbose=2)

    print(classification_report(Y_test[:, 1], np.round(Y_pred[:, 1]), target_names=['rationals', 'emotionals']))

    fpr, tpr, _ = roc_curve(Y_test[:, 1], Y_pred[:, 1])
    roc_auc = auc(fpr, tpr)
    plot_roc_curve(fpr, tpr, roc_auc, 'roc.png')

    cnf_matrix = confusion_matrix(Y_test[:, 1], np.round(Y_pred[:, 1]))
    plot_confusion_matrix(cnf_matrix, ['rationals', 'emotionals'], 'cnf.png')

    attention_vector = np.mean(get_activations(model, X_test, True, 'attention_vec')[0], axis=2).squeeze()
    attention_vector = np.mean(attention_vector, axis=0)

    import matplotlib.pyplot as plt
    import pandas as pd
    pd.DataFrame(attention_vector, columns=['attention (%)']).plot(kind='bar', title='Attention')
    plt.savefig('attention_vec.png')

    attention_vector_indices = np.argsort(attention_vector)[::-1]

    word_index = tokenizer.word_index
    word_index_inv = {v: k for k, v in word_index.items()}

    with open('attention_word.txt', 'w') as f:
        for i, attention_index in enumerate(attention_vector_indices, start=1):
            try:
                print('No.{} : {}'.format(i, word_index_inv[attention_index]), file=f)
            except KeyError:
                continue