negative_nouns      = spanish_tools.read_file_to_list(this_path + 'comments/classifier//vocabularies/words/negative_nouns.txt')
positive_others     = spanish_tools.read_file_to_list(this_path + 'comments/classifier//vocabularies/words/positive_others.txt')
negative_others     = spanish_tools.read_file_to_list(this_path + 'comments/classifier//vocabularies/words/negative_others.txt')
adversative_conj    = spanish_tools.read_file_to_list(this_path + 'comments/classifier//vocabularies/words/adversative_conjunctions.txt')

# Comparison is made without accents to increase precision
positive_words = positive_adjectives + positive_adverbs + positive_verbs + positive_nouns + positive_others
for i, s in enumerate(positive_words):
    positive_words[i] = spanish_tools.remove_accents(s)
negative_words = negative_adjectives + negative_adverbs + negative_verbs + negative_nouns + negative_others
for i, s in enumerate(negative_words):
    negative_words[i] = spanish_tools.remove_accents(s)


# Feature list vector for Morphosyntactic Model and Classifier
feature_list = spanish_tools.read_file_to_list(this_path + 'comments/classifier//vocabularies/features/morphosyntactic_feature_list.txt')

# Feature list vector for Bigram Model and Classifier
bigram_feature_list = spanish_tools.read_file_to_list(this_path + 'comments/classifier//vocabularies/features/bigram_feature_list.txt')

# SVM Classifiers
classifier_svm = Classifier.load(this_path + 'comments/classifier/objects/classifiers/SVM/Morphosyntactic_Classifier_5_Classes_backup')
classifier_svm_3 = Classifier.load(this_path + 'comments/classifier/objects/classifiers/SVM/Classifier_SVM_2_classes')
classifier_svm_1_2 = Classifier.load(this_path + 'comments/classifier/objects/classifiers/SVM/Classifier_SVM_segmented_1_2')
classifier_svm_2_3 = Classifier.load(this_path + 'comments/classifier/objects/classifiers/SVM/Classifier_SVM_segmented_2_3')
classifier_svm_3_4 = Classifier.load(this_path + 'comments/classifier/objects/classifiers/SVM/Classifier_SVM_segmented_3_4')
classifier_svm_4_5 = Classifier.load(this_path + 'comments/classifier/objects/classifiers/SVM/Classifier_SVM_segmented_4_5')

# MNB Classifiers
classifier_mnb = Classifier.load(this_path + 'comments/classifier/objects/classifiers/MNB/Bigram_2500_Classifier_5_Classes')
Exemple #2
0
positive_adverbs    = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/positive_adverbs.txt')
negative_adverbs    = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/negative_adverbs.txt')
positive_verbs      = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/positive_verbs.txt')
negative_verbs      = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/negative_verbs.txt')
positive_nouns      = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/positive_nouns.txt')
negative_nouns      = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/negative_nouns.txt')
positive_others     = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/positive_others.txt')
negative_others     = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/negative_others.txt')
adversative_conj    = spanish_tools.read_file_to_list(this_path + '/vocabularies/words/adversative_conjunctions.txt')

# Comparison is made without accents to increase precision
positive_words = positive_adjectives + positive_adverbs + positive_verbs + positive_nouns + positive_others
for i, s in enumerate(positive_words):
    positive_words[i] = spanish_tools.remove_accents(s)
negative_words = negative_adjectives + negative_adverbs + negative_verbs + negative_nouns + negative_others
for i, s in enumerate(negative_words):
    negative_words[i] = spanish_tools.remove_accents(s)


# Feature list vector for Morphosyntactic Model and Classifier
feature_list = spanish_tools.read_file_to_list(this_path + '/vocabularies/features/morphosyntactic_feature_list.txt')

# Feature list vector for Bigram Model and Classifier
bigram_feature_list = spanish_tools.read_file_to_list(this_path + '/vocabularies/features/bigram_feature_list.txt')

# SVM Classifiers
classifier_svm = Classifier.load(this_path + '/objects/classifiers/SVM/Morphosyntactic_Classifier_5_Classes_backup')

# MNB Classifiers
classifier_mnb = Classifier.load(this_path + '/objects/classifiers/MNB/Bigram_2500_Classifier_5_Classes')
    print("Classes: " + repr(classifier.classes))

    #performance = kfoldcv(NB, vectors, folds=n_fold)
    performance = kfoldcv(type(classifier), vectors, folds=n_fold)
    print("Accuracy: %.3f\n" \
          "Precision: %.3f\n" \
          "Recall: %.3f\n" \
          "F1: %.3f\n" \
          "Stddev:%.3f" % performance)
    print()
    print("Confusion matrx:")
    print(classifier.confusion_matrix(vectors).table)

    classifier.save(trained_filename)
elif options["predict"]:
    classifier = Classifier.load(trained_filename)

    print("#Author\tURL\tPrediction\tActual")
    for v in vectors:
        print("%s\t%s\t%s" % (v.name.encode('utf-8'),
                              repr(classifier.classify(v)),
                              repr(v.type)))

    # Remove any individual documents classified as 'None' prior to
    # calculating performance unless the entire set has no classifications,
    # in which case we assume we are doing a blind prediction but won't
    # calculate performance metrics (eg no rating shown on website,
    # so no classification to predict)
    pre_filter_n = len(vectors)
    fvectors = list(filter(lambda x: x.type is not None, vectors))
    post_filter_n = len(fvectors)