Exemple #1
0
def exercise3(corpus, n, out):
    wordLists = getWordListsFromXForm(corpus, reviewAuthorXForm)
    freqWords = review_features.freqWordList(wordLists)
    features = [
                review_features.createContainsFeature(freqWords, 'freq'),
                review_features.distinctWordsFeature,
                review_features.mostOccurringWordFeature,
                review_features.numWordsFeature,
                review_features.numNegationsFeature,
               ]
    classifier = doExercise(corpus, reviewAuthorXForm, nltk.NaiveBayesClassifier.train, features, binaryrms, n, out)
    return (reviewAuthorXForm, features, classifier)
Exemple #2
0
def exercise4(corpus):
    wordLists = getWordListsFromXForm(corpus, reviewAuthorXForm)
    freqWords = review_features.freqWordList(wordLists)
    features = [
                review_features.createContainsFeature(freqWords, 'freq'),
                review_features.distinctWordsFeature,
                review_features.mostOccurringWordFeature,
                review_features.numWordsFeature,
                review_features.numNegationsFeature,
               ]
    classifier = doExercise(corpus, reviewAuthorXForm, nltk.NaiveBayesClassifier.train, features)
    matrix = confusion_matrix.initMatrix(list(set([review.getAuthorName() for review in corpus])))
    for review in corpus:
        auth = review.getAuthorName()
        pAuth = classifier.classify(toFeatureSetDatum(review, reviewAuthorXForm, features)[0])
        confusion_matrix.keepScore(pAuth, auth, matrix)
    confusion_matrix.drawMatrix(matrix, 30)
    return (reviewAuthorXForm, features, classifier)