def exercise3(corpus, n, out): wordLists = getWordListsFromXForm(corpus, reviewAuthorXForm) freqWords = review_features.freqWordList(wordLists) features = [ review_features.createContainsFeature(freqWords, 'freq'), review_features.distinctWordsFeature, review_features.mostOccurringWordFeature, review_features.numWordsFeature, review_features.numNegationsFeature, ] classifier = doExercise(corpus, reviewAuthorXForm, nltk.NaiveBayesClassifier.train, features, binaryrms, n, out) return (reviewAuthorXForm, features, classifier)
def exercise4(corpus): wordLists = getWordListsFromXForm(corpus, reviewAuthorXForm) freqWords = review_features.freqWordList(wordLists) features = [ review_features.createContainsFeature(freqWords, 'freq'), review_features.distinctWordsFeature, review_features.mostOccurringWordFeature, review_features.numWordsFeature, review_features.numNegationsFeature, ] classifier = doExercise(corpus, reviewAuthorXForm, nltk.NaiveBayesClassifier.train, features) matrix = confusion_matrix.initMatrix(list(set([review.getAuthorName() for review in corpus]))) for review in corpus: auth = review.getAuthorName() pAuth = classifier.classify(toFeatureSetDatum(review, reviewAuthorXForm, features)[0]) confusion_matrix.keepScore(pAuth, auth, matrix) confusion_matrix.drawMatrix(matrix, 30) return (reviewAuthorXForm, features, classifier)