for fileid in movie_reviews.fileids(category)]
random.shuffle(documents)

allWords = []
for w in movie_reviews.words():
	allWords.append(w.lower())

allWords = nltk.FreqDist(allWords)
wordFeatures = list(allWords.keys())[:3000]


def findFeatures(document):
	words = set(document)
	features = {}
	for w in wordFeatures:
		features[w] = (w in words)
	return features
	 

#print((findFeatures(movie_reviews.words('neg/cv000_29416.txt'))))
featureSets = [(findFeatures(rev), category) for (rev, category) in documents ]

new_training_set = featureSets[:100]
testing_set = featureSets[100:]


cl = NaiveBayesClassifier(new_training_set)
print(cl.accuracy(testing_set))


Example #2
0
def updateNaiveBayes():
    cl = NaiveBayesClassifier(new_training_set)
    print(cl.accuracy(testing_set))