예제 #1
0
 featureSelector = MIFeatureSelector()
 validate.regularBayes(bayes, featureSelector, DISPLAY_ACCURACY)
 with open("cleanData/output.txt") as methodOutput:
     for line in methodOutput.read().split():
         finalOutput.write(method + "," + line + "\n")
 
 method = "MultinomialBayes"
 print method
 shutil.rmtree("cleanData/", True) 
 bayes = Bayes('multinomial')
 bayes.initalPreprocess(DR, DT, L, TEST, "custom", 1, False)            
 validate = CrossValidate("cleanData")
 featureSelector = FeatureSelector()     # Multinomial does much worse with MIFeatureSelector()
 validate.regularBayes(bayes, featureSelector, DISPLAY_ACCURACY)
 with open("cleanData/output.txt") as methodOutput:
     for line in methodOutput.read().split():
         finalOutput.write(method + "," + line + "\n")
         
 method = "Perceptron"
 print method
 shutil.rmtree("cleanData/", True)
 classifier = Perceptron()
 classifier.initalPreprocess(DR, DT, L, TEST, "custom", 1, False)
 validate = CrossValidate("cleanData")
 featureSelector = MIFeatureSelector()
 accuracy = validate.regularPerceptron(classifier, featureSelector, DISPLAY_ACCURACY)
 with open("cleanData/output.txt") as methodOutput:
     for line in methodOutput.read().split():
         finalOutput.write(method + "," + line + "\n")
         
 shutil.rmtree("cleanData/", True)
예제 #2
0
		self.docWords = {}
		self.knownClasses = {}
		self.classDocs = { docClass : set() for docClass in self.docClasses }
		allWords = set()
		for docClass in self.docClasses:
			for docPath in trainData[docClass]:
				self.knownClasses[docPath] = docClass
				self.classDocs[docClass].add(docPath)
				with open(docPath) as docFile:
					self.docWords[docPath] = set(docFile.read().split())
				allWords.update(self.docWords[docPath])
		
		info = Counter()
		for word in allWords:
			info[word] = self.mutualInformation(word)
		top = info.most_common()[0:numFeatures]
		return [ word for (word, mutualInfo) in top ]


if __name__ == "__main__":
	classifier = Perceptron()
	print "Preprocessing"
	classifier.initalPreprocess("web", 1, True)
	# classifier.initalPreprocess("web",1)
	# classifier.initalPreprocess(None,3)
	# classifier.initalPreprocess("web",3)
	validate = CrossValidate("cleanData")
	featureSelector = MIFeatureSelector()
	accuracy = validate.crossFoldOnPerceptron(classifier, featureSelector)
	print "Perceptron Accuracy: ",accuracy