featureSelector = MIFeatureSelector() validate.regularBayes(bayes, featureSelector, DISPLAY_ACCURACY) with open("cleanData/output.txt") as methodOutput: for line in methodOutput.read().split(): finalOutput.write(method + "," + line + "\n") method = "MultinomialBayes" print method shutil.rmtree("cleanData/", True) bayes = Bayes('multinomial') bayes.initalPreprocess(DR, DT, L, TEST, "custom", 1, False) validate = CrossValidate("cleanData") featureSelector = FeatureSelector() # Multinomial does much worse with MIFeatureSelector() validate.regularBayes(bayes, featureSelector, DISPLAY_ACCURACY) with open("cleanData/output.txt") as methodOutput: for line in methodOutput.read().split(): finalOutput.write(method + "," + line + "\n") method = "Perceptron" print method shutil.rmtree("cleanData/", True) classifier = Perceptron() classifier.initalPreprocess(DR, DT, L, TEST, "custom", 1, False) validate = CrossValidate("cleanData") featureSelector = MIFeatureSelector() accuracy = validate.regularPerceptron(classifier, featureSelector, DISPLAY_ACCURACY) with open("cleanData/output.txt") as methodOutput: for line in methodOutput.read().split(): finalOutput.write(method + "," + line + "\n") shutil.rmtree("cleanData/", True)
self.docWords = {} self.knownClasses = {} self.classDocs = { docClass : set() for docClass in self.docClasses } allWords = set() for docClass in self.docClasses: for docPath in trainData[docClass]: self.knownClasses[docPath] = docClass self.classDocs[docClass].add(docPath) with open(docPath) as docFile: self.docWords[docPath] = set(docFile.read().split()) allWords.update(self.docWords[docPath]) info = Counter() for word in allWords: info[word] = self.mutualInformation(word) top = info.most_common()[0:numFeatures] return [ word for (word, mutualInfo) in top ] if __name__ == "__main__": classifier = Perceptron() print "Preprocessing" classifier.initalPreprocess("web", 1, True) # classifier.initalPreprocess("web",1) # classifier.initalPreprocess(None,3) # classifier.initalPreprocess("web",3) validate = CrossValidate("cleanData") featureSelector = MIFeatureSelector() accuracy = validate.crossFoldOnPerceptron(classifier, featureSelector) print "Perceptron Accuracy: ",accuracy