#get key (class) with maximum value (probability) key = max(prob, key=prob.get) return key def testFile(self, filePath): if self.version == 'multivariant': return self.multivariantTestFile(filePath) elif self.version == 'multinomial': return self.multinomialTestFile(filePath) if __name__ == "__main__": """ NO crossfold validation """ """bayes = Bayes('multivariant') # 3rd arg = True for crossvalidation, false for regular, delete clean data or it won't take effect bayes.initalPreprocess("data/DR", "data/DT", "data/L", "data/TEST", "custom", 1, False) validate = CrossValidate("cleanData") featureSelector = FeatureSelector() validate.regularBayes(bayes, featureSelector)""" """ USING crossfold validation """ bayes = Bayes('multinomial') # 3rd arg = True for crossvalidation, false for regular, delete clean data or it won't take effect bayes.initalPreprocess("data/DR", "data/DT", "data/L", "data/TEST", None,1, True) validate = CrossValidate("cleanData") featureSelector = FeatureSelector() accuracy = validate.crossFoldOnBayes(bayes, featureSelector) print "Multivariant Bayes Accuracy: ", accuracy
method = "SuperGrep" print method shutil.rmtree("cleanData/", True) grep = Grep(True) grep.initalPreprocess(DR, DT, L, TEST) grep.testDirToOutput("cleanData/TEST/", "cleanData/" ) with open("cleanData/output.txt") as methodOutput: for line in methodOutput.read().split(): finalOutput.write(method + "," + line + "\n") method = "MultivariantBayes" print method shutil.rmtree("cleanData/", True) bayes = Bayes('multivariant') bayes.initalPreprocess(DR, DT, L, TEST, "custom", 1, False) validate = CrossValidate("cleanData") featureSelector = MIFeatureSelector() validate.regularBayes(bayes, featureSelector, DISPLAY_ACCURACY) with open("cleanData/output.txt") as methodOutput: for line in methodOutput.read().split(): finalOutput.write(method + "," + line + "\n") method = "MultinomialBayes" print method shutil.rmtree("cleanData/", True) bayes = Bayes('multinomial') bayes.initalPreprocess(DR, DT, L, TEST, "custom", 1, False) validate = CrossValidate("cleanData") featureSelector = FeatureSelector() # Multinomial does much worse with MIFeatureSelector() validate.regularBayes(bayes, featureSelector, DISPLAY_ACCURACY) with open("cleanData/output.txt") as methodOutput:
self.docWords = {} self.knownClasses = {} self.classDocs = { docClass : set() for docClass in self.docClasses } allWords = set() for docClass in self.docClasses: for docPath in trainData[docClass]: self.knownClasses[docPath] = docClass self.classDocs[docClass].add(docPath) with open(docPath) as docFile: self.docWords[docPath] = set(docFile.read().split()) allWords.update(self.docWords[docPath]) info = Counter() for word in allWords: info[word] = self.mutualInformation(word) top = info.most_common()[0:numFeatures] return [ word for (word, mutualInfo) in top ] if __name__ == "__main__": classifier = Perceptron() print "Preprocessing" classifier.initalPreprocess("web", 1, True) # classifier.initalPreprocess("web",1) # classifier.initalPreprocess(None,3) # classifier.initalPreprocess("web",3) validate = CrossValidate("cleanData") featureSelector = MIFeatureSelector() accuracy = validate.crossFoldOnPerceptron(classifier, featureSelector) print "Perceptron Accuracy: ",accuracy