def main(): print "Creating Train File..." Data.CreateDataFile("mails-train", "mails-train.txt") print "Initializing Train File..." trainingSet = Data.LoadFile("mails-train.txt") print "Creating Test File..." Data.CreateDataFile("mails-test", "mails-test.txt") print "Initializing Test File..." testSet = Data.LoadFile("mails-test.txt") print "Extracting Classes.." classes = Train.ExtractClasses(trainingSet) print "Training NBC..." vocabulary, prior, condprob = Train.TrainMultinomialNaiveBayes( classes, trainingSet) print "Testing Accuracy..." percentage = Test.Accuracy(classes, vocabulary, prior, condprob, testSet) print "The percentage of correct predictions is ", 100 * percentage, "percent." print "Get Random Document..." testDocument = Data.GetDocument(testSet) print "Do A Time Measurement of the Application of the NBC..." print "The time is took to do a single application of the NBC on a document is", Test.TimeMeasure( classes, vocabulary, prior, condprob, testDocument), "seconds." print "Applying NBC on Document..." topClass, score = Classify.ApplyMultinomialNaiveBayes( classes, vocabulary, prior, condprob, testDocument['document'])
def data(self): print "Creating Train File..." Data.CreateDataFile(self.trainDir, self.folder(self.trainDir) + ".txt") print "Initializing Train File..." self.trainingSet = Data.LoadFile(self.folder(self.trainDir) + ".txt") print "Creating Test File..." Data.CreateDataFile(self.testDir, self.folder(self.testDir) + ".txt") print "Initializing Test File..." self.testSet = Data.LoadFile(self.folder(self.testDir )+ ".txt") print "Extracting Classes.." self.classes = Train.ExtractClasses(self.trainingSet) print "Done."
def accuracyOnTest(self): print "Calculating Accuracy..." print "Creating Test File..." Data.CreateDataFile(self.testDir, self.folder(self.testDir) + ".txt") print "Initializing Test File..." self.testSet = Data.LoadFile(self.folder(self.testDir)+ ".txt") percentage = Test.Accuracy(self.classes, self.vocabulary, self.prior, self.condprob, self.testSet) print "The percentage of correct predictions is ",100*percentage,"percent." print "Done."
def load(self): f = askopenfile(mode='r', defaultextension=".train") if f is None: # askopenfile return `None` if dialog closed with "cancel". return self.classes, self.vocabulary, self.prior, self.condprob = pickle.load(f) datasets = [{"trainSet": "mails-train", "testSet": "mails-test"}, {"trainSet": "blogs-train", "testSet": "blogs-test"}] if self.CheckVar1.get() == 1: dataset = datasets[0] if self.CheckVar2.get() == 1: dataset = datasets[1] self.testSet = Data.LoadFile(dataset['testSet'] + ".txt") f.close() # `()` was missing. print "Loaded."