Ejemplo n.º 1
0
 def main():
     print "Creating Train File..."
     Data.CreateDataFile("mails-train", "mails-train.txt")
     print "Initializing Train File..."
     trainingSet = Data.LoadFile("mails-train.txt")
     print "Creating Test File..."
     Data.CreateDataFile("mails-test", "mails-test.txt")
     print "Initializing Test File..."
     testSet = Data.LoadFile("mails-test.txt")
     print "Extracting Classes.."
     classes = Train.ExtractClasses(trainingSet)
     print "Training NBC..."
     vocabulary, prior, condprob = Train.TrainMultinomialNaiveBayes(
         classes, trainingSet)
     print "Testing Accuracy..."
     percentage = Test.Accuracy(classes, vocabulary, prior, condprob,
                                testSet)
     print "The percentage of correct predictions is ", 100 * percentage, "percent."
     print "Get Random Document..."
     testDocument = Data.GetDocument(testSet)
     print "Do A Time Measurement of the Application of the NBC..."
     print "The time is took to do a single application of the NBC on a document is", Test.TimeMeasure(
         classes, vocabulary, prior, condprob, testDocument), "seconds."
     print "Applying NBC on Document..."
     topClass, score = Classify.ApplyMultinomialNaiveBayes(
         classes, vocabulary, prior, condprob, testDocument['document'])
Ejemplo n.º 2
0
 def TimeMeasure(classes, vocabulary, prior, condprob, document):
     # Neemt een tijdmeting voor en na het uitvoeren van ApplyMultinomialNBC om te kijken
     # hoelang het duurt om deze uit te voeren op het gegeven document.
     start = time.time()
     Classify.ApplyMultinomialNaiveBayes(classes, vocabulary, prior,
                                         condprob, document)
     end = time.time()
     return end - start
Ejemplo n.º 3
0
    def testSingleFile(self):
        f = askopenfile(mode='r', defaultextension=".txt")
        if f is None: # askopenfile return `None` if dialog closed with "cancel".
            return
        lines = f.read()
        bagOfWords = re.split(' ',lines)
        singleFile = Data.Normalize(bagOfWords)

        print "Loaded."
        print "Calculating..."
        topClass, score = Classify.ApplyMultinomialNaiveBayes(self.classes, self.vocabulary, self.prior, self.condprob, singleFile)
        print "This document belongs to", topClass
        print "Done."
        f.close() # `()` was missing.
Ejemplo n.º 4
0
 def Accuracy(classes, vocabulary, prior, condprob, dataset):
     # Voert ApplyMulitnomialNBC uit op een serie documenten, waarvan we de class kennen.
     # Er wordt geteld hoevaak de voorspelling overeenkomt met de echte waarde.
     # De teruggegeven waarde is een fractie tussen 0 en 1 die aangeeft welk deel van de keren de voorspelling correct was.
     correct = 0
     totaal = 0
     count = 0
     for c in dataset:
         for d in dataset[c]:
             topClass, score = Classify.ApplyMultinomialNaiveBayes(
                 classes, vocabulary, prior, condprob, dataset[c][d])
             if topClass == c:
                 correct += 1
             totaal += 1
     print "The amount of total tested documents is", totaal
     print "The amount of correct predictions are", correct
     return correct / totaal