Beispiel #1
0
 def test_nhamPersisted(self):
     """
     L{SQLite3Classifier} tracks, in a database, the number of ham messages
     it has been trained with.
     """
     self.classifier.train(StringIO("very nice words"), False)
     bayes = spam._SQLite3Classifier(self.path)
     self.assertEqual(bayes.nham, 1)
Beispiel #2
0
 def test_nhamPersisted(self):
     """
     L{SQLite3Classifier} tracks, in a database, the number of ham messages
     it has been trained with.
     """
     self.classifier.train(StringIO("very nice words"), False)
     bayes = spam._SQLite3Classifier(self.path)
     self.assertEqual(bayes.nham, 1)
Beispiel #3
0
 def test_nspamPersisted(self):
     """
     L{SQLite3Classifier} tracks, in a database, the number of spam messages
     it has been trained with.
     """
     self.classifier.train(StringIO("spam words of spamfulness"), True)
     bayes = spam._SQLite3Classifier(self.path)
     self.assertEqual(bayes.nspam, 1)
Beispiel #4
0
 def test_nspamPersisted(self):
     """
     L{SQLite3Classifier} tracks, in a database, the number of spam messages
     it has been trained with.
     """
     self.classifier.train(StringIO("spam words of spamfulness"), True)
     bayes = spam._SQLite3Classifier(self.path)
     self.assertEqual(bayes.nspam, 1)
Beispiel #5
0
 def test_hamClassificationWithoutCache(self):
     """
     Like L{test_spamClassification}, but ensure no instance cache is used to
     satisfied word info lookups.
     """
     self.classifier.train(StringIO("very nice words"), False)
     classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
     self.assertTrue(classifier.score(StringIO("words, very nice")) < 0.01)
Beispiel #6
0
 def test_spamClassificationWithoutCache(self):
     """
     Like L{test_spamClassification}, but ensure no instance cache is used to
     satisfied word info lookups.
     """
     self.classifier.train(StringIO("spam words of spamfulness"), True)
     classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
     self.assertTrue(
         classifier.score(StringIO("spamfulness words of spam")) > 0.99)
Beispiel #7
0
 def test_spamClassificationWithoutCache(self):
     """
     Like L{test_spamClassification}, but ensure no instance cache is used to
     satisfied word info lookups.
     """
     self.classifier.train(StringIO("spam words of spamfulness"), True)
     classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
     self.assertTrue(
         classifier.score(StringIO("spamfulness words of spam")) > 0.99)
Beispiel #8
0
 def test_hamClassificationWithoutCache(self):
     """
     Like L{test_spamClassification}, but ensure no instance cache is used to
     satisfied word info lookups.
     """
     self.classifier.train(StringIO("very nice words"), False)
     classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
     self.assertTrue(
         classifier.score(StringIO("words, very nice")) < 0.01)
Beispiel #9
0
 def test_hamTokenIncremented(self):
     """
     Encountered on a subsequent ham training operation, an existing word
     info row has its spam count left alone and its ham count incremented.
     """
     self.classifier.train(StringIO("spam bad puppies"), True)
     self.classifier.train(StringIO("justice sunshine puppies"), False)
     bayes = spam._SQLite3Classifier(self.path)
     wordInfo = bayes._get(u"puppies")
     self.assertEqual((u"puppies", 1, 1), wordInfo)
Beispiel #10
0
 def test_hamTokenRecorded(self):
     """
     The first time a token is encountered during ham training, a row is
     inserted into the database counting it as never a spam token, once a ham
     token.
     """
     self.classifier.train(StringIO("justice sunshine puppies"), False)
     bayes = spam._SQLite3Classifier(self.path)
     wordInfo = bayes._get(u"sunshine")
     self.assertEqual((u"sunshine", 0, 1), wordInfo)
Beispiel #11
0
 def test_spamTokenRecorded(self):
     """
     The first time a token is encountered during spam training, a row is
     inserted into the database counting it as once a spam token, never a ham
     token.
     """
     self.classifier.train(StringIO("spam bad gross"), True)
     bayes = spam._SQLite3Classifier(self.path)
     wordInfo = bayes._get(u"spam")
     self.assertEqual((u"spam", 1, 0), wordInfo)
Beispiel #12
0
 def test_hamTokenIncremented(self):
     """
     Encountered on a subsequent ham training operation, an existing word
     info row has its spam count left alone and its ham count incremented.
     """
     self.classifier.train(StringIO("spam bad puppies"), True)
     self.classifier.train(StringIO("justice sunshine puppies"), False)
     bayes = spam._SQLite3Classifier(self.path)
     wordInfo = bayes._get(u"puppies")
     self.assertEqual((u"puppies", 1, 1), wordInfo)
Beispiel #13
0
 def test_hamTokenRecorded(self):
     """
     The first time a token is encountered during ham training, a row is
     inserted into the database counting it as never a spam token, once a ham
     token.
     """
     self.classifier.train(StringIO("justice sunshine puppies"), False)
     bayes = spam._SQLite3Classifier(self.path)
     wordInfo = bayes._get(u"sunshine")
     self.assertEqual((u"sunshine", 0, 1), wordInfo)
Beispiel #14
0
 def test_spamTokenRecorded(self):
     """
     The first time a token is encountered during spam training, a row is
     inserted into the database counting it as once a spam token, never a ham
     token.
     """
     self.classifier.train(StringIO("spam bad gross"), True)
     bayes = spam._SQLite3Classifier(self.path)
     wordInfo = bayes._get(u"spam")
     self.assertEqual((u"spam", 1, 0), wordInfo)
Beispiel #15
0
    def test_largeDocumentClassification(self):
        """
        A document with more than 999 tokens can be successfully classified.
        """
        words = []
        for i in range(1000):
            word = "word%d" % (i, )
            words.append(word)
        document = " ".join(words)
        self.classifier.train(StringIO(document), False)

        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(classifier.score(StringIO(document)) < 0.01)
Beispiel #16
0
    def test_largeDocumentClassification(self):
        """
        A document with more than 999 tokens can be successfully classified.
        """
        words = []
        for i in range(1000):
            word = "word%d" % (i,)
            words.append(word)
        document = " ".join(words)
        self.classifier.train(StringIO(document), False)

        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(
            classifier.score(StringIO(document)) < 0.01)
Beispiel #17
0
 def setUp(self):
     self.path = self.mktemp()
     self.bayes = spam._SQLite3Classifier(self.path)
     self.classifier = Hammie(self.bayes, mode='r')
Beispiel #18
0
 def setUp(self):
     self.path = self.mktemp()
     self.bayes = spam._SQLite3Classifier(self.path)
     self.classifier = Hammie(self.bayes, mode='r')