Esempio n. 1
0
 def test_hamClassificationWithoutCache(self):
     """
     Like L{test_spamClassification}, but ensure no instance cache is used to
     satisfied word info lookups.
     """
     self.classifier.train(StringIO("very nice words"), False)
     classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
     self.assertTrue(classifier.score(StringIO("words, very nice")) < 0.01)
Esempio n. 2
0
 def test_spamClassificationWithoutCache(self):
     """
     Like L{test_spamClassification}, but ensure no instance cache is used to
     satisfied word info lookups.
     """
     self.classifier.train(StringIO("spam words of spamfulness"), True)
     classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
     self.assertTrue(
         classifier.score(StringIO("spamfulness words of spam")) > 0.99)
Esempio n. 3
0
 def test_hamClassificationWithoutCache(self):
     """
     Like L{test_spamClassification}, but ensure no instance cache is used to
     satisfied word info lookups.
     """
     self.classifier.train(StringIO("very nice words"), False)
     classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
     self.assertTrue(
         classifier.score(StringIO("words, very nice")) < 0.01)
Esempio n. 4
0
 def test_spamClassificationWithoutCache(self):
     """
     Like L{test_spamClassification}, but ensure no instance cache is used to
     satisfied word info lookups.
     """
     self.classifier.train(StringIO("spam words of spamfulness"), True)
     classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
     self.assertTrue(
         classifier.score(StringIO("spamfulness words of spam")) > 0.99)
Esempio n. 5
0
    def test_largeDocumentClassification(self):
        """
        A document with more than 999 tokens can be successfully classified.
        """
        words = []
        for i in range(1000):
            word = "word%d" % (i, )
            words.append(word)
        document = " ".join(words)
        self.classifier.train(StringIO(document), False)

        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(classifier.score(StringIO(document)) < 0.01)
Esempio n. 6
0
    def test_largeDocumentClassification(self):
        """
        A document with more than 999 tokens can be successfully classified.
        """
        words = []
        for i in range(1000):
            word = "word%d" % (i,)
            words.append(word)
        document = " ".join(words)
        self.classifier.train(StringIO(document), False)

        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(
            classifier.score(StringIO(document)) < 0.01)
Esempio n. 7
0
 def setUp(self):
     self.path = self.mktemp()
     self.bayes = spam._SQLite3Classifier(self.path)
     self.classifier = Hammie(self.bayes, mode='r')
Esempio n. 8
0
class SQLite3ClassifierTests(unittest.TestCase):
    """
    Tests for L{xquotient.spam._SQLite3Classifier}, a spambayes classifier which
    persists training data in a SQLite3 database.
    """
    def setUp(self):
        self.path = self.mktemp()
        self.bayes = spam._SQLite3Classifier(self.path)
        self.classifier = Hammie(self.bayes, mode='r')

    def test_nspam(self):
        """
        L{SQLite3Classifier} tracks, in memory, the number of spam messages it
        has been trained with.
        """
        self.classifier.train(StringIO("spam words of spamnfulness"), True)
        self.assertEqual(self.bayes.nspam, 1)

    def test_nspamPersisted(self):
        """
        L{SQLite3Classifier} tracks, in a database, the number of spam messages
        it has been trained with.
        """
        self.classifier.train(StringIO("spam words of spamfulness"), True)
        bayes = spam._SQLite3Classifier(self.path)
        self.assertEqual(bayes.nspam, 1)

    def test_spamTokenRecorded(self):
        """
        The first time a token is encountered during spam training, a row is
        inserted into the database counting it as once a spam token, never a ham
        token.
        """
        self.classifier.train(StringIO("spam bad gross"), True)
        bayes = spam._SQLite3Classifier(self.path)
        wordInfo = bayes._get(u"spam")
        self.assertEqual((u"spam", 1, 0), wordInfo)

    def test_hamTokenRecorded(self):
        """
        The first time a token is encountered during ham training, a row is
        inserted into the database counting it as never a spam token, once a ham
        token.
        """
        self.classifier.train(StringIO("justice sunshine puppies"), False)
        bayes = spam._SQLite3Classifier(self.path)
        wordInfo = bayes._get(u"sunshine")
        self.assertEqual((u"sunshine", 0, 1), wordInfo)

    def test_spamTokenIncremented(self):
        """
        Encountered on a subsequent spam training operation, an existing word
        info row has its spam count incremented and its ham count left alone.
        """
        self.classifier.train(StringIO("justice sunshine puppies"), False)
        self.classifier.train(StringIO("spam bad puppies"), True)
        bayes = spam._SQLite3Classifier(self.path)
        wordInfo = bayes._get(u"puppies")
        self.assertEqual((u"puppies", 1, 1), wordInfo)

    def test_hamTokenIncremented(self):
        """
        Encountered on a subsequent ham training operation, an existing word
        info row has its spam count left alone and its ham count incremented.
        """
        self.classifier.train(StringIO("spam bad puppies"), True)
        self.classifier.train(StringIO("justice sunshine puppies"), False)
        bayes = spam._SQLite3Classifier(self.path)
        wordInfo = bayes._get(u"puppies")
        self.assertEqual((u"puppies", 1, 1), wordInfo)

    def test_nham(self):
        """
        L{SQLite3Classifier} tracks, in memory, the number of ham messages it
        has been trained with.
        """
        self.classifier.train(StringIO("very nice words"), False)
        self.assertEqual(self.bayes.nham, 1)

    def test_nhamPersisted(self):
        """
        L{SQLite3Classifier} tracks, in a database, the number of ham messages
        it has been trained with.
        """
        self.classifier.train(StringIO("very nice words"), False)
        bayes = spam._SQLite3Classifier(self.path)
        self.assertEqual(bayes.nham, 1)

    def test_spamClassification(self):
        """
        L{SQLite3Classifier} can be trained with a spam message so as to later
        classify messages like that one as spam.
        """
        self.classifier.train(StringIO("spam words of spamfulness"), True)
        self.assertTrue(
            self.classifier.score(StringIO("spamfulness words of spam")) > 0.99
        )

    def test_spamClassificationWithoutCache(self):
        """
        Like L{test_spamClassification}, but ensure no instance cache is used to
        satisfied word info lookups.
        """
        self.classifier.train(StringIO("spam words of spamfulness"), True)
        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(
            classifier.score(StringIO("spamfulness words of spam")) > 0.99)

    def test_hamClassification(self):
        """
        L{SQLite3Classifier} can be trained with a ham message so as to later
        classify messages like that one as ham.
        """
        self.classifier.train(StringIO("very nice words"), False)
        self.assertTrue(
            self.classifier.score(StringIO("words, very nice")) < 0.01)

    def test_hamClassificationWithoutCache(self):
        """
        Like L{test_spamClassification}, but ensure no instance cache is used to
        satisfied word info lookups.
        """
        self.classifier.train(StringIO("very nice words"), False)
        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(classifier.score(StringIO("words, very nice")) < 0.01)

    def test_largeDocumentClassification(self):
        """
        A document with more than 999 tokens can be successfully classified.
        """
        words = []
        for i in range(1000):
            word = "word%d" % (i, )
            words.append(word)
        document = " ".join(words)
        self.classifier.train(StringIO(document), False)

        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(classifier.score(StringIO(document)) < 0.01)
Esempio n. 9
0
 def setUp(self):
     self.path = self.mktemp()
     self.bayes = spam._SQLite3Classifier(self.path)
     self.classifier = Hammie(self.bayes, mode='r')
Esempio n. 10
0
class SQLite3ClassifierTests(unittest.TestCase):
    """
    Tests for L{xquotient.spam._SQLite3Classifier}, a spambayes classifier which
    persists training data in a SQLite3 database.
    """
    def setUp(self):
        self.path = self.mktemp()
        self.bayes = spam._SQLite3Classifier(self.path)
        self.classifier = Hammie(self.bayes, mode='r')


    def test_nspam(self):
        """
        L{SQLite3Classifier} tracks, in memory, the number of spam messages it
        has been trained with.
        """
        self.classifier.train(StringIO("spam words of spamnfulness"), True)
        self.assertEqual(self.bayes.nspam, 1)


    def test_nspamPersisted(self):
        """
        L{SQLite3Classifier} tracks, in a database, the number of spam messages
        it has been trained with.
        """
        self.classifier.train(StringIO("spam words of spamfulness"), True)
        bayes = spam._SQLite3Classifier(self.path)
        self.assertEqual(bayes.nspam, 1)


    def test_spamTokenRecorded(self):
        """
        The first time a token is encountered during spam training, a row is
        inserted into the database counting it as once a spam token, never a ham
        token.
        """
        self.classifier.train(StringIO("spam bad gross"), True)
        bayes = spam._SQLite3Classifier(self.path)
        wordInfo = bayes._get(u"spam")
        self.assertEqual((u"spam", 1, 0), wordInfo)


    def test_hamTokenRecorded(self):
        """
        The first time a token is encountered during ham training, a row is
        inserted into the database counting it as never a spam token, once a ham
        token.
        """
        self.classifier.train(StringIO("justice sunshine puppies"), False)
        bayes = spam._SQLite3Classifier(self.path)
        wordInfo = bayes._get(u"sunshine")
        self.assertEqual((u"sunshine", 0, 1), wordInfo)


    def test_spamTokenIncremented(self):
        """
        Encountered on a subsequent spam training operation, an existing word
        info row has its spam count incremented and its ham count left alone.
        """
        self.classifier.train(StringIO("justice sunshine puppies"), False)
        self.classifier.train(StringIO("spam bad puppies"), True)
        bayes = spam._SQLite3Classifier(self.path)
        wordInfo = bayes._get(u"puppies")
        self.assertEqual((u"puppies", 1, 1), wordInfo)


    def test_hamTokenIncremented(self):
        """
        Encountered on a subsequent ham training operation, an existing word
        info row has its spam count left alone and its ham count incremented.
        """
        self.classifier.train(StringIO("spam bad puppies"), True)
        self.classifier.train(StringIO("justice sunshine puppies"), False)
        bayes = spam._SQLite3Classifier(self.path)
        wordInfo = bayes._get(u"puppies")
        self.assertEqual((u"puppies", 1, 1), wordInfo)


    def test_nham(self):
        """
        L{SQLite3Classifier} tracks, in memory, the number of ham messages it
        has been trained with.
        """
        self.classifier.train(StringIO("very nice words"), False)
        self.assertEqual(self.bayes.nham, 1)


    def test_nhamPersisted(self):
        """
        L{SQLite3Classifier} tracks, in a database, the number of ham messages
        it has been trained with.
        """
        self.classifier.train(StringIO("very nice words"), False)
        bayes = spam._SQLite3Classifier(self.path)
        self.assertEqual(bayes.nham, 1)


    def test_spamClassification(self):
        """
        L{SQLite3Classifier} can be trained with a spam message so as to later
        classify messages like that one as spam.
        """
        self.classifier.train(StringIO("spam words of spamfulness"), True)
        self.assertTrue(
            self.classifier.score(StringIO("spamfulness words of spam")) > 0.99)


    def test_spamClassificationWithoutCache(self):
        """
        Like L{test_spamClassification}, but ensure no instance cache is used to
        satisfied word info lookups.
        """
        self.classifier.train(StringIO("spam words of spamfulness"), True)
        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(
            classifier.score(StringIO("spamfulness words of spam")) > 0.99)


    def test_hamClassification(self):
        """
        L{SQLite3Classifier} can be trained with a ham message so as to later
        classify messages like that one as ham.
        """
        self.classifier.train(StringIO("very nice words"), False)
        self.assertTrue(
            self.classifier.score(StringIO("words, very nice")) < 0.01)


    def test_hamClassificationWithoutCache(self):
        """
        Like L{test_spamClassification}, but ensure no instance cache is used to
        satisfied word info lookups.
        """
        self.classifier.train(StringIO("very nice words"), False)
        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(
            classifier.score(StringIO("words, very nice")) < 0.01)



    def test_largeDocumentClassification(self):
        """
        A document with more than 999 tokens can be successfully classified.
        """
        words = []
        for i in range(1000):
            word = "word%d" % (i,)
            words.append(word)
        document = " ".join(words)
        self.classifier.train(StringIO(document), False)

        classifier = Hammie(spam._SQLite3Classifier(self.path), mode='r')
        self.assertTrue(
            classifier.score(StringIO(document)) < 0.01)