Ejemplo n.º 1
0
def TestSpamFilter(driver):
    bayes = driver.manager.classifier_data.bayes
    nspam = bayes.nspam
    nham = bayes.nham
    original_bayes = copy.copy(driver.manager.classifier_data.bayes)
    # for each watch folder, create a spam message, and do the training thang
    for msf_watch, folder_watch in driver.GetWatchFolderGenerator():
        print "Performing Spam test on watch folder '%s'..." % msf_watch.GetFQName()
        # Create a spam message in the Inbox - it should get immediately filtered
        msg, words = driver.CreateTestMessageInFolder(SPAM, folder_watch)
        # sleep to ensure filtering.
        WaitForFilters()
        # It should no longer be in the Inbox.
        driver.CheckMessageFilteredFrom(folder_watch)
        # It should be in the "sure is spam" folder.
        spam_msg = driver.FindTestMessage(driver.folder_spam)
        if spam_msg is None:
            TestFailed("The test message vanished from the Inbox, but didn't appear in Spam")
        # Check that none of the above caused training.
        if nspam != bayes.nspam:
            TestFailed("Something caused a new spam message to appear")
        if nham != bayes.nham:
            TestFailed("Something caused a new ham message to appear")
        check_words(words, bayes, 0, 0)

        # Now move the message back to the inbox - it should get trained.
        store_msg = driver.manager.message_store.GetMessage(spam_msg)
        import train
        if train.been_trained_as_ham(store_msg, driver.manager.classifier_data):
            TestFailed("This new spam message should not have been trained as ham yet")
        if train.been_trained_as_spam(store_msg, driver.manager.classifier_data):
            TestFailed("This new spam message should not have been trained as spam yet")
        spam_msg.Move(folder_watch)
        WaitForFilters()
        spam_msg = driver.FindTestMessage(folder_watch)
        if spam_msg is None:
            TestFailed("The message appears to have been filtered out of the watch folder")
        store_msg = driver.manager.message_store.GetMessage(spam_msg)
        need_untrain = True
        try:
            if nspam != bayes.nspam:
                TestFailed("There were not the same number of spam messages after a re-train")
            if nham+1 != bayes.nham:
                TestFailed("There was not one more ham messages after a re-train")
            if train.been_trained_as_spam(store_msg, driver.manager.classifier_data):
                TestFailed("This new spam message should not have been trained as spam yet")
            if not train.been_trained_as_ham(store_msg, driver.manager.classifier_data):
                TestFailed("This new spam message should have been trained as ham now")
            # word infos should have one extra ham
            check_words(words, bayes, 0, 1)
            # Now move it back to the Spam folder.
            # This should see the message un-trained as ham, and re-trained as Spam
            spam_msg.Move(driver.folder_spam)
            WaitForFilters()
            spam_msg = driver.FindTestMessage(driver.folder_spam)
            if spam_msg is None:
                TestFailed("Could not find the message in the Spam folder")
            store_msg = driver.manager.message_store.GetMessage(spam_msg)
            if nspam +1 != bayes.nspam:
                TestFailed("There should be one more spam now")
            if nham != bayes.nham:
                TestFailed("There should be the same number of hams again")
            if not train.been_trained_as_spam(store_msg, driver.manager.classifier_data):
                TestFailed("This new spam message should have been trained as spam by now")
            if train.been_trained_as_ham(store_msg, driver.manager.classifier_data):
                TestFailed("This new spam message should have been un-trained as ham")
            # word infos should have one extra spam, no extra ham
            check_words(words, bayes, 1, 0)
            # Move the message to another folder, and make sure we still
            # identify it correctly as having been trained.
            # Move to the "unsure" folder, just cos we know about it, and
            # we know that no special watching of this folder exists.
            spam_msg.Move(driver.folder_unsure)
            spam_msg = driver.FindTestMessage(driver.folder_unsure)
            if spam_msg is None:
                TestFailed("Could not find the message in the Unsure folder")
            store_msg = driver.manager.message_store.GetMessage(spam_msg)
            if not train.been_trained_as_spam(store_msg, driver.manager.classifier_data):
                TestFailed("Message was not identified as Spam after moving")

            # word infos still be 'spam'
            check_words(words, bayes, 1, 0)

            # Now undo the damage we did.
            was_spam = train.untrain_message(store_msg, driver.manager.classifier_data)
            if not was_spam:
                TestFailed("Untraining this message did not indicate it was spam")
            if train.been_trained_as_spam(store_msg, driver.manager.classifier_data) or \
               train.been_trained_as_ham(store_msg, driver.manager.classifier_data):
                TestFailed("Untraining this message kept it has ham/spam")
            need_untrain = False
        finally:
            if need_untrain:
                train.untrain_message(store_msg, driver.manager.classifier_data)

        # Check all the counts are back where we started.
        if nspam != bayes.nspam:
            TestFailed("Spam count didn't get back to the same")
        if nham != bayes.nham:
            TestFailed("Ham count didn't get back to the same")
        check_words(words, bayes, 0, 0)

        if bayes.wordinfo != original_bayes.wordinfo:
            TestFailed("The bayes object's 'wordinfo' did not compare the same at the end of all this!")
        if bayes.probcache != original_bayes.probcache:
            TestFailed("The bayes object's 'probcache' did not compare the same at the end of all this!")

        spam_msg.Delete()
    print "Created a Spam message, and saw it get filtered and trained."
Ejemplo n.º 2
0
 def TestSpamFilter(driver):

    bayes = driver.manager.classifier_data.bayes

    nspam = bayes.nspam

    nham = bayes.nham

    original_bayes = copy.copy(driver.manager.classifier_data.bayes)

    for msf_watch, folder_watch in driver.GetWatchFolderGenerator():

        print("Performing Spam test on watch folder '%s'..." % msf_watch.GetFQName())

        msg, words = driver.CreateTestMessageInFolder(SPAM, folder_watch)

        WaitForFilters()

        driver.CheckMessageFilteredFrom(folder_watch)

        spam_msg = driver.FindTestMessage(driver.folder_spam)

        if spam_msg is None:

            TestFailed("The test message vanished from the Inbox, but didn't appear in Spam")

        if nspam != bayes.nspam:

            TestFailed("Something caused a new spam message to appear")

        if nham != bayes.nham:

            TestFailed("Something caused a new ham message to appear")

        check_words(words, bayes, 0, 0)

        store_msg = driver.manager.message_store.GetMessage(spam_msg)

        driver.manager.classifier_data.message_db.load_msg(store_msg)

        import train

        if train.been_trained_as_ham(store_msg):

            TestFailed("This new spam message should not have been trained as ham yet")

        if train.been_trained_as_spam(store_msg):

            TestFailed("This new spam message should not have been trained as spam yet")

        spam_msg.Move(folder_watch)

        WaitForFilters()

        spam_msg = driver.FindTestMessage(folder_watch)

        if spam_msg is None:

            TestFailed("The message appears to have been filtered out of the watch folder")

        store_msg = driver.manager.message_store.GetMessage(spam_msg)

        driver.manager.classifier_data.message_db.load_msg(store_msg)

        need_untrain = True

        try:

            if nspam != bayes.nspam:

                TestFailed("There were not the same number of spam messages after a re-train")

            if nham+1 != bayes.nham:

                TestFailed("There was not one more ham messages after a re-train")

            if train.been_trained_as_spam(store_msg):

                TestFailed("This new spam message should not have been trained as spam yet")

            if not train.been_trained_as_ham(store_msg):

                TestFailed("This new spam message should have been trained as ham now")

            check_words(words, bayes, 0, 1)

            spam_msg.Move(driver.folder_spam)

            WaitForFilters()

            spam_msg = driver.FindTestMessage(driver.folder_spam)

            if spam_msg is None:

                TestFailed("Could not find the message in the Spam folder")

            store_msg = driver.manager.message_store.GetMessage(spam_msg)

            driver.manager.classifier_data.message_db.load_msg(store_msg)

            if nspam +1 != bayes.nspam:

                TestFailed("There should be one more spam now")

            if nham != bayes.nham:

                TestFailed("There should be the same number of hams again")

            if not train.been_trained_as_spam(store_msg):

                TestFailed("This new spam message should have been trained as spam by now")

            if train.been_trained_as_ham(store_msg):

                TestFailed("This new spam message should have been un-trained as ham")

            check_words(words, bayes, 1, 0)

            spam_msg.Move(driver.folder_unsure)

            spam_msg = driver.FindTestMessage(driver.folder_unsure)

            if spam_msg is None:

                TestFailed("Could not find the message in the Unsure folder")

            store_msg = driver.manager.message_store.GetMessage(spam_msg)

            driver.manager.classifier_data.message_db.load_msg(store_msg)

            if not train.been_trained_as_spam(store_msg):

                TestFailed("Message was not identified as Spam after moving")

            check_words(words, bayes, 1, 0)

            was_spam = train.untrain_message(store_msg, driver.manager.classifier_data)

            driver.manager.classifier_data.message_db.load_msg(store_msg)

            if not was_spam:

                TestFailed("Untraining this message did not indicate it was spam")

            if train.been_trained_as_spam(store_msg) or \
               train.been_trained_as_ham(store_msg):

                TestFailed("Untraining this message kept it has ham/spam")

            need_untrain = False

        finally:

            if need_untrain:

                train.untrain_message(store_msg, driver.manager.classifier_data)

        if nspam != bayes.nspam:

            TestFailed("Spam count didn't get back to the same")

        if nham != bayes.nham:

            TestFailed("Ham count didn't get back to the same")

        check_words(words, bayes, 0, 0)

        if bayes.wordinfo != original_bayes.wordinfo:

            TestFailed("The bayes object's 'wordinfo' did not compare the same at the end of all this!")

        if bayes.probcache != original_bayes.probcache:

            TestFailed("The bayes object's 'probcache' did not compare the same at the end of all this!")

        spam_msg.Delete()

    print("Created a Spam message, and saw it get filtered and trained.")