def mapmessages(f, mboxtype, mapdb):
    i = 0
    for msg in getmbox(f):
        i += 1
        sys.stdout.write('\r%s: %d' % (f, i))
        sys.stdout.flush()
        msgid = msg.get("message-id")
        if msgid is None:
            continue
        for t in tokenize(msg):
            ham, spam = mapdb.get(t, ({}, {}))
            if mboxtype == "ham":
                msgids = ham.get(f, set())
                msgids.add(msgid)
                ham[f] = msgids
            else:
                msgids = spam.get(f, set())
                msgids.add(msgid)
                spam[f] = msgids
            mapdb[t] = (ham, spam)
        if options["Classifier", "x-use_bigrams"]:
            for t in Classifier()._enhance_wordstream(tokenize(msg)):
                ham, spam = mapdb.get(t, ({}, {}))
                if mboxtype == "ham":
                    msgids = ham.get(f, set())
                    msgids.add(msgid)
                    ham[f] = msgids
                else:
                    msgids = spam.get(f, set())
                    msgids.add(msgid)
                    spam[f] = msgids
                mapdb[t] = (ham, spam)
    sys.stdout.write("\n")
Beispiel #2
0
 def setUp(self):
     BaseIMAPFilterTest.setUp(self)
     self.imap.login(IMAP_USERNAME, IMAP_PASSWORD)
     classifier = Classifier()
     self.filter = IMAPFilter(classifier, None)
     options["imap", "ham_train_folders"] = ("ham_to_train", )
     options["imap", "spam_train_folders"] = ("spam_to_train", )
Beispiel #3
0
def get_classifier(feed_key):
    logging.info("Getting classifier for feed " + feed_key)
    classifier_key = "classifier_" + feed_key
    classifier = memcache.get(classifier_key)
    
    if classifier is None:
        classifier = Classifier(classifier_key)
        logging.info("Reloading classifier " + str(classifier.key))
        counts = SpamCounts.get_by_key_name(SPAM_COUNT_KEY) 
        if counts:
            classifier.nham = counts.nham
            classifier.nspam = counts.nspam
        
        wordInfos = db.GqlQuery("SELECT * FROM WordInfoEntity WHERE ANCESTOR IS :1", feed_key)
        count = 0
        max_sc = max_hc = 0
        for info in wordInfos:
            w = WordInfo()
            max_sc = max(max_sc, info.spamcount)
            max_hc = max(max_hc, info.hamcount)
            w.spamcount = info.spamcount
            w.hamcount = info.hamcount
            classifier.wordinfo[info.word] = w
            count += 1
        if max_sc > classifier.nspam:
            classifier.nspam = max_sc
        if max_hc > classifier.nham:
            classifier.nham = max_hc
        logging.info("Max spamcount = %s, with nspam = %s", max_sc, classifier.nspam)
        logging.info("Max hamcount = %s with nham = %s", max_hc, classifier.nham)
        logging.info("Loaded %s entities", count)
        memcache.add(classifier.key, classifier)
    return classifier
 def setUp(self):
     self.msg = email.message_from_string(spam1, _class=SBHeaderMessage)
     # Get a prob and some clues.
     c = Classifier()
     self.u_prob, clues = c.spamprob(tokenize(good1), True)
     c.learn(tokenize(good1), False)
     self.g_prob, clues = c.spamprob(tokenize(good1), True)
     c.unlearn(tokenize(good1), False)
     c.learn(tokenize(spam1), True)
     self.s_prob, self.clues = c.spamprob(tokenize(spam1), True)
     self.ham = options['Headers', 'header_ham_string']
     self.spam = options['Headers', 'header_spam_string']
     self.unsure = options['Headers', 'header_unsure_string']
     self.to = "[email protected];[email protected]"
     self.msg["to"] = self.to
Beispiel #5
0
 def reset(self, atagger, at_config):
     atagger.spambayes = Classifier()
Beispiel #6
0
def _classifier(autotagger):
    if not hasattr(autotagger, 'spambayes'):
        autotagger.spambayes = Classifier()
    return autotagger.spambayes
Beispiel #7
0
 def runProxy():
     trainer = SMTPTrainer(Classifier(), state)
     BayesSMTPProxyListener('localhost', 8025, ('', 8026), trainer)
     Dibbler.run()
Beispiel #8
0
 def __init__(self, *args):
     unittest.TestCase.__init__(self, *args)
     self.bayes = Classifier()