Beispiel #1
0
	def __init__(self, cdbfile=None):

        Classifier.__init__(self)

        if cdbfile is not None:

            self.wordinfo = cdb.Cdb(cdbfile)
Beispiel #2
0
    def __init__(self):
        Classifier.__init__(self)

        # Set state from DB stored value.
        state = self.get_state()
        self.nspam = state.spam_count
        self.nham = state.ham_count
Beispiel #3
0
    def __init__(self, *args, **kwargs):
        Classifier.__init__(self)

        self.redis = self.redis_class(**kwargs)
        # Set state from Redis stored value.
        state = self.get_state()
        self.nspam = state.spam_count
        self.nham = state.ham_count
Beispiel #4
0
 def setUp(self):
     self.msg = email.message_from_string(spam1, _class=SBHeaderMessage)
     c = Classifier()
     self.u_prob, clues = c.spamprob(tokenize(good1), True)
     c.learn(tokenize(good1), False)
     self.g_prob, clues = c.spamprob(tokenize(good1), True)
     c.unlearn(tokenize(good1), False)
     c.learn(tokenize(spam1), True)
     self.s_prob, self.clues = c.spamprob(tokenize(spam1), True)
     self.ham = options['Headers','header_ham_string']
     self.spam = options['Headers','header_spam_string']
     self.unsure = options['Headers','header_unsure_string']
     self.to = "[email protected];[email protected]"
     self.msg["to"] = self.to
Beispiel #5
0
    def __init__(self, Model):
        """ 
        Note that this class is initialized with a 'Django Model' rather than
        a db name. It expects the Model itself, not an instance. Therefore, 
        one needs to do:

            from sbayes.models import Bayes
            db = DjangoClassifier(Bayes)

        """
        Classifier.__init__(self)
        self.statekey = 'save state'
        self.Model = Model
        self.load()
def mapmessages(f, mboxtype, mapdb):
    i = 0
    for msg in getmbox(f):
        i += 1
        sys.stdout.write('\r%s: %d' % (f, i))
        sys.stdout.flush()
        msgid = msg.get("message-id")
        if msgid is None:
            continue
        for t in tokenize(msg):
            ham, spam = mapdb.get(t, ({}, {}))
            if mboxtype == "ham":
                msgids = ham.get(f, set())
                msgids.add(msgid)
                ham[f] = msgids
            else:
                msgids = spam.get(f, set())
                msgids.add(msgid)
                spam[f] = msgids
            mapdb[t] = (ham, spam)
        if options["Classifier", "x-use_bigrams"]:
            for t in Classifier()._enhance_wordstream(tokenize(msg)):
                ham, spam = mapdb.get(t, ({}, {}))
                if mboxtype == "ham":
                    msgids = ham.get(f, set())
                    msgids.add(msgid)
                    ham[f] = msgids
                else:
                    msgids = spam.get(f, set())
                    msgids.add(msgid)
                    spam[f] = msgids
                mapdb[t] = (ham, spam)
    sys.stdout.write("\n")
Beispiel #7
0
 def setUp(self):
     BaseIMAPFilterTest.setUp(self)
     self.imap.login(IMAP_USERNAME, IMAP_PASSWORD)
     classifier = Classifier()
     self.filter = IMAPFilter(classifier, None)
     options["imap", "ham_train_folders"] = ("ham_to_train", )
     options["imap", "spam_train_folders"] = ("spam_to_train", )
Beispiel #8
0
 def save_wordinfo(self, db_file):

        items = []

        for word, record in self.wordinfo.items():

            prob = Classifier.probability(self, record)

            items.append((word, str(prob)))

        cdb.cdb_make(db_file, items)
Beispiel #9
0
def get_classifier(feed_key):
    logging.info("Getting classifier for feed " + feed_key)
    classifier_key = "classifier_" + feed_key
    classifier = memcache.get(classifier_key)
    
    if classifier is None:
        classifier = Classifier(classifier_key)
        logging.info("Reloading classifier " + str(classifier.key))
        counts = SpamCounts.get_by_key_name(SPAM_COUNT_KEY) 
        if counts:
            classifier.nham = counts.nham
            classifier.nspam = counts.nspam
        
        wordInfos = db.GqlQuery("SELECT * FROM WordInfoEntity WHERE ANCESTOR IS :1", feed_key)
        count = 0
        max_sc = max_hc = 0
        for info in wordInfos:
            w = WordInfo()
            max_sc = max(max_sc, info.spamcount)
            max_hc = max(max_hc, info.hamcount)
            w.spamcount = info.spamcount
            w.hamcount = info.hamcount
            classifier.wordinfo[info.word] = w
            count += 1
        if max_sc > classifier.nspam:
            classifier.nspam = max_sc
        if max_hc > classifier.nham:
            classifier.nham = max_hc
        logging.info("Max spamcount = %s, with nspam = %s", max_sc, classifier.nspam)
        logging.info("Max hamcount = %s with nham = %s", max_hc, classifier.nham)
        logging.info("Loaded %s entities", count)
        memcache.add(classifier.key, classifier)
    return classifier
Beispiel #10
0
 def reset(self, atagger, at_config):
     atagger.spambayes = Classifier()
Beispiel #11
0
def _classifier(autotagger):
    if not hasattr(autotagger, 'spambayes'):
        autotagger.spambayes = Classifier()
    return autotagger.spambayes
 def setUp(self):
     self.msg = email.message_from_string(spam1, _class=SBHeaderMessage)
     # Get a prob and some clues.
     c = Classifier()
     self.u_prob, clues = c.spamprob(tokenize(good1), True)
     c.learn(tokenize(good1), False)
     self.g_prob, clues = c.spamprob(tokenize(good1), True)
     c.unlearn(tokenize(good1), False)
     c.learn(tokenize(spam1), True)
     self.s_prob, self.clues = c.spamprob(tokenize(spam1), True)
     self.ham = options['Headers', 'header_ham_string']
     self.spam = options['Headers', 'header_spam_string']
     self.unsure = options['Headers', 'header_unsure_string']
     self.to = "[email protected];[email protected]"
     self.msg["to"] = self.to
 def save_wordinfo(self, db_file):
     items = []
     for word, record in self.wordinfo.iteritems():
         prob = Classifier.probability(self, record)
         items.append((word, str(prob)))
     cdb.cdb_make(db_file, items)
 def __init__(self, cdbfile=None):
     Classifier.__init__(self)
     if cdbfile is not None:
         self.wordinfo = cdb.Cdb(cdbfile)
Beispiel #15
0
 def runProxy():
     trainer = SMTPTrainer(Classifier(), state)
     BayesSMTPProxyListener('localhost', 8025, ('', 8026), trainer)
     Dibbler.run()
Beispiel #16
0
 def __init__(self, *args):
     unittest.TestCase.__init__(self, *args)
     self.bayes = Classifier()