def __init__(self, cdbfile=None): Classifier.__init__(self) if cdbfile is not None: self.wordinfo = cdb.Cdb(cdbfile)
def __init__(self): Classifier.__init__(self) # Set state from DB stored value. state = self.get_state() self.nspam = state.spam_count self.nham = state.ham_count
def __init__(self, *args, **kwargs): Classifier.__init__(self) self.redis = self.redis_class(**kwargs) # Set state from Redis stored value. state = self.get_state() self.nspam = state.spam_count self.nham = state.ham_count
def setUp(self): self.msg = email.message_from_string(spam1, _class=SBHeaderMessage) c = Classifier() self.u_prob, clues = c.spamprob(tokenize(good1), True) c.learn(tokenize(good1), False) self.g_prob, clues = c.spamprob(tokenize(good1), True) c.unlearn(tokenize(good1), False) c.learn(tokenize(spam1), True) self.s_prob, self.clues = c.spamprob(tokenize(spam1), True) self.ham = options['Headers','header_ham_string'] self.spam = options['Headers','header_spam_string'] self.unsure = options['Headers','header_unsure_string'] self.to = "[email protected];[email protected]" self.msg["to"] = self.to
def __init__(self, Model): """ Note that this class is initialized with a 'Django Model' rather than a db name. It expects the Model itself, not an instance. Therefore, one needs to do: from sbayes.models import Bayes db = DjangoClassifier(Bayes) """ Classifier.__init__(self) self.statekey = 'save state' self.Model = Model self.load()
def mapmessages(f, mboxtype, mapdb): i = 0 for msg in getmbox(f): i += 1 sys.stdout.write('\r%s: %d' % (f, i)) sys.stdout.flush() msgid = msg.get("message-id") if msgid is None: continue for t in tokenize(msg): ham, spam = mapdb.get(t, ({}, {})) if mboxtype == "ham": msgids = ham.get(f, set()) msgids.add(msgid) ham[f] = msgids else: msgids = spam.get(f, set()) msgids.add(msgid) spam[f] = msgids mapdb[t] = (ham, spam) if options["Classifier", "x-use_bigrams"]: for t in Classifier()._enhance_wordstream(tokenize(msg)): ham, spam = mapdb.get(t, ({}, {})) if mboxtype == "ham": msgids = ham.get(f, set()) msgids.add(msgid) ham[f] = msgids else: msgids = spam.get(f, set()) msgids.add(msgid) spam[f] = msgids mapdb[t] = (ham, spam) sys.stdout.write("\n")
def setUp(self): BaseIMAPFilterTest.setUp(self) self.imap.login(IMAP_USERNAME, IMAP_PASSWORD) classifier = Classifier() self.filter = IMAPFilter(classifier, None) options["imap", "ham_train_folders"] = ("ham_to_train", ) options["imap", "spam_train_folders"] = ("spam_to_train", )
def save_wordinfo(self, db_file): items = [] for word, record in self.wordinfo.items(): prob = Classifier.probability(self, record) items.append((word, str(prob))) cdb.cdb_make(db_file, items)
def get_classifier(feed_key): logging.info("Getting classifier for feed " + feed_key) classifier_key = "classifier_" + feed_key classifier = memcache.get(classifier_key) if classifier is None: classifier = Classifier(classifier_key) logging.info("Reloading classifier " + str(classifier.key)) counts = SpamCounts.get_by_key_name(SPAM_COUNT_KEY) if counts: classifier.nham = counts.nham classifier.nspam = counts.nspam wordInfos = db.GqlQuery("SELECT * FROM WordInfoEntity WHERE ANCESTOR IS :1", feed_key) count = 0 max_sc = max_hc = 0 for info in wordInfos: w = WordInfo() max_sc = max(max_sc, info.spamcount) max_hc = max(max_hc, info.hamcount) w.spamcount = info.spamcount w.hamcount = info.hamcount classifier.wordinfo[info.word] = w count += 1 if max_sc > classifier.nspam: classifier.nspam = max_sc if max_hc > classifier.nham: classifier.nham = max_hc logging.info("Max spamcount = %s, with nspam = %s", max_sc, classifier.nspam) logging.info("Max hamcount = %s with nham = %s", max_hc, classifier.nham) logging.info("Loaded %s entities", count) memcache.add(classifier.key, classifier) return classifier
def reset(self, atagger, at_config): atagger.spambayes = Classifier()
def _classifier(autotagger): if not hasattr(autotagger, 'spambayes'): autotagger.spambayes = Classifier() return autotagger.spambayes
def setUp(self): self.msg = email.message_from_string(spam1, _class=SBHeaderMessage) # Get a prob and some clues. c = Classifier() self.u_prob, clues = c.spamprob(tokenize(good1), True) c.learn(tokenize(good1), False) self.g_prob, clues = c.spamprob(tokenize(good1), True) c.unlearn(tokenize(good1), False) c.learn(tokenize(spam1), True) self.s_prob, self.clues = c.spamprob(tokenize(spam1), True) self.ham = options['Headers', 'header_ham_string'] self.spam = options['Headers', 'header_spam_string'] self.unsure = options['Headers', 'header_unsure_string'] self.to = "[email protected];[email protected]" self.msg["to"] = self.to
def save_wordinfo(self, db_file): items = [] for word, record in self.wordinfo.iteritems(): prob = Classifier.probability(self, record) items.append((word, str(prob))) cdb.cdb_make(db_file, items)
def runProxy(): trainer = SMTPTrainer(Classifier(), state) BayesSMTPProxyListener('localhost', 8025, ('', 8026), trainer) Dibbler.run()
def __init__(self, *args): unittest.TestCase.__init__(self, *args) self.bayes = Classifier()