def print_message_score(msg_name, msg_fp): msg = email.message_from_file(msg_fp) bayes = CdbClassifier(open(DB_FILE, 'rb')) prob, evidence = bayes.spamprob(tokenize(msg), evidence=True) print msg_name, prob for word, prob in evidence: print ' ', repr(word), prob
def filter_message(hamdir, spamdir): signal.signal(signal.SIGALRM, lambda s, f: sys.exit(1)) signal.alarm(24 * 60 * 60) tmpfile, pathname, filename = maketmp(hamdir) try: tmpfile.write(os.environ.get("DTLINE", "")) # delivered-to line bytes = 0 blocks = [] while 1: block = sys.stdin.read(BLOCK_SIZE) if not block: break bytes += len(block) if bytes < SIZE_LIMIT: blocks.append(block) tmpfile.write(block) tmpfile.close() if bytes < SIZE_LIMIT: msgdata = ''.join(blocks) del blocks msg = email.message_from_string(msgdata) del msgdata bayes = CdbClassifier(open(DB_FILE, 'rb')) prob = bayes.spamprob(tokenize(msg)) else: prob = 0.0 if prob > SPAM_CUTOFF: os.rename(pathname, "%s/new/%s" % (spamdir, filename)) else: os.rename(pathname, "%s/new/%s" % (hamdir, filename)) except: os.unlink(pathname) raise
def train_messages(ham_name, spam_name): """Create database using messages.""" rc_dir = os.path.expanduser(RC_DIR) if not os.path.exists(rc_dir): print("Creating", RC_DIR, "directory...") os.mkdir(rc_dir) bayes = CdbClassifier() print('Training with ham...') train(bayes, ham_name, False) print('Training with spam...') train(bayes, spam_name, True) print('Update probabilities and writing DB...') db = open(DB_FILE, "wb") bayes.save_wordinfo(db) db.close() print('done')
def train_messages(ham_name, spam_name): """Create database using messages.""" rc_dir = os.path.expanduser(RC_DIR) if not os.path.exists(rc_dir): print "Creating", RC_DIR, "directory..." os.mkdir(rc_dir) bayes = CdbClassifier() print 'Training with ham...' train(bayes, ham_name, False) print 'Training with spam...' train(bayes, spam_name, True) print 'Update probabilities and writing DB...' db = open(DB_FILE, "wb") bayes.save_wordinfo(db) db.close() print 'done'
def filter_message(hamdir, spamdir): signal.signal(signal.SIGALRM, lambda s, f: sys.exit(1)) signal.alarm(24 * 60 * 60) # write message to temporary file (must be on same partition) tmpfile, pathname, filename = maketmp(hamdir) try: tmpfile.write(os.environ.get("DTLINE", "")) # delivered-to line bytes = 0 blocks = [] while 1: block = sys.stdin.read(BLOCK_SIZE) if not block: break bytes += len(block) if bytes < SIZE_LIMIT: blocks.append(block) tmpfile.write(block) tmpfile.close() if bytes < SIZE_LIMIT: msgdata = ''.join(blocks) del blocks msg = email.message_from_string(msgdata) del msgdata bayes = CdbClassifier(open(DB_FILE, 'rb')) prob = bayes.spamprob(tokenize(msg)) else: prob = 0.0 if prob > SPAM_CUTOFF: os.rename(pathname, "%s/new/%s" % (spamdir, filename)) else: os.rename(pathname, "%s/new/%s" % (hamdir, filename)) except: os.unlink(pathname) raise
usage(msg) return 1 if len(args) != 1: usage() return 1 cdbname = args[0] dbname = usedb = None for opt, arg in opts: if opt in ("-h", "--help"): usage() return 0 dbname, usedb = storage.database_type(opts) store = storage.open_storage(dbname, usedb) bayes = CdbClassifier() items = [] for word in store._wordinfokeys(): record = store._wordinfoget(word) prob = store.probability(record) items.append((word, str(prob))) cdbfile = open(cdbname, "wb") cdb.cdb_make(cdbfile, items) cdbfile.close() if __name__ == "__main__": sys.exit(main(sys.argv[1:]))