def print_message_score(msg_name, msg_fp):
    msg = email.message_from_file(msg_fp)
    bayes = CdbClassifier(open(DB_FILE, 'rb'))
    prob, evidence = bayes.spamprob(tokenize(msg), evidence=True)
    print msg_name, prob
    for word, prob in evidence:
        print '  ', repr(word), prob
Exemple #2
0
def filter_message(hamdir, spamdir):
    signal.signal(signal.SIGALRM, lambda s, f: sys.exit(1))
    signal.alarm(24 * 60 * 60)
    tmpfile, pathname, filename = maketmp(hamdir)
    try:
        tmpfile.write(os.environ.get("DTLINE", "")) # delivered-to line
        bytes = 0
        blocks = []
        while 1:
            block = sys.stdin.read(BLOCK_SIZE)
            if not block:
                break
            bytes += len(block)
            if bytes < SIZE_LIMIT:
                blocks.append(block)
            tmpfile.write(block)
        tmpfile.close()
        if bytes < SIZE_LIMIT:
            msgdata = ''.join(blocks)
            del blocks
            msg = email.message_from_string(msgdata)
            del msgdata
            bayes = CdbClassifier(open(DB_FILE, 'rb'))
            prob = bayes.spamprob(tokenize(msg))
        else:
            prob = 0.0
        if prob > SPAM_CUTOFF:
            os.rename(pathname, "%s/new/%s" % (spamdir, filename))
        else:
            os.rename(pathname, "%s/new/%s" % (hamdir, filename))
    except:
        os.unlink(pathname)
        raise
Exemple #3
0
 def train_messages(ham_name, spam_name):

    """Create database using messages."""

    rc_dir = os.path.expanduser(RC_DIR)

    if not os.path.exists(rc_dir):

        print("Creating", RC_DIR, "directory...")

        os.mkdir(rc_dir)

    bayes = CdbClassifier()

    print('Training with ham...')

    train(bayes, ham_name, False)

    print('Training with spam...')

    train(bayes, spam_name, True)

    print('Update probabilities and writing DB...')

    db = open(DB_FILE, "wb")

    bayes.save_wordinfo(db)

    db.close()

    print('done')
def train_messages(ham_name, spam_name):
    """Create database using messages."""

    rc_dir = os.path.expanduser(RC_DIR)
    if not os.path.exists(rc_dir):
        print "Creating", RC_DIR, "directory..."
        os.mkdir(rc_dir)
    bayes = CdbClassifier()
    print 'Training with ham...'
    train(bayes, ham_name, False)
    print 'Training with spam...'
    train(bayes, spam_name, True)
    print 'Update probabilities and writing DB...'
    db = open(DB_FILE, "wb")
    bayes.save_wordinfo(db)
    db.close()
    print 'done'
def filter_message(hamdir, spamdir):
    signal.signal(signal.SIGALRM, lambda s, f: sys.exit(1))
    signal.alarm(24 * 60 * 60)

    # write message to temporary file (must be on same partition)
    tmpfile, pathname, filename = maketmp(hamdir)
    try:
        tmpfile.write(os.environ.get("DTLINE", "")) # delivered-to line
        bytes = 0
        blocks = []
        while 1:
            block = sys.stdin.read(BLOCK_SIZE)
            if not block:
                break
            bytes += len(block)
            if bytes < SIZE_LIMIT:
                blocks.append(block)
            tmpfile.write(block)
        tmpfile.close()

        if bytes < SIZE_LIMIT:
            msgdata = ''.join(blocks)
            del blocks
            msg = email.message_from_string(msgdata)
            del msgdata
            bayes = CdbClassifier(open(DB_FILE, 'rb'))
            prob = bayes.spamprob(tokenize(msg))
        else:
            prob = 0.0

        if prob > SPAM_CUTOFF:
            os.rename(pathname, "%s/new/%s" % (spamdir, filename))
        else:
            os.rename(pathname, "%s/new/%s" % (hamdir, filename))
    except:
        os.unlink(pathname)
        raise
Exemple #6
0
        usage(msg)
        return 1

    if len(args) != 1:
        usage()
        return 1
    cdbname = args[0]

    dbname = usedb = None
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0

    dbname, usedb = storage.database_type(opts)
    store = storage.open_storage(dbname, usedb)

    bayes = CdbClassifier()
    items = []
    for word in store._wordinfokeys():
        record = store._wordinfoget(word)
        prob = store.probability(record)
        items.append((word, str(prob)))
    cdbfile = open(cdbname, "wb")
    cdb.cdb_make(cdbfile, items)
    cdbfile.close()


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))