예제 #1
0
def drive(nsets,decision):
    print options.display()

    spamdirs = [get_pathname_option("TestDriver", "spam_directories") % \
                i for i in range(1, nsets+1)]
    hamdirs  = [get_pathname_option("TestDriver", "ham_directories") % \
                i for i in range(1, nsets+1)]

    spamfns = [(x,y,1) for x in spamdirs for y in os.listdir(x)]
    hamfns = [(x,y,0) for x in hamdirs for y in os.listdir(x)]

    nham = len(hamfns)
    nspam = len(spamfns)
    cc = CostCounter.nodelay()

    allfns = {}
    for fn in spamfns+hamfns:
        allfns[fn] = None

    d = hammie.open('weaktest.db', False)

    hamtrain = 0
    spamtrain = 0
    n = 0
    for dir,name, is_spam in allfns.iterkeys():
        n += 1
        m=msgs.Msg(dir, name).guts
        if debug > 1:
            print "trained:%dH+%dS"%(hamtrain,spamtrain)
        scr=d.score(m)
        if debug > 1:
            print "score:%.3f"%scr
        if not decision.tooearly():
            if is_spam:
                if debug > 0:
                    print "Spam with score %.2f"%scr
                cc.spam(scr)
            else:
                if debug > 0:
                    print "Ham with score %.2f"%scr
                cc.ham(scr)
        de = decision(scr,is_spam)
        if de == TRAIN_AS_SPAM:
            d.train_spam(m)
            spamtrain += 1
        elif de == TRAIN_AS_HAM:
            d.train_ham(m)
            hamtrain += 1
        if n % 100 == 0:
            print "%5d trained:%dH+%dS wrds:%d"%(
                n, hamtrain, spamtrain, len(d.bayes.wordinfo))
            print cc
    print "="*70
    print "%5d trained:%dH+%dS wrds:%d"%(
        n, hamtrain, spamtrain, len(d.bayes.wordinfo))
    print cc
예제 #2
0
def drive(nsets, decision):
    print options.display()

    spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets + 1)]
    hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets + 1)]

    spamfns = [(x, y, 1) for x in spamdirs for y in os.listdir(x)]
    hamfns = [(x, y, 0) for x in hamdirs for y in os.listdir(x)]

    nham = len(hamfns)
    nspam = len(spamfns)
    cc = CostCounter.nodelay()

    allfns = {}
    for fn in spamfns + hamfns:
        allfns[fn] = None

    d = hammie.open("weaktest.db", False)

    hamtrain = 0
    spamtrain = 0
    n = 0
    for dir, name, is_spam in allfns.iterkeys():
        n += 1
        m = msgs.Msg(dir, name).guts
        if debug > 1:
            print "trained:%dH+%dS" % (hamtrain, spamtrain)
        scr = d.score(m)
        if debug > 1:
            print "score:%.3f" % scr
        if not decision.tooearly():
            if is_spam:
                if debug > 0:
                    print "Spam with score %.2f" % scr
                cc.spam(scr)
            else:
                if debug > 0:
                    print "Ham with score %.2f" % scr
                cc.ham(scr)
        de = decision(scr, is_spam)
        if de == TRAIN_AS_SPAM:
            d.train_spam(m)
            spamtrain += 1
        elif de == TRAIN_AS_HAM:
            d.train_ham(m)
            hamtrain += 1
        if n % 100 == 0:
            print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo))
            print cc
    print "=" * 70
    print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo))
    print cc