Ejemplo n.º 1
0
    def open(self, mode):
        assert not self.h, "Cannot reopen, close first."
        assert not self.mode, "Mode should be None on open, bad state."
        assert mode in ['r', 'c'], "Must give a valid mode: r, c."

        self.mode = mode
        self.h = hammie.open(self.dbname, self.usedb, self.mode)
Ejemplo n.º 2
0
 def open(self, mode):
     if self.h is None or self.mode != mode:
         if self.h is not None:
             if self.mode != 'r':
                 self.h.store()
         self.mode = mode
         self.h = hammie.open(self.dbname, self.usedb, self.mode)
Ejemplo n.º 3
0
    def open(self, mode):
        assert not self.h, "Cannot reopen, close first."
        assert not self.mode, "Mode should be None on open, bad state."
        assert mode in ['r', 'c'], "Must give a valid mode: r, c."

        self.mode = mode
        self.h = hammie.open(self.dbname, self.usedb, self.mode)
Ejemplo n.º 4
0
Archivo: nway.py Proyecto: Xodarap/Eipi
def main(args):
    opts, args = getopt.getopt(args, "h")

    for opt, arg in opts:
        if opt == '-h':
            help()
            return 0

    tagdb_list = []
    msg = mboxutils.get_message(sys.stdin)
    try:
        del msg["X-Spambayes-Classification"]
    except KeyError:
        pass
    for pair in args:
        tag, db = pair.split('=', 1)
        h = hammie.open(db, True, 'r')
        score = h.score(msg)
        if score >= Options.options["Categorization", "spam_cutoff"]:
            msg["X-Spambayes-Classification"] = "%s; %.2f" % (tag, score)
            break
    else:
        msg["X-Spambayes-Classification"] = "unsure"

    sys.stdout.write(msg.as_string(unixfrom=(msg.get_unixfrom()
                                             is not None)))
    return 0
Ejemplo n.º 5
0
def main(args):
    opts, args = getopt.getopt(args, "h")

    for opt, arg in opts:
        if opt == '-h':
            usage()
            return 0

    msg = mboxutils.get_message(sys.stdin)
    try:
        del msg["X-Spambayes-Classification"]
    except KeyError:
        pass
    for pair in args:
        tag, db = pair.split('=', 1)
        h = hammie.open(db, True, 'r')
        score = h.score(msg)
        if score >= Options.options["Categorization", "spam_cutoff"]:
            msg["X-Spambayes-Classification"] = "%s; %.2f" % (tag, score)
            break
    else:
        msg["X-Spambayes-Classification"] = "unsure"

    sys.stdout.write(msg.as_string(unixfrom=(msg.get_unixfrom() is not None)))
    return 0
Ejemplo n.º 6
0
 def __init__(self):
     options = Options.options
     options.mergefiles(['/etc/hammierc',
                         os.path.expanduser('~/.hammierc')])
     self.dbname = options.hammiefilter_persistent_storage_file
     self.dbname = os.path.expanduser(self.dbname)
     self.usedb = options.hammiefilter_persistent_use_database
     self.ham = hammie.open(self.dbname, self.usedb, 'r')
Ejemplo n.º 7
0
 def open(self, mode):
     if self.h is None or self.mode != mode:
         if self.h is not None:
             if self.mode != 'r':
                 self.h.store()
             self.h.close()
         self.mode = mode
         self.h = hammie.open(self.dbname, self.usedb, self.mode)
Ejemplo n.º 8
0
 def open(self):

        mtime = os.path.getmtime(self.dbname)

        if self.h is None or self.modtime < mtime:

            self.h = hammie.open(self.dbname, self.usedb, 'r')

            self.modtime = mtime
Ejemplo n.º 9
0
 def __init__(self):
     options = Options.options
     options.mergefiles(
         ['/etc/hammierc',
          os.path.expanduser('~/.hammierc')])
     self.dbname = options.hammiefilter_persistent_storage_file
     self.dbname = os.path.expanduser(self.dbname)
     self.usedb = options.hammiefilter_persistent_use_database
     self.ham = hammie.open(self.dbname, self.usedb, 'r')
Ejemplo n.º 10
0
def drive(nsets,decision):
    print options.display()

    spamdirs = [get_pathname_option("TestDriver", "spam_directories") % \
                i for i in range(1, nsets+1)]
    hamdirs  = [get_pathname_option("TestDriver", "ham_directories") % \
                i for i in range(1, nsets+1)]

    spamfns = [(x,y,1) for x in spamdirs for y in os.listdir(x)]
    hamfns = [(x,y,0) for x in hamdirs for y in os.listdir(x)]

    nham = len(hamfns)
    nspam = len(spamfns)
    cc = CostCounter.nodelay()

    allfns = {}
    for fn in spamfns+hamfns:
        allfns[fn] = None

    d = hammie.open('weaktest.db', False)

    hamtrain = 0
    spamtrain = 0
    n = 0
    for dir,name, is_spam in allfns.iterkeys():
        n += 1
        m=msgs.Msg(dir, name).guts
        if debug > 1:
            print "trained:%dH+%dS"%(hamtrain,spamtrain)
        scr=d.score(m)
        if debug > 1:
            print "score:%.3f"%scr
        if not decision.tooearly():
            if is_spam:
                if debug > 0:
                    print "Spam with score %.2f"%scr
                cc.spam(scr)
            else:
                if debug > 0:
                    print "Ham with score %.2f"%scr
                cc.ham(scr)
        de = decision(scr,is_spam)
        if de == TRAIN_AS_SPAM:
            d.train_spam(m)
            spamtrain += 1
        elif de == TRAIN_AS_HAM:
            d.train_ham(m)
            hamtrain += 1
        if n % 100 == 0:
            print "%5d trained:%dH+%dS wrds:%d"%(
                n, hamtrain, spamtrain, len(d.bayes.wordinfo))
            print cc
    print "="*70
    print "%5d trained:%dH+%dS wrds:%d"%(
        n, hamtrain, spamtrain, len(d.bayes.wordinfo))
    print cc
Ejemplo n.º 11
0
def main():
    """Main program; parse options and go."""
    global loud
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hfqnrd:p:g:s:o:')
    except getopt.error as msg:
        usage(2, msg)
    if not opts:
        usage(2, "No options given")
    force = False
    trainnew = False
    removetrained = False
    good = []
    spam = []
    for opt, arg in opts:
        if opt == '-h':
            usage(0)
        elif opt == "-f":
            force = True
        elif opt == "-n":
            trainnew = True
        elif opt == "-q":
            loud = False
        elif opt == '-g':
            good.append(arg)
        elif opt == '-s':
            spam.append(arg)
        elif opt == "-r":
            removetrained = True
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")
    if usedb == None:
        usedb = options["Storage", "persistent_use_database"]
        pck = get_pathname_option("Storage",
                                          "persistent_storage_file")
    h = hammie.open(pck, usedb, "c")
    for g in good:
        if loud:
            print("Training ham (%s):" % g)
        train(h, g, False, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True
    for s in spam:
        if loud:
            print("Training spam (%s):" % s)
        train(h, s, True, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True
    if save:
        h.store()
Ejemplo n.º 12
0
class SpambayesFilter(BufferAllFilter):
    checker = hammie.open(dbf, 1, 'r')

    def filter(self, s):
        if self.reply.split()[1] == '200':
            prob = self.checker.score("%s\r\n%s" % (self.serverheaders, s))
            print "|  prob: %.5f" % prob
            if prob >= Options.options["Categorization", "spam_cutoff"]:
                print self.serverheaders
                print "text:", s[0:40], "...", s[-40:]
                return "not authorized"
        return s
Ejemplo n.º 13
0
def drive(nsets, decision):
    print options.display()

    spamdirs = [get_pathname_option("TestDriver", "spam_directories") % i for i in range(1, nsets + 1)]
    hamdirs = [get_pathname_option("TestDriver", "ham_directories") % i for i in range(1, nsets + 1)]

    spamfns = [(x, y, 1) for x in spamdirs for y in os.listdir(x)]
    hamfns = [(x, y, 0) for x in hamdirs for y in os.listdir(x)]

    nham = len(hamfns)
    nspam = len(spamfns)
    cc = CostCounter.nodelay()

    allfns = {}
    for fn in spamfns + hamfns:
        allfns[fn] = None

    d = hammie.open("weaktest.db", False)

    hamtrain = 0
    spamtrain = 0
    n = 0
    for dir, name, is_spam in allfns.iterkeys():
        n += 1
        m = msgs.Msg(dir, name).guts
        if debug > 1:
            print "trained:%dH+%dS" % (hamtrain, spamtrain)
        scr = d.score(m)
        if debug > 1:
            print "score:%.3f" % scr
        if not decision.tooearly():
            if is_spam:
                if debug > 0:
                    print "Spam with score %.2f" % scr
                cc.spam(scr)
            else:
                if debug > 0:
                    print "Ham with score %.2f" % scr
                cc.ham(scr)
        de = decision(scr, is_spam)
        if de == TRAIN_AS_SPAM:
            d.train_spam(m)
            spamtrain += 1
        elif de == TRAIN_AS_HAM:
            d.train_ham(m)
            hamtrain += 1
        if n % 100 == 0:
            print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo))
            print cc
    print "=" * 70
    print "%5d trained:%dH+%dS wrds:%d" % (n, hamtrain, spamtrain, len(d.bayes.wordinfo))
    print cc
Ejemplo n.º 14
0
 def check_spambayes(self, arguments,
                     cache=[]):
     # All arguments are optional.  First is REJECT threshold.
     if arguments:
         spam_cutoff = float(arguments[0])
         arguments = arguments[1:]
     else:
         try:
             from spambayes.Options import options
         except ImportError:
             return
         spam_cutoff = options.spam_cutoff
     assert 0.0 < spam_cutoff <= 1.0, spam_cutoff
     # Second argument is KILL threshold.
     if arguments:
         kill_cutoff = float(arguments[0])
         arguments = arguments[1:]
     else:
         kill_cutoff = 1.00
     assert spam_cutoff <= kill_cutoff <= 1.0, (spam_cutoff, kill_cutoff)
     assert not arguments, arguments
     # Fetch data base, caching it on first call.
     if cache:
         data_base = cache[0]
     else:
         data_base_name = os.path.expanduser('~pinard/etc/nospam/hammie.db')
         try:
             from spambayes import hammie
             data_base = hammie.open(data_base_name, True, 'r')
         except (ImportError, IOError):
             data_base = None
         cache.append(data_base)
     if data_base is None:
         return
     # Evaluate spamicity and act accordingly.
     spamicity = data_base.score(self.message)
     if spamicity > kill_cutoff:
         self.checker.kill("Spambayes score is %4.2f." % spamicity)
     elif spamicity > spam_cutoff:
         self.checker.reject("Spambayes score is %4.2f." % spamicity)
     elif self.run.debug >= 2:
         self.checker.report("DEBUG: Spambayes score is %4.2f." % spamicity)
Ejemplo n.º 15
0
        elif opt == '-u':
            unknown.append(arg)
        elif opt == '-U':
            untrain_mode = 1
        elif opt == '-r':
            reverse = 1
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")

    if usedb == None:
        usage(2, "Must specify one of -d or -D")

    save = False

    h = hammie.open(pck, usedb, mode)

    if not untrain_mode:
        for g in good:
            print "Training ham (%s):" % g
            train(h, g, False)
            save = True

        for s in spam:
            print "Training spam (%s):" % s
            train(h, s, True)
            save = True
    else:
        for g in good:
            print "Untraining ham (%s):" % g
            untrain(h, g, False)
Ejemplo n.º 16
0
        elif opt == '-s':
            spam.append(arg)
        elif opt == "-r":
            removetrained = True
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")

    if usedb == None:
        # Use settings in configuration file.
        usedb = options["Storage", "persistent_use_database"]
        pck = get_pathname_option("Storage", "persistent_storage_file")

    h = hammie.open(pck, usedb, "c")

    for g in good:
        if loud:
            print "Training ham (%s):" % g
        train(h, g, False, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True

    for s in spam:
        if loud:
            print "Training spam (%s):" % s
        train(h, s, True, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True
Ejemplo n.º 17
0
 def main():

    """Main program; parse options and go."""

    try:

        opts, args = getopt.getopt(sys.argv[1:], 'hd:Ufg:s:p:u:r')

    except getopt.error as msg:

        usage(2, msg)

    if not opts:

        usage(2, "No options given")

    pck = DEFAULTDB

    good = []

    spam = []

    unknown = []

    reverse = 0

    untrain_mode = 0

    do_filter = False

    usedb = None

    mode = 'r'

    for opt, arg in opts:

        if opt == '-h':

            usage(0)

        elif opt == '-g':

            good.append(arg)

            mode = 'c'

        elif opt == '-s':

            spam.append(arg)

            mode = 'c'

        elif opt == "-f":

            do_filter = True

        elif opt == '-u':

            unknown.append(arg)

        elif opt == '-U':

            untrain_mode = 1

        elif opt == '-r':

            reverse = 1

    pck, usedb = storage.database_type(opts)

    if args:

        usage(2, "Positional arguments not allowed")

    if usedb == None:

        usage(2, "Must specify one of -d or -D")

    save = False

    h = hammie.open(pck, usedb, mode)

    if not untrain_mode:

        for g in good:

            print("Training ham (%s):" % g)

            train(h, g, False)

            save = True

        for s in spam:

            print("Training spam (%s):" % s)

            train(h, s, True)

            save = True

    else:

        for g in good:

            print("Untraining ham (%s):" % g)

            untrain(h, g, False)

            save = True

        for s in spam:

            print("Untraining spam (%s):" % s)

            untrain(h, s, True)

            save = True

    if save:

        h.store()

    if do_filter:

        msg = sys.stdin.read()

        filtered = h.filter(msg)

        sys.stdout.write(filtered)

    if unknown:

        spams = hams = unsures = 0

        for u in unknown:

            if len(unknown) > 1:

                print("Scoring", u)

            s, g, u = score(h, u, reverse)

            spams += s

            hams += g

            unsures += u

        print("Total %d spam, %d ham, %d unsure" % (spams, hams, unsures))
Ejemplo n.º 18
0
Archivo: data.py Proyecto: jamii/scampy
def classifier(scam, mode='c'):
  # Might have to first create the folder/file
  return hammie.open(('scams/%s/db' % scam), mode)
Ejemplo n.º 19
0
def main():
    filters = Filters()
    duplicate = Duplicate()
    filters.add(duplicate, AppendFile("spam2.mbox"))
    filters.add(WhiteListFrom("good_emails.txt"), KEEP)
    filters.add(WhiteListSubstrings("subject", [
                   'ABCD:',
                   '[Python-announce]',
                   '[Python]',
                   '[Bioinfo]',
                   '[EuroPython]',
                   ]),
                KEEP)
    filters.add(WhiteListSubstrings("to", [
        "*****@*****.**",
        "*****@*****.**",
        ]),
                KEEP)
    names = ["john", "", "jon", "johnathan"]
    valid_emails = ([name + "@lectroid.com" for name in names] +
                    [name + "@bigboote.org" for name in names] +
                    ["*****@*****.**"])
    filters.add(IllegalDeliveredTo(valid_emails), DELETE)
    filters.add(SpamAssassin(), AppendFile("spam2.mbox"))
    filters.add(IsVirus, DELETE)
    h = hammie.open("cull.spambayes", "dbm", "r")
    filters.add(IsSpam(h, 0.90), AppendFile("spam.mbox"))
    server_configs = [("mail.example.com",
                          "*****@*****.**", "password"),
                      ("popserver.big.com", "ceo", "12345"), ]
    error_count = 0
    cumulative_log = {SPAM: 0, VIRUS: 0}
    initial_log = None
    start_time = None  # init'ed only after initial_log is created
    while 1:
        error_flag = False
        duplicate.unique.clear()  # Hack!
        for server, user, pwd in server_configs:
            try:
                log = filter_server( (server, user, pwd), filters)
            except KeyboardInterrupt:
                raw_input("Press enter to continue. ")
            except StandardError:
                raise
            except:
                error_flag = True
                traceback.print_exc()
                continue
            if VERBOSE_LEVEL > 1 and log:
                print "  ** Summary **"
                for x in (log.tests, log.actions):
                    items = x.items()
                    if items:
                        items.sort()
                        for k, v in items:
                            print "  %s: %s" % (k, v)
                        print
            cumulative_log[SPAM] += log.tests.get(SPAM, 0)
            cumulative_log[VIRUS] += log.tests.get(VIRUS, 0)
        if initial_log is None:
            initial_log = cumulative_log.copy()
            start_time = time.time()
            if VERBOSE_LEVEL:
                print "Stats: %d spams, %d virus" % (
                    initial_log[SPAM], initial_log[VIRUS])
        else:
            if VERBOSE_LEVEL:
                delta_t = time.time() - start_time
                delta_t = max(delta_t, 1)  #
                print "Stats: %d spams (%.2f/hr), %d virus (%.2f/hr)" % (
                    cumulative_log[SPAM],
                    (cumulative_log[SPAM] - initial_log[SPAM]) /
                             delta_t * 3600,
                    cumulative_log[VIRUS],
                    (cumulative_log[VIRUS] - initial_log[VIRUS]) /
                             delta_t * 3600)
        if error_flag:
            error_count += 1
        if error_count > 0:
            restart_network()
            error_count = 0
        delay = 10 * 60
        while delay:
            try:
                wait(delay)
                break
            except KeyboardInterrupt:
                print
                while 1:
                    cmd = raw_input("enter, delay, or quit? ")
                    if cmd in ("q", "quit"):
                        raise SystemExit(0)
                    elif cmd == "":
                        delay = 0
                        break
                    elif cmd.isdigit():
                        delay = int(cmd)
                        break
                    else:
                        print "Unknown command."
Ejemplo n.º 20
0
 def __init__(self):
     self.h = hammie.open(self.hammieFile, mode = 'c')
     pass
Ejemplo n.º 21
0
def main():
    filters = Filters()

    duplicate = Duplicate()
    filters.add(duplicate, AppendFile("spam2.mbox"))

    # A list of everyone who has emailed me this year.
    # Keep their messages on the server.
    filters.add(WhiteListFrom("good_emails.txt"), KEEP)

    # My mailing lists.
    filters.add(
        WhiteListSubstrings("subject", [
            'ABCD:',
            '[Python-announce]',
            '[Python]',
            '[Bioinfo]',
            '[EuroPython]',
        ]), KEEP)

    filters.add(
        WhiteListSubstrings("to", [
            "*****@*****.**",
            "*****@*****.**",
        ]), KEEP)

    names = ["john", "", "jon", "johnathan"]
    valid_emails = ([name + "@lectroid.com" for name in names] +
                    [name + "@bigboote.org"
                     for name in names] + ["*****@*****.**"])

    filters.add(IllegalDeliveredTo(valid_emails), DELETE)
    filters.add(SpamAssassin(), AppendFile("spam2.mbox"))

    # Get rid of anything which smells like an exectuable.
    filters.add(IsVirus, DELETE)

    # Use SpamBayes to identify spam.  Make a local copy then
    # delete from the server.
    h = hammie.open("cull.spambayes", "dbm", "r")
    filters.add(IsSpam(h, 0.90), AppendFile("spam.mbox"))

    # These are my POP3 accounts.
    server_configs = [
        ("mail.example.com", "*****@*****.**", "password"),
        ("popserver.big.com", "ceo", "12345"),
    ]

    # The main culling loop.
    error_count = 0
    cumulative_log = {SPAM: 0, VIRUS: 0}
    initial_log = None
    start_time = None  # init'ed only after initial_log is created
    while 1:
        error_flag = False
        duplicate.unique.clear()  # Hack!
        for server, user, pwd in server_configs:
            try:
                log = filter_server((server, user, pwd), filters)
            except KeyboardInterrupt:
                raw_input("Press enter to continue. ")
            except StandardError:
                raise
            except:
                error_flag = True
                traceback.print_exc()
                continue

            if VERBOSE_LEVEL > 1 and log:
                print "  ** Summary **"
                for x in (log.tests, log.actions):
                    items = x.items()
                    if items:
                        items.sort()
                        for k, v in items:
                            print "  %s: %s" % (k, v)
                        print

            cumulative_log[SPAM] += log.tests.get(SPAM, 0)
            cumulative_log[VIRUS] += log.tests.get(VIRUS, 0)

        if initial_log is None:
            initial_log = cumulative_log.copy()
            start_time = time.time()
            if VERBOSE_LEVEL:
                print "Stats: %d spams, %d virus" % (initial_log[SPAM],
                                                     initial_log[VIRUS])
        else:
            if VERBOSE_LEVEL:
                delta_t = time.time() - start_time
                delta_t = max(delta_t, 1)  #

                print "Stats: %d spams (%.2f/hr), %d virus (%.2f/hr)" % (
                    cumulative_log[SPAM],
                    (cumulative_log[SPAM] - initial_log[SPAM]) / delta_t *
                    3600, cumulative_log[VIRUS],
                    (cumulative_log[VIRUS] - initial_log[VIRUS]) / delta_t *
                    3600)

        if error_flag:
            error_count += 1

        if error_count > 0:
            restart_network()
            error_count = 0

        delay = 10 * 60
        while delay:
            try:
                wait(delay)
                break
            except KeyboardInterrupt:
                print
                while 1:
                    cmd = raw_input("enter, delay, or quit? ")
                    if cmd in ("q", "quit"):
                        raise SystemExit(0)
                    elif cmd == "":
                        delay = 0
                        break
                    elif cmd.isdigit():
                        delay = int(cmd)
                        break
                    else:
                        print "Unknown command."
Ejemplo n.º 22
0
def classifier(scam, mode='c'):
    # Might have to first create the folder/file
    return hammie.open(('scams/%s/db' % scam), mode)
            spam.append(arg)
        elif opt == "-r":
            removetrained = True
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")

    if usedb == None:
        # Use settings in configuration file.
        usedb = options["Storage", "persistent_use_database"]
        pck = get_pathname_option("Storage",
                                          "persistent_storage_file")

    h = hammie.open(pck, usedb, "c")

    for g in good:
        if loud:
            print "Training ham (%s):" % g
        train(h, g, False, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True

    for s in spam:
        if loud:
            print "Training spam (%s):" % s
        train(h, s, True, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True
Ejemplo n.º 24
0
#!/bin/python
"""Wrapper script for testing the performance of SpamBayes.

Run a canned mailbox through a SpamBayes ham/spam classifier.
"""

import os.path
from spambayes import hammie, mboxutils

__author__ = "[email protected] (Skip Montanaro)"
__contact__ = "[email protected] (Collin Winter)"


def bench_spambayes(ham_classifier, messages):
    for msg in messages:
        ham_classifier.score(msg)


# data_dir = os.path.join(os.path.dirname(__file__), "data")
data_dir = os.path.dirname(__file__)
mailbox = os.path.join(data_dir, "spambayes_mailbox")
#mailbox = os.path.join(data_dir, "small_mailbox")
ham_data = os.path.join(data_dir, "spambayes_hammie.pkl")
messages = list(mboxutils.getmbox(mailbox))
ham_classifier = hammie.open(ham_data, "pickle", "r")
bench_spambayes(ham_classifier, messages)
Ejemplo n.º 25
0
        elif opt == '-u':
            unknown.append(arg)
        elif opt == '-U':
            untrain_mode = 1
        elif opt == '-r':
            reverse = 1
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")

    if usedb == None:
        usage(2, "Must specify one of -d or -D")

    save = False

    h = hammie.open(pck, usedb, mode)

    if not untrain_mode:
        for g in good:
            print "Training ham (%s):" % g
            train(h, g, False)
            save = True

        for s in spam:
            print "Training spam (%s):" % s
            train(h, s, True)
            save = True
    else:
        for g in good:
            print "Untraining ham (%s):" % g
            untrain(h, g, False)
Ejemplo n.º 26
0
 def open(self):
     mtime = os.path.getmtime(self.dbname)
     if self.h is None or self.modtime < mtime:
         self.h = hammie.open(self.dbname, self.usedb, 'r')
         self.modtime = mtime
Ejemplo n.º 27
0
Run a canned mailbox through a SpamBayes ham/spam classifier.
"""

import os.path

import perf

from spambayes import hammie, mboxutils


__author__ = "[email protected] (Skip Montanaro)"
__contact__ = "[email protected] (Collin Winter)"


def bench_spambayes(ham_classifier, messages):
    for msg in messages:
        ham_classifier.score(msg)


if __name__ == "__main__":
    runner = perf.Runner()
    runner.metadata['description'] = "Run the SpamBayes benchmark."

    data_dir = os.path.join(os.path.dirname(__file__), "data")
    mailbox = os.path.join(data_dir, "spambayes_mailbox")
    ham_data = os.path.join(data_dir, "spambayes_hammie.pkl")
    messages = list(mboxutils.getmbox(mailbox))
    ham_classifier = hammie.open(ham_data, "pickle", "r")

    runner.bench_func('spambayes', bench_spambayes, ham_classifier, messages)