Example #1
0
    def onCvresults(self, *args, **kwargs):
        del kwargs["how"]
        self._writePreamble("CV Test Results")
        text = "Display the results of a cross-validation test with the " \
               "current settings against the defaults."
        nsets = options["TestToolsUI", "n"]

        # With defaults first.
        self.write("<p>Testing with defaults...</p>")
        saved = {}
        for opt in options.options(True):
            # Ignore those that have do_not_restore as True
            # (These are predominately storage options, and at least
            # the cache directory ones may be needed later on).
            sect, opt = opt[1:].split(']', 1)
            saved[(sect, opt)] = options[(sect, opt)]
            if not options.no_restore(sect, opt):
                options.set(sect, opt, options.default(sect, opt))
        options["TestToolsUI", "source"] = kwargs["TestToolsUI_source"]
        # XXX Cache this somewhere?  If the testing data isn't changing,
        # XXX and the user is running multiple tests, then it doesn't
        # XXX make much sense to rerun the 'default's test over and over
        # XXX again.
        cv_out, errors = self.timCV(nsets)
##        print errors.read()
        defaults = self.rates(cv_out)

        # Now with specified settings.
        self.write("<p>Testing with selected settings...</p>")
        for opt in options.options(True):
            sect, opt = opt[1:].split(']', 1)
            try:
                value = kwargs["%s_%s" % (sect, opt)]
            except KeyError:
                # Leave as the default.
                pass
            else:
                options.set(sect, opt, value)
        cv_out, errors = self.timCV(nsets)
##        print errors.read()
        current = self.rates(cv_out)

        # Restore the settings.
        for opt in options.options(True):
            sect, opt = opt[1:].split(']', 1)
            options.set(sect, opt, saved[(sect, opt)])

        # Do the comparison.
        comp, errors = self.compare(defaults, current)
##        print errors.read()

        # Output the results
        # XXX This is just what you'd get from running cmp.py
        # XXX at the moment - it could be prettied up a bit.
        comp = comp.read()
        box = self._buildBox('Cross-validation test', None,
                             cgi.escape(comp).replace("\n", "<br />"))
        self.write(box)
        self._writePostamble()
Example #2
0
    def onCvresults(self, *args, **kwargs):
        del kwargs["how"]
        self._writePreamble("CV Test Results")
        text = "Display the results of a cross-validation test with the " \
               "current settings against the defaults."
        nsets = options["TestToolsUI", "n"]

        # With defaults first.
        self.write("<p>Testing with defaults...</p>")
        saved = {}
        for opt in options.options(True):
            # Ignore those that have do_not_restore as True
            # (These are predominately storage options, and at least
            # the cache directory ones may be needed later on).
            sect, opt = opt[1:].split(']', 1)
            saved[(sect, opt)] = options[(sect, opt)]
            if not options.no_restore(sect, opt):
                options.set(sect, opt, options.default(sect, opt))
        options["TestToolsUI", "source"] = kwargs["TestToolsUI_source"]
        # XXX Cache this somewhere?  If the testing data isn't changing,
        # XXX and the user is running multiple tests, then it doesn't
        # XXX make much sense to rerun the 'default's test over and over
        # XXX again.
        cv_out, errors = self.timCV(nsets)
        ##        print errors.read()
        defaults = self.rates(cv_out)

        # Now with specified settings.
        self.write("<p>Testing with selected settings...</p>")
        for opt in options.options(True):
            sect, opt = opt[1:].split(']', 1)
            try:
                value = kwargs["%s_%s" % (sect, opt)]
            except KeyError:
                # Leave as the default.
                pass
            else:
                options.set(sect, opt, value)
        cv_out, errors = self.timCV(nsets)
        ##        print errors.read()
        current = self.rates(cv_out)

        # Restore the settings.
        for opt in options.options(True):
            sect, opt = opt[1:].split(']', 1)
            options.set(sect, opt, saved[(sect, opt)])

        # Do the comparison.
        comp, errors = self.compare(defaults, current)
        ##        print errors.read()

        # Output the results
        # XXX This is just what you'd get from running cmp.py
        # XXX at the moment - it could be prettied up a bit.
        comp = comp.read()
        box = self._buildBox('Cross-validation test', None,
                             cgi.escape(comp).replace("\n", "<br />"))
        self.write(box)
        self._writePostamble()
Example #3
0
def open(db_name, mode):
    if os.path.exists(db_name) and \
       options.default("globals", "dbm_type") != \
       options["globals", "dbm_type"]:
        dbm_type = whichdb.whichdb(db_name)
        if sys.platform == "win32" and sys.version_info < (2,3) and \
           dbm_type == "dbhash":
            dbm_type = "db3hash"
    else:
        dbm_type = options["globals", "dbm_type"].lower()
    f = open_funcs.get(dbm_type)
    if f is None:
        raise error("Unknown dbm type: %s" % dbm_type)
    return f(db_name, mode)
Example #4
0
def open(db_name, mode):
    if os.path.exists(db_name) and \
       options.default("globals", "dbm_type") != \
       options["globals", "dbm_type"]:
        # let the file tell us what db to use
        dbm_type = whichdb.whichdb(db_name)
        # if we are using Windows and Python < 2.3, then we need to use
        # db3hash, not dbhash.
        if (sys.platform == "win32" and sys.version_info < (2, 3)
                and dbm_type == "dbhash"):
            dbm_type = "db3hash"
    else:
        # fresh file or overridden - open with what the user specified
        dbm_type = options["globals", "dbm_type"].lower()
    f = open_funcs.get(dbm_type)
    if f is None:
        raise error("Unknown dbm type: %s" % dbm_type)
    return f(db_name, mode)
Example #5
0
def open(db_name, mode):
    if os.path.exists(db_name) and \
       options.default("globals", "dbm_type") != \
       options["globals", "dbm_type"]:
        # let the file tell us what db to use
        dbm_type = whichdb.whichdb(db_name)
        # if we are using Windows and Python < 2.3, then we need to use
        # db3hash, not dbhash.
        if (sys.platform == "win32" and
            sys.version_info < (2, 3) and
            dbm_type == "dbhash"):
            dbm_type = "db3hash"
    else:
        # fresh file or overridden - open with what the user specified
        dbm_type = options["globals", "dbm_type"].lower()
    f = open_funcs.get(dbm_type)
    if f is None:
        raise error("Unknown dbm type: %s" % dbm_type)
    return f(db_name, mode)
Example #6
0
 def onCvresults(self, *args, **kwargs):
     del kwargs["how"]
     self._writePreamble("CV Test Results")
     text = "Display the results of a cross-validation test with the " \
            "current settings against the defaults."
     nsets = options["TestToolsUI", "n"]
     self.write("<p>Testing with defaults...</p>")
     saved = {}
     for opt in options.options(True):
         sect, opt = opt[1:].split(']', 1)
         saved[(sect, opt)] = options[(sect, opt)]
         if not options.no_restore(sect, opt):
             options.set(sect, opt, options.default(sect, opt))
     options["TestToolsUI", "source"] = kwargs["TestToolsUI_source"]
     cv_out, errors = self.timCV(nsets)
     defaults = self.rates(cv_out)
     self.write("<p>Testing with selected settings...</p>")
     for opt in options.options(True):
         sect, opt = opt[1:].split(']', 1)
         try:
             value = kwargs["%s_%s" % (sect, opt)]
         except KeyError:
             pass
         else:
             options.set(sect, opt, value)
     cv_out, errors = self.timCV(nsets)
     current = self.rates(cv_out)
     for opt in options.options(True):
         sect, opt = opt[1:].split(']', 1)
         options.set(sect, opt, saved[(sect, opt)])
     comp, errors = self.compare(defaults, current)
     comp = comp.read()
     box = self._buildBox('Cross-validation test', None,
                          cgi.escape(comp).replace("\n", "<br />"))
     self.write(box)
     self._writePostamble()
Example #7
0
import getopt

from spambayes.Options import options, get_pathname_option
from spambayes import mboxutils, hammie, Corpus, storage

Corpus.Verbose = True

program = sys.argv[0] # For usage(); referenced by docstring above

# Default database name
# This is a bit of a hack to counter the default for
# persistent_storage_file changing from ~/.hammiedb to hammie.db
# This will work unless a user had hammie.db as their value for
# persistent_storage_file
if options["Storage", "persistent_storage_file"] == \
   options.default("Storage", "persistent_storage_file"):
    options["Storage", "persistent_storage_file"] = \
                       os.path.join("~", ".hammiedb")
DEFAULTDB = get_pathname_option("Storage", "persistent_storage_file")

# Probability at which a message is considered spam
SPAM_THRESHOLD = options["Categorization", "spam_cutoff"]
HAM_THRESHOLD = options["Categorization", "ham_cutoff"]


def train(h, msgs, is_spam):
    """Train bayes with all messages from a mailbox."""
    mbox = mboxutils.getmbox(msgs)
    i = 0
    for msg in mbox:
        i += 1
Example #8
0
import getopt

from spambayes.Options import options, get_pathname_option
from spambayes import mboxutils, hammie, Corpus, storage

Corpus.Verbose = True

program = sys.argv[0]  # For usage(); referenced by docstring above

# Default database name
# This is a bit of a hack to counter the default for
# persistent_storage_file changing from ~/.hammiedb to hammie.db
# This will work unless a user had hammie.db as their value for
# persistent_storage_file
if options["Storage", "persistent_storage_file"] == \
   options.default("Storage", "persistent_storage_file"):
    options["Storage", "persistent_storage_file"] = \
                       os.path.join("~", ".hammiedb")
DEFAULTDB = get_pathname_option("Storage", "persistent_storage_file")

# Probability at which a message is considered spam
SPAM_THRESHOLD = options["Categorization", "spam_cutoff"]
HAM_THRESHOLD = options["Categorization", "ham_cutoff"]


def train(h, msgs, is_spam):
    """Train bayes with all messages from a mailbox."""
    mbox = mboxutils.getmbox(msgs)
    i = 0
    for msg in mbox:
        i += 1
Example #9
0
import getopt

from spambayes.Options import options
from spambayes import classifier, mboxutils, hammie, Corpus

Corpus.Verbose = True

program = sys.argv[0] # For usage(); referenced by docstring above

# Default database name
DEFAULTDB = os.path.expanduser(options["Storage", "persistent_storage_file"])
# This is a bit of a hack to counter the default for
# persistent_storage_file changing from ~/.hammiedb to hammie.db
# This will work unless a user had hammie.db as their value for
# persistent_storage_file
if DEFAULTDB == options.default("Storage", "persistent_storage_file"):
    DEFAULTDB = os.path.expanduser(os.path.join("~", ".hammiedb"))

# Probability at which a message is considered spam
SPAM_THRESHOLD = options["Categorization", "spam_cutoff"]
HAM_THRESHOLD = options["Categorization", "ham_cutoff"]


def train(h, msgs, is_spam):
    """Train bayes with all messages from a mailbox."""
    mbox = mboxutils.getmbox(msgs)
    i = 0
    for msg in mbox:
        i += 1
        if i % 10 == 0:
            sys.stdout.write("\r%6d" % i)