コード例 #1
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "hrto:", ["help", "re", "tokenize", "option="])
    except getopt.GetoptError as msg:
        usage(msg)
        return 1
    usere = False
    tokenizestdin = False
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
        elif opt in ("-r", "--re"):
            usere = True
        elif opt in ("-t", "--tokenize"):
            tokenizestdin = True
        elif opt in ("-o", "--option"):
            options.set_from_cmdline(arg, sys.stderr)
    if usere and tokenizestdin:
        usage("-r and -t may not be used at the same time")
        return 1
    dbname, usedb = database_type(opts)
    db = open_storage(dbname, usedb)
    if tokenizestdin:
        args = tokenize(sys.stdin)
    if args:
        print_spamcounts(args, db, usere)
        return 0
    else:
        usage("need tokens on cmd line or -t w/ msg on stdin")
        return 1
コード例 #2
0
def main(profiling=False):
    h = HammieFilter()
    actions = []
    opts, args = getopt.getopt(sys.argv[1:], 'hvxd:p:nfgstGSo:P',
                               ['help', 'version', 'examples', 'option='])
    create_newdb = False
    do_profile = False
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-v', '--version'):
            version()
        elif opt in ('-x', '--examples'):
            examples()
        elif opt in ('-o', '--option'):
            Options.options.set_from_cmdline(arg, sys.stderr)
        elif opt == '-f':
            actions.append(h.filter)
        elif opt == '-g':
            actions.append(h.train_ham)
        elif opt == '-s':
            actions.append(h.train_spam)
        elif opt == '-t':
            actions.append(h.filter_train)
        elif opt == '-G':
            actions.append(h.untrain_ham)
        elif opt == '-S':
            actions.append(h.untrain_spam)
        elif opt == '-P':
            do_profile = True
            if not profiling:
                try:
                    import cProfile
                except ImportError:
                    pass
                else:
                    return cProfile.run("main(True)")
        elif opt == "-n":
            create_newdb = True
    h.dbname, h.usedb = storage.database_type(opts)
    if create_newdb or not os.path.exists(h.dbname):
        h.newdb()
        print("Created new database in", h.dbname, file=sys.stderr)
        if create_newdb:
            sys.exit(0)
    if actions == []:
        actions = [h.filter]
    if not args:
        args = ["-"]
    for fname in args:
        mbox = mboxutils.getmbox(fname)
        for msg in mbox:
            for action in actions:
                action(msg)
                if args == ["-"]:
                    unixfrom = msg.get_unixfrom() is not None
                else:
                    unixfrom = True
            result = mboxutils.as_string(msg, unixfrom=unixfrom)
            sys.stdout.write(result)
コード例 #3
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "hd:p:", ["help", "database=", "pickle="])
    except getopt.GetoptError as msg:
        usage(msg)
        return 1
    if len(args) != 1:
        usage()
        return 1
    cdbname = args[0]
    dbname = usedb = None
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
    dbname, usedb = storage.database_type(opts)
    store = storage.open_storage(dbname, usedb)
    bayes = CdbClassifier()
    items = []
    for word in store._wordinfokeys():
        record = store._wordinfoget(word)
        prob = store.probability(record)
        items.append((word, str(prob)))
    cdbfile = open(cdbname, "wb")
    cdb.cdb_make(cdbfile, items)
    cdbfile.close()
コード例 #4
0
ファイル: sb_server.py プロジェクト: Xodarap/Eipi
    def createWorkers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        print "Loading database...",
        if self.isTest:
            self.useDB = "pickle"
            self.DBName = '_pop3proxy_test.pickle'   # This is never saved.
        if not hasattr(self, "DBName"):
            self.DBName, self.useDB = storage.database_type([])
        self.bayes = storage.open_storage(self.DBName, self.useDB)
        
        self.buildStatusStrings()

        # Don't set up the caches and training objects when running the self-test,
        # so as not to clutter the filesystem.
        if not self.isTest:
            def ensureDir(dirname):
                try:
                    os.mkdir(dirname)
                except OSError, e:
                    if e.errno != errno.EEXIST:
                        raise

            # Create/open the Corpuses.  Use small cache sizes to avoid hogging
            # lots of memory.
            sc = get_pathname_option("Storage", "spam_cache")
            hc = get_pathname_option("Storage", "ham_cache")
            uc = get_pathname_option("Storage", "unknown_cache")
            map(ensureDir, [sc, hc, uc])
            if self.gzipCache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spamTrainer = storage.SpamTrainer(self.bayes)
            self.hamTrainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spamTrainer)
            self.hamCorpus.addObserver(self.hamTrainer)
コード例 #5
0
ファイル: message.py プロジェクト: thomasvangurp/spamfilter
def database_type():
    dn = ("Storage", "messageinfo_storage_file")
    # The storage options here may lag behind those in storage.py,
    # so we try and be more robust.  If we can't use the same storage
    # method, then we fall back to pickle.
    nm, typ = storage.database_type((), default_name=dn)
    if typ not in _storage_types.keys():
        typ = "pickle"
    return nm, typ
コード例 #6
0
    def createWorkers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        print "Loading database...",
        if self.isTest:
            self.useDB = "pickle"
            self.DBName = '_pop3proxy_test.pickle'   # This is never saved.
        if not hasattr(self, "DBName"):
            self.DBName, self.useDB = storage.database_type([])
        self.bayes = storage.open_storage(self.DBName, self.useDB)
        self.mdb = spambayes.message.Message().message_info_db

        # Load stats manager.
        self.stats = Stats.Stats(options, self.mdb)

        self.buildStatusStrings()

        # Don't set up the caches and training objects when running the self-test,
        # so as not to clutter the filesystem.
        if not self.isTest:
            # Create/open the Corpuses.  Use small cache sizes to avoid hogging
            # lots of memory.
            sc = get_pathname_option("Storage", "spam_cache")
            hc = get_pathname_option("Storage", "ham_cache")
            uc = get_pathname_option("Storage", "unknown_cache")
            map(storage.ensureDir, [sc, hc, uc])
            if self.gzipCache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spamTrainer = storage.SpamTrainer(self.bayes)
            self.hamTrainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spamTrainer)
            self.hamCorpus.addObserver(self.hamTrainer)
コード例 #7
0
ファイル: CoreUI.py プロジェクト: dbrandt/spambayes-lite
    def create_workers(self):
        """Using the options that were initialised in __init__ and then
        possibly overridden by the driver code, create the Bayes object,
        the Corpuses, the Trainers and so on."""
        if self.is_test:
            self.use_db = "pickle"
            self.db_name = '_core_server.pickle'   # This is never saved.
        if not hasattr(self, "db_name"):
            self.db_name, self.use_db = storage.database_type([])
        self.bayes = storage.open_storage(self.db_name, self.use_db)

        # Load stats manager.
        self.stats = Stats.Stats(options,
                                 spambayes.message.Message().message_info_db)

        self.build_status_strings()

        # Don't set up the caches and training objects when running the
        # self-test, so as not to clutter the filesystem.
        if not self.is_test:
            # Create/open the Corpuses.  Use small cache sizes to avoid
            # hogging lots of memory.
            sc = get_pathname_option("Storage", "core_spam_cache")
            hc = get_pathname_option("Storage", "core_ham_cache")
            uc = get_pathname_option("Storage", "core_unknown_cache")
            for d in [sc, hc, uc]:
                storage.ensureDir(d)
            if self.gzip_cache:
                factory = GzipFileMessageFactory()
            else:
                factory = FileMessageFactory()
            age = options["Storage", "cache_expiry_days"]*24*60*60
            self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
            self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                              '[0123456789\-]*',
                                              cacheSize=20)
            self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)

            # Given that (hopefully) users will get to the stage
            # where they do not need to do any more regular training to
            # be satisfied with spambayes' performance, we expire old
            # messages from not only the trained corpora, but the unknown
            # as well.
            self.spamCorpus.removeExpiredMessages()
            self.hamCorpus.removeExpiredMessages()
            self.unknownCorpus.removeExpiredMessages()

            # Create the Trainers.
            self.spam_trainer = storage.SpamTrainer(self.bayes)
            self.ham_trainer = storage.HamTrainer(self.bayes)
            self.spamCorpus.addObserver(self.spam_trainer)
            self.hamCorpus.addObserver(self.ham_trainer)
コード例 #8
0
 def __init__(self):
     options = Options.options
     if options["Storage", "persistent_storage_file"] == \
        options.default("Storage", "persistent_storage_file"):
         options["Storage", "persistent_storage_file"] = \
                                 "~/.hammiedb"
     options.merge_files(['/etc/hammierc',
                         os.path.expanduser('~/.hammierc')])
     self.dbname, self.usedb = storage.database_type([])
     self.mode = self.h = None
コード例 #9
0
 def database_type():

    dn = ("Storage", "messageinfo_storage_file")

    nm, typ = storage.database_type((), default_name=dn)

    if typ not in list(_storage_types.keys()):

        typ = "pickle"

    return nm, typ
コード例 #10
0
 def createWorkers(self):
     """There aren't many workers in an IMAP State - most of the
     work is done elsewhere.  We do need to load the classifier,
     though, and build the status strings."""
     if not hasattr(self, "DBName"):
         self.DBName, self.useDB = storage.database_type([])
     self.bayes = storage.open_storage(self.DBName, self.useDB)
     if not hasattr(self, "MBDName"):
         self.MDBName, self.useMDB = message.database_type()
     self.mdb = message.open_storage(self.MDBName, self.useMDB)
     self.stats = Stats(options, self.mdb)
     self.buildStatusStrings()
コード例 #11
0
def main():
    """Main program; parse options and go."""
    global loud
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hfqnrd:p:g:s:o:')
    except getopt.error as msg:
        usage(2, msg)
    if not opts:
        usage(2, "No options given")
    force = False
    trainnew = False
    removetrained = False
    good = []
    spam = []
    for opt, arg in opts:
        if opt == '-h':
            usage(0)
        elif opt == "-f":
            force = True
        elif opt == "-n":
            trainnew = True
        elif opt == "-q":
            loud = False
        elif opt == '-g':
            good.append(arg)
        elif opt == '-s':
            spam.append(arg)
        elif opt == "-r":
            removetrained = True
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")
    if usedb == None:
        usedb = options["Storage", "persistent_use_database"]
        pck = get_pathname_option("Storage",
                                          "persistent_storage_file")
    h = hammie.open(pck, usedb, "c")
    for g in good:
        if loud:
            print("Training ham (%s):" % g)
        train(h, g, False, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True
    for s in spam:
        if loud:
            print("Training spam (%s):" % s)
        train(h, s, True, force, trainnew, removetrained)
        sys.stdout.flush()
        save = True
    if save:
        h.store()
コード例 #12
0
ファイル: spam.py プロジェクト: DeaconDesperado/salmon
    def __init__(self, storage_file, config):
        options = Options.options
        options["Storage", "persistent_storage_file"] = storage_file
        options.merge_files(['/etc/hammierc', os.path.expanduser(config)])

        self.include_trained = Options.options["Headers", "include_trained"]
        self.dbname, self.usedb = storage.database_type([])

        self.mode = None
        self.h = None

        assert not Options.options["Hammie", "train_on_filter"], "Cannot train_on_filter."
コード例 #13
0
 def __init__(self):
     options = Options.options
     # This is a bit of a hack to counter the default for
     # persistent_storage_file changing from ~/.hammiedb to hammie.db
     # This will work unless a user:
     #   * had hammie.db as their value for persistent_storage_file, and
     #   * their config file was loaded by Options.py.
     if options["Storage", "persistent_storage_file"] == options.default("Storage", "persistent_storage_file"):
         options["Storage", "persistent_storage_file"] = "~/.hammiedb"
     options.merge_files(["/etc/hammierc", os.path.expanduser("~/.hammierc")])
     self.dbname, self.usedb = storage.database_type([])
     self.modtime = os.path.getmtime(self.dbname)
     self.h = None
コード例 #14
0
    def __init__(self, storage_file, config):
        options = Options.options
        options["Storage", "persistent_storage_file"] = storage_file
        options.merge_files(['/etc/hammierc', os.path.expanduser(config)])

        self.include_trained = Options.options["Headers", "include_trained"]
        self.dbname, self.usedb = storage.database_type([])

        self.mode = None
        self.h = None

        assert not Options.options[
            "Hammie", "train_on_filter"], "Cannot train_on_filter."
コード例 #15
0
ファイル: sb_filter.py プロジェクト: Xodarap/Eipi
def main():
    h = HammieFilter()
    actions = []
    opts, args = getopt.getopt(sys.argv[1:], 'hxd:p:nfgstGSo:',
                               ['help', 'examples', 'option='])
    create_newdb = False
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-x', '--examples'):
            examples()
        elif opt in ('-o', '--option'):
            Options.options.set_from_cmdline(arg, sys.stderr)
        elif opt == '-f':
            actions.append(h.filter)
        elif opt == '-g':
            actions.append(h.train_ham)
        elif opt == '-s':
            actions.append(h.train_spam)
        elif opt == '-t':
            actions.append(h.filter_train)
        elif opt == '-G':
            actions.append(h.untrain_ham)
        elif opt == '-S':
            actions.append(h.untrain_spam)
        elif opt == "-n":
            create_newdb = True
    h.dbname, h.usedb = storage.database_type(opts)

    if create_newdb:
        h.newdb()
        sys.exit(0)

    if actions == []:
        actions = [h.filter]

    if not args:
        args = ["-"]
    for fname in args:
        mbox = mboxutils.getmbox(fname)
        for msg in mbox:
            for action in actions:
                action(msg)
                if args == ["-"]:
                    unixfrom = msg.get_unixfrom() is not None
                else:
                    unixfrom = True
            result = mboxutils.as_string(msg, unixfrom=unixfrom)
            sys.stdout.write(result)
コード例 #16
0
 def __init__(self):
     options = Options.options
     # This is a bit of a hack to counter the default for
     # persistent_storage_file changing from ~/.hammiedb to hammie.db
     # This will work unless a user:
     #   * had hammie.db as their value for persistent_storage_file, and
     #   * their config file was loaded by Options.py.
     if options["Storage", "persistent_storage_file"] == \
        options.default("Storage", "persistent_storage_file"):
         options["Storage", "persistent_storage_file"] = \
                                 "~/.hammiedb"
     options.merge_files(
         ['/etc/hammierc',
          os.path.expanduser('~/.hammierc')])
     self.dbname, self.usedb = storage.database_type([])
     self.mode = self.h = None
コード例 #17
0
ファイル: sb_pop3dnd.py プロジェクト: alexeip0/spambayes
    def createWorkers(self):
        """There aren't many workers in an IMAP State - most of the
        work is done elsewhere.  We do need to load the classifier,
        though, and build the status strings."""
        # Load token and message databases.
        if not hasattr(self, "DBName"):
            self.DBName, self.useDB = storage.database_type([])
        self.bayes = storage.open_storage(self.DBName, self.useDB)
        if not hasattr(self, "MBDName"):
            self.MDBName, self.useMDB = message.database_type()
        self.mdb = message.open_storage(self.MDBName, self.useMDB)

        # Load stats manager.
        self.stats = Stats(options, self.mdb)

        # Build status strings.
        self.buildStatusStrings()
コード例 #18
0
 def main():

    """Main program; parse options and go."""

    try:

        opts, args = getopt.getopt(sys.argv[1:], 'hd:p:o:')

    except getopt.error as msg:

        usage(2, msg)

    options = Options.options

    for opt, arg in opts:

        if opt == '-h':

            usage(0)

        elif opt == '-o':

            options.set_from_cmdline(arg, sys.stderr)

    dbname, usedb = storage.database_type(opts)

    if len(args) != 1:

        usage(2, "IP:PORT not specified")

    ip, port = args[0].split(":")

    port = int(port)

    bayes = storage.open_storage(dbname, usedb)

    h = XMLHammie(bayes)

    server = ReusableSimpleXMLRPCServer(
        (ip, port),
        xmlrpc.server.SimpleXMLRPCRequestHandler)

    server.register_instance(h)

    server.serve_forever()
コード例 #19
0
 def createWorkers(self):
     """Using the options that were initialised in __init__ and then
     possibly overridden by the driver code, create the Bayes object,
     the Corpuses, the Trainers and so on."""
     print("Loading database...", end=' ')
     if self.isTest:
         self.useDB = "pickle"
         self.DBName = '_pop3proxy_test.pickle'   # This is never saved.
     if not hasattr(self, "DBName"):
         self.DBName, self.useDB = storage.database_type([])
     self.bayes = storage.open_storage(self.DBName, self.useDB)
     self.mdb = spambayes.message.Message().message_info_db
     self.stats = Stats.Stats(options, self.mdb)
     self.buildStatusStrings()
     if not self.isTest:
         sc = get_pathname_option("Storage", "spam_cache")
         hc = get_pathname_option("Storage", "ham_cache")
         uc = get_pathname_option("Storage", "unknown_cache")
         for d in [sc, hc, uc]:
             storage.ensureDir(d)
         if self.gzipCache:
             factory = GzipFileMessageFactory()
         else:
             factory = FileMessageFactory()
         age = options["Storage", "cache_expiry_days"]*24*60*60
         self.spamCorpus = ExpiryFileCorpus(age, factory, sc,
                                            '[0123456789\-]*',
                                            cacheSize=20)
         self.hamCorpus = ExpiryFileCorpus(age, factory, hc,
                                           '[0123456789\-]*',
                                           cacheSize=20)
         self.unknownCorpus = ExpiryFileCorpus(age, factory, uc,
                                               '[0123456789\-]*',
                                               cacheSize=20)
         self.spamCorpus.removeExpiredMessages()
         self.hamCorpus.removeExpiredMessages()
         self.unknownCorpus.removeExpiredMessages()
         self.spamTrainer = storage.SpamTrainer(self.bayes)
         self.hamTrainer = storage.HamTrainer(self.bayes)
         self.spamCorpus.addObserver(self.spamTrainer)
         self.hamCorpus.addObserver(self.hamTrainer)
コード例 #20
0
def run():
    global state
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hbd:p:l:u:o:')
    except getopt.error as msg:
        print(str(msg) + '\n\n' + __doc__, file=sys.stderr)
        sys.exit()
    for opt, arg in opts:
        if opt == '-h':
            print(__doc__, file=sys.stderr)
            sys.exit()
        elif opt == '-b':
            state.launchUI = True
        elif opt == '-l':
            state.proxyPorts = [_addressAndPort(a) for a in arg.split(',')]
        elif opt == '-u':
            state.uiPort = int(arg)
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    state.DBName, state.useDB = storage.database_type(opts)
    v = get_current_version()
    print("%s\n" % (v.get_long_version("SpamBayes POP3 Proxy"),))
    if 0 <= len(args) <= 2:
        if len(args) == 1:
            state.servers = [(args[0], 110)]
        elif len(args) == 2:
            state.servers = [(args[0], int(args[1]))]
        if len(args) > 0 and state.proxyPorts == []:
            state.proxyPorts = [('', 110)]
        try:
            prepare()
        except AlreadyRunningException:
            print("ERROR: The proxy is already running on this machine.", file=sys.stderr)
            print("Please stop the existing proxy and try again", file=sys.stderr)
            return
        start()
    else:
        print(__doc__, file=sys.stderr)
コード例 #21
0
def main():
    """Main program; parse options and go."""
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:p:o:a:A:')
    except getopt.error as msg:
        usage(2, msg)
    if len(args) != 1:
        usage(2, "socket not specified")
    try:
        server = BNServer(args[0], BNRequest)
    except socket.error as e:
        if e[0] == errno.EADDRINUSE:
            pass   # in use, no need
        else:
            raise  # a real error
    else:
        try:
            from spambayes import Options, storage
            options = Options.options
            for opt, arg in opts:
                if opt == '-h':
                    usage(0)
                elif opt == '-o':
                    options.set_from_cmdline(arg, sys.stderr)
                elif opt == '-a':
                    server.timeout = float(arg)
                elif opt == '-A':
                    server.number = int(arg)
            h = make_HammieFilter()
            h.dbname, h.usedb = storage.database_type(opts)
            server.hammie = h
            server.serve_until_idle()
            h.close()
        finally:
            try:
                os.unlink(args[0])
            except EnvironmentError:
                pass
コード例 #22
0
def main(profiling=False):
    h = HammieFilter()
    actions = []
    opts, args = getopt.getopt(sys.argv[1:], 'hvxd:p:nfgstGSo:P',
                               ['help', 'version', 'examples', 'option='])
    create_newdb = False
    do_profile = False
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-v', '--version'):
            version()
        elif opt in ('-x', '--examples'):
            examples()
        elif opt in ('-o', '--option'):
            Options.options.set_from_cmdline(arg, sys.stderr)
        elif opt == '-f':
            actions.append(h.filter)
        elif opt == '-g':
            actions.append(h.train_ham)
        elif opt == '-s':
            actions.append(h.train_spam)
        elif opt == '-t':
            actions.append(h.filter_train)
        elif opt == '-G':
            actions.append(h.untrain_ham)
        elif opt == '-S':
            actions.append(h.untrain_spam)
        elif opt == '-P':
            do_profile = True
            if not profiling:
                try:
                    import cProfile
                except ImportError:
                    pass
                else:
                    return cProfile.run("main(True)")
        elif opt == "-n":
            create_newdb = True
    h.dbname, h.usedb = storage.database_type(opts)

    if create_newdb or not os.path.exists(h.dbname):
        h.newdb()
        print >> sys.stderr, "Created new database in", h.dbname
        if create_newdb:
            sys.exit(0)

    if actions == []:
        actions = [h.filter]

    if not args:
        args = ["-"]
    for fname in args:
        mbox = mboxutils.getmbox(fname)
        for msg in mbox:
            for action in actions:
                action(msg)
                if args == ["-"]:
                    unixfrom = msg.get_unixfrom() is not None
                else:
                    unixfrom = True
            result = mboxutils.as_string(msg, unixfrom=unixfrom)
            sys.stdout.write(result)
コード例 #23
0
            usage(0)
        elif opt == "-f":
            force = True
        elif opt == "-n":
            trainnew = True
        elif opt == "-q":
            loud = False
        elif opt == '-g':
            good.append(arg)
        elif opt == '-s':
            spam.append(arg)
        elif opt == "-r":
            removetrained = True
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")

    if usedb == None:
        # Use settings in configuration file.
        usedb = options["Storage", "persistent_use_database"]
        pck = get_pathname_option("Storage", "persistent_storage_file")

    h = hammie.open(pck, usedb, "c")

    for g in good:
        if loud:
            print "Training ham (%s):" % g
        train(h, g, False, force, trainnew, removetrained)
        sys.stdout.flush()
コード例 #24
0
 def main():

    """Main program; parse options and go."""

    try:

        opts, args = getopt.getopt(sys.argv[1:], 'hd:Ufg:s:p:u:r')

    except getopt.error as msg:

        usage(2, msg)

    if not opts:

        usage(2, "No options given")

    pck = DEFAULTDB

    good = []

    spam = []

    unknown = []

    reverse = 0

    untrain_mode = 0

    do_filter = False

    usedb = None

    mode = 'r'

    for opt, arg in opts:

        if opt == '-h':

            usage(0)

        elif opt == '-g':

            good.append(arg)

            mode = 'c'

        elif opt == '-s':

            spam.append(arg)

            mode = 'c'

        elif opt == "-f":

            do_filter = True

        elif opt == '-u':

            unknown.append(arg)

        elif opt == '-U':

            untrain_mode = 1

        elif opt == '-r':

            reverse = 1

    pck, usedb = storage.database_type(opts)

    if args:

        usage(2, "Positional arguments not allowed")

    if usedb == None:

        usage(2, "Must specify one of -d or -D")

    save = False

    h = hammie.open(pck, usedb, mode)

    if not untrain_mode:

        for g in good:

            print("Training ham (%s):" % g)

            train(h, g, False)

            save = True

        for s in spam:

            print("Training spam (%s):" % s)

            train(h, s, True)

            save = True

    else:

        for g in good:

            print("Untraining ham (%s):" % g)

            untrain(h, g, False)

            save = True

        for s in spam:

            print("Untraining spam (%s):" % s)

            untrain(h, s, True)

            save = True

    if save:

        h.store()

    if do_filter:

        msg = sys.stdin.read()

        filtered = h.filter(msg)

        sys.stdout.write(filtered)

    if unknown:

        spams = hams = unsures = 0

        for u in unknown:

            if len(unknown) > 1:

                print("Scoring", u)

            s, g, u = score(h, u, reverse)

            spams += s

            hams += g

            unsures += u

        print("Total %d spam, %d ham, %d unsure" % (spams, hams, unsures))
コード例 #25
0
            rdbname = arg
        elif opt == '-f':
            sbfname = arg
        elif opt == '-t':
            doTrain = True
        elif opt == '-c':
            doClassify = True
        elif opt == '-P':
            doPrompt = True
        elif opt == '-i':
            idxname = arg
        elif opt == '-L':
            logname = arg
        elif opt == '-W':
            pwd = arg
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    bdbname, useDBM = storage.database_type(opts)

    if not idxname:
        idxname = "%s.sbindex" % (ldbname)

    if (bdbname and ldbname and sbfname and (doTrain or doClassify)):
        run(bdbname, useDBM, ldbname, rdbname, \
            sbfname, doTrain, doClassify, pwd, idxname, logname)

        if doPrompt:
            raw_input("Press Enter to end ")
    else:
        print >> sys.stderr, __doc__
コード例 #26
0
ファイル: sb_server.py プロジェクト: ehuelsmann/spambayes
    for opt, arg in opts:
        if opt == '-h':
            print >> sys.stderr, __doc__
            sys.exit()
        elif opt == '-b':
            state.launchUI = True
        # '-p' and '-d' are handled by the storage.database_type call
        # below, in case you are wondering why they are missing.
        elif opt == '-l':
            state.proxyPorts = [_addressAndPort(a) for a in arg.split(',')]
        elif opt == '-u':
            state.uiPort = int(arg)
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)

    state.DBName, state.useDB = storage.database_type(opts)

    # Let the user know what they are using...
    v = get_current_version()
    print "%s\n" % (v.get_long_version("SpamBayes POP3 Proxy"),)

    if 0 <= len(args) <= 2:
        # Normal usage, with optional server name and port number.
        if len(args) == 1:
            state.servers = [(args[0], 110)]
        elif len(args) == 2:
            state.servers = [(args[0], int(args[1]))]

        # Default to listening on port 110 for command-line-specified servers.
        if len(args) > 0 and state.proxyPorts == []:
            state.proxyPorts = [('', 110)]
コード例 #27
0
            usage(0)
        elif opt == "-f":
            force = True
        elif opt == "-n":
            trainnew = True
        elif opt == "-q":
            loud = False
        elif opt == '-g':
            good.append(arg)
        elif opt == '-s':
            spam.append(arg)
        elif opt == "-r":
            removetrained = True
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    pck, usedb = storage.database_type(opts)
    if args:
        usage(2, "Positional arguments not allowed")

    if usedb == None:
        # Use settings in configuration file.
        usedb = options["Storage", "persistent_use_database"]
        pck = get_pathname_option("Storage",
                                          "persistent_storage_file")

    h = hammie.open(pck, usedb, "c")

    for g in good:
        if loud:
            print "Training ham (%s):" % g
        train(h, g, False, force, trainnew, removetrained)
コード例 #28
0
def main():
    """Main program; parse options and go."""
    try:
        opts, args = getopt.getopt(sys.argv[1:], 'hd:p:o:')
    except getopt.error, msg:
        usage(2, msg)

    options = Options.options

    for opt, arg in opts:
        if opt == '-h':
            usage(0)
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)
    dbname, usedb = storage.database_type(opts)

    if len(args) != 1:
        usage(2, "IP:PORT not specified")

    ip, port = args[0].split(":")
    port = int(port)

    bayes = storage.open_storage(dbname, usedb)
    h = XMLHammie(bayes)

    server = ReusableSimpleXMLRPCServer(
        (ip, port), SimpleXMLRPCServer.SimpleXMLRPCRequestHandler)
    server.register_instance(h)
    server.serve_forever()
コード例 #29
0
    except getopt.GetoptError, msg:
        usage(msg)
        return 1
    usere = False
    tokenizestdin = False
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
        elif opt in ("-r", "--re"):
            usere = True
        elif opt in ("-t", "--tokenize"):
            tokenizestdin = True
        elif opt in ('-o', '--option'):
            options.set_from_cmdline(arg, sys.stderr)
    if usere and tokenizestdin:
        usage("-r and -t may not be used at the same time")
        return 1
    dbname, usedb = database_type(opts)
    db = open_storage(dbname, usedb)
    if tokenizestdin:
        args = tokenize(sys.stdin)
    if args:
        print_spamcounts(args, db, usere)
        return 0
    else:
        usage("need tokens on cmd line or -t w/ msg on stdin")
        return 1
if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
コード例 #30
0
ファイル: sb_server.py プロジェクト: thomasvangurp/spamfilter
    for opt, arg in opts:
        if opt == '-h':
            print >> sys.stderr, __doc__
            sys.exit()
        elif opt == '-b':
            state.launchUI = True
        # '-p' and '-d' are handled by the storage.database_type call
        # below, in case you are wondering why they are missing.
        elif opt == '-l':
            state.proxyPorts = [_addressAndPort(a) for a in arg.split(',')]
        elif opt == '-u':
            state.uiPort = int(arg)
        elif opt == '-o':
            options.set_from_cmdline(arg, sys.stderr)

    state.DBName, state.useDB = storage.database_type(opts)

    # Let the user know what they are using...
    v = get_current_version()
    print "%s\n" % (v.get_long_version("SpamBayes POP3 Proxy"), )

    if 0 <= len(args) <= 2:
        # Normal usage, with optional server name and port number.
        if len(args) == 1:
            state.servers = [(args[0], 110)]
        elif len(args) == 2:
            state.servers = [(args[0], int(args[1]))]

        # Default to listening on port 110 for command-line-specified servers.
        if len(args) > 0 and state.proxyPorts == []:
            state.proxyPorts = [('', 110)]
コード例 #31
0
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
        elif opt in ("-r", "--re"):
            usere = True
        elif opt in ("-t", "--tokenize"):
            tokenizestdin = True
        elif opt in ('-o', '--option'):
            options.set_from_cmdline(arg, sys.stderr)

    if usere and tokenizestdin:
        usage("-r and -t may not be used at the same time")
        return 1

    dbname, usedb = database_type(opts)
    db = open_storage(dbname, usedb)

    if tokenizestdin:
        args = tokenize(sys.stdin)

    if args:
        print_spamcounts(args, db, usere)
        return 0
    else:
        usage("need tokens on cmd line or -t w/ msg on stdin")
        return 1


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
コード例 #32
0
            idxname = arg

        elif opt == '-L':

            logname = arg

        elif opt == '-W':

            pwd = arg

        elif opt == '-o':

            options.set_from_cmdline(arg, sys.stderr)

    bdbname, useDBM = storage.database_type(opts)

    if not idxname:

        idxname = "%s.sbindex" % (ldbname)

    if (bdbname and ldbname and sbfname and (doTrain or doClassify)):

        run(bdbname, useDBM, ldbname, rdbname, \
            sbfname, doTrain, doClassify, pwd, idxname, logname)

        if doPrompt:

            try:

                key = input("Press Enter to end")
コード例 #33
0
 def change_db():
     classifier = storage.open_storage(*storage.database_type(opts))
     message.Message.message_info_db = message_db
     imap_filter = IMAPFilter(classifier, message_db)
コード例 #34
0
def main():
    """Main program; parse options and go."""
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hd:p:o:")
    except getopt.error, msg:
        usage(2, msg)

    options = Options.options

    for opt, arg in opts:
        if opt == "-h":
            usage(0)
        elif opt == "-o":
            options.set_from_cmdline(arg, sys.stderr)
    dbname, usedb = storage.database_type(opts)

    if len(args) != 1:
        usage(2, "IP:PORT not specified")

    ip, port = args[0].split(":")
    port = int(port)

    bayes = storage.open_storage(dbname, usedb)
    h = XMLHammie(bayes)

    server = ReusableSimpleXMLRPCServer((ip, port), SimpleXMLRPCServer.SimpleXMLRPCRequestHandler)
    server.register_instance(h)
    server.serve_forever()

コード例 #35
0
def main(args):
    try:
        opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:c:vRuC",
                                   ["help", "good=", "spam=",
                                    "database=", "pickle=", "verbose",
                                    "option=", "max=", "maxrounds=",
                                    "cullext=", "cull", "reverse",
                                    "ratio=", "unbalanced"])
    except getopt.GetoptError as msg:
        usage(msg)
        return 1
    ham = spam = dbname = usedb = cullext = None
    maxmsgs = 0
    maxrounds = MAXROUNDS
    verbose = False
    reverse = False
    sh_ratio = (1, 1)
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
        elif opt in ("-v", "--verbose"):
            verbose = True
        elif opt in ("-g", "--good"):
            ham = arg
        elif opt in ("-s", "--spam"):
            spam = arg
        elif opt in ("-c", "--cullext"):
            cullext = arg
        elif opt in ("-C", "--cull"):
            cullext = ''
        elif opt in ("-m", "--max"):
            maxmsgs = int(arg)
        elif opt in ("-r", "--maxrounds"):
            maxrounds = int(arg)
        elif opt in ("-R", "--reverse"):
            reverse = True
        elif opt in ("-u", "--unbalanced"):
            sh_ratio = None
        elif opt in ('-o', '--option'):
            Options.options.set_from_cmdline(arg, sys.stderr)
        elif opt == '--ratio':
            arg = arg.split(":")
            sh_ratio = (int(arg[0]), int(arg[1]))
    if ham is None or spam is None:
        usage("require both ham and spam piles")
        return 1
    dbname, usedb = storage.database_type(opts)
    try:
        os.unlink(dbname)
    except OSError:
        pass
    store = storage.open_storage(dbname, usedb)
    tdict = {}
    train(store, ham, spam, maxmsgs, maxrounds, tdict, reverse, verbose,
          sh_ratio)
    store.store()
    store.close()
    if cullext is not None:
        cull(ham, cullext, 'ham', tdict)
        cull(spam, cullext, 'spam', tdict)
    return 0