def main(args): try: opts, args = getopt.getopt(args, "hrto:", ["help", "re", "tokenize", "option="]) except getopt.GetoptError as msg: usage(msg) return 1 usere = False tokenizestdin = False for opt, arg in opts: if opt in ("-h", "--help"): usage() return 0 elif opt in ("-r", "--re"): usere = True elif opt in ("-t", "--tokenize"): tokenizestdin = True elif opt in ("-o", "--option"): options.set_from_cmdline(arg, sys.stderr) if usere and tokenizestdin: usage("-r and -t may not be used at the same time") return 1 dbname, usedb = database_type(opts) db = open_storage(dbname, usedb) if tokenizestdin: args = tokenize(sys.stdin) if args: print_spamcounts(args, db, usere) return 0 else: usage("need tokens on cmd line or -t w/ msg on stdin") return 1
def main(profiling=False): h = HammieFilter() actions = [] opts, args = getopt.getopt(sys.argv[1:], 'hvxd:p:nfgstGSo:P', ['help', 'version', 'examples', 'option=']) create_newdb = False do_profile = False for opt, arg in opts: if opt in ('-h', '--help'): usage(0) elif opt in ('-v', '--version'): version() elif opt in ('-x', '--examples'): examples() elif opt in ('-o', '--option'): Options.options.set_from_cmdline(arg, sys.stderr) elif opt == '-f': actions.append(h.filter) elif opt == '-g': actions.append(h.train_ham) elif opt == '-s': actions.append(h.train_spam) elif opt == '-t': actions.append(h.filter_train) elif opt == '-G': actions.append(h.untrain_ham) elif opt == '-S': actions.append(h.untrain_spam) elif opt == '-P': do_profile = True if not profiling: try: import cProfile except ImportError: pass else: return cProfile.run("main(True)") elif opt == "-n": create_newdb = True h.dbname, h.usedb = storage.database_type(opts) if create_newdb or not os.path.exists(h.dbname): h.newdb() print("Created new database in", h.dbname, file=sys.stderr) if create_newdb: sys.exit(0) if actions == []: actions = [h.filter] if not args: args = ["-"] for fname in args: mbox = mboxutils.getmbox(fname) for msg in mbox: for action in actions: action(msg) if args == ["-"]: unixfrom = msg.get_unixfrom() is not None else: unixfrom = True result = mboxutils.as_string(msg, unixfrom=unixfrom) sys.stdout.write(result)
def main(args): try: opts, args = getopt.getopt(args, "hd:p:", ["help", "database=", "pickle="]) except getopt.GetoptError as msg: usage(msg) return 1 if len(args) != 1: usage() return 1 cdbname = args[0] dbname = usedb = None for opt, arg in opts: if opt in ("-h", "--help"): usage() return 0 dbname, usedb = storage.database_type(opts) store = storage.open_storage(dbname, usedb) bayes = CdbClassifier() items = [] for word in store._wordinfokeys(): record = store._wordinfoget(word) prob = store.probability(record) items.append((word, str(prob))) cdbfile = open(cdbname, "wb") cdb.cdb_make(cdbfile, items) cdbfile.close()
def createWorkers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" print "Loading database...", if self.isTest: self.useDB = "pickle" self.DBName = '_pop3proxy_test.pickle' # This is never saved. if not hasattr(self, "DBName"): self.DBName, self.useDB = storage.database_type([]) self.bayes = storage.open_storage(self.DBName, self.useDB) self.buildStatusStrings() # Don't set up the caches and training objects when running the self-test, # so as not to clutter the filesystem. if not self.isTest: def ensureDir(dirname): try: os.mkdir(dirname) except OSError, e: if e.errno != errno.EEXIST: raise # Create/open the Corpuses. Use small cache sizes to avoid hogging # lots of memory. sc = get_pathname_option("Storage", "spam_cache") hc = get_pathname_option("Storage", "ham_cache") uc = get_pathname_option("Storage", "unknown_cache") map(ensureDir, [sc, hc, uc]) if self.gzipCache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) # Given that (hopefully) users will get to the stage # where they do not need to do any more regular training to # be satisfied with spambayes' performance, we expire old # messages from not only the trained corpora, but the unknown # as well. self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() # Create the Trainers. self.spamTrainer = storage.SpamTrainer(self.bayes) self.hamTrainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spamTrainer) self.hamCorpus.addObserver(self.hamTrainer)
def database_type(): dn = ("Storage", "messageinfo_storage_file") # The storage options here may lag behind those in storage.py, # so we try and be more robust. If we can't use the same storage # method, then we fall back to pickle. nm, typ = storage.database_type((), default_name=dn) if typ not in _storage_types.keys(): typ = "pickle" return nm, typ
def createWorkers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" print "Loading database...", if self.isTest: self.useDB = "pickle" self.DBName = '_pop3proxy_test.pickle' # This is never saved. if not hasattr(self, "DBName"): self.DBName, self.useDB = storage.database_type([]) self.bayes = storage.open_storage(self.DBName, self.useDB) self.mdb = spambayes.message.Message().message_info_db # Load stats manager. self.stats = Stats.Stats(options, self.mdb) self.buildStatusStrings() # Don't set up the caches and training objects when running the self-test, # so as not to clutter the filesystem. if not self.isTest: # Create/open the Corpuses. Use small cache sizes to avoid hogging # lots of memory. sc = get_pathname_option("Storage", "spam_cache") hc = get_pathname_option("Storage", "ham_cache") uc = get_pathname_option("Storage", "unknown_cache") map(storage.ensureDir, [sc, hc, uc]) if self.gzipCache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) # Given that (hopefully) users will get to the stage # where they do not need to do any more regular training to # be satisfied with spambayes' performance, we expire old # messages from not only the trained corpora, but the unknown # as well. self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() # Create the Trainers. self.spamTrainer = storage.SpamTrainer(self.bayes) self.hamTrainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spamTrainer) self.hamCorpus.addObserver(self.hamTrainer)
def create_workers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" if self.is_test: self.use_db = "pickle" self.db_name = '_core_server.pickle' # This is never saved. if not hasattr(self, "db_name"): self.db_name, self.use_db = storage.database_type([]) self.bayes = storage.open_storage(self.db_name, self.use_db) # Load stats manager. self.stats = Stats.Stats(options, spambayes.message.Message().message_info_db) self.build_status_strings() # Don't set up the caches and training objects when running the # self-test, so as not to clutter the filesystem. if not self.is_test: # Create/open the Corpuses. Use small cache sizes to avoid # hogging lots of memory. sc = get_pathname_option("Storage", "core_spam_cache") hc = get_pathname_option("Storage", "core_ham_cache") uc = get_pathname_option("Storage", "core_unknown_cache") for d in [sc, hc, uc]: storage.ensureDir(d) if self.gzip_cache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) # Given that (hopefully) users will get to the stage # where they do not need to do any more regular training to # be satisfied with spambayes' performance, we expire old # messages from not only the trained corpora, but the unknown # as well. self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() # Create the Trainers. self.spam_trainer = storage.SpamTrainer(self.bayes) self.ham_trainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spam_trainer) self.hamCorpus.addObserver(self.ham_trainer)
def __init__(self): options = Options.options if options["Storage", "persistent_storage_file"] == \ options.default("Storage", "persistent_storage_file"): options["Storage", "persistent_storage_file"] = \ "~/.hammiedb" options.merge_files(['/etc/hammierc', os.path.expanduser('~/.hammierc')]) self.dbname, self.usedb = storage.database_type([]) self.mode = self.h = None
def database_type(): dn = ("Storage", "messageinfo_storage_file") nm, typ = storage.database_type((), default_name=dn) if typ not in list(_storage_types.keys()): typ = "pickle" return nm, typ
def createWorkers(self): """There aren't many workers in an IMAP State - most of the work is done elsewhere. We do need to load the classifier, though, and build the status strings.""" if not hasattr(self, "DBName"): self.DBName, self.useDB = storage.database_type([]) self.bayes = storage.open_storage(self.DBName, self.useDB) if not hasattr(self, "MBDName"): self.MDBName, self.useMDB = message.database_type() self.mdb = message.open_storage(self.MDBName, self.useMDB) self.stats = Stats(options, self.mdb) self.buildStatusStrings()
def main(): """Main program; parse options and go.""" global loud try: opts, args = getopt.getopt(sys.argv[1:], 'hfqnrd:p:g:s:o:') except getopt.error as msg: usage(2, msg) if not opts: usage(2, "No options given") force = False trainnew = False removetrained = False good = [] spam = [] for opt, arg in opts: if opt == '-h': usage(0) elif opt == "-f": force = True elif opt == "-n": trainnew = True elif opt == "-q": loud = False elif opt == '-g': good.append(arg) elif opt == '-s': spam.append(arg) elif opt == "-r": removetrained = True elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) pck, usedb = storage.database_type(opts) if args: usage(2, "Positional arguments not allowed") if usedb == None: usedb = options["Storage", "persistent_use_database"] pck = get_pathname_option("Storage", "persistent_storage_file") h = hammie.open(pck, usedb, "c") for g in good: if loud: print("Training ham (%s):" % g) train(h, g, False, force, trainnew, removetrained) sys.stdout.flush() save = True for s in spam: if loud: print("Training spam (%s):" % s) train(h, s, True, force, trainnew, removetrained) sys.stdout.flush() save = True if save: h.store()
def __init__(self, storage_file, config): options = Options.options options["Storage", "persistent_storage_file"] = storage_file options.merge_files(['/etc/hammierc', os.path.expanduser(config)]) self.include_trained = Options.options["Headers", "include_trained"] self.dbname, self.usedb = storage.database_type([]) self.mode = None self.h = None assert not Options.options["Hammie", "train_on_filter"], "Cannot train_on_filter."
def __init__(self): options = Options.options # This is a bit of a hack to counter the default for # persistent_storage_file changing from ~/.hammiedb to hammie.db # This will work unless a user: # * had hammie.db as their value for persistent_storage_file, and # * their config file was loaded by Options.py. if options["Storage", "persistent_storage_file"] == options.default("Storage", "persistent_storage_file"): options["Storage", "persistent_storage_file"] = "~/.hammiedb" options.merge_files(["/etc/hammierc", os.path.expanduser("~/.hammierc")]) self.dbname, self.usedb = storage.database_type([]) self.modtime = os.path.getmtime(self.dbname) self.h = None
def __init__(self, storage_file, config): options = Options.options options["Storage", "persistent_storage_file"] = storage_file options.merge_files(['/etc/hammierc', os.path.expanduser(config)]) self.include_trained = Options.options["Headers", "include_trained"] self.dbname, self.usedb = storage.database_type([]) self.mode = None self.h = None assert not Options.options[ "Hammie", "train_on_filter"], "Cannot train_on_filter."
def main(): h = HammieFilter() actions = [] opts, args = getopt.getopt(sys.argv[1:], 'hxd:p:nfgstGSo:', ['help', 'examples', 'option=']) create_newdb = False for opt, arg in opts: if opt in ('-h', '--help'): usage(0) elif opt in ('-x', '--examples'): examples() elif opt in ('-o', '--option'): Options.options.set_from_cmdline(arg, sys.stderr) elif opt == '-f': actions.append(h.filter) elif opt == '-g': actions.append(h.train_ham) elif opt == '-s': actions.append(h.train_spam) elif opt == '-t': actions.append(h.filter_train) elif opt == '-G': actions.append(h.untrain_ham) elif opt == '-S': actions.append(h.untrain_spam) elif opt == "-n": create_newdb = True h.dbname, h.usedb = storage.database_type(opts) if create_newdb: h.newdb() sys.exit(0) if actions == []: actions = [h.filter] if not args: args = ["-"] for fname in args: mbox = mboxutils.getmbox(fname) for msg in mbox: for action in actions: action(msg) if args == ["-"]: unixfrom = msg.get_unixfrom() is not None else: unixfrom = True result = mboxutils.as_string(msg, unixfrom=unixfrom) sys.stdout.write(result)
def __init__(self): options = Options.options # This is a bit of a hack to counter the default for # persistent_storage_file changing from ~/.hammiedb to hammie.db # This will work unless a user: # * had hammie.db as their value for persistent_storage_file, and # * their config file was loaded by Options.py. if options["Storage", "persistent_storage_file"] == \ options.default("Storage", "persistent_storage_file"): options["Storage", "persistent_storage_file"] = \ "~/.hammiedb" options.merge_files( ['/etc/hammierc', os.path.expanduser('~/.hammierc')]) self.dbname, self.usedb = storage.database_type([]) self.mode = self.h = None
def createWorkers(self): """There aren't many workers in an IMAP State - most of the work is done elsewhere. We do need to load the classifier, though, and build the status strings.""" # Load token and message databases. if not hasattr(self, "DBName"): self.DBName, self.useDB = storage.database_type([]) self.bayes = storage.open_storage(self.DBName, self.useDB) if not hasattr(self, "MBDName"): self.MDBName, self.useMDB = message.database_type() self.mdb = message.open_storage(self.MDBName, self.useMDB) # Load stats manager. self.stats = Stats(options, self.mdb) # Build status strings. self.buildStatusStrings()
def main(): """Main program; parse options and go.""" try: opts, args = getopt.getopt(sys.argv[1:], 'hd:p:o:') except getopt.error as msg: usage(2, msg) options = Options.options for opt, arg in opts: if opt == '-h': usage(0) elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) dbname, usedb = storage.database_type(opts) if len(args) != 1: usage(2, "IP:PORT not specified") ip, port = args[0].split(":") port = int(port) bayes = storage.open_storage(dbname, usedb) h = XMLHammie(bayes) server = ReusableSimpleXMLRPCServer( (ip, port), xmlrpc.server.SimpleXMLRPCRequestHandler) server.register_instance(h) server.serve_forever()
def createWorkers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" print("Loading database...", end=' ') if self.isTest: self.useDB = "pickle" self.DBName = '_pop3proxy_test.pickle' # This is never saved. if not hasattr(self, "DBName"): self.DBName, self.useDB = storage.database_type([]) self.bayes = storage.open_storage(self.DBName, self.useDB) self.mdb = spambayes.message.Message().message_info_db self.stats = Stats.Stats(options, self.mdb) self.buildStatusStrings() if not self.isTest: sc = get_pathname_option("Storage", "spam_cache") hc = get_pathname_option("Storage", "ham_cache") uc = get_pathname_option("Storage", "unknown_cache") for d in [sc, hc, uc]: storage.ensureDir(d) if self.gzipCache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() self.spamTrainer = storage.SpamTrainer(self.bayes) self.hamTrainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spamTrainer) self.hamCorpus.addObserver(self.hamTrainer)
def run(): global state try: opts, args = getopt.getopt(sys.argv[1:], 'hbd:p:l:u:o:') except getopt.error as msg: print(str(msg) + '\n\n' + __doc__, file=sys.stderr) sys.exit() for opt, arg in opts: if opt == '-h': print(__doc__, file=sys.stderr) sys.exit() elif opt == '-b': state.launchUI = True elif opt == '-l': state.proxyPorts = [_addressAndPort(a) for a in arg.split(',')] elif opt == '-u': state.uiPort = int(arg) elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) state.DBName, state.useDB = storage.database_type(opts) v = get_current_version() print("%s\n" % (v.get_long_version("SpamBayes POP3 Proxy"),)) if 0 <= len(args) <= 2: if len(args) == 1: state.servers = [(args[0], 110)] elif len(args) == 2: state.servers = [(args[0], int(args[1]))] if len(args) > 0 and state.proxyPorts == []: state.proxyPorts = [('', 110)] try: prepare() except AlreadyRunningException: print("ERROR: The proxy is already running on this machine.", file=sys.stderr) print("Please stop the existing proxy and try again", file=sys.stderr) return start() else: print(__doc__, file=sys.stderr)
def main(): """Main program; parse options and go.""" try: opts, args = getopt.getopt(sys.argv[1:], 'hd:p:o:a:A:') except getopt.error as msg: usage(2, msg) if len(args) != 1: usage(2, "socket not specified") try: server = BNServer(args[0], BNRequest) except socket.error as e: if e[0] == errno.EADDRINUSE: pass # in use, no need else: raise # a real error else: try: from spambayes import Options, storage options = Options.options for opt, arg in opts: if opt == '-h': usage(0) elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) elif opt == '-a': server.timeout = float(arg) elif opt == '-A': server.number = int(arg) h = make_HammieFilter() h.dbname, h.usedb = storage.database_type(opts) server.hammie = h server.serve_until_idle() h.close() finally: try: os.unlink(args[0]) except EnvironmentError: pass
def main(profiling=False): h = HammieFilter() actions = [] opts, args = getopt.getopt(sys.argv[1:], 'hvxd:p:nfgstGSo:P', ['help', 'version', 'examples', 'option=']) create_newdb = False do_profile = False for opt, arg in opts: if opt in ('-h', '--help'): usage(0) elif opt in ('-v', '--version'): version() elif opt in ('-x', '--examples'): examples() elif opt in ('-o', '--option'): Options.options.set_from_cmdline(arg, sys.stderr) elif opt == '-f': actions.append(h.filter) elif opt == '-g': actions.append(h.train_ham) elif opt == '-s': actions.append(h.train_spam) elif opt == '-t': actions.append(h.filter_train) elif opt == '-G': actions.append(h.untrain_ham) elif opt == '-S': actions.append(h.untrain_spam) elif opt == '-P': do_profile = True if not profiling: try: import cProfile except ImportError: pass else: return cProfile.run("main(True)") elif opt == "-n": create_newdb = True h.dbname, h.usedb = storage.database_type(opts) if create_newdb or not os.path.exists(h.dbname): h.newdb() print >> sys.stderr, "Created new database in", h.dbname if create_newdb: sys.exit(0) if actions == []: actions = [h.filter] if not args: args = ["-"] for fname in args: mbox = mboxutils.getmbox(fname) for msg in mbox: for action in actions: action(msg) if args == ["-"]: unixfrom = msg.get_unixfrom() is not None else: unixfrom = True result = mboxutils.as_string(msg, unixfrom=unixfrom) sys.stdout.write(result)
usage(0) elif opt == "-f": force = True elif opt == "-n": trainnew = True elif opt == "-q": loud = False elif opt == '-g': good.append(arg) elif opt == '-s': spam.append(arg) elif opt == "-r": removetrained = True elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) pck, usedb = storage.database_type(opts) if args: usage(2, "Positional arguments not allowed") if usedb == None: # Use settings in configuration file. usedb = options["Storage", "persistent_use_database"] pck = get_pathname_option("Storage", "persistent_storage_file") h = hammie.open(pck, usedb, "c") for g in good: if loud: print "Training ham (%s):" % g train(h, g, False, force, trainnew, removetrained) sys.stdout.flush()
def main(): """Main program; parse options and go.""" try: opts, args = getopt.getopt(sys.argv[1:], 'hd:Ufg:s:p:u:r') except getopt.error as msg: usage(2, msg) if not opts: usage(2, "No options given") pck = DEFAULTDB good = [] spam = [] unknown = [] reverse = 0 untrain_mode = 0 do_filter = False usedb = None mode = 'r' for opt, arg in opts: if opt == '-h': usage(0) elif opt == '-g': good.append(arg) mode = 'c' elif opt == '-s': spam.append(arg) mode = 'c' elif opt == "-f": do_filter = True elif opt == '-u': unknown.append(arg) elif opt == '-U': untrain_mode = 1 elif opt == '-r': reverse = 1 pck, usedb = storage.database_type(opts) if args: usage(2, "Positional arguments not allowed") if usedb == None: usage(2, "Must specify one of -d or -D") save = False h = hammie.open(pck, usedb, mode) if not untrain_mode: for g in good: print("Training ham (%s):" % g) train(h, g, False) save = True for s in spam: print("Training spam (%s):" % s) train(h, s, True) save = True else: for g in good: print("Untraining ham (%s):" % g) untrain(h, g, False) save = True for s in spam: print("Untraining spam (%s):" % s) untrain(h, s, True) save = True if save: h.store() if do_filter: msg = sys.stdin.read() filtered = h.filter(msg) sys.stdout.write(filtered) if unknown: spams = hams = unsures = 0 for u in unknown: if len(unknown) > 1: print("Scoring", u) s, g, u = score(h, u, reverse) spams += s hams += g unsures += u print("Total %d spam, %d ham, %d unsure" % (spams, hams, unsures))
rdbname = arg elif opt == '-f': sbfname = arg elif opt == '-t': doTrain = True elif opt == '-c': doClassify = True elif opt == '-P': doPrompt = True elif opt == '-i': idxname = arg elif opt == '-L': logname = arg elif opt == '-W': pwd = arg elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) bdbname, useDBM = storage.database_type(opts) if not idxname: idxname = "%s.sbindex" % (ldbname) if (bdbname and ldbname and sbfname and (doTrain or doClassify)): run(bdbname, useDBM, ldbname, rdbname, \ sbfname, doTrain, doClassify, pwd, idxname, logname) if doPrompt: raw_input("Press Enter to end ") else: print >> sys.stderr, __doc__
for opt, arg in opts: if opt == '-h': print >> sys.stderr, __doc__ sys.exit() elif opt == '-b': state.launchUI = True # '-p' and '-d' are handled by the storage.database_type call # below, in case you are wondering why they are missing. elif opt == '-l': state.proxyPorts = [_addressAndPort(a) for a in arg.split(',')] elif opt == '-u': state.uiPort = int(arg) elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) state.DBName, state.useDB = storage.database_type(opts) # Let the user know what they are using... v = get_current_version() print "%s\n" % (v.get_long_version("SpamBayes POP3 Proxy"),) if 0 <= len(args) <= 2: # Normal usage, with optional server name and port number. if len(args) == 1: state.servers = [(args[0], 110)] elif len(args) == 2: state.servers = [(args[0], int(args[1]))] # Default to listening on port 110 for command-line-specified servers. if len(args) > 0 and state.proxyPorts == []: state.proxyPorts = [('', 110)]
usage(0) elif opt == "-f": force = True elif opt == "-n": trainnew = True elif opt == "-q": loud = False elif opt == '-g': good.append(arg) elif opt == '-s': spam.append(arg) elif opt == "-r": removetrained = True elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) pck, usedb = storage.database_type(opts) if args: usage(2, "Positional arguments not allowed") if usedb == None: # Use settings in configuration file. usedb = options["Storage", "persistent_use_database"] pck = get_pathname_option("Storage", "persistent_storage_file") h = hammie.open(pck, usedb, "c") for g in good: if loud: print "Training ham (%s):" % g train(h, g, False, force, trainnew, removetrained)
def main(): """Main program; parse options and go.""" try: opts, args = getopt.getopt(sys.argv[1:], 'hd:p:o:') except getopt.error, msg: usage(2, msg) options = Options.options for opt, arg in opts: if opt == '-h': usage(0) elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) dbname, usedb = storage.database_type(opts) if len(args) != 1: usage(2, "IP:PORT not specified") ip, port = args[0].split(":") port = int(port) bayes = storage.open_storage(dbname, usedb) h = XMLHammie(bayes) server = ReusableSimpleXMLRPCServer( (ip, port), SimpleXMLRPCServer.SimpleXMLRPCRequestHandler) server.register_instance(h) server.serve_forever()
except getopt.GetoptError, msg: usage(msg) return 1 usere = False tokenizestdin = False for opt, arg in opts: if opt in ("-h", "--help"): usage() return 0 elif opt in ("-r", "--re"): usere = True elif opt in ("-t", "--tokenize"): tokenizestdin = True elif opt in ('-o', '--option'): options.set_from_cmdline(arg, sys.stderr) if usere and tokenizestdin: usage("-r and -t may not be used at the same time") return 1 dbname, usedb = database_type(opts) db = open_storage(dbname, usedb) if tokenizestdin: args = tokenize(sys.stdin) if args: print_spamcounts(args, db, usere) return 0 else: usage("need tokens on cmd line or -t w/ msg on stdin") return 1 if __name__ == "__main__": sys.exit(main(sys.argv[1:]))
for opt, arg in opts: if opt == '-h': print >> sys.stderr, __doc__ sys.exit() elif opt == '-b': state.launchUI = True # '-p' and '-d' are handled by the storage.database_type call # below, in case you are wondering why they are missing. elif opt == '-l': state.proxyPorts = [_addressAndPort(a) for a in arg.split(',')] elif opt == '-u': state.uiPort = int(arg) elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) state.DBName, state.useDB = storage.database_type(opts) # Let the user know what they are using... v = get_current_version() print "%s\n" % (v.get_long_version("SpamBayes POP3 Proxy"), ) if 0 <= len(args) <= 2: # Normal usage, with optional server name and port number. if len(args) == 1: state.servers = [(args[0], 110)] elif len(args) == 2: state.servers = [(args[0], int(args[1]))] # Default to listening on port 110 for command-line-specified servers. if len(args) > 0 and state.proxyPorts == []: state.proxyPorts = [('', 110)]
for opt, arg in opts: if opt in ("-h", "--help"): usage() return 0 elif opt in ("-r", "--re"): usere = True elif opt in ("-t", "--tokenize"): tokenizestdin = True elif opt in ('-o', '--option'): options.set_from_cmdline(arg, sys.stderr) if usere and tokenizestdin: usage("-r and -t may not be used at the same time") return 1 dbname, usedb = database_type(opts) db = open_storage(dbname, usedb) if tokenizestdin: args = tokenize(sys.stdin) if args: print_spamcounts(args, db, usere) return 0 else: usage("need tokens on cmd line or -t w/ msg on stdin") return 1 if __name__ == "__main__": sys.exit(main(sys.argv[1:]))
idxname = arg elif opt == '-L': logname = arg elif opt == '-W': pwd = arg elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) bdbname, useDBM = storage.database_type(opts) if not idxname: idxname = "%s.sbindex" % (ldbname) if (bdbname and ldbname and sbfname and (doTrain or doClassify)): run(bdbname, useDBM, ldbname, rdbname, \ sbfname, doTrain, doClassify, pwd, idxname, logname) if doPrompt: try: key = input("Press Enter to end")
def change_db(): classifier = storage.open_storage(*storage.database_type(opts)) message.Message.message_info_db = message_db imap_filter = IMAPFilter(classifier, message_db)
def main(): """Main program; parse options and go.""" try: opts, args = getopt.getopt(sys.argv[1:], "hd:p:o:") except getopt.error, msg: usage(2, msg) options = Options.options for opt, arg in opts: if opt == "-h": usage(0) elif opt == "-o": options.set_from_cmdline(arg, sys.stderr) dbname, usedb = storage.database_type(opts) if len(args) != 1: usage(2, "IP:PORT not specified") ip, port = args[0].split(":") port = int(port) bayes = storage.open_storage(dbname, usedb) h = XMLHammie(bayes) server = ReusableSimpleXMLRPCServer((ip, port), SimpleXMLRPCServer.SimpleXMLRPCRequestHandler) server.register_instance(h) server.serve_forever()
def main(args): try: opts, args = getopt.getopt(args, "hg:s:d:p:o:m:r:c:vRuC", ["help", "good=", "spam=", "database=", "pickle=", "verbose", "option=", "max=", "maxrounds=", "cullext=", "cull", "reverse", "ratio=", "unbalanced"]) except getopt.GetoptError as msg: usage(msg) return 1 ham = spam = dbname = usedb = cullext = None maxmsgs = 0 maxrounds = MAXROUNDS verbose = False reverse = False sh_ratio = (1, 1) for opt, arg in opts: if opt in ("-h", "--help"): usage() return 0 elif opt in ("-v", "--verbose"): verbose = True elif opt in ("-g", "--good"): ham = arg elif opt in ("-s", "--spam"): spam = arg elif opt in ("-c", "--cullext"): cullext = arg elif opt in ("-C", "--cull"): cullext = '' elif opt in ("-m", "--max"): maxmsgs = int(arg) elif opt in ("-r", "--maxrounds"): maxrounds = int(arg) elif opt in ("-R", "--reverse"): reverse = True elif opt in ("-u", "--unbalanced"): sh_ratio = None elif opt in ('-o', '--option'): Options.options.set_from_cmdline(arg, sys.stderr) elif opt == '--ratio': arg = arg.split(":") sh_ratio = (int(arg[0]), int(arg[1])) if ham is None or spam is None: usage("require both ham and spam piles") return 1 dbname, usedb = storage.database_type(opts) try: os.unlink(dbname) except OSError: pass store = storage.open_storage(dbname, usedb) tdict = {} train(store, ham, spam, maxmsgs, maxrounds, tdict, reverse, verbose, sh_ratio) store.store() store.close() if cullext is not None: cull(ham, cullext, 'ham', tdict) cull(spam, cullext, 'spam', tdict) return 0