class State: def __init__(self): """Initialises the State object that holds the state of the app. The default settings are read from Options.py and bayescustomize.ini and are then overridden by the command-line processing code in the __main__ code below.""" self.logFile = None self.bayes = None self.platform_mutex = None self.prepared = False self.can_stop = True self.init() # Load up the other settings from Option.py / bayescustomize.ini self.uiPort = options["html_ui", "port"] self.launchUI = options["html_ui", "launch_browser"] self.gzipCache = options["Storage", "cache_use_gzip"] self.cacheExpiryDays = options["Storage", "cache_expiry_days"] self.runTestServer = False self.isTest = False def init(self): assert not self.prepared, "init after prepare, but before close" # Load the environment for translation. self.lang_manager = i18n.LanguageManager() # Set the system user default language. self.lang_manager.set_language(\ self.lang_manager.locale_default_lang()) # Set interface to use the user language in the configuration file. for language in reversed(options["globals", "language"]): # We leave the default in there as the last option, to fall # back on if necessary. self.lang_manager.add_language(language) if options["globals", "verbose"]: print "Asked to add languages: " + \ ", ".join(options["globals", "language"]) print "Set language to " + \ str(self.lang_manager.current_langs_codes) # Open the log file. if options["globals", "verbose"]: self.logFile = open('_pop3proxy.log', 'wb', 0) if not hasattr(self, "servers"): # Could have already been set via the command line. self.servers = [] if options["pop3proxy", "remote_servers"]: for server in options["pop3proxy", "remote_servers"]: server = server.strip() if server.find(':') > -1: server, port = server.split(':', 1) else: port = '110' self.servers.append((server, int(port))) if not hasattr(self, "proxyPorts"): # Could have already been set via the command line. self.proxyPorts = [] if options["pop3proxy", "listen_ports"]: splitPorts = options["pop3proxy", "listen_ports"] self.proxyPorts = map(_addressAndPort, splitPorts) if len(self.servers) != len(self.proxyPorts): print "pop3proxy_servers & pop3proxy_ports are different lengths!" sys.exit() # Remember reported errors. self.reported_errors = {} # Set up the statistics. self.totalSessions = 0 self.activeSessions = 0 self.numSpams = 0 self.numHams = 0 self.numUnsure = 0 # Unique names for cached messages - see `getNewMessageName()` below. self.lastBaseMessageName = '' self.uniquifier = 2 def close(self): assert self.prepared, "closed without being prepared!" self.servers = None if self.bayes is not None: # Only store a non-empty db. if self.bayes.nham != 0 and self.bayes.nspam != 0: state.bayes.store() self.bayes.close() self.bayes = None if self.mdb is not None: self.mdb.store() self.mdb.close() self.mdb = None spambayes.message.Message().message_info_db = None self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spamTrainer = self.hamTrainer = None self.prepared = False close_platform_mutex(self.platform_mutex) self.platform_mutex = None def prepare(self, can_stop=True): """Do whatever needs to be done to prepare for running. If can_stop is False, then we may not let the user shut down the proxy - for example, running as a Windows service this should be the case.""" # If we can, prevent multiple servers from running at the same time. assert self.platform_mutex is None, "Should not already have the mutex" self.platform_mutex = open_platform_mutex() self.can_stop = can_stop # Do whatever we've been asked to do... self.createWorkers() self.prepared = True def buildServerStrings(self): """After the server details have been set up, this creates string versions of the details, for display in the Status panel.""" serverStrings = ["%s:%s" % (s, p) for s, p in self.servers] self.serversString = ', '.join(serverStrings) self.proxyPortsString = ', '.join(map(_addressPortStr, self.proxyPorts)) def buildStatusStrings(self): """Build the status message(s) to display on the home page of the web interface.""" nspam = self.bayes.nspam nham = self.bayes.nham if nspam > 10 and nham > 10: db_ratio = nham / float(nspam) if db_ratio > 5.0: self.warning = _("Warning: you have much more ham than " \ "spam - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") elif db_ratio < (1 / 5.0): self.warning = _("Warning: you have much more spam than " \ "ham - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") else: self.warning = "" elif nspam > 0 or nham > 0: self.warning = _("Database only has %d good and %d spam - " \ "you should consider performing additional " \ "training.") % (nham, nspam) else: self.warning = _("Database has no training information. " \ "SpamBayes will classify all messages as " \ "'unsure', ready for you to train.") # Add an additional warning message if the user's thresholds are # truly odd. spam_cut = options["Categorization", "spam_cutoff"] ham_cut = options["Categorization", "ham_cutoff"] if spam_cut < 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the spam threshold less than 0.5.") if ham_cut > 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the ham threshold greater than 0.5.") if ham_cut > spam_cut: self.warning += _("<br/>Warning: your ham threshold is " \ "<b>higher</b> than your spam threshold. " \ "Results are unpredictable.") def createWorkers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" print "Loading database...", if self.isTest: self.useDB = "pickle" self.DBName = '_pop3proxy_test.pickle' # This is never saved. if not hasattr(self, "DBName"): self.DBName, self.useDB = storage.database_type([]) self.bayes = storage.open_storage(self.DBName, self.useDB) self.mdb = spambayes.message.Message().message_info_db # Load stats manager. self.stats = Stats.Stats(options, self.mdb) self.buildStatusStrings() # Don't set up the caches and training objects when running the self-test, # so as not to clutter the filesystem. if not self.isTest: # Create/open the Corpuses. Use small cache sizes to avoid hogging # lots of memory. sc = get_pathname_option("Storage", "spam_cache") hc = get_pathname_option("Storage", "ham_cache") uc = get_pathname_option("Storage", "unknown_cache") map(storage.ensureDir, [sc, hc, uc]) if self.gzipCache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"] * 24 * 60 * 60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) # Given that (hopefully) users will get to the stage # where they do not need to do any more regular training to # be satisfied with spambayes' performance, we expire old # messages from not only the trained corpora, but the unknown # as well. self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() # Create the Trainers. self.spamTrainer = storage.SpamTrainer(self.bayes) self.hamTrainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spamTrainer) self.hamCorpus.addObserver(self.hamTrainer) def getNewMessageName(self): # The message name is the time it arrived, with a uniquifier # appended if two arrive within one clock tick of each other. messageName = "%10.10d" % long(time.time()) if messageName == self.lastBaseMessageName: messageName = "%s-%d" % (messageName, self.uniquifier) self.uniquifier += 1 else: self.lastBaseMessageName = messageName self.uniquifier = 2 return messageName def RecordClassification(self, cls, score): """Record the classification in the session statistics. cls should match one of the options["Headers", "header_*_string"] values. score is the score the message received. """ if cls == options["Headers", "header_ham_string"]: self.numHams += 1 elif cls == options["Headers", "header_spam_string"]: self.numSpams += 1 else: self.numUnsure += 1 self.stats.RecordClassification(score)
class CoreState: """This keeps the global state of the module - the command-line options, statistics like how many mails have been classified, the handle of the log file, the Classifier and FileCorpus objects, and so on.""" def __init__(self): """Initialises the State object that holds the state of the app. The default settings are read from Options.py and bayescustomize.ini and are then overridden by the command-line processing code in the __main__ code below.""" self.log_file = None self.bayes = None self.mutex = None self.prepared = False self.can_stop = True self.plugin = None self.last_base_message_name = '' self.uniquifier = 2 self.numSpams = 0 self.numHams = 0 self.numUnsure = 0 self.servers = "" self.ui_port = options["html_ui", "port"] self.launch_ui = options["html_ui", "launch_browser"] self.gzip_cache = options["Storage", "cache_use_gzip"] self.run_test_server = False self.is_test = False self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spam_trainer = self.ham_trainer = None self.init() def init(self): assert not self.prepared, "init after prepare, but before close" self.lang_manager = None if options["globals", "verbose"]: self.log_file = open('_core_server.log', 'wb', 0) self.reported_errors = {} def close(self): assert self.prepared, "closed without being prepared!" if self.bayes is not None: if self.bayes.nham != 0 and self.bayes.nspam != 0: self.bayes.store() self.bayes.close() self.bayes = None spambayes.message.Message().message_info_db = None self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spam_trainer = self.ham_trainer = None self.prepared = False self.close_platform_mutex() def prepare(self, can_stop=True): """Do whatever needs to be done to prepare for running. If can_stop is False, then we may not let the user shut down the proxy - for example, running as a Windows service this should be the case.""" self.init() assert self.mutex is None, "Should not already have the mutex" self.open_platform_mutex() self.can_stop = can_stop self.create_workers() self.prepared = True def build_status_strings(self): """Build the status message(s) to display on the home page of the web interface.""" nspam = self.bayes.nspam nham = self.bayes.nham if nspam > 10 and nham > 10: db_ratio = nham/float(nspam) if db_ratio > 5.0: self.warning = _("Warning: you have much more ham than " \ "spam - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") elif db_ratio < (1/5.0): self.warning = _("Warning: you have much more spam than " \ "ham - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") else: self.warning = "" elif nspam > 0 or nham > 0: self.warning = _("Database only has %d good and %d spam - " \ "you should consider performing additional " \ "training.") % (nham, nspam) else: self.warning = _("Database has no training information. " \ "SpamBayes will classify all messages as " \ "'unsure', ready for you to train.") spam_cut = options["Categorization", "spam_cutoff"] ham_cut = options["Categorization", "ham_cutoff"] if spam_cut < 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the spam threshold less than 0.5.") if ham_cut > 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the ham threshold greater than 0.5.") if ham_cut > spam_cut: self.warning += _("<br/>Warning: your ham threshold is " \ "<b>higher</b> than your spam threshold. " \ "Results are unpredictable.") def create_workers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" if self.is_test: self.use_db = "pickle" self.db_name = '_core_server.pickle' # This is never saved. if not hasattr(self, "db_name"): self.db_name, self.use_db = storage.database_type([]) self.bayes = storage.open_storage(self.db_name, self.use_db) self.stats = Stats.Stats(options, spambayes.message.Message().message_info_db) self.build_status_strings() if not self.is_test: sc = get_pathname_option("Storage", "core_spam_cache") hc = get_pathname_option("Storage", "core_ham_cache") uc = get_pathname_option("Storage", "core_unknown_cache") for d in [sc, hc, uc]: storage.ensureDir(d) if self.gzip_cache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() self.spam_trainer = storage.SpamTrainer(self.bayes) self.ham_trainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spam_trainer) self.hamCorpus.addObserver(self.ham_trainer) def getNewMessageName(self): """The message name is the time it arrived with a uniquifier appended if two arrive within one clock tick of each other. """ message_name = "%10.10d" % long(time.time()) if message_name == self.last_base_message_name: message_name = "%s-%d" % (message_name, self.uniquifier) self.uniquifier += 1 else: self.last_base_message_name = message_name self.uniquifier = 2 return message_name def record_classification(self, cls, score): """Record the classification in the session statistics. cls should match one of the options["Headers", "header_*_string"] values. score is the score the message received. """ if cls == options["Headers", "header_ham_string"]: self.numHams += 1 elif cls == options["Headers", "header_spam_string"]: self.numSpams += 1 else: self.numUnsure += 1 self.stats.RecordClassification(score) def buildStatusStrings(self): return "" def recreate_state(self): if self.prepared: self.close() state = CoreState() state.prepare() return state def open_platform_mutex(self, mutex_name="SpamBayesServer"): """Implementations of a mutex or other resource which can prevent multiple servers starting at once. Platform specific as no reasonable cross-platform solution exists (however, an old trick is to use a directory for a mutex, as a create/test atomic API generally exists). Will set self.mutex or may throw AlreadyRunningException """ if sys.platform.startswith("win"): try: import win32event, win32api, winerror try: hmutex = win32event.CreateMutex(None, True, mutex_name) except win32event.error, details: if details[0] != winerror.ERROR_ACCESS_DENIED: raise raise AlreadyRunningException if win32api.GetLastError()==winerror.ERROR_ALREADY_EXISTS: win32api.CloseHandle(hmutex) raise AlreadyRunningException self.mutex = hmutex return except ImportError: pass
class State: def __init__(self): """Initialises the State object that holds the state of the app. The default settings are read from Options.py and bayescustomize.ini and are then overridden by the command-line processing code in the __main__ code below.""" self.logFile = None self.bayes = None self.platform_mutex = None self.prepared = False self.can_stop = True self.init() # Load up the other settings from Option.py / bayescustomize.ini self.uiPort = options["html_ui", "port"] self.launchUI = options["html_ui", "launch_browser"] self.gzipCache = options["Storage", "cache_use_gzip"] self.cacheExpiryDays = options["Storage", "cache_expiry_days"] self.runTestServer = False self.isTest = False def init(self): assert not self.prepared, "init after prepare, but before close" # Load the environment for translation. self.lang_manager = i18n.LanguageManager() # Set the system user default language. self.lang_manager.set_language(\ self.lang_manager.locale_default_lang()) # Set interface to use the user language in the configuration file. for language in reversed(options["globals", "language"]): # We leave the default in there as the last option, to fall # back on if necessary. self.lang_manager.add_language(language) if options["globals", "verbose"]: print "Asked to add languages: " + \ ", ".join(options["globals", "language"]) print "Set language to " + \ str(self.lang_manager.current_langs_codes) # Open the log file. if options["globals", "verbose"]: self.logFile = open('_pop3proxy.log', 'wb', 0) if not hasattr(self, "servers"): # Could have already been set via the command line. self.servers = [] if options["pop3proxy", "remote_servers"]: for server in options["pop3proxy", "remote_servers"]: server = server.strip() if server.find(':') > -1: server, port = server.split(':', 1) else: port = '110' self.servers.append((server, int(port))) if not hasattr(self, "proxyPorts"): # Could have already been set via the command line. self.proxyPorts = [] if options["pop3proxy", "listen_ports"]: splitPorts = options["pop3proxy", "listen_ports"] self.proxyPorts = map(_addressAndPort, splitPorts) if len(self.servers) != len(self.proxyPorts): print "pop3proxy_servers & pop3proxy_ports are different lengths!" sys.exit() # Remember reported errors. self.reported_errors = {} # Set up the statistics. self.totalSessions = 0 self.activeSessions = 0 self.numSpams = 0 self.numHams = 0 self.numUnsure = 0 # Unique names for cached messages - see `getNewMessageName()` below. self.lastBaseMessageName = '' self.uniquifier = 2 def close(self): assert self.prepared, "closed without being prepared!" self.servers = None if self.bayes is not None: # Only store a non-empty db. if self.bayes.nham != 0 and self.bayes.nspam != 0: state.bayes.store() self.bayes.close() self.bayes = None if self.mdb is not None: self.mdb.store() self.mdb.close() self.mdb = None spambayes.message.Message().message_info_db = None self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spamTrainer = self.hamTrainer = None self.prepared = False close_platform_mutex(self.platform_mutex) self.platform_mutex = None def prepare(self, can_stop=True): """Do whatever needs to be done to prepare for running. If can_stop is False, then we may not let the user shut down the proxy - for example, running as a Windows service this should be the case.""" # If we can, prevent multiple servers from running at the same time. assert self.platform_mutex is None, "Should not already have the mutex" self.platform_mutex = open_platform_mutex() self.can_stop = can_stop # Do whatever we've been asked to do... self.createWorkers() self.prepared = True def buildServerStrings(self): """After the server details have been set up, this creates string versions of the details, for display in the Status panel.""" serverStrings = ["%s:%s" % (s, p) for s, p in self.servers] self.serversString = ', '.join(serverStrings) self.proxyPortsString = ', '.join(map(_addressPortStr, self.proxyPorts)) def buildStatusStrings(self): """Build the status message(s) to display on the home page of the web interface.""" nspam = self.bayes.nspam nham = self.bayes.nham if nspam > 10 and nham > 10: db_ratio = nham/float(nspam) if db_ratio > 5.0: self.warning = _("Warning: you have much more ham than " \ "spam - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") elif db_ratio < (1/5.0): self.warning = _("Warning: you have much more spam than " \ "ham - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") else: self.warning = "" elif nspam > 0 or nham > 0: self.warning = _("Database only has %d good and %d spam - " \ "you should consider performing additional " \ "training.") % (nham, nspam) else: self.warning = _("Database has no training information. " \ "SpamBayes will classify all messages as " \ "'unsure', ready for you to train.") # Add an additional warning message if the user's thresholds are # truly odd. spam_cut = options["Categorization", "spam_cutoff"] ham_cut = options["Categorization", "ham_cutoff"] if spam_cut < 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the spam threshold less than 0.5.") if ham_cut > 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the ham threshold greater than 0.5.") if ham_cut > spam_cut: self.warning += _("<br/>Warning: your ham threshold is " \ "<b>higher</b> than your spam threshold. " \ "Results are unpredictable.") def createWorkers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" print "Loading database...", if self.isTest: self.useDB = "pickle" self.DBName = '_pop3proxy_test.pickle' # This is never saved. if not hasattr(self, "DBName"): self.DBName, self.useDB = storage.database_type([]) self.bayes = storage.open_storage(self.DBName, self.useDB) self.mdb = spambayes.message.Message().message_info_db # Load stats manager. self.stats = Stats.Stats(options, self.mdb) self.buildStatusStrings() # Don't set up the caches and training objects when running the self-test, # so as not to clutter the filesystem. if not self.isTest: # Create/open the Corpuses. Use small cache sizes to avoid hogging # lots of memory. sc = get_pathname_option("Storage", "spam_cache") hc = get_pathname_option("Storage", "ham_cache") uc = get_pathname_option("Storage", "unknown_cache") map(storage.ensureDir, [sc, hc, uc]) if self.gzipCache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) # Given that (hopefully) users will get to the stage # where they do not need to do any more regular training to # be satisfied with spambayes' performance, we expire old # messages from not only the trained corpora, but the unknown # as well. self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() # Create the Trainers. self.spamTrainer = storage.SpamTrainer(self.bayes) self.hamTrainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spamTrainer) self.hamCorpus.addObserver(self.hamTrainer) def getNewMessageName(self): # The message name is the time it arrived, with a uniquifier # appended if two arrive within one clock tick of each other. messageName = "%10.10d" % long(time.time()) if messageName == self.lastBaseMessageName: messageName = "%s-%d" % (messageName, self.uniquifier) self.uniquifier += 1 else: self.lastBaseMessageName = messageName self.uniquifier = 2 return messageName def RecordClassification(self, cls, score): """Record the classification in the session statistics. cls should match one of the options["Headers", "header_*_string"] values. score is the score the message received. """ if cls == options["Headers", "header_ham_string"]: self.numHams += 1 elif cls == options["Headers", "header_spam_string"]: self.numSpams += 1 else: self.numUnsure += 1 self.stats.RecordClassification(score)
class CoreState: """This keeps the global state of the module - the command-line options, statistics like how many mails have been classified, the handle of the log file, the Classifier and FileCorpus objects, and so on.""" def __init__(self): """Initialises the State object that holds the state of the app. The default settings are read from Options.py and bayescustomize.ini and are then overridden by the command-line processing code in the __main__ code below.""" self.log_file = None self.bayes = None self.mutex = None self.prepared = False self.can_stop = True self.plugin = None # Unique names for cached messages - see `getNewMessageName()` below. self.last_base_message_name = '' self.uniquifier = 2 # Set up the statistics. self.numSpams = 0 self.numHams = 0 self.numUnsure = 0 self.servers = "" # Load up the other settings from Option.py / bayescustomize.ini self.ui_port = options["html_ui", "port"] self.launch_ui = options["html_ui", "launch_browser"] self.gzip_cache = options["Storage", "cache_use_gzip"] self.run_test_server = False self.is_test = False self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spam_trainer = self.ham_trainer = None self.init() def init(self): assert not self.prepared, "init after prepare, but before close" ## no i18n yet... ## # Load the environment for translation. ## self.lang_manager = i18n.LanguageManager() ## # Set the system user default language. ## self.lang_manager.set_language(\ ## self.lang_manager.locale_default_lang()) ## # Set interface to use the user language in the configuration file. ## for language in reversed(options["globals", "language"]): ## # We leave the default in there as the last option, to fall ## # back on if necessary. ## self.lang_manager.add_language(language) ## if options["globals", "verbose"]: ## print "Asked to add languages: " + \ ## ", ".join(options["globals", "language"]) ## print "Set language to " + \ ## str(self.lang_manager.current_langs_codes) self.lang_manager = None # Open the log file. if options["globals", "verbose"]: self.log_file = open('_core_server.log', 'wb', 0) # Remember reported errors. self.reported_errors = {} def close(self): assert self.prepared, "closed without being prepared!" if self.bayes is not None: # Only store a non-empty db. if self.bayes.nham != 0 and self.bayes.nspam != 0: self.bayes.store() self.bayes.close() self.bayes = None spambayes.message.Message().message_info_db = None self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spam_trainer = self.ham_trainer = None self.prepared = False self.close_platform_mutex() def prepare(self, can_stop=True): """Do whatever needs to be done to prepare for running. If can_stop is False, then we may not let the user shut down the proxy - for example, running as a Windows service this should be the case.""" self.init() # If we can, prevent multiple servers from running at the same time. assert self.mutex is None, "Should not already have the mutex" self.open_platform_mutex() self.can_stop = can_stop # Do whatever we've been asked to do... self.create_workers() self.prepared = True def build_status_strings(self): """Build the status message(s) to display on the home page of the web interface.""" nspam = self.bayes.nspam nham = self.bayes.nham if nspam > 10 and nham > 10: db_ratio = nham/float(nspam) if db_ratio > 5.0: self.warning = _("Warning: you have much more ham than " \ "spam - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") elif db_ratio < (1/5.0): self.warning = _("Warning: you have much more spam than " \ "ham - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") else: self.warning = "" elif nspam > 0 or nham > 0: self.warning = _("Database only has %d good and %d spam - " \ "you should consider performing additional " \ "training.") % (nham, nspam) else: self.warning = _("Database has no training information. " \ "SpamBayes will classify all messages as " \ "'unsure', ready for you to train.") # Add an additional warning message if the user's thresholds are # truly odd. spam_cut = options["Categorization", "spam_cutoff"] ham_cut = options["Categorization", "ham_cutoff"] if spam_cut < 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the spam threshold less than 0.5.") if ham_cut > 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the ham threshold greater than 0.5.") if ham_cut > spam_cut: self.warning += _("<br/>Warning: your ham threshold is " \ "<b>higher</b> than your spam threshold. " \ "Results are unpredictable.") def create_workers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" if self.is_test: self.use_db = "pickle" self.db_name = '_core_server.pickle' # This is never saved. if not hasattr(self, "db_name"): self.db_name, self.use_db = storage.database_type([]) self.bayes = storage.open_storage(self.db_name, self.use_db) # Load stats manager. self.stats = Stats.Stats(options, spambayes.message.Message().message_info_db) self.build_status_strings() # Don't set up the caches and training objects when running the # self-test, so as not to clutter the filesystem. if not self.is_test: # Create/open the Corpuses. Use small cache sizes to avoid # hogging lots of memory. sc = get_pathname_option("Storage", "core_spam_cache") hc = get_pathname_option("Storage", "core_ham_cache") uc = get_pathname_option("Storage", "core_unknown_cache") for d in [sc, hc, uc]: storage.ensureDir(d) if self.gzip_cache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) # Given that (hopefully) users will get to the stage # where they do not need to do any more regular training to # be satisfied with spambayes' performance, we expire old # messages from not only the trained corpora, but the unknown # as well. self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() # Create the Trainers. self.spam_trainer = storage.SpamTrainer(self.bayes) self.ham_trainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spam_trainer) self.hamCorpus.addObserver(self.ham_trainer) def getNewMessageName(self): """The message name is the time it arrived with a uniquifier appended if two arrive within one clock tick of each other. """ message_name = "%10.10d" % long(time.time()) if message_name == self.last_base_message_name: message_name = "%s-%d" % (message_name, self.uniquifier) self.uniquifier += 1 else: self.last_base_message_name = message_name self.uniquifier = 2 return message_name def record_classification(self, cls, score): """Record the classification in the session statistics. cls should match one of the options["Headers", "header_*_string"] values. score is the score the message received. """ if cls == options["Headers", "header_ham_string"]: self.numHams += 1 elif cls == options["Headers", "header_spam_string"]: self.numSpams += 1 else: self.numUnsure += 1 self.stats.RecordClassification(score) def buildStatusStrings(self): return "" def recreate_state(self): if self.prepared: # Close the state (which saves if necessary) self.close() # And get a new one going. state = CoreState() state.prepare() return state def open_platform_mutex(self, mutex_name="SpamBayesServer"): """Implementations of a mutex or other resource which can prevent multiple servers starting at once. Platform specific as no reasonable cross-platform solution exists (however, an old trick is to use a directory for a mutex, as a create/test atomic API generally exists). Will set self.mutex or may throw AlreadyRunningException """ if sys.platform.startswith("win"): try: import win32event, win32api, winerror # ideally, the mutex name could include either the username, # or the munged path to the INI file - this would mean we # would allow multiple starts so long as they weren't for # the same user. However, as of now, the service version # is likely to start as a different user, so a single mutex # is best for now. # XXX - even if we do get clever with another mutex name, we # should consider still creating a non-exclusive # "SpamBayesServer" mutex, if for no better reason than so # an installer can check if we are running try: hmutex = win32event.CreateMutex(None, True, mutex_name) except win32event.error as details: # If another user has the mutex open, we get an "access # denied" error - this is still telling us what we need # to know. if details[0] != winerror.ERROR_ACCESS_DENIED: raise raise AlreadyRunningException # mutex opened - now check if we actually created it. if win32api.GetLastError()==winerror.ERROR_ALREADY_EXISTS: win32api.CloseHandle(hmutex) raise AlreadyRunningException self.mutex = hmutex return except ImportError: # no win32all - no worries, just start pass self.mutex = None def close_platform_mutex(self): """Toss out the current mutex.""" if sys.platform.startswith("win"): if self.mutex is not None: self.mutex.Close() self.mutex = None
class State: def __init__(self): """Initialises the State object that holds the state of the app. The default settings are read from Options.py and bayescustomize.ini and are then overridden by the command-line processing code in the __main__ code below.""" self.logFile = None self.bayes = None self.platform_mutex = None self.prepared = False self.init() # Load up the other settings from Option.py / bayescustomize.ini self.uiPort = options["html_ui", "port"] self.launchUI = options["html_ui", "launch_browser"] self.gzipCache = options["Storage", "cache_use_gzip"] self.cacheExpiryDays = options["Storage", "cache_expiry_days"] self.runTestServer = False self.isTest = False def init(self): assert not self.prepared, "init after prepare, but before close" # Open the log file. if options["globals", "verbose"]: self.logFile = open('_pop3proxy.log', 'wb', 0) self.servers = [] self.proxyPorts = [] if options["pop3proxy", "remote_servers"]: for server in options["pop3proxy", "remote_servers"]: server = server.strip() if server.find(':') > -1: server, port = server.split(':', 1) else: port = '110' self.servers.append((server, int(port))) if options["pop3proxy", "listen_ports"]: splitPorts = options["pop3proxy", "listen_ports"] self.proxyPorts = map(_addressAndPort, splitPorts) if len(self.servers) != len(self.proxyPorts): print "pop3proxy_servers & pop3proxy_ports are different lengths!" sys.exit() # Remember reported errors. self.reported_errors = {} # Set up the statistics. self.totalSessions = 0 self.activeSessions = 0 self.numSpams = 0 self.numHams = 0 self.numUnsure = 0 # Unique names for cached messages - see `getNewMessageName()` below. self.lastBaseMessageName = '' self.uniquifier = 2 def close(self): assert self.prepared, "closed without being prepared!" self.servers = None if self.bayes is not None: # Only store a non-empty db. if self.bayes.nham != 0 and self.bayes.nspam != 0: state.bayes.store() self.bayes.close() self.bayes = None self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spamTrainer = self.hamTrainer = None self.prepared = False close_platform_mutex(self.platform_mutex) self.platform_mutex = None def prepare(self): # If we can, prevent multiple servers from running at the same time. assert self.platform_mutex is None, "Should not already have the mutex" self.platform_mutex = open_platform_mutex() # Do whatever we've been asked to do... self.createWorkers() self.prepared = True def buildServerStrings(self): """After the server details have been set up, this creates string versions of the details, for display in the Status panel.""" serverStrings = ["%s:%s" % (s, p) for s, p in self.servers] self.serversString = ', '.join(serverStrings) self.proxyPortsString = ', '.join(map(_addressPortStr, self.proxyPorts)) def buildStatusStrings(self): """Build the status message(s) to display on the home page of the web interface.""" nspam = self.bayes.nspam nham = self.bayes.nham if nspam > 10 and nham > 10: db_ratio = nham/float(nspam) big = small = None if db_ratio > 5.0: big = "ham" small = "spam" elif db_ratio < (1/5.0): big = "spam" small = "ham" if big is not None: self.warning = "Warning: you have much more %s than %s - " \ "SpamBayes works best with approximately even " \ "numbers of ham and spam." % (big, small) else: self.warning = "" elif nspam > 0 or nham > 0: self.warning = "Database only has %d good and %d spam - you should " \ "consider performing additional training." % (nham, nspam) else: self.warning = "Database has no training information. SpamBayes " \ "will classify all messages as 'unsure', " \ "ready for you to train." def createWorkers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" print "Loading database...", if self.isTest: self.useDB = "pickle" self.DBName = '_pop3proxy_test.pickle' # This is never saved. if not hasattr(self, "DBName"): self.DBName, self.useDB = storage.database_type([]) self.bayes = storage.open_storage(self.DBName, self.useDB) self.buildStatusStrings() # Don't set up the caches and training objects when running the self-test, # so as not to clutter the filesystem. if not self.isTest: def ensureDir(dirname): try: os.mkdir(dirname) except OSError, e: if e.errno != errno.EEXIST: raise # Create/open the Corpuses. Use small cache sizes to avoid hogging # lots of memory. sc = get_pathname_option("Storage", "spam_cache") hc = get_pathname_option("Storage", "ham_cache") uc = get_pathname_option("Storage", "unknown_cache") map(ensureDir, [sc, hc, uc]) if self.gzipCache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) # Given that (hopefully) users will get to the stage # where they do not need to do any more regular training to # be satisfied with spambayes' performance, we expire old # messages from not only the trained corpora, but the unknown # as well. self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() # Create the Trainers. self.spamTrainer = storage.SpamTrainer(self.bayes) self.hamTrainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spamTrainer) self.hamCorpus.addObserver(self.hamTrainer)
class CoreState: """This keeps the global state of the module - the command-line options, statistics like how many mails have been classified, the handle of the log file, the Classifier and FileCorpus objects, and so on.""" def __init__(self): """Initialises the State object that holds the state of the app. The default settings are read from Options.py and bayescustomize.ini and are then overridden by the command-line processing code in the __main__ code below.""" self.log_file = None self.bayes = None self.mutex = None self.prepared = False self.can_stop = True self.plugin = None # Unique names for cached messages - see `getNewMessageName()` below. self.last_base_message_name = '' self.uniquifier = 2 # Set up the statistics. self.numSpams = 0 self.numHams = 0 self.numUnsure = 0 self.servers = "" # Load up the other settings from Option.py / bayescustomize.ini self.ui_port = options["html_ui", "port"] self.launch_ui = options["html_ui", "launch_browser"] self.gzip_cache = options["Storage", "cache_use_gzip"] self.run_test_server = False self.is_test = False self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spam_trainer = self.ham_trainer = None self.init() def init(self): assert not self.prepared, "init after prepare, but before close" ## no i18n yet... ## # Load the environment for translation. ## self.lang_manager = i18n.LanguageManager() ## # Set the system user default language. ## self.lang_manager.set_language(\ ## self.lang_manager.locale_default_lang()) ## # Set interface to use the user language in the configuration file. ## for language in reversed(options["globals", "language"]): ## # We leave the default in there as the last option, to fall ## # back on if necessary. ## self.lang_manager.add_language(language) ## if options["globals", "verbose"]: ## print "Asked to add languages: " + \ ## ", ".join(options["globals", "language"]) ## print "Set language to " + \ ## str(self.lang_manager.current_langs_codes) self.lang_manager = None # Open the log file. if options["globals", "verbose"]: self.log_file = open('_core_server.log', 'wb', 0) # Remember reported errors. self.reported_errors = {} def close(self): assert self.prepared, "closed without being prepared!" if self.bayes is not None: # Only store a non-empty db. if self.bayes.nham != 0 and self.bayes.nspam != 0: self.bayes.store() self.bayes.close() self.bayes = None spambayes.message.Message().message_info_db = None self.spamCorpus = self.hamCorpus = self.unknownCorpus = None self.spam_trainer = self.ham_trainer = None self.prepared = False self.close_platform_mutex() def prepare(self, can_stop=True): """Do whatever needs to be done to prepare for running. If can_stop is False, then we may not let the user shut down the proxy - for example, running as a Windows service this should be the case.""" self.init() # If we can, prevent multiple servers from running at the same time. assert self.mutex is None, "Should not already have the mutex" self.open_platform_mutex() self.can_stop = can_stop # Do whatever we've been asked to do... self.create_workers() self.prepared = True def build_status_strings(self): """Build the status message(s) to display on the home page of the web interface.""" nspam = self.bayes.nspam nham = self.bayes.nham if nspam > 10 and nham > 10: db_ratio = nham / float(nspam) if db_ratio > 5.0: self.warning = _("Warning: you have much more ham than " \ "spam - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") elif db_ratio < (1 / 5.0): self.warning = _("Warning: you have much more spam than " \ "ham - SpamBayes works best with " \ "approximately even numbers of ham and " \ "spam.") else: self.warning = "" elif nspam > 0 or nham > 0: self.warning = _("Database only has %d good and %d spam - " \ "you should consider performing additional " \ "training.") % (nham, nspam) else: self.warning = _("Database has no training information. " \ "SpamBayes will classify all messages as " \ "'unsure', ready for you to train.") # Add an additional warning message if the user's thresholds are # truly odd. spam_cut = options["Categorization", "spam_cutoff"] ham_cut = options["Categorization", "ham_cutoff"] if spam_cut < 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the spam threshold less than 0.5.") if ham_cut > 0.5: self.warning += _("<br/>Warning: we do not recommend " \ "setting the ham threshold greater than 0.5.") if ham_cut > spam_cut: self.warning += _("<br/>Warning: your ham threshold is " \ "<b>higher</b> than your spam threshold. " \ "Results are unpredictable.") def create_workers(self): """Using the options that were initialised in __init__ and then possibly overridden by the driver code, create the Bayes object, the Corpuses, the Trainers and so on.""" if self.is_test: self.use_db = "pickle" self.db_name = '_core_server.pickle' # This is never saved. if not hasattr(self, "db_name"): self.db_name, self.use_db = storage.database_type([]) self.bayes = storage.open_storage(self.db_name, self.use_db) # Load stats manager. self.stats = Stats.Stats(options, spambayes.message.Message().message_info_db) self.build_status_strings() # Don't set up the caches and training objects when running the # self-test, so as not to clutter the filesystem. if not self.is_test: # Create/open the Corpuses. Use small cache sizes to avoid # hogging lots of memory. sc = get_pathname_option("Storage", "core_spam_cache") hc = get_pathname_option("Storage", "core_ham_cache") uc = get_pathname_option("Storage", "core_unknown_cache") for d in [sc, hc, uc]: storage.ensureDir(d) if self.gzip_cache: factory = GzipFileMessageFactory() else: factory = FileMessageFactory() age = options["Storage", "cache_expiry_days"] * 24 * 60 * 60 self.spamCorpus = ExpiryFileCorpus(age, factory, sc, '[0123456789\-]*', cacheSize=20) self.hamCorpus = ExpiryFileCorpus(age, factory, hc, '[0123456789\-]*', cacheSize=20) self.unknownCorpus = ExpiryFileCorpus(age, factory, uc, '[0123456789\-]*', cacheSize=20) # Given that (hopefully) users will get to the stage # where they do not need to do any more regular training to # be satisfied with spambayes' performance, we expire old # messages from not only the trained corpora, but the unknown # as well. self.spamCorpus.removeExpiredMessages() self.hamCorpus.removeExpiredMessages() self.unknownCorpus.removeExpiredMessages() # Create the Trainers. self.spam_trainer = storage.SpamTrainer(self.bayes) self.ham_trainer = storage.HamTrainer(self.bayes) self.spamCorpus.addObserver(self.spam_trainer) self.hamCorpus.addObserver(self.ham_trainer) def getNewMessageName(self): """The message name is the time it arrived with a uniquifier appended if two arrive within one clock tick of each other. """ message_name = "%10.10d" % long(time.time()) if message_name == self.last_base_message_name: message_name = "%s-%d" % (message_name, self.uniquifier) self.uniquifier += 1 else: self.last_base_message_name = message_name self.uniquifier = 2 return message_name def record_classification(self, cls, score): """Record the classification in the session statistics. cls should match one of the options["Headers", "header_*_string"] values. score is the score the message received. """ if cls == options["Headers", "header_ham_string"]: self.numHams += 1 elif cls == options["Headers", "header_spam_string"]: self.numSpams += 1 else: self.numUnsure += 1 self.stats.RecordClassification(score) def buildStatusStrings(self): return "" def recreate_state(self): if self.prepared: # Close the state (which saves if necessary) self.close() # And get a new one going. state = CoreState() state.prepare() return state def open_platform_mutex(self, mutex_name="SpamBayesServer"): """Implementations of a mutex or other resource which can prevent multiple servers starting at once. Platform specific as no reasonable cross-platform solution exists (however, an old trick is to use a directory for a mutex, as a create/test atomic API generally exists). Will set self.mutex or may throw AlreadyRunningException """ if sys.platform.startswith("win"): try: import win32event, win32api, winerror # ideally, the mutex name could include either the username, # or the munged path to the INI file - this would mean we # would allow multiple starts so long as they weren't for # the same user. However, as of now, the service version # is likely to start as a different user, so a single mutex # is best for now. # XXX - even if we do get clever with another mutex name, we # should consider still creating a non-exclusive # "SpamBayesServer" mutex, if for no better reason than so # an installer can check if we are running try: hmutex = win32event.CreateMutex(None, True, mutex_name) except win32event.error, details: # If another user has the mutex open, we get an "access # denied" error - this is still telling us what we need # to know. if details[0] != winerror.ERROR_ACCESS_DENIED: raise raise AlreadyRunningException # mutex opened - now check if we actually created it. if win32api.GetLastError() == winerror.ERROR_ALREADY_EXISTS: win32api.CloseHandle(hmutex) raise AlreadyRunningException self.mutex = hmutex return except ImportError: # no win32all - no worries, just start pass