def trainer(mgr, config, progress): rebuild = config.training.rebuild rescore = config.training.rescore if not config.training.ham_folder_ids or not config.training.spam_folder_ids: progress.error("You must specify at least one spam, and one good folder") return if rebuild: # Make a new temporary bayes database to use for training. # If we complete, then the manager "adopts" it. # This prevents cancelled training from leaving a "bad" db, and # also prevents mail coming in during training from being classified # with the partial database. import os, manager bayes_base = os.path.join(mgr.data_directory, "$sbtemp$default_bayes_database") mdb_base = os.path.join(mgr.data_directory, "$sbtemp$default_message_database") # determine which db manager to use, and create it. ManagerClass = manager.GetStorageManagerClass() db_manager = ManagerClass(bayes_base, mdb_base) classifier_data = manager.ClassifierData(db_manager, mgr) classifier_data.InitNew() else: classifier_data = mgr.classifier_data # We do this in possibly 3 stages - train, filter, save # re-scoring is much slower and training (as we actually have to save # the message back.) # Saving is really slow sometimes, but we only have 1 tick for that anyway if rescore: stages = ("Training", .3), ("Saving", .1), ("Scoring", .6) else: stages = ("Training", .9), ("Saving", .1) progress.set_stages(stages) real_trainer(classifier_data, config, mgr.message_store, progress) if progress.stop_requested(): return if rebuild: assert mgr.classifier_data is not classifier_data mgr.classifier_data.Adopt(classifier_data) classifier_data = mgr.classifier_data progress.tick() if rescore: # Setup the "filter now" config to what we want. config = mgr.config.filter_now config.only_unread = False config.only_unseen = False config.action_all = False config.folder_ids = mgr.config.training.ham_folder_ids + mgr.config.training.spam_folder_ids config.include_sub = mgr.config.training.ham_include_sub or mgr.config.training.spam_include_sub import filter filter.filterer(mgr, mgr.config, progress) bayes = classifier_data.bayes progress.set_status("Completed training with %d spam and %d good messages" % (bayes.nspam, bayes.nham))
def WizardTrainer(mgr, config, progress): import os, manager, train bayes_base = os.path.join(mgr.data_directory, "$sbwiz$default_bayes_database") mdb_base = os.path.join(mgr.data_directory, "$sbwiz$default_message_database") fnames = [] for ext in ".pck", ".db": fnames.append(bayes_base + ext) fnames.append(mdb_base + ext) config.wizard.temp_training_names = fnames # determine which db manager to use, and create it. ManagerClass = manager.GetStorageManagerClass() db_manager = ManagerClass(bayes_base, mdb_base) classifier_data = manager.ClassifierData(db_manager, mgr) classifier_data.InitNew() rescore = config.training.rescore if rescore: stages = (_("Training"), .3), (_("Saving"), .1), (_("Scoring"), .6) else: stages = (_("Training"), .9), (_("Saving"), .1) progress.set_stages(stages) train.real_trainer(classifier_data, config, mgr.message_store, progress) # xxx - more hacks - we should pass the classifier data in. orig_classifier_data = mgr.classifier_data mgr.classifier_data = classifier_data # temporary try: progress.tick() if rescore: # Setup the "filter now" config to what we want. now_config = config.filter_now now_config.only_unread = False now_config.only_unseen = False now_config.action_all = False now_config.folder_ids = config.training.ham_folder_ids + \ config.training.spam_folder_ids now_config.include_sub = config.training.ham_include_sub or \ config.training.spam_include_sub import filter filter.filterer(mgr, config, progress) bayes = classifier_data.bayes progress.set_status(_("Completed training with %d spam and %d good messages") \ % (bayes.nspam, bayes.nham)) finally: mgr.wizard_classifier_data = classifier_data mgr.classifier_data = orig_classifier_data
def WizardTrainer(mgr, config, progress): import os, manager, train bayes_base = os.path.join(mgr.data_directory, "$sbwiz$default_bayes_database") mdb_base = os.path.join(mgr.data_directory, "$sbwiz$default_message_database") fnames = [] for ext in ".pck", ".db": fnames.append(bayes_base+ext) fnames.append(mdb_base+ext) config.wizard.temp_training_names = fnames # determine which db manager to use, and create it. ManagerClass = manager.GetStorageManagerClass() db_manager = ManagerClass(bayes_base, mdb_base) classifier_data = manager.ClassifierData(db_manager, mgr) classifier_data.InitNew() rescore = config.training.rescore if rescore: stages = ("Training", .3), ("Saving", .1), ("Scoring", .6) else: stages = ("Training", .9), ("Saving", .1) progress.set_stages(stages) train.real_trainer(classifier_data, config, mgr.message_store, progress) # xxx - more hacks - we should pass the classifier data in. orig_classifier_data = mgr.classifier_data mgr.classifier_data = classifier_data # temporary try: progress.tick() if rescore: # Setup the "filter now" config to what we want. now_config = config.filter_now now_config.only_unread = False now_config.only_unseen = False now_config.action_all = False now_config.folder_ids = config.training.ham_folder_ids + \ config.training.spam_folder_ids now_config.include_sub = config.training.ham_include_sub or \ config.training.spam_include_sub import filter filter.filterer(mgr, config, progress) bayes = classifier_data.bayes progress.set_status("Completed training with %d spam and %d good messages" \ % (bayes.nspam, bayes.nham)) finally: mgr.wizard_classifier_data = classifier_data mgr.classifier_data = orig_classifier_data
def trainer(mgr, config, progress): rebuild = config.training.rebuild rescore = config.training.rescore if not config.training.ham_folder_ids and not config.training.spam_folder_ids: progress.error(_("You must specify at least one spam or one good folder")) return if rebuild: import os, manager bayes_base = os.path.join(mgr.data_directory, "$sbtemp$default_bayes_database") mdb_base = os.path.join(mgr.data_directory, "$sbtemp$default_message_database") ManagerClass = manager.GetStorageManagerClass() db_manager = ManagerClass(bayes_base, mdb_base) classifier_data = manager.ClassifierData(db_manager, mgr) classifier_data.InitNew() else: classifier_data = mgr.classifier_data if rescore: stages = (_("Training"), 0.3), (_("Saving"), 0.1), (_("Scoring"), 0.6) else: stages = (_("Training"), 0.9), (_("Saving"), 0.1) progress.set_stages(stages) real_trainer(classifier_data, config, mgr.message_store, progress) if progress.stop_requested(): return if rebuild: assert mgr.classifier_data is not classifier_data mgr.AdoptClassifierData(classifier_data) classifier_data = mgr.classifier_data mgr.LogDebug(1, "Session:" + "\r\n".join(mgr.stats.GetStats(session_only=True))) mgr.LogDebug(1, "Total:" + "\r\n".join(mgr.stats.GetStats())) mgr.stats.Reset() mgr.stats.ResetTotal(permanently=True) progress.tick() if rescore: config = mgr.config.filter_now config.only_unread = False config.only_unseen = False config.action_all = False config.folder_ids = mgr.config.training.ham_folder_ids + mgr.config.training.spam_folder_ids config.include_sub = mgr.config.training.ham_include_sub or mgr.config.training.spam_include_sub import filter filter.filterer(mgr, mgr.config, progress) bayes = classifier_data.bayes progress.set_status(_("Completed training with %d spam and %d good messages") % (bayes.nspam, bayes.nham))
def WizardTrainer(mgr, config, progress): import os, manager, train bayes_base = os.path.join(mgr.data_directory, "$sbwiz$default_bayes_database") mdb_base = os.path.join(mgr.data_directory, "$sbwiz$default_message_database") fnames = [] for ext in ".pck", ".db": fnames.append(bayes_base + ext) fnames.append(mdb_base + ext) config.wizard.temp_training_names = fnames ManagerClass = manager.GetStorageManagerClass() db_manager = ManagerClass(bayes_base, mdb_base) classifier_data = manager.ClassifierData(db_manager, mgr) classifier_data.InitNew() rescore = config.training.rescore if rescore: stages = (_("Training"), 0.3), (_("Saving"), 0.1), (_("Scoring"), 0.6) else: stages = (_("Training"), 0.9), (_("Saving"), 0.1) progress.set_stages(stages) train.real_trainer(classifier_data, config, mgr.message_store, progress) orig_classifier_data = mgr.classifier_data mgr.classifier_data = classifier_data # temporary try: progress.tick() if rescore: now_config = config.filter_now now_config.only_unread = False now_config.only_unseen = False now_config.action_all = False now_config.folder_ids = config.training.ham_folder_ids + config.training.spam_folder_ids now_config.include_sub = config.training.ham_include_sub or config.training.spam_include_sub import filter filter.filterer(mgr, config, progress) bayes = classifier_data.bayes progress.set_status(_("Completed training with %d spam and %d good messages") % (bayes.nspam, bayes.nham)) finally: mgr.wizard_classifier_data = classifier_data mgr.classifier_data = orig_classifier_data
def trainer(mgr, config, progress): rebuild = config.training.rebuild rescore = config.training.rescore if not config.training.ham_folder_ids and not config.training.spam_folder_ids: progress.error( _("You must specify at least one spam or one good folder")) return if rebuild: # Make a new temporary bayes database to use for training. # If we complete, then the manager "adopts" it. # This prevents cancelled training from leaving a "bad" db, and # also prevents mail coming in during training from being classified # with the partial database. import os, manager bayes_base = os.path.join(mgr.data_directory, "$sbtemp$default_bayes_database") mdb_base = os.path.join(mgr.data_directory, "$sbtemp$default_message_database") # determine which db manager to use, and create it. ManagerClass = manager.GetStorageManagerClass() db_manager = ManagerClass(bayes_base, mdb_base) classifier_data = manager.ClassifierData(db_manager, mgr) classifier_data.InitNew() else: classifier_data = mgr.classifier_data # We do this in possibly 3 stages - train, filter, save # re-scoring is much slower than training (as we actually have to save # the message back.) # Saving is really slow sometimes, but we only have 1 tick for that anyway if rescore: stages = (_("Training"), .3), (_("Saving"), .1), (_("Scoring"), .6) else: stages = (_("Training"), .9), (_("Saving"), .1) progress.set_stages(stages) real_trainer(classifier_data, config, mgr.message_store, progress) if progress.stop_requested(): return if rebuild: assert mgr.classifier_data is not classifier_data mgr.AdoptClassifierData(classifier_data) classifier_data = mgr.classifier_data # If we are rebuilding, then we reset the statistics, too. # (But output them to the log for reference). mgr.LogDebug(1, "Session:" + "\r\n".join(\ mgr.stats.GetStats(session_only=True))) mgr.LogDebug(1, "Total:" + "\r\n".join(mgr.stats.GetStats())) mgr.stats.Reset() mgr.stats.ResetTotal(permanently=True) progress.tick() if rescore: # Setup the "filter now" config to what we want. config = mgr.config.filter_now config.only_unread = False config.only_unseen = False config.action_all = False config.folder_ids = mgr.config.training.ham_folder_ids + mgr.config.training.spam_folder_ids config.include_sub = mgr.config.training.ham_include_sub or mgr.config.training.spam_include_sub import filter filter.filterer(mgr, mgr.config, progress) bayes = classifier_data.bayes progress.set_status( _("Completed training with %d spam and %d good messages") % (bayes.nspam, bayes.nham))