def onTrain(self, file, text, which): """Train on an uploaded or pasted message.""" self._writePreamble(_("Train")) content = file or text isSpam = (which == _('Train as Spam')) if file: content = self._convertToMbox(content) content = content.replace('\r\n', '\n').replace('\r', '\n') messages = self._convertUploadToMessageList(content) if isSpam: desired_corpus = "spamCorpus" else: desired_corpus = "hamCorpus" if hasattr(self, desired_corpus): corpus = getattr(self, desired_corpus) else: if hasattr(self, "state"): corpus = getattr(self.state, desired_corpus) setattr(self, desired_corpus, corpus) self.msg_name_func = self.state.getNewMessageName else: if isSpam: fn = storage.get_pathname_option("Storage", "spam_cache") else: fn = storage.get_pathname_option("Storage", "ham_cache") storage.ensureDir(fn) if options["Storage", "cache_use_gzip"]: factory = FileCorpus.GzipFileMessageFactory() else: factory = FileCorpus.FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 corpus = FileCorpus.ExpiryFileCorpus(age, factory, fn, '[0123456789\-]*', cacheSize=20) setattr(self, desired_corpus, corpus) class UniqueNamer(object): count = -1 def generate_name(self): self.count += 1 return "%10.10d-%d" % (long(time.time()), self.count) Namer = UniqueNamer() self.msg_name_func = Namer.generate_name self.write("<b>" + _("Training") + "...</b>\n") self.flush() for message in messages: key = self.msg_name_func() msg = corpus.makeMessage(key, message) msg.setId(key) corpus.addMessage(msg) msg.RememberTrained(isSpam) self.stats.RecordTraining(not isSpam) self._doSave() self.write(_("%sOK. Return %sHome%s or train again:%s") % ("<p>", "<a href='home'>", "</a", "</p>")) self.write(self._buildTrainBox()) self._writePostamble()
def train_mime(self, msg_text, encoding, is_spam): if self.state.bayes is None: self.state.create_workers() # Get msg_text into canonical string representation. # Make sure we have a unicode object... if isinstance(msg_text, str): msg_text = unicode(msg_text, encoding) # ... then encode it as utf-8. if isinstance(msg_text, unicode): msg_text = msg_text.encode("utf-8") msg = message_from_string(msg_text, _class=spambayes.message.SBHeaderMessage) if is_spam: desired_corpus = "spamCorpus" else: desired_corpus = "hamCorpus" if hasattr(self, desired_corpus): corpus = getattr(self, desired_corpus) else: if hasattr(self, "state"): corpus = getattr(self.state, desired_corpus) setattr(self, desired_corpus, corpus) self.msg_name_func = self.state.getNewMessageName else: if is_spam: fn = storage.get_pathname_option("Storage", "spam_cache") else: fn = storage.get_pathname_option("Storage", "ham_cache") storage.ensureDir(fn) if options["Storage", "cache_use_gzip"]: factory = FileCorpus.GzipFileMessageFactory() else: factory = FileCorpus.FileMessageFactory() age = options["Storage", "cache_expiry_days"] * 24 * 60 * 60 corpus = FileCorpus.ExpiryFileCorpus(age, factory, fn, '[0123456789\-]*', cacheSize=20) setattr(self, desired_corpus, corpus) class UniqueNamer(object): count = -1 def generate_name(self): self.count += 1 return "%10.10d-%d" % (long(time.time()), self.count) Namer = UniqueNamer() self.msg_name_func = Namer.generate_name key = self.msg_name_func() mime_message = unicode(msg.as_string(), "utf-8").encode("utf-8") msg = corpus.makeMessage(key, mime_message) msg.setId(key) corpus.addMessage(msg) msg.RememberTrained(is_spam)
def train_mime(self, msg_text, encoding, is_spam): if self.state.bayes is None: self.state.create_workers() # Get msg_text into canonical string representation. # Make sure we have a unicode object... if isinstance(msg_text, str): msg_text = unicode(msg_text, encoding) # ... then encode it as utf-8. if isinstance(msg_text, unicode): msg_text = msg_text.encode("utf-8") msg = message_from_string(msg_text, _class=spambayes.message.SBHeaderMessage) if is_spam: desired_corpus = "spamCorpus" else: desired_corpus = "hamCorpus" if hasattr(self, desired_corpus): corpus = getattr(self, desired_corpus) else: if hasattr(self, "state"): corpus = getattr(self.state, desired_corpus) setattr(self, desired_corpus, corpus) self.msg_name_func = self.state.getNewMessageName else: if is_spam: fn = storage.get_pathname_option("Storage", "spam_cache") else: fn = storage.get_pathname_option("Storage", "ham_cache") storage.ensureDir(fn) if options["Storage", "cache_use_gzip"]: factory = FileCorpus.GzipFileMessageFactory() else: factory = FileCorpus.FileMessageFactory() age = options["Storage", "cache_expiry_days"]*24*60*60 corpus = FileCorpus.ExpiryFileCorpus(age, factory, fn, '[0123456789\-]*', cacheSize=20) setattr(self, desired_corpus, corpus) class UniqueNamer(object): count = -1 def generate_name(self): self.count += 1 return "%10.10d-%d" % (long(time.time()), self.count) Namer = UniqueNamer() self.msg_name_func = Namer.generate_name key = self.msg_name_func() mime_message = unicode(msg.as_string(), "utf-8").encode("utf-8") msg = corpus.makeMessage(key, mime_message) msg.setId(key) corpus.addMessage(msg) msg.RememberTrained(is_spam)