コード例 #1
0
 def onTrain(self, file, text, which):
     """Train on an uploaded or pasted message."""
     self._writePreamble(_("Train"))
     content = file or text
     isSpam = (which == _('Train as Spam'))
     if file:
         content = self._convertToMbox(content)
     content = content.replace('\r\n', '\n').replace('\r', '\n')
     messages = self._convertUploadToMessageList(content)
     if isSpam:
         desired_corpus = "spamCorpus"
     else:
         desired_corpus = "hamCorpus"
     if hasattr(self, desired_corpus):
         corpus = getattr(self, desired_corpus)
     else:
         if hasattr(self, "state"):
             corpus = getattr(self.state, desired_corpus)
             setattr(self, desired_corpus, corpus)
             self.msg_name_func = self.state.getNewMessageName
         else:
             if isSpam:
                 fn = storage.get_pathname_option("Storage",
                                                  "spam_cache")
             else:
                 fn = storage.get_pathname_option("Storage",
                                                  "ham_cache")
             storage.ensureDir(fn)
             if options["Storage", "cache_use_gzip"]:
                 factory = FileCorpus.GzipFileMessageFactory()
             else:
                 factory = FileCorpus.FileMessageFactory()
             age = options["Storage", "cache_expiry_days"]*24*60*60
             corpus = FileCorpus.ExpiryFileCorpus(age, factory, fn,
                                       '[0123456789\-]*', cacheSize=20)
             setattr(self, desired_corpus, corpus)
             class UniqueNamer(object):
                 count = -1
                 def generate_name(self):
                     self.count += 1
                     return "%10.10d-%d" % (long(time.time()), self.count)
             Namer = UniqueNamer()
             self.msg_name_func = Namer.generate_name
     self.write("<b>" + _("Training") + "...</b>\n")
     self.flush()
     for message in messages:
         key = self.msg_name_func()
         msg = corpus.makeMessage(key, message)
         msg.setId(key)
         corpus.addMessage(msg)
         msg.RememberTrained(isSpam)
         self.stats.RecordTraining(not isSpam)
     self._doSave()
     self.write(_("%sOK. Return %sHome%s or train again:%s") %
                ("<p>", "<a href='home'>", "</a", "</p>"))
     self.write(self._buildTrainBox())
     self._writePostamble()
コード例 #2
0
    def train_mime(self, msg_text, encoding, is_spam):
        if self.state.bayes is None:
            self.state.create_workers()
        # Get msg_text into canonical string representation.
        # Make sure we have a unicode object...
        if isinstance(msg_text, str):
            msg_text = unicode(msg_text, encoding)
        # ... then encode it as utf-8.
        if isinstance(msg_text, unicode):
            msg_text = msg_text.encode("utf-8")
        msg = message_from_string(msg_text,
                                  _class=spambayes.message.SBHeaderMessage)
        if is_spam:
            desired_corpus = "spamCorpus"
        else:
            desired_corpus = "hamCorpus"
        if hasattr(self, desired_corpus):
            corpus = getattr(self, desired_corpus)
        else:
            if hasattr(self, "state"):
                corpus = getattr(self.state, desired_corpus)
                setattr(self, desired_corpus, corpus)
                self.msg_name_func = self.state.getNewMessageName
            else:
                if is_spam:
                    fn = storage.get_pathname_option("Storage", "spam_cache")
                else:
                    fn = storage.get_pathname_option("Storage", "ham_cache")
                storage.ensureDir(fn)
                if options["Storage", "cache_use_gzip"]:
                    factory = FileCorpus.GzipFileMessageFactory()
                else:
                    factory = FileCorpus.FileMessageFactory()
                age = options["Storage", "cache_expiry_days"] * 24 * 60 * 60
                corpus = FileCorpus.ExpiryFileCorpus(age,
                                                     factory,
                                                     fn,
                                                     '[0123456789\-]*',
                                                     cacheSize=20)
                setattr(self, desired_corpus, corpus)

                class UniqueNamer(object):
                    count = -1

                    def generate_name(self):
                        self.count += 1
                        return "%10.10d-%d" % (long(time.time()), self.count)

                Namer = UniqueNamer()
                self.msg_name_func = Namer.generate_name
        key = self.msg_name_func()
        mime_message = unicode(msg.as_string(), "utf-8").encode("utf-8")
        msg = corpus.makeMessage(key, mime_message)
        msg.setId(key)
        corpus.addMessage(msg)
        msg.RememberTrained(is_spam)
コード例 #3
0
 def train_mime(self, msg_text, encoding, is_spam):
     if self.state.bayes is None:
         self.state.create_workers()
     # Get msg_text into canonical string representation.
     # Make sure we have a unicode object...
     if isinstance(msg_text, str):
         msg_text = unicode(msg_text, encoding)
     # ... then encode it as utf-8.
     if isinstance(msg_text, unicode):
         msg_text = msg_text.encode("utf-8")
     msg = message_from_string(msg_text,
                               _class=spambayes.message.SBHeaderMessage)
     if is_spam:
         desired_corpus = "spamCorpus"
     else:
         desired_corpus = "hamCorpus"
     if hasattr(self, desired_corpus):
         corpus = getattr(self, desired_corpus)
     else:
         if hasattr(self, "state"):
             corpus = getattr(self.state, desired_corpus)
             setattr(self, desired_corpus, corpus)
             self.msg_name_func = self.state.getNewMessageName
         else:
             if is_spam:
                 fn = storage.get_pathname_option("Storage",
                                                  "spam_cache")
             else:
                 fn = storage.get_pathname_option("Storage",
                                                  "ham_cache")
             storage.ensureDir(fn)
             if options["Storage", "cache_use_gzip"]:
                 factory = FileCorpus.GzipFileMessageFactory()
             else:
                 factory = FileCorpus.FileMessageFactory()
             age = options["Storage", "cache_expiry_days"]*24*60*60
             corpus = FileCorpus.ExpiryFileCorpus(age, factory, fn,
                                                  '[0123456789\-]*',
                                                  cacheSize=20)
             setattr(self, desired_corpus, corpus)
             class UniqueNamer(object):
                 count = -1
                 def generate_name(self):
                     self.count += 1
                     return "%10.10d-%d" % (long(time.time()), self.count)
             Namer = UniqueNamer()
             self.msg_name_func = Namer.generate_name
     key = self.msg_name_func()
     mime_message = unicode(msg.as_string(), "utf-8").encode("utf-8")
     msg = corpus.makeMessage(key, mime_message)
     msg.setId(key)
     corpus.addMessage(msg)
     msg.RememberTrained(is_spam)