Example #1
0
 def train(self, msg, isSpam):
     try:
         use_cached = options["smtpproxy", "use_cached_message"]
     except KeyError:
         use_cached = True
     if use_cached:
         id = self.extractSpambayesID(msg)
         if id is None:
             print "Could not extract id"
             return
         self.train_cached_message(id, isSpam)
     # Otherwise, train on the forwarded/bounced message.
     msg = sbheadermessage_from_string(msg)
     id = msg.setIdFromPayload()
     msg.delSBHeaders()
     if id is None:
         # No id, so we don't have any reliable method of remembering
         # information about this message, so we just assume that it
         # hasn't been trained before.  We could generate some sort of
         # checksum for the message and use that as an id (this would
         # mean that we didn't need to store the id with the message)
         # but that might be a little unreliable.
         self.classifier.learn(msg.asTokens(), isSpam)
     else:
         if msg.GetTrained() == (not isSpam):
             self.classifier.unlearn(msg.asTokens(), not isSpam)
             msg.RememberTrained(None)
         if msg.GetTrained() is None:
             self.classifier.learn(msg.asTokens(), isSpam)
             msg.RememberTrained(isSpam)
Example #2
0
    def extractSpambayesID(self, data):
        msg = sbheadermessage_from_string(data)

        # The nicest MUA is one that forwards the header intact.
        id = msg.get(options["Headers", "mailid_header_name"])
        if id is not None:
            return id

        # Some MUAs will put it in the body somewhere, while others will
        # put it in an attached MIME message.
        id = self._find_id_in_text(msg.as_string())
        if id is not None:
            return id

        # the message might be encoded
        for part in textparts(msg):
            # Decode, or take it as-is if decoding fails.
            try:
                text = part.get_payload(decode=True)
            except:
                text = part.get_payload(decode=False)
                if text is not None:
                    text = try_to_repair_damaged_base64(text)
            if text is not None:
                id = self._find_id_in_text(text)
                return id
        return None