Exemple #1
0
 def __init__(self, msg):
     self.debug = []
     self.digest = None
     self.predigest = None
     self.bodytext_size = 0
     self.filter = SuspectFilter(None)
     self.logger = logging.getLogger('fuglu.plugins.fuzor.Digest')
     
     # digest config
     self.LONG_WORD_THRESHOLD = 10  # what is considered a long word
     self.REPLACE_LONG_WORD = '[LONG]'  # Replace long words in pre-digest with... None to disable
     self.REPLACE_EMAIL = '[EMAIL]'  # Replace email addrs in pre-digest with... None to disable
     self.REPLACE_URL = '[LINK]'  # Replace urls in pre-digest with... None to disable
     self.INCLUDE_ATTACHMENT_CONTENT = False  # should non-text attachment contents be included in digest (not recommended, there are better attachment hash systems)
     self.INCLUDE_ATTACHMENT_COUNT = True  # should the number of non-text-attachments be included in the digest
     self.MINIMUM_PREDIGEST_SIZE = 27  # if the predigest is smaller than this, ignore this message
     self.MINIMUM_UNMODIFIED_CONTENT = 27  # minimum unmodified content after stripping, eg. [SOMETHING] removed from the predigest (27>'von meinem Iphone gesendet')
     self.MINIMUM_BODYTEXT_SIZE = 27  # if the body text content is smaller than this, ignore this message
     self.STRIP_WHITESPACE = True  # remove all whitespace from the pre-digest
     self.STRIP_HTML_MARKUP = True  # remove html tags (but keep content)
     self.REMOVE_HTML_TAGS = [
         'script',
         'style']  # strip tags (including content)
     
     self.predigest = self._make_predigest(msg)
     self.digest = self._make_hash(self.predigest)
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)
        self.requiredvars = {
            'limiterfile': {
                'default': '/etc/fuglu/ratelimit.conf',
                'description': 'file based rate limits',
            },
            'backendtype': {
                'default':
                'memory',
                'description':
                'type of backend where the events are stored. memory is only recommended for low traffic standalone systems. alternatives are: redis, sqlalchemy'
            },
            'backendconfig': {
                'default':
                '',
                'description':
                'backend specific configuration. sqlalchemy: the database url, redis: hostname:port:db'
            }
        }

        self.logger = self._logger()
        self.backend_instance = None
        self.limiters = None
        self.filter = SuspectFilter(None)
    def examine(self, suspect):
        starttime = time.time()
        if self.filter == None:
            self.filter = SuspectFilter(
                self.config.get(self.section, 'filterfile'))

        hits = self.filter.get_args(suspect, extended=True)
        if len(hits) == 0:
            return DUNNO

        #open file
        ofile = self.config.get(self.section, 'outputfile')
        if ofile.strip() == '':
            self._logger().error("No output file specified for headerwriter")
            return DUNNO

        fh = open(ofile, 'a')
        for hit in hits:
            (fieldname, matchedvalue, arg, regex) = hit
            if arg == None or arg == '':
                arg = self.config.get(self.section, 'defaultlinetemplate')

            addvalues = dict(fieldname=fieldname,
                             matchedvalue=matchedvalue,
                             regex=regex)
            outputline = apply_template(arg, suspect, addvalues)
            fh.write(outputline)
            fh.write('\n')

        fh.close()
Exemple #4
0
    def examine(self, suspect):
        archiverules = self.config.get(self.section, 'archiverules')
        if archiverules == None or archiverules == "":
            return DUNNO

        if not os.path.exists(archiverules):
            self.logger.error('Archive Rules file does not exist : %s' %
                              archiverules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(archiverules)

        (match, arg) = self.filter.matches(suspect)
        if match:
            if arg != None and arg.lower() == 'no':
                suspect.debug("Suspect matches archive exception rule")
                self.logger.debug(
                    """Header matches archive exception rule - not archiving"""
                )
            else:
                if arg != None and arg.lower() != 'yes':
                    self.logger.warning(
                        "Unknown archive action '%s' assuming 'yes'" % arg)
                self.logger.debug("""Header matches archive rule""")
                if suspect.get_tag('debug'):
                    suspect.debug(
                        "Suspect matches archiving rule (i would  archive it if we weren't in debug mode)"
                    )
                else:
                    self.archive(suspect)
        else:
            suspect.debug(
                "No archive rule/exception rule applies to this message")
 def examine(self,suspect):
     starttime=time.time()
     if self.filter==None:
         self.filter=SuspectFilter(self.config.get(self.section,'filterfile'))
     
         
     hits=self.filter.get_args(suspect,extended=True)
     if len(hits)==0:
         return DUNNO
         
     #open file
     ofile=self.config.get(self.section,'outputfile')
     if ofile.strip()=='':
         self._logger().error("No output file specified for headerwriter")
         return DUNNO
         
     fh=open(ofile,'a')
     for hit in hits:
         (fieldname, matchedvalue, arg, regex)=hit
         if arg==None or arg=='':
             arg=self.config.get(self.section,'defaultlinetemplate')
         
         addvalues=dict(fieldname=fieldname,matchedvalue=matchedvalue,regex=regex)
         outputline=apply_template(arg, suspect, addvalues)
         fh.write(outputline)
         fh.write('\n')
         
     fh.close()
 def examine(self,suspect):
     imapcopyrules=self.config.get(self.section, 'imapcopyrules')
     if imapcopyrules==None or imapcopyrules=="":
         return DUNNO
     
     if not os.path.exists(imapcopyrules):
         self._logger().error('IMAP copy rules file does not exist : %s'%imapcopyrules)
         return DUNNO
     
     if self.filter==None:
         self.filter=SuspectFilter(imapcopyrules)
     
     (match,info)=self.filter.matches(suspect,extended=True)
     if match:
         field,matchedvalue,arg,regex=info
         if arg!=None and arg.lower()=='no':
             suspect.debug("Suspect matches imap copy exception rule")
             self.logger.info("""%s: Header %s matches imap copy exception rule '%s' """%(suspect.id,field,regex))
         else:
             if arg==None or (not arg.lower().startswith('imap')):
                 self.logger.error("Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'"%arg)
                 
             else:
                 self.logger.info("""%s: Header %s matches imap copy rule '%s' """%(suspect.id,field,regex))
                 if suspect.get_tag('debug'):
                     suspect.debug("Suspect matches imap copy rule (I would  copy it if we weren't in debug mode)")
                 else:
                     self.storeimap(suspect,arg)
     else:
         suspect.debug("No imap copy rule/exception rule applies to this message")
    def __init__(self, msg):
        self.debug = []
        self.digest = None
        self.predigest = None
        self.bodytext_size = 0
        self.filter = SuspectFilter(None)
        self.logger = logging.getLogger('fuglu.plugins.fuzor.Digest')

        # digest config
        self.LONG_WORD_THRESHOLD = 10  # what is considered a long word
        self.REPLACE_LONG_WORD = '[LONG]'  # Replace long words in pre-digest with... None to disable
        self.REPLACE_EMAIL = '[EMAIL]'  # Replace email addrs in pre-digest with... None to disable
        self.REPLACE_URL = '[LINK]'  # Replace urls in pre-digest with... None to disable
        self.INCLUDE_ATTACHMENT_CONTENT = False  # should non-text attachment contents be included in digest (not recommended, there are better attachment hash systems)
        self.INCLUDE_ATTACHMENT_COUNT = True  # should the number of non-text-attachments be included in the digest
        self.MINIMUM_PREDIGEST_SIZE = 27  # if the predigest is smaller than this, ignore this message
        self.MINIMUM_UNMODIFIED_CONTENT = 27  # minimum unmodified content after stripping, eg. [SOMETHING] removed from the predigest (27>'von meinem Iphone gesendet')
        self.MINIMUM_BODYTEXT_SIZE = 27  # if the body text content is smaller than this, ignore this message
        self.STRIP_WHITESPACE = True  # remove all whitespace from the pre-digest
        self.STRIP_HTML_MARKUP = True  # remove html tags (but keep content)
        self.REMOVE_HTML_TAGS = [
            'script',
            'style']  # strip tags (including content)

        self.predigest = self._make_predigest(msg)
        self.digest = self._make_hash(self.predigest)
Exemple #8
0
    def examine(self, suspect):
        archiverules = self.config.get(self.section, 'archiverules')
        if archiverules == None or archiverules == "":
            return DUNNO

        if not os.path.exists(archiverules):
            self.logger.error(
                'Archive Rules file does not exist : %s' % archiverules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(archiverules)

        (match, arg) = self.filter.matches(suspect)
        if match:
            if arg != None and arg.lower() == 'no':
                suspect.debug("Suspect matches archive exception rule")
                self.logger.debug(
                    """Header matches archive exception rule - not archiving""")
            else:
                if arg != None and arg.lower() != 'yes':
                    self.logger.warning(
                        "Unknown archive action '%s' assuming 'yes'" % arg)
                self.logger.debug("""Header matches archive rule""")
                if suspect.get_tag('debug'):
                    suspect.debug(
                        "Suspect matches archiving rule (i would  archive it if we weren't in debug mode)")
                else:
                    self.archive(suspect)
        else:
            suspect.debug(
                "No archive rule/exception rule applies to this message")
    def __init__(self):
        self.requiredvars = {
            "backendtype": {
                "default": "redis",
                "description": "Token store backend type. Allowed values are: sqlalchemy , redis",
            },
            "backendconfig": {
                "default": "",
                "description": "Backend configuration. Depends on backendtype, eg. sqlalchemy url, redis host:port:db",
            },
            "spambias": {
                "default": "0.5",
                "description": "overall spam bias. 0.5=no bias. 0.8=around 80% of scanned mail traffic is spam",
            },
            "minimum-token-occurence": {
                "default": "3",
                "description": "don't make assumptions on tokens seen less than this amount",
            },
            "maximum-tokens-per-message": {"default": "5000", "description": "stop tokenizing after x tokens"},
            "minimum-ham": {"default": "10", "description": "minimum known hams for classification"},
            "minimum-spam": {"default": "10", "description": "minimum known spams for classification"},
        }
        self.tokenstore = None
        self.calc_minimum = 0.00000001  # work around division by zero etc

        self.logger = self._logger()
        self.filter = SuspectFilter(None)
Exemple #10
0
    def lint_imap(self):
        #read file, check for all imap accounts
        imapcopyrules = self.config.get(self.section, 'imapcopyrules')
        if imapcopyrules != '' and not os.path.exists(imapcopyrules):
            print "Imap copy rules file does not exist : %s" % imapcopyrules
            return False
        filter = SuspectFilter(imapcopyrules)

        accounts = []
        for tup in filter.patterns:
            (headername, pattern, arg) = tup
            if arg not in accounts:
                if arg == None:
                    print "Rule %s %s has no imap copy target" % (
                        headername, pattern.pattern)
                    return False
                if arg.lower() == 'no':
                    continue
                accounts.append(arg)

        for acc in accounts:
            p = urlparse(acc)
            host = p.hostname
            username = p.username
            folder = p.path[1:]
            print "Checking %s@%s/%s" % (username, host, folder)
            imap = self.imapconnect(acc, lintmode=True)
            if not imap:
                print "Lint failed for this account"
                return False

        return True
Exemple #11
0
    def examine(self, suspect):
        actionrules = self.config.get(self.section, 'actionrules')
        if actionrules == None or actionrules == "":
            return DUNNO

        if not os.path.exists(actionrules):
            self.logger.error('Action Rules file does not exist : %s' %
                              actionrules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(actionrules)

        (match, arg) = self.filter.matches(suspect)
        if match:
            if arg == None or arg.strip() == '':
                self.logger.error("Rule match but no action defined.")
                return DUNNO

            arg = arg.strip()
            spl = arg.split(None, 1)
            actionstring = spl[0]
            message = None
            if len(spl) == 2:
                message = spl[1]
            self.logger.debug("%s: Rule match! Action override: %s" %
                              (suspect.id, arg.upper()))

            actioncode = string_to_actioncode(actionstring, self.config)
            if actioncode != None:
                return actioncode, message

            elif actionstring.upper() == 'REDIRECT':
                suspect.to_address = message.strip()
                suspect.recipients = [
                    suspect.to_address,
                ]
                # todo: should we override to_domain? probably not
                # todo: check for invalid adress, multiple adressses
                # todo: document redirect action
            else:
                self.logger.error("Invalid action: %s" % arg)
                return DUNNO

        return DUNNO
Exemple #12
0
    def __init__(self):
        self.requiredvars = {
            'backendtype': {
                'default':
                'redis',
                'description':
                'Token store backend type. Allowed values are: sqlalchemy , redis',
            },
            'backendconfig': {
                'default':
                '',
                'description':
                'Backend configuration. Depends on backendtype, eg. sqlalchemy url, redis host:port:db',
            },
            'spambias': {
                'default':
                '0.5',
                'description':
                'overall spam bias. 0.5=no bias. 0.8=around 80% of scanned mail traffic is spam',
            },
            'minimum-token-occurence': {
                'default':
                '3',
                'description':
                "don't make assumptions on tokens seen less than this amount",
            },
            'maximum-tokens-per-message': {
                'default': '5000',
                'description': 'stop tokenizing after x tokens',
            },
            'minimum-ham': {
                'default': '10',
                'description': "minimum known hams for classification",
            },
            'minimum-spam': {
                'default': '10',
                'description': "minimum known spams for classification",
            },
        }
        self.tokenstore = None
        self.calc_minimum = 0.00000001  # work around division by zero etc

        self.logger = self._logger()
        self.filter = SuspectFilter(None)
Exemple #13
0
    def examine(self, suspect):
        imapcopyrules = self.config.get(self.section, 'imapcopyrules')
        if imapcopyrules == None or imapcopyrules == "":
            return DUNNO

        if not os.path.exists(imapcopyrules):
            self._logger().error('IMAP copy rules file does not exist : %s' %
                                 imapcopyrules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(imapcopyrules)

        (match, info) = self.filter.matches(suspect, extended=True)
        if match:
            field, matchedvalue, arg, regex = info
            if arg != None and arg.lower() == 'no':
                suspect.debug("Suspect matches imap copy exception rule")
                self.logger.info(
                    """%s: Header %s matches imap copy exception rule '%s' """
                    % (suspect.id, field, regex))
            else:
                if arg == None or (not arg.lower().startswith('imap')):
                    self.logger.error(
                        "Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'"
                        % arg)

                else:
                    self.logger.info(
                        """%s: Header %s matches imap copy rule '%s' """ %
                        (suspect.id, field, regex))
                    if suspect.get_tag('debug'):
                        suspect.debug(
                            "Suspect matches imap copy rule (I would  copy it if we weren't in debug mode)"
                        )
                    else:
                        self.storeimap(suspect, arg)
        else:
            suspect.debug(
                "No imap copy rule/exception rule applies to this message")
Exemple #14
0
 def examine(self,suspect):
     starttime=time.time()
     
     filterfile=self.config.get(self.section, 'filterfile','').strip()
     
     if self.filter==None:
         if filterfile!='': 
             if not os.path.exists(filterfile):
                 self._logger().warning('LDA filter rules file does not exist : %s'%filterfile)
                 return DEFER
             self.filter=SuspectFilter(filterfile)
     
     if self.filter!=None:
         match=self.filter.matches(suspect)
         if not match:
             return DUNNO
     
     self.boxtypemap[self.config.get(self.section, 'boxtype')](suspect)
     
     #For debugging, its good to know how long each plugin took
     endtime=time.time()
     difftime=endtime-starttime
     suspect.tags['LDAPlugin.time']="%.4f"%difftime
Exemple #15
0
    def _initfilter(self):
        if self.filter is not None:
            return True

        filename = self.config.get(self.section, 'filterfile')
        if filename is None or filename == "":
            return False

        if not os.path.exists(filename):
            self.logger.error('Filterfile not found for skipper: %s' % filename)
            return False

        self.filter = SuspectFilter(filename)
        return True
Exemple #16
0
    def examine(self, suspect):
        actionrules = self.config.get(self.section, 'actionrules')
        if actionrules == None or actionrules == "":
            return DUNNO

        if not os.path.exists(actionrules):
            self.logger.error(
                'Action Rules file does not exist : %s' % actionrules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(actionrules)

        (match, arg) = self.filter.matches(suspect)
        if match:
            if arg == None or arg.strip() == '':
                self.logger.error("Rule match but no action defined.")
                return DUNNO

            arg = arg.strip()
            spl = arg.split(None, 1)
            actionstring = spl[0]
            message = None
            if len(spl) == 2:
                message = spl[1]
            self.logger.debug(
                "%s: Rule match! Action override: %s" % (suspect.id, arg.upper()))

            actioncode = string_to_actioncode(actionstring, self.config)
            if actioncode != None:
                return actioncode, message

            elif actionstring.upper() == 'REDIRECT':
                suspect.to_address = message.strip()
                suspect.recipients = [suspect.to_address, ]
                # todo: should we override to_domain? probably not
                # todo: check for invalid adress, multiple adressses
                # todo: document redirect action
            else:
                self.logger.error("Invalid action: %s" % arg)
                return DUNNO

        return DUNNO
Exemple #17
0
    def examine(self, suspect):
        starttime = time.time()

        filterfile = self.config.get(self.section, "filterfile", "").strip()

        if self.filter == None:
            if filterfile != "":
                if not os.path.exists(filterfile):
                    self._logger().warning("LDA filter rules file does not exist : %s" % filterfile)
                    return DEFER
                self.filter = SuspectFilter(filterfile)

        if self.filter != None:
            match = self.filter.matches(suspect)
            if not match:
                return DUNNO

        self.boxtypemap[self.config.get(self.section, "boxtype")](suspect)

        # For debugging, its good to know how long each plugin took
        endtime = time.time()
        difftime = endtime - starttime
        suspect.tags["LDAPlugin.time"] = "%.4f" % difftime
class HeaderwriterPlugin(ScannerPlugin):
    """
    Writes custom log based on suspect filter rules
    
    eg. if you put this into headerwriter.regex:
    From: (microsoft\.com|yahoo\.com|gmail\.com) ${id} claims to be from ${matchedvalue}
    
    fuglu would write a log with fuglu-id's whose from-domain is microsoft.com,yahoo.com or gmail.com
    """
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)

        self.requiredvars = {
            'filterfile': {
                'default': '/etc/fuglu/headerwriter.regex',
                'description': 'Suspectfilter File',
            },
            'outputfile': {
                'default': '',
                'description': 'Output File',
            },
            'defaultlinetemplate': {
                'default':
                '${fieldname}: ${matchedvalue}',
                'description':
                'Default line output template if nothing is specified in filter config',
            }
        }
        self.filter = None

    def examine(self, suspect):
        starttime = time.time()
        if self.filter == None:
            self.filter = SuspectFilter(
                self.config.get(self.section, 'filterfile'))

        hits = self.filter.get_args(suspect, extended=True)
        if len(hits) == 0:
            return DUNNO

        #open file
        ofile = self.config.get(self.section, 'outputfile')
        if ofile.strip() == '':
            self._logger().error("No output file specified for headerwriter")
            return DUNNO

        fh = open(ofile, 'a')
        for hit in hits:
            (fieldname, matchedvalue, arg, regex) = hit
            if arg == None or arg == '':
                arg = self.config.get(self.section, 'defaultlinetemplate')

            addvalues = dict(fieldname=fieldname,
                             matchedvalue=matchedvalue,
                             regex=regex)
            outputline = apply_template(arg, suspect, addvalues)
            fh.write(outputline)
            fh.write('\n')

        fh.close()

    def lint(self):
        filterfile = self.config.get(self.section, 'filterfile')
        if not os.path.exists(filterfile):
            print "file not found: %s" % filterfile
            return False

        if self.config.get(self.section, 'outputfile').strip() == '':
            print "No outputfile configured"
            return False

        return True
Exemple #19
0
class BayesPlugin(object):
    def __init__(self):
        self.requiredvars = {
            "backendtype": {
                "default": "redis",
                "description": "Token store backend type. Allowed values are: sqlalchemy , redis",
            },
            "backendconfig": {
                "default": "",
                "description": "Backend configuration. Depends on backendtype, eg. sqlalchemy url, redis host:port:db",
            },
            "spambias": {
                "default": "0.5",
                "description": "overall spam bias. 0.5=no bias. 0.8=around 80% of scanned mail traffic is spam",
            },
            "minimum-token-occurence": {
                "default": "3",
                "description": "don't make assumptions on tokens seen less than this amount",
            },
            "maximum-tokens-per-message": {"default": "5000", "description": "stop tokenizing after x tokens"},
            "minimum-ham": {"default": "10", "description": "minimum known hams for classification"},
            "minimum-spam": {"default": "10", "description": "minimum known spams for classification"},
        }
        self.tokenstore = None
        self.calc_minimum = 0.00000001  # work around division by zero etc

        self.logger = self._logger()
        self.filter = SuspectFilter(None)

    def init_backend(self):
        if self.tokenstore != None:
            return
        backendtype = self.config.get(self.section, "backendtype")
        if backendtype not in SUPPORTED_BACKENDS:
            self.logger.error("Bayes tokenstore %s not supported, maybe misspelled or missing dependency" % backendtype)

        backend = SUPPORTED_BACKENDS[backendtype](self.config.get(self.section, "backendconfig"))
        self.tokenstore = backend

    def single_token_spam_probability(self, token):
        """Compute the probability that a message containing a given token is spam
        ( "spamicity of a word" )
        """
        total_spam = self.tokenstore.get_total_spam_count()
        if total_spam < self.config.getint(self.section, "minimum-spam"):
            self.logger.warning("Not enough known spams for bayes classification")
            return 0.5

        total_ham = self.tokenstore.get_total_ham_count()
        if total_ham < self.config.getint(self.section, "minimum-ham"):
            self.logger.warning("Not enough known hams for bayes classification")
            return 0.5

        pr_s = self.config.getfloat(self.section, "spambias")  # probability that any given message is spam
        pr_h = 1 - pr_s  # probability that any given message is ham

        spam_count = self.tokenstore.get_spam_count(token)  # number of known spams containing this token
        ham_count = self.tokenstore.get_ham_count(token)  # number of known hams containing this token

        # "Dealing with rare words"
        if spam_count + ham_count < self.config.get(self.section, "minimum-token-occurence"):
            pr_s_w = 0.5
        else:
            pr_w_s = float(spam_count) / total_spam  #  the probability that the token appears in spam messages
            pr_w_h = float(ham_count) / total_ham  #   the probability that the token appears in ham messages
            divisor = pr_w_s * pr_s + pr_w_h * pr_h
            if divisor < self.calc_minimum:
                divisor = self.calc_minimum
            pr_s_w = pr_w_s * pr_s / divisor
        # self.logger.info("Token '%s' : seen in %s spams, %s hams => spamicity= %.4f"%(token,spam_count,ham_count,pr_s_w))
        return pr_s_w

    def spam_probability(self, suspect):
        """
        :param text:
        :return: the probability that the given text is spam. float value between 0.0 and 1.0
        """
        tokens = self.tokenize(suspect)
        self.logger.debug("Got %s tokens" % len(tokens))
        total = 0
        for t in tokens:
            spamicity = self.single_token_spam_probability(t)
            if spamicity < self.calc_minimum:
                spamicity = self.calc_minimum

            # make sure we get at least a very small amount
            x = 1 - spamicity
            if x < self.calc_minimum:
                x = self.calc_minimum
            n = math.log(x) - math.log(spamicity)
            total += n
        try:
            probability = 1.0 / (1 + math.pow(math.e, total))
        except OverflowError:
            return 0.0

        return round(probability, 4)

    def ngrams(self, sequence, n=3, maxnumber=None):
        sequence = list(sequence)
        count = max(0, len(sequence) - n + 1)
        if maxnumber == None:
            maxnumber = count
        return ["".join(sequence[i : i + n]) for i in range(min(count, maxnumber))]

    def tokenize(self, suspect):
        visible_texts = self.filter.get_field(suspect, "body:stripped")
        stripped = " ".join([t.strip() for t in visible_texts if t.strip() != ""])
        maxtokens = self.config.getint(self.section, "maximum-tokens-per-message")
        if maxtokens == 0:
            maxtokens = None
        tokens = self.ngrams(stripped, n=3, maxnumber=maxtokens)
        # self.logger.debug(tokens)
        return tokens
Exemple #20
0
class SuspectFilterTestCase(unittest.TestCase):

    """Test Header Filter"""

    def setUp(self):
        self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex')

    def tearDown(self):
        pass

    def test_sf_get_args(self):
        """Test SuspectFilter files"""
        suspect = Suspect('*****@*****.**',
                          '*****@*****.**', TESTDATADIR + '/helloworld.eml')
        suspect.tags['testtag'] = 'testvalue'

        headermatches = self.candidate.get_args(suspect)
        self.assertTrue(
            'Sent to unittest domain!' in headermatches, "To_domain not found in headercheck")
        self.assertTrue('Envelope sender is [email protected]' in headermatches,
                        "Envelope Sender not matched in header chekc")
        self.assertTrue('Mime Version is 1.0' in headermatches,
                        "Standard header Mime Version not found")
        self.assertTrue(
            'A tag match' in headermatches, "Tag match did not work")
        self.assertTrue(
            'Globbing works' in headermatches, "header globbing failed")
        self.assertTrue(
            'body rule works' in headermatches, "decoded body rule failed")
        self.assertTrue(
            'full body rule works' in headermatches, "full body failed")
        self.assertTrue('mime rule works' in headermatches, "mime rule failed")
        self.assertFalse('this should not match in a body rule' in headermatches,
                         'decoded body rule matched raw body')

        # perl style advanced rules
        self.assertTrue('perl-style /-notation works!' in headermatches,
                        "new rule format failed: %s" % headermatches)
        self.assertTrue('perl-style recipient match' in headermatches,
                        "new rule format failed for to_domain: %s" % headermatches)
        self.assertFalse('this should not match' in headermatches,
                         "rule flag ignorecase was not detected")

        # TODO: raw body rules

    def test_sf_matches(self):
        """Test SuspectFilter extended matches"""

        suspect = Suspect('*****@*****.**',
                          '*****@*****.**', TESTDATADIR + '/helloworld.eml')

        (match, info) = self.candidate.matches(suspect, extended=True)
        self.assertTrue(match, 'Match should return True')
        field, matchedvalue, arg, regex = info
        self.assertTrue(field == 'to_domain')
        self.assertTrue(matchedvalue == 'unittests.fuglu.org')
        self.assertTrue(arg == 'Sent to unittest domain!')
        self.assertTrue(regex == 'unittests\.fuglu\.org')

    def test_sf_get_field(self):
        """Test SuspectFilter field extract"""
        suspect = Suspect('*****@*****.**',
                          '*****@*****.**', TESTDATADIR + '/helloworld.eml')

        # additional field tests
        self.assertEqual(self.candidate.get_field(
            suspect, 'clienthelo')[0], 'helo1')
        self.assertEqual(self.candidate.get_field(
            suspect, 'clientip')[0], '10.0.0.1')
        self.assertEqual(self.candidate.get_field(
            suspect, 'clienthostname')[0], 'rdns1')

    def test_strip(self):
        html = """foo<a href="bar">bar</a><script language="JavaScript">echo('hello world');</script>baz"""

        declarationtest = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de">
  <head>
    <title>greetings</title>
  </head>
  <body>
    <font color="red">well met!</font>
  </body>
</html>
"""
        # word generated empty message
        wordhtml = """<html xmlns:v=3D"urn:schemas-microsoft-com:vml"
xmlns:o=3D"urn:schemas-microsoft-com:office:office"
xmlns:w=3D"urn:schemas-microsoft-com:office:word"
xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml"
xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META
HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html;
charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 15
(filtered medium)"><style><!--
/* Font Definitions */
@font-face
	{font-family:"Cambria Math";
	panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
	{font-family:Calibri;
	panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
	{margin:0cm;
	margin-bottom:.0001pt;
	font-size:11.0pt;
	font-family:"Calibri",sans-serif;
	mso-fareast-language:EN-US;}
a:link, span.MsoHyperlink
	{mso-style-priority:99;
	color:#0563C1;
	text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
	{mso-style-priority:99;
	color:#954F72;
	text-decoration:underline;}
span.E-MailFormatvorlage17
	{mso-style-type:personal-compose;
	font-family:"Calibri",sans-serif;
	color:windowtext;}
.MsoChpDefault
	{mso-style-type:export-only;
	font-family:"Calibri",sans-serif;
	mso-fareast-language:EN-US;}
@page WordSection1
	{size:612.0pt 792.0pt;
	margin:70.85pt 70.85pt 2.0cm 70.85pt;}
div.WordSection1
	{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext=3D"edit">
<o:idmap v:ext=3D"edit" data=3D"1" />
</o:shapelayout></xml><![endif]--></head><body lang=3DDE-CH
link=3D"#0563C1" vlink=3D"#954F72"><div class=3DWordSection1><p
class=3DMsoNormal><o:p> </o:p></p></div></body></html>"""

        for use_bfs in [True, False]:
            stripped = self.candidate.strip_text(html, use_bfs=use_bfs)
            self.assertEqual(stripped, 'foobarbaz')

            docstripped = self.candidate.strip_text(
                declarationtest, use_bfs=use_bfs)
            self.assertEqual(
                docstripped.split(), ['greetings', 'well', 'met!'])

            wordhtmstripped = self.candidate.strip_text(
                wordhtml, use_bfs=use_bfs)
            self.assertEqual(wordhtmstripped.strip(), '')
Exemple #21
0
class FuzorDigest(object):
    def __init__(self, msg):
        self.debug = []
        self.digest = None
        self.predigest = None
        self.bodytext_size = 0
        self.filter = SuspectFilter(None)
        self.logger = logging.getLogger('fuglu.plugins.fuzor.Digest')
        
        # digest config
        self.LONG_WORD_THRESHOLD = 10  # what is considered a long word
        self.REPLACE_LONG_WORD = '[LONG]'  # Replace long words in pre-digest with... None to disable
        self.REPLACE_EMAIL = '[EMAIL]'  # Replace email addrs in pre-digest with... None to disable
        self.REPLACE_URL = '[LINK]'  # Replace urls in pre-digest with... None to disable
        self.INCLUDE_ATTACHMENT_CONTENT = False  # should non-text attachment contents be included in digest (not recommended, there are better attachment hash systems)
        self.INCLUDE_ATTACHMENT_COUNT = True  # should the number of non-text-attachments be included in the digest
        self.MINIMUM_PREDIGEST_SIZE = 27  # if the predigest is smaller than this, ignore this message
        self.MINIMUM_UNMODIFIED_CONTENT = 27  # minimum unmodified content after stripping, eg. [SOMETHING] removed from the predigest (27>'von meinem Iphone gesendet')
        self.MINIMUM_BODYTEXT_SIZE = 27  # if the body text content is smaller than this, ignore this message
        self.STRIP_WHITESPACE = True  # remove all whitespace from the pre-digest
        self.STRIP_HTML_MARKUP = True  # remove html tags (but keep content)
        self.REMOVE_HTML_TAGS = [
            'script',
            'style']  # strip tags (including content)
        
        self.predigest = self._make_predigest(msg)
        self.digest = self._make_hash(self.predigest)
    
    
    
    def _make_hash(self, predigest):
        if self.bodytext_size < self.MINIMUM_BODYTEXT_SIZE:
            return None
        predigest = predigest.strip()
        if len(predigest) < self.MINIMUM_PREDIGEST_SIZE:
            return None
        unmodified = re.sub(r'\[[A-Z0-9:]+\]', '', predigest)
        if len(unmodified) < self.MINIMUM_UNMODIFIED_CONTENT:
            return None
        
        predigest = predigest.encode('utf-8', errors='ignore')
        return hashlib.sha1(predigest).hexdigest()
    
    
    
    def _handle_text_part(self, part):
        payload = part.get_payload(decode=True)
        charset = part.get_content_charset()
        errors = "ignore"
        if not charset:
            charset = "ascii"
        elif charset.lower().replace("_", "-") in ("quopri-codec", "quopri", "quoted-printable", "quotedprintable"):
            errors = "strict"
        
        try:
            payload = payload.decode(charset, errors)
        except (LookupError, UnicodeError, AssertionError):
            payload = payload.decode("ascii", "ignore")
        
        if self.STRIP_HTML_MARKUP:
            payload = self.filter.strip_text(
                payload,
                remove_tags=self.REMOVE_HTML_TAGS,
                use_bfs=True)
        
        if self.REPLACE_EMAIL is not None:
            payload = re.sub(r'\S{1,50}@\S{1,30}', self.REPLACE_EMAIL, payload)
        
        if self.REPLACE_URL is not None:
            payload = re.sub(r'[a-z]+:\S{1,100}', self.REPLACE_URL, payload)
        
        if self.REPLACE_LONG_WORD is not None:
            patt = r'\S{%s,}' % self.LONG_WORD_THRESHOLD
            payload = re.sub(patt, self.REPLACE_LONG_WORD, payload)
        
        if self.STRIP_WHITESPACE:
            payload = re.sub(r'\s', '', payload)
        payload = payload.strip()
        return payload
    
    
    
    def _make_predigest(self, msg):
        attachment_count = 0
        predigest = ''
        for part in msg.walk():
            if part.is_multipart():
                continue
            
            if part.get_content_maintype() == "text":
                try:
                    normalized_text_part = self._handle_text_part(part)
                    predigest += normalized_text_part
                    self.bodytext_size += len(normalized_text_part)
                except Exception as e:
                    self.logger.warn(e)
            else:
                attachment_count += 1
                if self.INCLUDE_ATTACHMENT_CONTENT:
                    predigest += "[ATTH:%s]" % hashlib.sha1(
                        part.get_payload()).hexdigest()
        
        if self.INCLUDE_ATTACHMENT_COUNT and attachment_count:
            predigest += "[ATTC:%s]" % attachment_count
        
        if self.STRIP_WHITESPACE:
            predigest = re.sub(r'\s', '', predigest)
        
        return predigest
Exemple #22
0
 def setUp(self):
     self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex')
Exemple #23
0
class SuspectFilterTestCase(unittest.TestCase):
    """Test Suspectfilter"""
    def setUp(self):
        self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex')

    def tearDown(self):
        pass

    def test_sf_get_args(self):
        """Test SuspectFilter files"""
        suspect = Suspect('*****@*****.**',
                          '*****@*****.**',
                          TESTDATADIR + '/helloworld.eml')
        suspect.tags['testtag'] = 'testvalue'

        headermatches = self.candidate.get_args(suspect)
        self.assertTrue('Sent to unittest domain!' in headermatches,
                        "To_domain not found in headercheck")
        self.assertTrue(
            'Envelope sender is [email protected]' in headermatches,
            "Envelope Sender not matched in header chekc")
        self.assertTrue('Mime Version is 1.0' in headermatches,
                        "Standard header Mime Version not found")
        self.assertTrue('A tag match' in headermatches,
                        "Tag match did not work")
        self.assertTrue('Globbing works' in headermatches,
                        "header globbing failed")
        self.assertTrue('body rule works' in headermatches,
                        "decoded body rule failed")
        self.assertTrue('full body rule works' in headermatches,
                        "full body failed")
        self.assertTrue('mime rule works' in headermatches, "mime rule failed")
        self.assertFalse(
            'this should not match in a body rule' in headermatches,
            'decoded body rule matched raw body')

        # perl style advanced rules
        self.assertTrue('perl-style /-notation works!' in headermatches,
                        "new rule format failed: %s" % headermatches)
        self.assertTrue(
            'perl-style recipient match' in headermatches,
            "new rule format failed for to_domain: %s" % headermatches)
        self.assertFalse('this should not match' in headermatches,
                         "rule flag ignorecase was not detected")

        # TODO: raw body rules

    def test_sf_matches(self):
        """Test SuspectFilter extended matches"""

        suspect = Suspect('*****@*****.**',
                          '*****@*****.**',
                          TESTDATADIR + '/helloworld.eml')

        (match, info) = self.candidate.matches(suspect, extended=True)
        self.assertTrue(match, 'Match should return True')
        field, matchedvalue, arg, regex = info
        self.assertTrue(field == 'to_domain')
        self.assertTrue(matchedvalue == 'unittests.fuglu.org')
        self.assertTrue(arg == 'Sent to unittest domain!')
        self.assertTrue(regex == 'unittests\.fuglu\.org')

    def test_sf_get_field(self):
        """Test SuspectFilter field extract"""
        suspect = Suspect('*****@*****.**',
                          '*****@*****.**',
                          TESTDATADIR + '/helloworld.eml')

        # additional field tests
        self.assertEqual(
            self.candidate.get_field(suspect, 'clienthelo')[0], 'helo1')
        self.assertEqual(
            self.candidate.get_field(suspect, 'clientip')[0], '10.0.0.1')
        self.assertEqual(
            self.candidate.get_field(suspect, 'clienthostname')[0], 'rdns1')

    def test_strip(self):
        html = """foo<a href="bar">bar</a><script language="JavaScript">echo('hello world');</script>baz"""

        declarationtest = """<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de">
  <head>
    <title>greetings</title>
  </head>
  <body>
    <font color="red">well met!</font>
  </body>
</html>
"""
        # word generated empty message
        wordhtml = """<html xmlns:v=3D"urn:schemas-microsoft-com:vml"
xmlns:o=3D"urn:schemas-microsoft-com:office:office"
xmlns:w=3D"urn:schemas-microsoft-com:office:word"
xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml"
xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META
HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html;
charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 15
(filtered medium)"><style><!--
/* Font Definitions */
@font-face
	{font-family:"Cambria Math";
	panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
	{font-family:Calibri;
	panose-1:2 15 5 2 2 2 4 3 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
	{margin:0cm;
	margin-bottom:.0001pt;
	font-size:11.0pt;
	font-family:"Calibri",sans-serif;
	mso-fareast-language:EN-US;}
a:link, span.MsoHyperlink
	{mso-style-priority:99;
	color:#0563C1;
	text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
	{mso-style-priority:99;
	color:#954F72;
	text-decoration:underline;}
span.E-MailFormatvorlage17
	{mso-style-type:personal-compose;
	font-family:"Calibri",sans-serif;
	color:windowtext;}
.MsoChpDefault
	{mso-style-type:export-only;
	font-family:"Calibri",sans-serif;
	mso-fareast-language:EN-US;}
@page WordSection1
	{size:612.0pt 792.0pt;
	margin:70.85pt 70.85pt 2.0cm 70.85pt;}
div.WordSection1
	{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext=3D"edit">
<o:idmap v:ext=3D"edit" data=3D"1" />
</o:shapelayout></xml><![endif]--></head><body lang=3DDE-CH
link=3D"#0563C1" vlink=3D"#954F72"><div class=3DWordSection1><p
class=3DMsoNormal><o:p> </o:p></p></div></body></html>"""

        for use_bfs in [True, False]:
            stripped = self.candidate.strip_text(html, use_bfs=use_bfs)
            self.assertEqual(stripped, 'foobarbaz')

            docstripped = self.candidate.strip_text(declarationtest,
                                                    use_bfs=use_bfs)
            self.assertEqual(docstripped.split(),
                             ['greetings', 'well', 'met!'])

            wordhtmstripped = self.candidate.strip_text(wordhtml,
                                                        use_bfs=use_bfs)
            self.assertEqual(wordhtmstripped.strip(), '')
class RateLimitPlugin(ScannerPlugin):
    """This is a generic rolling window rate limiting plugin. It allows limiting the amount of accepted messages based on any combination of supported SuspectFilter fields.
    This means you could for example limit the number of similar subjects by sender domain to implement a simple bulk filter.

    Important notes:
        - This plugin is experimental and has not been tested in production
        - This plugin only makes sense in pre-queue mode.
        - The content filter stage is usually *not* the best place to implement rate-limiting.
          Faster options are postfix built-in rate limits or a policy access daemon
          which doesn't need to accept the full message to make a decision
        - the backends don't automatically perform global expiration of all events.
          Old entries are only cleared per event the next time the same event happens.
          Add a cron job for your backend to clear all old events from time to time.

    Supported backends:
        - memory: stores events in memory. Do not use this in production.
        - sqlalchemy: Stores events in a SQL database. Recommended for small/low-traffic setups
        - redis: stores events in a redis database. This is the fastest and therefore recommended backend.

    Configuration example for redis. Prerequisite: python redis module
        backendtype = redis
        backendconfig = localhost:6379:0

    Configuration example for mysql: Prerequisite: python sqlalchemy module. The database must exist. The table will be created automatically.
        backendtype = sqlalchemy
        backendconfig = mysql://root@localhost/fuglu

    ratelimit.conf format: (not final yet)

    Each limiter is defined by a line which must match the following format. Each limiter is evaluated in the order specified.

    limit name=**name** rate=**max**/**timeframe** fields=**fieldlist** [match=/**filter regex**/ [skip=**skiplist** ]] action=**action** message=**message**

        **name**        : a descriptive name for this filter, one word. Required to reference in skip lists
        **max**         : the maximum number of events that may occur in the specified timeframe before an action is limited.
                          Specify a negative value to indicate "no limit"
        **timeframe**   : Timeframe for the limit
        **fields**      : comma separated list of fields which should be used as unique values to limit
        **match** (optional): regular expression to apply to the actuall values. The limiter is only applied if this regular expression matches.
                              If the limiter consists of multiple input fields,
                              The regex will be applied to the comma separated list of field values.
        **skip** (optional):  Comma separated list of subsequent limiter names, that should be skipped if this this limiter's regex matched the input values.
                              Used for overrides.
        **action**      : Action that should be performed if the limit is exceeded. ( REJECT / DEFER / ... )
        **message**     : Message returned to the connecting client


    Examples:

    # no sending limit for our newsletter
    limit name=newsletter rate=-1/1 fields=from_address match=/^newsletter@example\.com$/ skip=fromaddr,serverhelo action=DUNNO message=OK

    # max 10 messages in 30 seconds per unique sender address:
    limit name=fromaddr rate=10/30 fields=from_address action=REJECT message=Too many messages from ${from_address}

    # max 100 messages with same subject per hour per server helo
    limit name=serverhelo rate=100/3600 fields=clienthelo,subject action=REJECT message=Bulk message detected

    """
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)
        self.requiredvars = {
            'limiterfile': {
                'default': '/etc/fuglu/ratelimit.conf',
                'description': 'file based rate limits',
            },
            'backendtype': {
                'default':
                'memory',
                'description':
                'type of backend where the events are stored. memory is only recommended for low traffic standalone systems. alternatives are: redis, sqlalchemy'
            },
            'backendconfig': {
                'default':
                '',
                'description':
                'backend specific configuration. sqlalchemy: the database url, redis: hostname:port:db'
            }
        }

        self.logger = self._logger()
        self.backend_instance = None
        self.limiters = None
        self.filter = SuspectFilter(None)

    #TODO: make action and message optional
    def load_limiter_config(self, text):
        patt = re.compile(
            r'^limit\s+name=(?P<name>[^\s]+)\s+rate=(?P<max>\-?\d{1,10})\/(?P<time>\d{1,10})\s+fields=(?P<fieldlist>[^\s]+)(\s+match=\/(?P<matchregex>.+)\/(\s+skip=(?P<skiplist>[^\s]+))?)?\s+action=(?P<action>[^\s]+)\s+message=(?P<message>.*)$'
        )
        limiters = []
        lineno = 0
        for line in text.split('\n'):
            lineno += 1
            line = line.strip()
            if line.startswith('#') or line.strip() == '':
                continue
            match = patt.match(line)
            if match == None:
                self.logger.error('cannot parse limiter config line %s' %
                                  lineno)
                continue
            gdict = match.groupdict()
            limiter = Limiter()
            limiter.name = gdict['name']
            limiter.max = int(gdict['max'])
            limiter.timespan = int(gdict['time'])
            limiter.fields = gdict['fieldlist'].split(',')
            limiter.regex = gdict['matchregex']
            if gdict['skiplist'] != None:
                limiter.skip = gdict['skiplist'].split(',')
            action = string_to_actioncode(gdict['action'])
            if action == None:
                self.logger.error(
                    "Limiter config line %s : invalid action %s" %
                    (lineno, gdict['action']))
            limiter.action = action
            limiter.message = gdict['message']
            limiters.append(limiter)
        return limiters

    def examine(self, suspect):
        if self.limiters == None:
            filename = self.config.get(self.section, 'limiterfile')
            if not os.path.exists(filename):
                self.logger.error("Limiter config file %s not found" %
                                  filename)
                return
            limiterconfig = open(filename, 'r').read()
            limiters = self.load_limiter_config(limiterconfig)
            self.limiters = limiters
            self.logger.info("Found %s limiter configurations" %
                             (len(limiters)))

        if self.backend_instance == None:
            btype = self.config.get(self.section, 'backendtype')
            if btype not in AVAILABLE_RATELIMIT_BACKENDS:
                self.logger.error('ratelimit backend %s not available' %
                                  (btype))
                return
            self.backend_instance = AVAILABLE_RATELIMIT_BACKENDS[btype](
                self.config.get(self.section, 'backendconfig'))

        skiplist = []
        for limiter in self.limiters:
            if limiter.name in skiplist:  # check if this limiter is skipped by a previous one
                self.logger.debug('limiter %s skipped due to previous match' %
                                  limiter.name)
                continue

            #get field values
            allfieldsavailable = True
            fieldvalues = []
            for fieldname in limiter.fields:
                values = self.filter.get_field(suspect, fieldname)
                if len(values) < 1:
                    allfieldsavailable = False
                    self.logger.debug(
                        'Skipping limiter %s - field %s not available' %
                        (limiter.name, fieldname))
                    break
                fieldvalues.append(values[0])
            if not allfieldsavailable:  #rate limit can not be applied
                continue

            checkval = ','.join(fieldvalues)
            if limiter.regex != None:
                if re.match(limiter.regex, checkval):
                    if limiter.skip != None:
                        skiplist.extend(limiter.skip)
                else:  #no match, skip this limiter
                    self.logger.debug(
                        'Skipping limiter %s - regex does not match' %
                        (limiter.name))
                    continue
            #self.logger.debug("check %s"%str(limiter))
            eventname = limiter.name + checkval
            timespan = limiter.timespan
            max = limiter.max
            if max < 0:  #no limit
                continue
            event_count = self.backend_instance.check_count(
                eventname, timespan)
            self.logger.debug("Limiter event %s  count: %s" %
                              (eventname, event_count))
            if event_count > max:
                return limiter.action, apply_template(limiter.message, suspect)
Exemple #25
0
 def setUp(self):
     self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex')
Exemple #26
0
 def lint_filter(self):
     filterfile = self.config.get(self.section, 'filterfile')
     filter = SuspectFilter(filterfile)
     return filter.lint()
class IMAPCopyPlugin(ScannerPlugin):
    """This plugins stores a copy of the message to an IMAP mailbox if it matches certain criteria (Suspect Filter).
The rulefile works similar to the archive plugin. As third column you have to provide imap account data in the form:

<protocol>://<username>:<password>@<servernameorip>[:port]/<mailbox>

<protocol> is either imap or imaps


"""
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)

        self.requiredvars = {
            'imapcopyrules': {
                'default': '/etc/fuglu/imapcopy.regex',
                'description': 'IMAP copy suspectFilter File',
            },
            'storeoriginal': {
                'default':
                '1',
                'description':
                "if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers",
            }
        }
        self.filter = None
        self.logger = self._logger()

    def examine(self, suspect):
        imapcopyrules = self.config.get(self.section, 'imapcopyrules')
        if imapcopyrules is None or imapcopyrules == "":
            return DUNNO

        if not os.path.exists(imapcopyrules):
            self._logger().error('IMAP copy rules file does not exist : %s' %
                                 imapcopyrules)
            return DUNNO

        if self.filter is None:
            self.filter = SuspectFilter(imapcopyrules)

        (match, info) = self.filter.matches(suspect, extended=True)
        if match:
            field, matchedvalue, arg, regex = info
            if arg is not None and arg.lower() == 'no':
                suspect.debug("Suspect matches imap copy exception rule")
                self.logger.info(
                    """%s: Header %s matches imap copy exception rule '%s' """
                    % (suspect.id, field, regex))
            else:
                if arg is None or (not arg.lower().startswith('imap')):
                    self.logger.error(
                        "Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'"
                        % arg)

                else:
                    self.logger.info(
                        """%s: Header %s matches imap copy rule '%s' """ %
                        (suspect.id, field, regex))
                    if suspect.get_tag('debug'):
                        suspect.debug(
                            "Suspect matches imap copy rule (I would  copy it if we weren't in debug mode)"
                        )
                    else:
                        self.storeimap(suspect, arg)
        else:
            suspect.debug(
                "No imap copy rule/exception rule applies to this message")

    def imapconnect(self, imapurl, lintmode=False):
        p = urlparse(imapurl)
        scheme = p.scheme.lower()
        host = p.hostname
        port = p.port
        username = p.username
        password = p.password
        folder = p.path[1:]

        if scheme == 'imaps':
            ssl = True
        else:
            ssl = False

        if port is None:
            if ssl:
                port = imaplib.IMAP4_SSL_PORT
            else:
                port = imaplib.IMAP4_PORT
        try:
            if ssl:
                imap = imaplib.IMAP4_SSL(host=host, port=port)
            else:
                imap = imaplib.IMAP4(host=host, port=port)
        except Exception as e:
            ltype = 'IMAP'
            if ssl:
                ltype = 'IMAP-SSL'
            msg = "%s Connection to server %s failed: %s" % (ltype, host,
                                                             str(e))
            if lintmode:
                print(msg)
            else:
                self.logger.error(msg)
            return None

        try:
            imap.login(username, password)
        except Exception as e:
            msg = "Login to server %s failed: %s" % (host, str(e))
            if lintmode:
                print(msg)
            else:
                self.logger.error(msg)
            return None

        mtype, count = imap.select(folder)
        if mtype == 'NO':
            msg = "Could not select folder %s" % folder
            if lintmode:
                print(msg)
            else:
                self.logger.error(msg)
            return None
        return imap

    def storeimap(self, suspect, imapurl):
        imap = self.imapconnect(imapurl)
        if not imap:
            return
        #imap.debug=4
        p = urlparse(imapurl)
        folder = p.path[1:]

        if self.config.getboolean(self.section, 'storeoriginal'):
            src = suspect.get_original_source()
        else:
            src = suspect.get_source()

        mtype, data = imap.append(folder, None, None, src)
        if mtype != 'OK':
            self.logger.error(
                'Could put store in IMAP. APPEND command failed: %s' % data)
        imap.logout()

    def lint(self):
        allok = (self.check_config() and self.lint_imap())
        return allok

    def lint_imap(self):
        #read file, check for all imap accounts
        imapcopyrules = self.config.get(self.section, 'imapcopyrules')
        if imapcopyrules != '' and not os.path.exists(imapcopyrules):
            print("Imap copy rules file does not exist : %s" % imapcopyrules)
            return False
        sfilter = SuspectFilter(imapcopyrules)

        accounts = []
        for tup in sfilter.patterns:
            headername, pattern, arg = tup
            if arg not in accounts:
                if arg is None:
                    print("Rule %s %s has no imap copy target" %
                          (headername, pattern.pattern))
                    return False
                if arg.lower() == 'no':
                    continue
                accounts.append(arg)

        for acc in accounts:
            p = urlparse(acc)
            host = p.hostname
            username = p.username
            folder = p.path[1:]
            print("Checking %s@%s/%s" % (username, host, folder))
            imap = self.imapconnect(acc, lintmode=True)
            if not imap:
                print("Lint failed for this account")
                return False

        return True
Exemple #28
0
class FuzorDigest(object):

    def __init__(self, msg):
        self.debug = []
        self.digest = None
        self.predigest = None
        self.bodytext_size = 0
        self.filter = SuspectFilter(None)
        self.logger = logging.getLogger('fuglu.plugins.fuzor.Digest')

        # digest config
        self.LONG_WORD_THRESHOLD = 10  # what is considered a long word
        self.REPLACE_LONG_WORD = '[LONG]'  # Replace long words in pre-digest with... None to disable
        self.REPLACE_EMAIL = '[EMAIL]'  # Replace email addrs in pre-digest with... None to disable
        self.REPLACE_URL = '[LINK]'  # Replace urls in pre-digest with... None to disable
        self.INCLUDE_ATTACHMENT_CONTENT = False  # should non-text attachment contents be included in digest (not recommended, there are better attachment hash systems)
        self.INCLUDE_ATTACHMENT_COUNT = True  # should the number of non-text-attachments be included in the digest
        self.MINIMUM_PREDIGEST_SIZE = 27  # if the predigest is smaller than this, ignore this message
        self.MINIMUM_UNMODIFIED_CONTENT = 27  # minimum unmodified content after stripping, eg. [SOMETHING] removed from the predigest (27>'von meinem Iphone gesendet')
        self.MINIMUM_BODYTEXT_SIZE = 27  # if the body text content is smaller than this, ignore this message
        self.STRIP_WHITESPACE = True  # remove all whitespace from the pre-digest
        self.STRIP_HTML_MARKUP = True  # remove html tags (but keep content)
        self.REMOVE_HTML_TAGS = [
            'script',
            'style']  # strip tags (including content)

        self.predigest = self._make_predigest(msg)
        self.digest = self._make_hash(self.predigest)



    def _make_hash(self, predigest):
        if self.bodytext_size < self.MINIMUM_BODYTEXT_SIZE:
            return None
        predigest = predigest.strip()
        if isinstance(predigest, unicode):
            predigest = predigest.encode('utf-8', 'ignore')
        if len(predigest) < self.MINIMUM_PREDIGEST_SIZE:
            return None
        unmodified = re.sub(r'\[[A-Z0-9:]+\]', '', predigest)
        if len(unmodified) < self.MINIMUM_UNMODIFIED_CONTENT:
            return None
        try:
            return hashlib.sha1(predigest).hexdigest()
        except:
            return None



    def _handle_text_part(self, part):
        payload = part.get_payload(decode=True)
        charset = part.get_content_charset()
        errors = "ignore"
        if not charset:
            charset = "ascii"
        elif (charset.lower().replace("_", "-") in ("quopri-codec",
              "quopri", "quoted-printable", "quotedprintable")):
            errors = "strict"

        try:
            payload = payload.decode(charset, errors)
        except (LookupError, UnicodeError, AssertionError):
            payload = payload.decode("ascii", "ignore")

        if self.STRIP_HTML_MARKUP:
            payload = self.filter.strip_text(
                payload,
                remove_tags=self.REMOVE_HTML_TAGS,
                use_bfs=True)

        if self.REPLACE_EMAIL is not None:
            payload = re.sub(r'\S{1,50}@\S{1,30}', self.REPLACE_EMAIL, payload)

        if self.REPLACE_URL is not None:
            payload = re.sub(r'[a-z]+:\S{1,100}', self.REPLACE_URL, payload)

        if self.REPLACE_LONG_WORD is not None:
            patt = r'\S{%s,}' % self.LONG_WORD_THRESHOLD
            payload = re.sub(patt, self.REPLACE_LONG_WORD, payload)

        if self.STRIP_WHITESPACE:
            payload = re.sub(r'\s', '', payload)
        payload = payload.strip()
        return payload



    def _make_predigest(self, msg):
        attachment_count = 0
        predigest = ''
        for part in msg.walk():
            if part.is_multipart():
                continue

            if part.get_content_maintype() == "text":
                try:
                    normalized_text_part = self._handle_text_part(part)
                    predigest += normalized_text_part
                    self.bodytext_size += len(normalized_text_part)
                except Exception as e:
                    self.logger.warn(e)
            else:
                attachment_count += 1
                if self.INCLUDE_ATTACHMENT_CONTENT:
                    predigest += "[ATTH:%s]" % hashlib.sha1(
                        part.get_payload()).hexdigest()

        if self.INCLUDE_ATTACHMENT_COUNT and attachment_count:
            predigest += "[ATTC:%s]" % attachment_count

        if self.STRIP_WHITESPACE:
            predigest = re.sub(r'\s', '', predigest)

        return predigest
Exemple #29
0
class BayesPlugin(object):
    def __init__(self):
        self.requiredvars = {
            'backendtype': {
                'default':
                'redis',
                'description':
                'Token store backend type. Allowed values are: sqlalchemy , redis',
            },
            'backendconfig': {
                'default':
                '',
                'description':
                'Backend configuration. Depends on backendtype, eg. sqlalchemy url, redis host:port:db',
            },
            'spambias': {
                'default':
                '0.5',
                'description':
                'overall spam bias. 0.5=no bias. 0.8=around 80% of scanned mail traffic is spam',
            },
            'minimum-token-occurence': {
                'default':
                '3',
                'description':
                "don't make assumptions on tokens seen less than this amount",
            },
            'maximum-tokens-per-message': {
                'default': '5000',
                'description': 'stop tokenizing after x tokens',
            },
            'minimum-ham': {
                'default': '10',
                'description': "minimum known hams for classification",
            },
            'minimum-spam': {
                'default': '10',
                'description': "minimum known spams for classification",
            },
        }
        self.tokenstore = None
        self.calc_minimum = 0.00000001  # work around division by zero etc

        self.logger = self._logger()
        self.filter = SuspectFilter(None)

    def init_backend(self):
        if self.tokenstore != None:
            return
        backendtype = self.config.get(self.section, 'backendtype')
        if backendtype not in SUPPORTED_BACKENDS:
            self.logger.error(
                "Bayes tokenstore %s not supported, maybe misspelled or missing dependency"
                % backendtype)

        backend = SUPPORTED_BACKENDS[backendtype](self.config.get(
            self.section, 'backendconfig'))
        self.tokenstore = backend

    def single_token_spam_probability(self, token):
        """Compute the probability that a message containing a given token is spam
        ( "spamicity of a word" )
        """
        total_spam = self.tokenstore.get_total_spam_count()
        if total_spam < self.config.getint(self.section, 'minimum-spam'):
            self.logger.warning(
                "Not enough known spams for bayes classification")
            return 0.5

        total_ham = self.tokenstore.get_total_ham_count()
        if total_ham < self.config.getint(self.section, 'minimum-ham'):
            self.logger.warning(
                "Not enough known hams for bayes classification")
            return 0.5

        pr_s = self.config.getfloat(
            self.section,
            'spambias')  # probability that any given message is spam
        pr_h = 1 - pr_s  # probability that any given message is ham

        spam_count = self.tokenstore.get_spam_count(
            token)  # number of known spams containing this token
        ham_count = self.tokenstore.get_ham_count(
            token)  # number of known hams containing this token

        # "Dealing with rare words"
        if spam_count + ham_count < self.config.get(self.section,
                                                    'minimum-token-occurence'):
            pr_s_w = 0.5
        else:
            pr_w_s = float(
                spam_count
            ) / total_spam  #  the probability that the token appears in spam messages
            pr_w_h = float(
                ham_count
            ) / total_ham  #   the probability that the token appears in ham messages
            divisor = (pr_w_s * pr_s + pr_w_h * pr_h)
            if divisor < self.calc_minimum:
                divisor = self.calc_minimum
            pr_s_w = pr_w_s * pr_s / divisor
        #self.logger.info("Token '%s' : seen in %s spams, %s hams => spamicity= %.4f"%(token,spam_count,ham_count,pr_s_w))
        return pr_s_w

    def spam_probability(self, suspect):
        """
        :param text:
        :return: the probability that the given text is spam. float value between 0.0 and 1.0
        """
        tokens = self.tokenize(suspect)
        self.logger.debug("Got %s tokens" % len(tokens))
        total = 0
        for t in tokens:
            spamicity = self.single_token_spam_probability(t)
            if spamicity < self.calc_minimum:
                spamicity = self.calc_minimum

            #make sure we get at least a very small amount
            x = 1 - spamicity
            if x < self.calc_minimum:
                x = self.calc_minimum
            n = math.log(x) - math.log(spamicity)
            total += n
        try:
            probability = 1.0 / (1 + math.pow(math.e, total))
        except OverflowError:
            return 0.0

        return round(probability, 4)

    def ngrams(self, sequence, n=3, maxnumber=None):
        sequence = list(sequence)
        count = max(0, len(sequence) - n + 1)
        if maxnumber == None:
            maxnumber = count
        return [
            "".join(sequence[i:i + n]) for i in range(min(count, maxnumber))
        ]

    def tokenize(self, suspect):
        visible_texts = self.filter.get_field(suspect, 'body:stripped')
        stripped = " ".join(
            [t.strip() for t in visible_texts if t.strip() != ''])
        maxtokens = self.config.getint(self.section,
                                       'maximum-tokens-per-message')
        if maxtokens == 0:
            maxtokens = None
        tokens = self.ngrams(stripped, n=3, maxnumber=maxtokens)
        #self.logger.debug(tokens)
        return tokens
Exemple #30
0
 def lint_filter(self):
     filterfile = self.config.get(self.section, 'actionrules')
     filter = SuspectFilter(filterfile)
     return filter.lint()
class HeaderwriterPlugin(ScannerPlugin):
    """
    Writes custom log based on suspect filter rules
    
    eg. if you put this into headerwriter.regex:
    From: (microsoft\.com|yahoo\.com|gmail\.com) ${id} claims to be from ${matchedvalue}
    
    fuglu would write a log with fuglu-id's whose from-domain is microsoft.com,yahoo.com or gmail.com
    """

    def __init__(self,config,section=None):
        ScannerPlugin.__init__(self,config,section)
        
        self.requiredvars={
            'filterfile':{
                'default':'/etc/fuglu/headerwriter.regex',
                'description':'Suspectfilter File',
            },
                           
            'outputfile':{
                'default':'',
                'description':'Output File',
            },
                           
            'defaultlinetemplate':{
                 'default':'${fieldname}: ${matchedvalue}',
                'description':'Default line output template if nothing is specified in filter config',                  
            }
            
        }
        self.filter=None

    def examine(self,suspect):
        starttime=time.time()
        if self.filter==None:
            self.filter=SuspectFilter(self.config.get(self.section,'filterfile'))
        
            
        hits=self.filter.get_args(suspect,extended=True)
        if len(hits)==0:
            return DUNNO
            
        #open file
        ofile=self.config.get(self.section,'outputfile')
        if ofile.strip()=='':
            self._logger().error("No output file specified for headerwriter")
            return DUNNO
            
        fh=open(ofile,'a')
        for hit in hits:
            (fieldname, matchedvalue, arg, regex)=hit
            if arg==None or arg=='':
                arg=self.config.get(self.section,'defaultlinetemplate')
            
            addvalues=dict(fieldname=fieldname,matchedvalue=matchedvalue,regex=regex)
            outputline=apply_template(arg, suspect, addvalues)
            fh.write(outputline)
            fh.write('\n')
            
        fh.close()
        
    def lint(self):
        filterfile=self.config.get(self.section,'filterfile')
        if not os.path.exists(filterfile):
            print "file not found: %s"%filterfile
            return False
        
        if self.config.get(self.section,'outputfile').strip()=='':
            print "No outputfile configured"
            return False
        
        return True  
Exemple #32
0
class IMAPCopyPlugin(ScannerPlugin):
    """This plugins stores a copy of the message to an IMAP mailbox if it matches certain criteria (Suspect Filter).
The rulefile works similar to the archive plugin. As third column you have to provide imap account data in the form:

<protocol>://<username>:<password>@<servernameorip>[:port]/<mailbox>

<protocol> is either imap or imaps


"""
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)

        self.requiredvars = {
            'imapcopyrules': {
                'default': '/etc/fuglu/imapcopy.regex',
                'description': 'IMAP copy suspectFilter File',
            },
            'storeoriginal': {
                'default':
                '1',
                'description':
                "if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers",
            }
        }
        self.filter = None
        self.logger = self._logger()

    def examine(self, suspect):
        imapcopyrules = self.config.get(self.section, 'imapcopyrules')
        if imapcopyrules == None or imapcopyrules == "":
            return DUNNO

        if not os.path.exists(imapcopyrules):
            self._logger().error('IMAP copy rules file does not exist : %s' %
                                 imapcopyrules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(imapcopyrules)

        (match, info) = self.filter.matches(suspect, extended=True)
        if match:
            field, matchedvalue, arg, regex = info
            if arg != None and arg.lower() == 'no':
                suspect.debug("Suspect matches imap copy exception rule")
                self.logger.info(
                    """%s: Header %s matches imap copy exception rule '%s' """
                    % (suspect.id, field, regex))
            else:
                if arg == None or (not arg.lower().startswith('imap')):
                    self.logger.error(
                        "Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'"
                        % arg)

                else:
                    self.logger.info(
                        """%s: Header %s matches imap copy rule '%s' """ %
                        (suspect.id, field, regex))
                    if suspect.get_tag('debug'):
                        suspect.debug(
                            "Suspect matches imap copy rule (I would  copy it if we weren't in debug mode)"
                        )
                    else:
                        self.storeimap(suspect, arg)
        else:
            suspect.debug(
                "No imap copy rule/exception rule applies to this message")

    def imapconnect(self, imapurl, lintmode=False):
        p = urlparse(imapurl)
        scheme = p.scheme.lower()
        host = p.hostname
        port = p.port
        username = p.username
        password = p.password
        folder = p.path[1:]

        if scheme == 'imaps':
            ssl = True
        else:
            ssl = False

        if port == None:
            if ssl:
                port = imaplib.IMAP4_SSL_PORT
            else:
                port = imaplib.IMAP4_PORT
        try:
            if ssl:
                imap = imaplib.IMAP4_SSL(host=host, port=port)
            else:
                imap = imaplib.IMAP4(host=host, port=port)
        except Exception, e:
            ltype = 'IMAP'
            if ssl:
                ltype = 'IMAP-SSL'
            msg = "%s Connection to server %s failed: %s" % (ltype, host,
                                                             str(e))
            if lintmode:
                print msg
            else:
                self.logger.error(msg)
            return None

        try:
            imap.login(username, password)
        except Exception, e:
            msg = "Login to server %s failed: %s" % (host, str(e))
            if lintmode:
                print msg
            else:
                self.logger.error(msg)
            return None
Exemple #33
0
class ArchivePlugin(ScannerPlugin):

    """This plugins stores a copy of the message if it matches certain criteria (Suspect Filter). 
You can use this if you want message archives for your domains or to debug problems occuring only for certain recipients.

Examples for the archive.regex filter file:

Archive messages to domain ''test.com'':

``to_domain test\.com``


Archive messages from [email protected]:


``envelope_from oli@fuglu\.org``


you can also append "yes" and "no" to the rules to create a more advanced configuration. Lets say we want to archive all messages to [email protected] and all regular messages [email protected] except the ones created by automated scripts like logwatch or daily backup messages etc.

envelope_from logwatch@.*fuglu.org   no

envelope_to sales@fuglu\.org yes

from [email protected] no

envelope_to support@fuglu\.org      yes


Note: The first rule to match in a message is the only rule that will be applied. Exclusion rules should therefore be put above generic/catch-all rules.
"""

    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)

        self.requiredvars = {
            'archiverules': {
                'default': '/etc/fuglu/archive.regex',
                'description': 'Archiving SuspectFilter File',
            },

            'archivedir': {
                'default': '/tmp',
                'description': 'storage for archived messages',
            },
            'subdirtemplate': {
                'default': '${to_domain}',
                'description': 'subdirectory within archivedir',
            },
            'filenametemplate': {
                'default': '${id}.eml',
                'description': 'filename template for the archived messages',
            },
            'storeoriginal': {
                'default': '1',
                'description': "if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers",
            },
            'chown': {
                'default': '',
                'description': "change owner of saved messages (username or numeric id) - this only works if fuglu is running as root (which is NOT recommended)",
            },
            'chgrp': {
                'default': '',
                'description': "change group of saved messages (groupname or numeric id) - the user running fuglu must be a member of the target group for this to work",
            },
            'chmod': {
                'default': '',
                'description': "set file permissions of saved messages",
            },

        }

        self.filter = None
        self.logger = self._logger()

    def __str__(self):
        return "Archive"

    def lint(self):
        allok = (
            self.checkConfig() and self.check_deprecated() and self.lint_dirs() and self.lint_filter())
        return allok

    def check_deprecated(self):
        if self.config.has_option(self.section, 'makedomainsubdir'):
            print(
                "the config option 'makedomainsubdir' has been replaced with 'subdirtemplate' ")
            print("please update your config")
            print("makedomainsubdir=1 -> subdirtemplate=${to_domain}")
            print("makedomainsubdir=0 -> subdirtemplate=")
            return False
        return True

    def lint_filter(self):
        filterfile = self.config.get(self.section, 'archiverules')
        filter = SuspectFilter(filterfile)
        return filter.lint()

    def lint_dirs(self):
        archivedir = self.config.get(self.section, 'archivedir')
        if archivedir == "":
            print('Archivedir is not specified')
            return False

        if not os.path.isdir(archivedir):
            print("Archivedir '%s' does not exist or is not a directory" %
                  (archivedir))
            return False

        return True

    def examine(self, suspect):
        archiverules = self.config.get(self.section, 'archiverules')
        if archiverules == None or archiverules == "":
            return DUNNO

        if not os.path.exists(archiverules):
            self.logger.error(
                'Archive Rules file does not exist : %s' % archiverules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(archiverules)

        (match, arg) = self.filter.matches(suspect)
        if match:
            if arg != None and arg.lower() == 'no':
                suspect.debug("Suspect matches archive exception rule")
                self.logger.debug(
                    """Header matches archive exception rule - not archiving""")
            else:
                if arg != None and arg.lower() != 'yes':
                    self.logger.warning(
                        "Unknown archive action '%s' assuming 'yes'" % arg)
                self.logger.debug("""Header matches archive rule""")
                if suspect.get_tag('debug'):
                    suspect.debug(
                        "Suspect matches archiving rule (i would  archive it if we weren't in debug mode)")
                else:
                    self.archive(suspect)
        else:
            suspect.debug(
                "No archive rule/exception rule applies to this message")

    def archive(self, suspect):
        archivedir = self.config.get(self.section, 'archivedir')
        if archivedir == "":
            self.logger.error('Archivedir is not specified')
            return

        subdirtemplate = self.config.get(self.section, 'subdirtemplate')

        if self.config.has_option(self.section, 'makedomainsubdir') and subdirtemplate == self.requiredvars['subdirtemplate']['default']:
            self.logger.warning(
                "Archive config is using deprecated 'makedomainsubdir' config option. Emulating old behaviour. Update your config(subdirtemplate)")
            if self.config.getboolean(self.section, 'makedomainsubdir'):
                subdirtemplate = "${to_domain}"
            else:
                subdirtemplate = ""

        # the archive root dir
        startdir = os.path.abspath(archivedir)

        # relative dir within archive root
        subdir = apply_template(subdirtemplate, suspect)
        if subdir.endswith('/'):
            subdir = subdir[:-1]

        # filename without dir
        filenametemplate = self.config.get(self.section, 'filenametemplate')
        filename = apply_template(filenametemplate, suspect)
        # make sure filename can't create new folders
        filename = filename.replace('/', '_')

        # full relative filepath within archive dir
        fpath = "%s/%s" % (subdir, filename)

        # absolute final filepath
        requested_path = os.path.abspath("%s/%s" % (startdir, fpath))

        if not os.path.commonprefix([requested_path, startdir]).startswith(startdir):
            self.logger.error(
                "file path '%s' seems to be outside archivedir '%s' - storing to archivedir" % (requested_path, startdir))
            requested_path = "%s/%s" % (startdir, filename)

        finaldir = os.path.dirname(requested_path)
        if not os.path.isdir(finaldir):
            os.makedirs(finaldir, 0o755)

        if self.config.getboolean(self.section, 'storeoriginal'):
            shutil.copy(suspect.tempfile, requested_path)
        else:
            with open(requested_path, 'w') as fp:
                fp.write(suspect.get_source())

        chmod = self.config.get(self.section, 'chmod')
        chgrp = self.config.get(self.section, 'chgrp')
        chown = self.config.get(self.section, 'chown')
        if chmod or chgrp or chown:
            self.setperms(requested_path, chmod, chgrp, chown)

        self.logger.info('Message from %s to %s archived as %s' % (
            suspect.from_address, suspect.to_address, requested_path))
        return requested_path

    def setperms(self, filename, chmod, chgrp, chown):
        """Set file permissions and ownership
        :param filename The target file
        :param chmod string representing the permissions (example '640')
        :param chgrp groupname or group id of the target group. the user running fuglu must be a member of this group for this to work
        :param chown username or user id of the target user. fuglu must run as root for this to work (which is not recommended for security reasons)
        """

        # chmod
        if chmod:
            perm = int(chmod, 8)
            try:
                os.chmod(filename, perm)
            except:
                self.logger.error(
                    'could not set permission on file %s' % filename)

        # chgrp
        changetogroup = -1
        if chgrp:
            group = None
            try:
                group = grp.getgrnam(chgrp)
            except KeyError:
                pass

            try:
                group = grp.getgrgid(int(chgrp))
            except KeyError:
                pass
            except ValueError:
                pass

            if group != None:
                changetogroup = group.gr_gid
            else:
                self.logger.warn("Group %s not found" % chgrp)

        # chown
        changetouser = -1
        if chown:
            user = None
            try:
                user = pwd.getpwnam(chown)
            except KeyError:
                pass

            try:
                user = pwd.getpwuid(int(chown))
            except KeyError:
                pass
            except ValueError:
                pass

            if user != None:
                changetouser = user.pw_uid
            else:
                self.logger.warn("User %s not found" % chown)

        if changetogroup != -1 or changetouser != -1:
            try:
                os.chown(filename, changetouser, changetogroup)
            except Exception as e:
                self.logger.error(
                    "Could not change user/group of file %s : %s" % (filename, str(e)))
Exemple #34
0
class ArchivePlugin(ScannerPlugin):
    """This plugins stores a copy of the message if it matches certain criteria (Suspect Filter). 
You can use this if you want message archives for your domains or to debug problems occuring only for certain recipients.

Examples for the archive.regex filter file:

Archive messages to domain ''test.com'':

``to_domain test\.com``


Archive messages from [email protected]:


``envelope_from oli@fuglu\.org``


you can also append "yes" and "no" to the rules to create a more advanced configuration. Lets say we want to archive all messages to [email protected] and all regular messages [email protected] except the ones created by automated scripts like logwatch or daily backup messages etc.

envelope_from logwatch@.*fuglu.org   no

envelope_to sales@fuglu\.org yes

from [email protected] no

envelope_to support@fuglu\.org      yes


Note: The first rule to match in a message is the only rule that will be applied. Exclusion rules should therefore be put above generic/catch-all rules.
"""
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)

        self.requiredvars = {
            'archiverules': {
                'default': '/etc/fuglu/archive.regex',
                'description': 'Archiving SuspectFilter File',
            },
            'archivedir': {
                'default': '/tmp',
                'description': 'storage for archived messages',
            },
            'subdirtemplate': {
                'default': '${to_domain}',
                'description': 'subdirectory within archivedir',
            },
            'filenametemplate': {
                'default': '${id}.eml',
                'description': 'filename template for the archived messages',
            },
            'storeoriginal': {
                'default':
                '1',
                'description':
                "if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers",
            },
            'chown': {
                'default':
                '',
                'description':
                "change owner of saved messages (username or numeric id) - this only works if fuglu is running as root (which is NOT recommended)",
            },
            'chgrp': {
                'default':
                '',
                'description':
                "change group of saved messages (groupname or numeric id) - the user running fuglu must be a member of the target group for this to work",
            },
            'chmod': {
                'default': '',
                'description': "set file permissions of saved messages",
            },
        }

        self.filter = None
        self.logger = self._logger()

    def __str__(self):
        return "Archive"

    def lint(self):
        allok = (self.checkConfig() and self.check_deprecated()
                 and self.lint_dirs() and self.lint_filter())
        return allok

    def check_deprecated(self):
        if self.config.has_option(self.section, 'makedomainsubdir'):
            print(
                "the config option 'makedomainsubdir' has been replaced with 'subdirtemplate' "
            )
            print("please update your config")
            print("makedomainsubdir=1 -> subdirtemplate=${to_domain}")
            print("makedomainsubdir=0 -> subdirtemplate=")
            return False
        return True

    def lint_filter(self):
        filterfile = self.config.get(self.section, 'archiverules')
        filter = SuspectFilter(filterfile)
        return filter.lint()

    def lint_dirs(self):
        archivedir = self.config.get(self.section, 'archivedir')
        if archivedir == "":
            print('Archivedir is not specified')
            return False

        if not os.path.isdir(archivedir):
            print("Archivedir '%s' does not exist or is not a directory" %
                  (archivedir))
            return False

        return True

    def examine(self, suspect):
        archiverules = self.config.get(self.section, 'archiverules')
        if archiverules == None or archiverules == "":
            return DUNNO

        if not os.path.exists(archiverules):
            self.logger.error('Archive Rules file does not exist : %s' %
                              archiverules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(archiverules)

        (match, arg) = self.filter.matches(suspect)
        if match:
            if arg != None and arg.lower() == 'no':
                suspect.debug("Suspect matches archive exception rule")
                self.logger.debug(
                    """Header matches archive exception rule - not archiving"""
                )
            else:
                if arg != None and arg.lower() != 'yes':
                    self.logger.warning(
                        "Unknown archive action '%s' assuming 'yes'" % arg)
                self.logger.debug("""Header matches archive rule""")
                if suspect.get_tag('debug'):
                    suspect.debug(
                        "Suspect matches archiving rule (i would  archive it if we weren't in debug mode)"
                    )
                else:
                    self.archive(suspect)
        else:
            suspect.debug(
                "No archive rule/exception rule applies to this message")

    def archive(self, suspect):
        archivedir = self.config.get(self.section, 'archivedir')
        if archivedir == "":
            self.logger.error('Archivedir is not specified')
            return

        subdirtemplate = self.config.get(self.section, 'subdirtemplate')

        if self.config.has_option(
                self.section, 'makedomainsubdir'
        ) and subdirtemplate == self.requiredvars['subdirtemplate']['default']:
            self.logger.warning(
                "Archive config is using deprecated 'makedomainsubdir' config option. Emulating old behaviour. Update your config(subdirtemplate)"
            )
            if self.config.getboolean(self.section, 'makedomainsubdir'):
                subdirtemplate = "${to_domain}"
            else:
                subdirtemplate = ""

        # the archive root dir
        startdir = os.path.abspath(archivedir)

        # relative dir within archive root
        subdir = apply_template(subdirtemplate, suspect)
        if subdir.endswith('/'):
            subdir = subdir[:-1]

        # filename without dir
        filenametemplate = self.config.get(self.section, 'filenametemplate')
        filename = apply_template(filenametemplate, suspect)
        # make sure filename can't create new folders
        filename = filename.replace('/', '_')

        # full relative filepath within archive dir
        fpath = "%s/%s" % (subdir, filename)

        # absolute final filepath
        requested_path = os.path.abspath("%s/%s" % (startdir, fpath))

        if not os.path.commonprefix([requested_path, startdir
                                     ]).startswith(startdir):
            self.logger.error(
                "file path '%s' seems to be outside archivedir '%s' - storing to archivedir"
                % (requested_path, startdir))
            requested_path = "%s/%s" % (startdir, filename)

        finaldir = os.path.dirname(requested_path)
        if not os.path.isdir(finaldir):
            os.makedirs(finaldir, 0o755)

        if self.config.getboolean(self.section, 'storeoriginal'):
            shutil.copy(suspect.tempfile, requested_path)
        else:
            with open(requested_path, 'w') as fp:
                fp.write(suspect.get_source())

        chmod = self.config.get(self.section, 'chmod')
        chgrp = self.config.get(self.section, 'chgrp')
        chown = self.config.get(self.section, 'chown')
        if chmod or chgrp or chown:
            self.setperms(requested_path, chmod, chgrp, chown)

        self.logger.info(
            'Message from %s to %s archived as %s' %
            (suspect.from_address, suspect.to_address, requested_path))
        return requested_path

    def setperms(self, filename, chmod, chgrp, chown):
        """Set file permissions and ownership
        :param filename The target file
        :param chmod string representing the permissions (example '640')
        :param chgrp groupname or group id of the target group. the user running fuglu must be a member of this group for this to work
        :param chown username or user id of the target user. fuglu must run as root for this to work (which is not recommended for security reasons)
        """

        # chmod
        if chmod:
            perm = int(chmod, 8)
            try:
                os.chmod(filename, perm)
            except:
                self.logger.error('could not set permission on file %s' %
                                  filename)

        # chgrp
        changetogroup = -1
        if chgrp:
            group = None
            try:
                group = grp.getgrnam(chgrp)
            except KeyError:
                pass

            try:
                group = grp.getgrgid(int(chgrp))
            except KeyError:
                pass
            except ValueError:
                pass

            if group != None:
                changetogroup = group.gr_gid
            else:
                self.logger.warn("Group %s not found" % chgrp)

        # chown
        changetouser = -1
        if chown:
            user = None
            try:
                user = pwd.getpwnam(chown)
            except KeyError:
                pass

            try:
                user = pwd.getpwuid(int(chown))
            except KeyError:
                pass
            except ValueError:
                pass

            if user != None:
                changetouser = user.pw_uid
            else:
                self.logger.warn("User %s not found" % chown)

        if changetogroup != -1 or changetouser != -1:
            try:
                os.chown(filename, changetouser, changetogroup)
            except Exception as e:
                self.logger.error(
                    "Could not change user/group of file %s : %s" %
                    (filename, str(e)))
Exemple #35
0
class LDAPlugin(ScannerPlugin):
    """Deliver message to maildir / mbox"""
    def __init__(self,config,section=None):
        ScannerPlugin.__init__(self,config,section)
        
        self.requiredvars={
            'path':{
                'default':'/usr/local/fuglu/deliver/${to_address}',
                'description':'Path to maildir / mbox file, supports templates',
            },
            #maybe we need to support our own locking later, for now we use python's built-ins
            #'locktype':{ 
            #    'default':'',
            #    'description':"flock, ...",
            #},
            'boxtype':{
                'default':'mbox',
                'description':"mbox, maildir",
            },
            #maybe we need to support various mbox types later, for now we use python's built-in module
            #'subtype':{
            #    'default':'',
            #    'description':"what type of mbox... ",
            #},
            'filterfile':{
                'default':'',
                'description':"only store messages which use filter...",
            },
                           
        }
        self.logger=self._logger()
        self.filter=None
        
        self.boxtypemap={
         'mbox':self.deliver_mbox,
         'maildir':self.deliver_maildir,               
        }
        
    def lint(self):
        allok=self.checkConfig()
        
        filterfile=self.config.get(self.section, 'filterfile','').strip()
        
        if filterfile!='' and not os.path.exists(filterfile):
            print 'LDA filter rules file does not exist : %s'%filterfile
            allok=False
        
        boxtype=self.config.get(self.section, 'boxtype')
        if boxtype not in self.boxtypemap:
            print "Unsupported boxtype: %s"%boxtype
            allok=False
        
        return allok

        
    def examine(self,suspect):
        starttime=time.time()
        
        filterfile=self.config.get(self.section, 'filterfile','').strip()
        
        if self.filter==None:
            if filterfile!='': 
                if not os.path.exists(filterfile):
                    self._logger().warning('LDA filter rules file does not exist : %s'%filterfile)
                    return DEFER
                self.filter=SuspectFilter(filterfile)
        
        if self.filter!=None:
            match=self.filter.matches(suspect)
            if not match:
                return DUNNO
        
        self.boxtypemap[self.config.get(self.section, 'boxtype')](suspect)
        
        #For debugging, its good to know how long each plugin took
        endtime=time.time()
        difftime=endtime-starttime
        suspect.tags['LDAPlugin.time']="%.4f"%difftime

    def deliver_mbox(self,suspect):
        mbox_msg=mailbox.mboxMessage(suspect.get_message_rep())
        mbox_path=apply_template(self.config.get(self.section,'path'), suspect)
        mbox=mailbox.mbox( mbox_path)
        try:
            mbox.lock()
            mbox.add(mbox_msg)
            mbox.flush()
        except Exception,e:
            self.logger.error("Could not store message %s to %s: %s"%(suspect.id,mbox_path,str(e)))
        finally:
Exemple #36
0
class LDAPlugin(ScannerPlugin):
    """Deliver message to maildir / mbox"""

    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)

        self.requiredvars = {
            "path": {
                "default": "/usr/local/fuglu/deliver/${to_address}",
                "description": "Path to maildir / mbox file, supports templates",
            },
            # maybe we need to support our own locking later, for now we use python's built-ins
            #'locktype':{
            #    'default':'',
            #    'description':"flock, ...",
            # },
            "boxtype": {"default": "mbox", "description": "mbox, maildir"},
            # maybe we need to support various mbox types later, for now we use python's built-in module
            #'subtype':{
            #    'default':'',
            #    'description':"what type of mbox... ",
            # },
            "filterfile": {"default": "", "description": "only store messages which use filter..."},
        }
        self.logger = self._logger()
        self.filter = None

        self.boxtypemap = {"mbox": self.deliver_mbox, "maildir": self.deliver_maildir}

    def lint(self):
        allok = self.checkConfig()

        filterfile = self.config.get(self.section, "filterfile", "").strip()

        if filterfile != "" and not os.path.exists(filterfile):
            print "LDA filter rules file does not exist : %s" % filterfile
            allok = False

        boxtype = self.config.get(self.section, "boxtype")
        if boxtype not in self.boxtypemap:
            print "Unsupported boxtype: %s" % boxtype
            allok = False

        return allok

    def examine(self, suspect):
        starttime = time.time()

        filterfile = self.config.get(self.section, "filterfile", "").strip()

        if self.filter == None:
            if filterfile != "":
                if not os.path.exists(filterfile):
                    self._logger().warning("LDA filter rules file does not exist : %s" % filterfile)
                    return DEFER
                self.filter = SuspectFilter(filterfile)

        if self.filter != None:
            match = self.filter.matches(suspect)
            if not match:
                return DUNNO

        self.boxtypemap[self.config.get(self.section, "boxtype")](suspect)

        # For debugging, its good to know how long each plugin took
        endtime = time.time()
        difftime = endtime - starttime
        suspect.tags["LDAPlugin.time"] = "%.4f" % difftime

    def deliver_mbox(self, suspect):
        mbox_msg = mailbox.mboxMessage(suspect.get_message_rep())
        mbox_path = apply_template(self.config.get(self.section, "path"), suspect)
        mbox = mailbox.mbox(mbox_path)
        try:
            mbox.lock()
            mbox.add(mbox_msg)
            mbox.flush()
        except Exception, e:
            self.logger.error("Could not store message %s to %s: %s" % (suspect.id, mbox_path, str(e)))
        finally:
Exemple #37
0
class ActionOverridePlugin(ScannerPlugin):

    """ Override actions based on a Suspect Filter file. For example, delete all messages from a specific sender domain. """

    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)
        self.logger = self._logger()
        self.requiredvars = {
            'actionrules': {
                'default': '/etc/fuglu/actionrules.regex',
                'description': 'Rules file',
            }
        }
        self.filter = None

    def __str__(self):
        return "Action Override"

    def lint(self):
        allok = (self.checkConfig() and self.lint_filter())
        return allok

    def lint_filter(self):
        filterfile = self.config.get(self.section, 'actionrules')
        filter = SuspectFilter(filterfile)
        return filter.lint()

    def examine(self, suspect):
        actionrules = self.config.get(self.section, 'actionrules')
        if actionrules == None or actionrules == "":
            return DUNNO

        if not os.path.exists(actionrules):
            self.logger.error(
                'Action Rules file does not exist : %s' % actionrules)
            return DUNNO

        if self.filter == None:
            self.filter = SuspectFilter(actionrules)

        (match, arg) = self.filter.matches(suspect)
        if match:
            if arg == None or arg.strip() == '':
                self.logger.error("Rule match but no action defined.")
                return DUNNO

            arg = arg.strip()
            spl = arg.split(None, 1)
            actionstring = spl[0]
            message = None
            if len(spl) == 2:
                message = spl[1]
            self.logger.debug(
                "%s: Rule match! Action override: %s" % (suspect.id, arg.upper()))

            actioncode = string_to_actioncode(actionstring, self.config)
            if actioncode != None:
                return actioncode, message

            elif actionstring.upper() == 'REDIRECT':
                suspect.to_address = message.strip()
                suspect.recipients = [suspect.to_address, ]
                # todo: should we override to_domain? probably not
                # todo: check for invalid adress, multiple adressses
                # todo: document redirect action
            else:
                self.logger.error("Invalid action: %s" % arg)
                return DUNNO

        return DUNNO
Exemple #38
0
class ActionOverridePlugin(ScannerPlugin):
    """ Override actions based on a Suspect Filter file. For example, delete all messages from a specific sender domain. """
    def __init__(self, config, section=None):
        ScannerPlugin.__init__(self, config, section)
        self.logger = self._logger()
        self.requiredvars = {
            'actionrules': {
                'default': '/etc/fuglu/actionrules.regex',
                'description': 'Rules file',
            }
        }
        self.filter = None

    def __str__(self):
        return "Action Override"

    def lint(self):
        allok = self.check_config() and self.lint_filter()
        return allok

    def lint_filter(self):
        filterfile = self.config.get(self.section, 'actionrules')
        sfilter = SuspectFilter(filterfile)
        return sfilter.lint()

    def examine(self, suspect):
        actionrules = self.config.get(self.section, 'actionrules')
        if actionrules is None or actionrules == "":
            return DUNNO

        if not os.path.exists(actionrules):
            self.logger.error('Action Rules file does not exist : %s' %
                              actionrules)
            return DUNNO

        if self.filter is None:
            self.filter = SuspectFilter(actionrules)

        (match, arg) = self.filter.matches(suspect)
        if match:
            if arg is None or arg.strip() == '':
                self.logger.error("Rule match but no action defined.")
                return DUNNO

            arg = arg.strip()
            spl = arg.split(None, 1)
            actionstring = spl[0]
            message = None
            if len(spl) == 2:
                message = spl[1]
            self.logger.debug("%s: Rule match! Action override: %s" %
                              (suspect.id, arg.upper()))

            actioncode = string_to_actioncode(actionstring, self.config)
            if actioncode is not None:
                return actioncode, message

            elif actionstring.upper() == 'REDIRECT':
                suspect.to_address = message.strip()
                # todo: check for invalid adress, multiple adressses, set suspect.recipients instead of to_address
                # todo: document redirect action
            else:
                self.logger.error("Invalid action: %s" % arg)
                return DUNNO

        return DUNNO
Exemple #39
0
 def lint_filter(self):
     filterfile = self.config.get(self.section, 'actionrules')
     sfilter = SuspectFilter(filterfile)
     return sfilter.lint()
class IMAPCopyPlugin(ScannerPlugin):
    """This plugins stores a copy of the message to an IMAP mailbox if it matches certain criteria (Suspect Filter).
The rulefile works similar to the archive plugin. As third column you have to provide imap account data in the form:

<protocol>://<username>:<password>@<servernameorip>[:port]/<mailbox>

<protocol> is either imap or imaps


"""
    def __init__(self,config,section=None):
        ScannerPlugin.__init__(self,config,section)
        
        self.requiredvars={
            'imapcopyrules':{
                'default':'/etc/fuglu/imapcopy.regex',
                'description':'IMAP copy suspectFilter File',
            },
            'storeoriginal':{
                'default':'1',
                'description':"if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers",
            }
        }
        self.filter=None
        self.logger=self._logger()

        
    def examine(self,suspect):
        imapcopyrules=self.config.get(self.section, 'imapcopyrules')
        if imapcopyrules==None or imapcopyrules=="":
            return DUNNO
        
        if not os.path.exists(imapcopyrules):
            self._logger().error('IMAP copy rules file does not exist : %s'%imapcopyrules)
            return DUNNO
        
        if self.filter==None:
            self.filter=SuspectFilter(imapcopyrules)
        
        (match,info)=self.filter.matches(suspect,extended=True)
        if match:
            field,matchedvalue,arg,regex=info
            if arg!=None and arg.lower()=='no':
                suspect.debug("Suspect matches imap copy exception rule")
                self.logger.info("""%s: Header %s matches imap copy exception rule '%s' """%(suspect.id,field,regex))
            else:
                if arg==None or (not arg.lower().startswith('imap')):
                    self.logger.error("Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'"%arg)
                    
                else:
                    self.logger.info("""%s: Header %s matches imap copy rule '%s' """%(suspect.id,field,regex))
                    if suspect.get_tag('debug'):
                        suspect.debug("Suspect matches imap copy rule (I would  copy it if we weren't in debug mode)")
                    else:
                        self.storeimap(suspect,arg)
        else:
            suspect.debug("No imap copy rule/exception rule applies to this message")

    
    def imapconnect(self,imapurl,lintmode=False):
        p=urlparse(imapurl)
        scheme=p.scheme.lower()
        host=p.hostname
        port=p.port
        username=p.username
        password=p.password
        folder=p.path[1:]
        
        if scheme=='imaps':
            ssl=True
        else:
            ssl=False
        
        
        if port==None:
            if ssl:
                port=imaplib.IMAP4_SSL_PORT
            else:
                port=imaplib.IMAP4_PORT
        try:
            if ssl:
                imap=imaplib.IMAP4_SSL(host=host,port=port)
            else:
                imap=imaplib.IMAP4(host=host,port=port)
        except Exception,e:
            ltype='IMAP'
            if ssl:
                ltype='IMAP-SSL'
            msg="%s Connection to server %s failed: %s"%(ltype,host,str(e))
            if lintmode:
                print msg
            else:
                self.logger.error(msg)
            return None
        
        try:
            imap.login(username,password)
        except Exception,e:
            msg="Login to server %s failed: %s"%(host,str(e))
            if lintmode:
                print msg
            else:
                self.logger.error(msg)
            return None