def __init__(self, msg): self.debug = [] self.digest = None self.predigest = None self.bodytext_size = 0 self.filter = SuspectFilter(None) self.logger = logging.getLogger('fuglu.plugins.fuzor.Digest') # digest config self.LONG_WORD_THRESHOLD = 10 # what is considered a long word self.REPLACE_LONG_WORD = '[LONG]' # Replace long words in pre-digest with... None to disable self.REPLACE_EMAIL = '[EMAIL]' # Replace email addrs in pre-digest with... None to disable self.REPLACE_URL = '[LINK]' # Replace urls in pre-digest with... None to disable self.INCLUDE_ATTACHMENT_CONTENT = False # should non-text attachment contents be included in digest (not recommended, there are better attachment hash systems) self.INCLUDE_ATTACHMENT_COUNT = True # should the number of non-text-attachments be included in the digest self.MINIMUM_PREDIGEST_SIZE = 27 # if the predigest is smaller than this, ignore this message self.MINIMUM_UNMODIFIED_CONTENT = 27 # minimum unmodified content after stripping, eg. [SOMETHING] removed from the predigest (27>'von meinem Iphone gesendet') self.MINIMUM_BODYTEXT_SIZE = 27 # if the body text content is smaller than this, ignore this message self.STRIP_WHITESPACE = True # remove all whitespace from the pre-digest self.STRIP_HTML_MARKUP = True # remove html tags (but keep content) self.REMOVE_HTML_TAGS = [ 'script', 'style'] # strip tags (including content) self.predigest = self._make_predigest(msg) self.digest = self._make_hash(self.predigest)
def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { 'limiterfile': { 'default': '/etc/fuglu/ratelimit.conf', 'description': 'file based rate limits', }, 'backendtype': { 'default': 'memory', 'description': 'type of backend where the events are stored. memory is only recommended for low traffic standalone systems. alternatives are: redis, sqlalchemy' }, 'backendconfig': { 'default': '', 'description': 'backend specific configuration. sqlalchemy: the database url, redis: hostname:port:db' } } self.logger = self._logger() self.backend_instance = None self.limiters = None self.filter = SuspectFilter(None)
def examine(self, suspect): starttime = time.time() if self.filter == None: self.filter = SuspectFilter( self.config.get(self.section, 'filterfile')) hits = self.filter.get_args(suspect, extended=True) if len(hits) == 0: return DUNNO #open file ofile = self.config.get(self.section, 'outputfile') if ofile.strip() == '': self._logger().error("No output file specified for headerwriter") return DUNNO fh = open(ofile, 'a') for hit in hits: (fieldname, matchedvalue, arg, regex) = hit if arg == None or arg == '': arg = self.config.get(self.section, 'defaultlinetemplate') addvalues = dict(fieldname=fieldname, matchedvalue=matchedvalue, regex=regex) outputline = apply_template(arg, suspect, addvalues) fh.write(outputline) fh.write('\n') fh.close()
def examine(self, suspect): archiverules = self.config.get(self.section, 'archiverules') if archiverules == None or archiverules == "": return DUNNO if not os.path.exists(archiverules): self.logger.error('Archive Rules file does not exist : %s' % archiverules) return DUNNO if self.filter == None: self.filter = SuspectFilter(archiverules) (match, arg) = self.filter.matches(suspect) if match: if arg != None and arg.lower() == 'no': suspect.debug("Suspect matches archive exception rule") self.logger.debug( """Header matches archive exception rule - not archiving""" ) else: if arg != None and arg.lower() != 'yes': self.logger.warning( "Unknown archive action '%s' assuming 'yes'" % arg) self.logger.debug("""Header matches archive rule""") if suspect.get_tag('debug'): suspect.debug( "Suspect matches archiving rule (i would archive it if we weren't in debug mode)" ) else: self.archive(suspect) else: suspect.debug( "No archive rule/exception rule applies to this message")
def examine(self,suspect): starttime=time.time() if self.filter==None: self.filter=SuspectFilter(self.config.get(self.section,'filterfile')) hits=self.filter.get_args(suspect,extended=True) if len(hits)==0: return DUNNO #open file ofile=self.config.get(self.section,'outputfile') if ofile.strip()=='': self._logger().error("No output file specified for headerwriter") return DUNNO fh=open(ofile,'a') for hit in hits: (fieldname, matchedvalue, arg, regex)=hit if arg==None or arg=='': arg=self.config.get(self.section,'defaultlinetemplate') addvalues=dict(fieldname=fieldname,matchedvalue=matchedvalue,regex=regex) outputline=apply_template(arg, suspect, addvalues) fh.write(outputline) fh.write('\n') fh.close()
def examine(self,suspect): imapcopyrules=self.config.get(self.section, 'imapcopyrules') if imapcopyrules==None or imapcopyrules=="": return DUNNO if not os.path.exists(imapcopyrules): self._logger().error('IMAP copy rules file does not exist : %s'%imapcopyrules) return DUNNO if self.filter==None: self.filter=SuspectFilter(imapcopyrules) (match,info)=self.filter.matches(suspect,extended=True) if match: field,matchedvalue,arg,regex=info if arg!=None and arg.lower()=='no': suspect.debug("Suspect matches imap copy exception rule") self.logger.info("""%s: Header %s matches imap copy exception rule '%s' """%(suspect.id,field,regex)) else: if arg==None or (not arg.lower().startswith('imap')): self.logger.error("Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'"%arg) else: self.logger.info("""%s: Header %s matches imap copy rule '%s' """%(suspect.id,field,regex)) if suspect.get_tag('debug'): suspect.debug("Suspect matches imap copy rule (I would copy it if we weren't in debug mode)") else: self.storeimap(suspect,arg) else: suspect.debug("No imap copy rule/exception rule applies to this message")
def examine(self, suspect): archiverules = self.config.get(self.section, 'archiverules') if archiverules == None or archiverules == "": return DUNNO if not os.path.exists(archiverules): self.logger.error( 'Archive Rules file does not exist : %s' % archiverules) return DUNNO if self.filter == None: self.filter = SuspectFilter(archiverules) (match, arg) = self.filter.matches(suspect) if match: if arg != None and arg.lower() == 'no': suspect.debug("Suspect matches archive exception rule") self.logger.debug( """Header matches archive exception rule - not archiving""") else: if arg != None and arg.lower() != 'yes': self.logger.warning( "Unknown archive action '%s' assuming 'yes'" % arg) self.logger.debug("""Header matches archive rule""") if suspect.get_tag('debug'): suspect.debug( "Suspect matches archiving rule (i would archive it if we weren't in debug mode)") else: self.archive(suspect) else: suspect.debug( "No archive rule/exception rule applies to this message")
def __init__(self): self.requiredvars = { "backendtype": { "default": "redis", "description": "Token store backend type. Allowed values are: sqlalchemy , redis", }, "backendconfig": { "default": "", "description": "Backend configuration. Depends on backendtype, eg. sqlalchemy url, redis host:port:db", }, "spambias": { "default": "0.5", "description": "overall spam bias. 0.5=no bias. 0.8=around 80% of scanned mail traffic is spam", }, "minimum-token-occurence": { "default": "3", "description": "don't make assumptions on tokens seen less than this amount", }, "maximum-tokens-per-message": {"default": "5000", "description": "stop tokenizing after x tokens"}, "minimum-ham": {"default": "10", "description": "minimum known hams for classification"}, "minimum-spam": {"default": "10", "description": "minimum known spams for classification"}, } self.tokenstore = None self.calc_minimum = 0.00000001 # work around division by zero etc self.logger = self._logger() self.filter = SuspectFilter(None)
def lint_imap(self): #read file, check for all imap accounts imapcopyrules = self.config.get(self.section, 'imapcopyrules') if imapcopyrules != '' and not os.path.exists(imapcopyrules): print "Imap copy rules file does not exist : %s" % imapcopyrules return False filter = SuspectFilter(imapcopyrules) accounts = [] for tup in filter.patterns: (headername, pattern, arg) = tup if arg not in accounts: if arg == None: print "Rule %s %s has no imap copy target" % ( headername, pattern.pattern) return False if arg.lower() == 'no': continue accounts.append(arg) for acc in accounts: p = urlparse(acc) host = p.hostname username = p.username folder = p.path[1:] print "Checking %s@%s/%s" % (username, host, folder) imap = self.imapconnect(acc, lintmode=True) if not imap: print "Lint failed for this account" return False return True
def examine(self, suspect): actionrules = self.config.get(self.section, 'actionrules') if actionrules == None or actionrules == "": return DUNNO if not os.path.exists(actionrules): self.logger.error('Action Rules file does not exist : %s' % actionrules) return DUNNO if self.filter == None: self.filter = SuspectFilter(actionrules) (match, arg) = self.filter.matches(suspect) if match: if arg == None or arg.strip() == '': self.logger.error("Rule match but no action defined.") return DUNNO arg = arg.strip() spl = arg.split(None, 1) actionstring = spl[0] message = None if len(spl) == 2: message = spl[1] self.logger.debug("%s: Rule match! Action override: %s" % (suspect.id, arg.upper())) actioncode = string_to_actioncode(actionstring, self.config) if actioncode != None: return actioncode, message elif actionstring.upper() == 'REDIRECT': suspect.to_address = message.strip() suspect.recipients = [ suspect.to_address, ] # todo: should we override to_domain? probably not # todo: check for invalid adress, multiple adressses # todo: document redirect action else: self.logger.error("Invalid action: %s" % arg) return DUNNO return DUNNO
def __init__(self): self.requiredvars = { 'backendtype': { 'default': 'redis', 'description': 'Token store backend type. Allowed values are: sqlalchemy , redis', }, 'backendconfig': { 'default': '', 'description': 'Backend configuration. Depends on backendtype, eg. sqlalchemy url, redis host:port:db', }, 'spambias': { 'default': '0.5', 'description': 'overall spam bias. 0.5=no bias. 0.8=around 80% of scanned mail traffic is spam', }, 'minimum-token-occurence': { 'default': '3', 'description': "don't make assumptions on tokens seen less than this amount", }, 'maximum-tokens-per-message': { 'default': '5000', 'description': 'stop tokenizing after x tokens', }, 'minimum-ham': { 'default': '10', 'description': "minimum known hams for classification", }, 'minimum-spam': { 'default': '10', 'description': "minimum known spams for classification", }, } self.tokenstore = None self.calc_minimum = 0.00000001 # work around division by zero etc self.logger = self._logger() self.filter = SuspectFilter(None)
def examine(self, suspect): imapcopyrules = self.config.get(self.section, 'imapcopyrules') if imapcopyrules == None or imapcopyrules == "": return DUNNO if not os.path.exists(imapcopyrules): self._logger().error('IMAP copy rules file does not exist : %s' % imapcopyrules) return DUNNO if self.filter == None: self.filter = SuspectFilter(imapcopyrules) (match, info) = self.filter.matches(suspect, extended=True) if match: field, matchedvalue, arg, regex = info if arg != None and arg.lower() == 'no': suspect.debug("Suspect matches imap copy exception rule") self.logger.info( """%s: Header %s matches imap copy exception rule '%s' """ % (suspect.id, field, regex)) else: if arg == None or (not arg.lower().startswith('imap')): self.logger.error( "Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'" % arg) else: self.logger.info( """%s: Header %s matches imap copy rule '%s' """ % (suspect.id, field, regex)) if suspect.get_tag('debug'): suspect.debug( "Suspect matches imap copy rule (I would copy it if we weren't in debug mode)" ) else: self.storeimap(suspect, arg) else: suspect.debug( "No imap copy rule/exception rule applies to this message")
def examine(self,suspect): starttime=time.time() filterfile=self.config.get(self.section, 'filterfile','').strip() if self.filter==None: if filterfile!='': if not os.path.exists(filterfile): self._logger().warning('LDA filter rules file does not exist : %s'%filterfile) return DEFER self.filter=SuspectFilter(filterfile) if self.filter!=None: match=self.filter.matches(suspect) if not match: return DUNNO self.boxtypemap[self.config.get(self.section, 'boxtype')](suspect) #For debugging, its good to know how long each plugin took endtime=time.time() difftime=endtime-starttime suspect.tags['LDAPlugin.time']="%.4f"%difftime
def _initfilter(self): if self.filter is not None: return True filename = self.config.get(self.section, 'filterfile') if filename is None or filename == "": return False if not os.path.exists(filename): self.logger.error('Filterfile not found for skipper: %s' % filename) return False self.filter = SuspectFilter(filename) return True
def examine(self, suspect): actionrules = self.config.get(self.section, 'actionrules') if actionrules == None or actionrules == "": return DUNNO if not os.path.exists(actionrules): self.logger.error( 'Action Rules file does not exist : %s' % actionrules) return DUNNO if self.filter == None: self.filter = SuspectFilter(actionrules) (match, arg) = self.filter.matches(suspect) if match: if arg == None or arg.strip() == '': self.logger.error("Rule match but no action defined.") return DUNNO arg = arg.strip() spl = arg.split(None, 1) actionstring = spl[0] message = None if len(spl) == 2: message = spl[1] self.logger.debug( "%s: Rule match! Action override: %s" % (suspect.id, arg.upper())) actioncode = string_to_actioncode(actionstring, self.config) if actioncode != None: return actioncode, message elif actionstring.upper() == 'REDIRECT': suspect.to_address = message.strip() suspect.recipients = [suspect.to_address, ] # todo: should we override to_domain? probably not # todo: check for invalid adress, multiple adressses # todo: document redirect action else: self.logger.error("Invalid action: %s" % arg) return DUNNO return DUNNO
def examine(self, suspect): starttime = time.time() filterfile = self.config.get(self.section, "filterfile", "").strip() if self.filter == None: if filterfile != "": if not os.path.exists(filterfile): self._logger().warning("LDA filter rules file does not exist : %s" % filterfile) return DEFER self.filter = SuspectFilter(filterfile) if self.filter != None: match = self.filter.matches(suspect) if not match: return DUNNO self.boxtypemap[self.config.get(self.section, "boxtype")](suspect) # For debugging, its good to know how long each plugin took endtime = time.time() difftime = endtime - starttime suspect.tags["LDAPlugin.time"] = "%.4f" % difftime
class HeaderwriterPlugin(ScannerPlugin): """ Writes custom log based on suspect filter rules eg. if you put this into headerwriter.regex: From: (microsoft\.com|yahoo\.com|gmail\.com) ${id} claims to be from ${matchedvalue} fuglu would write a log with fuglu-id's whose from-domain is microsoft.com,yahoo.com or gmail.com """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { 'filterfile': { 'default': '/etc/fuglu/headerwriter.regex', 'description': 'Suspectfilter File', }, 'outputfile': { 'default': '', 'description': 'Output File', }, 'defaultlinetemplate': { 'default': '${fieldname}: ${matchedvalue}', 'description': 'Default line output template if nothing is specified in filter config', } } self.filter = None def examine(self, suspect): starttime = time.time() if self.filter == None: self.filter = SuspectFilter( self.config.get(self.section, 'filterfile')) hits = self.filter.get_args(suspect, extended=True) if len(hits) == 0: return DUNNO #open file ofile = self.config.get(self.section, 'outputfile') if ofile.strip() == '': self._logger().error("No output file specified for headerwriter") return DUNNO fh = open(ofile, 'a') for hit in hits: (fieldname, matchedvalue, arg, regex) = hit if arg == None or arg == '': arg = self.config.get(self.section, 'defaultlinetemplate') addvalues = dict(fieldname=fieldname, matchedvalue=matchedvalue, regex=regex) outputline = apply_template(arg, suspect, addvalues) fh.write(outputline) fh.write('\n') fh.close() def lint(self): filterfile = self.config.get(self.section, 'filterfile') if not os.path.exists(filterfile): print "file not found: %s" % filterfile return False if self.config.get(self.section, 'outputfile').strip() == '': print "No outputfile configured" return False return True
class BayesPlugin(object): def __init__(self): self.requiredvars = { "backendtype": { "default": "redis", "description": "Token store backend type. Allowed values are: sqlalchemy , redis", }, "backendconfig": { "default": "", "description": "Backend configuration. Depends on backendtype, eg. sqlalchemy url, redis host:port:db", }, "spambias": { "default": "0.5", "description": "overall spam bias. 0.5=no bias. 0.8=around 80% of scanned mail traffic is spam", }, "minimum-token-occurence": { "default": "3", "description": "don't make assumptions on tokens seen less than this amount", }, "maximum-tokens-per-message": {"default": "5000", "description": "stop tokenizing after x tokens"}, "minimum-ham": {"default": "10", "description": "minimum known hams for classification"}, "minimum-spam": {"default": "10", "description": "minimum known spams for classification"}, } self.tokenstore = None self.calc_minimum = 0.00000001 # work around division by zero etc self.logger = self._logger() self.filter = SuspectFilter(None) def init_backend(self): if self.tokenstore != None: return backendtype = self.config.get(self.section, "backendtype") if backendtype not in SUPPORTED_BACKENDS: self.logger.error("Bayes tokenstore %s not supported, maybe misspelled or missing dependency" % backendtype) backend = SUPPORTED_BACKENDS[backendtype](self.config.get(self.section, "backendconfig")) self.tokenstore = backend def single_token_spam_probability(self, token): """Compute the probability that a message containing a given token is spam ( "spamicity of a word" ) """ total_spam = self.tokenstore.get_total_spam_count() if total_spam < self.config.getint(self.section, "minimum-spam"): self.logger.warning("Not enough known spams for bayes classification") return 0.5 total_ham = self.tokenstore.get_total_ham_count() if total_ham < self.config.getint(self.section, "minimum-ham"): self.logger.warning("Not enough known hams for bayes classification") return 0.5 pr_s = self.config.getfloat(self.section, "spambias") # probability that any given message is spam pr_h = 1 - pr_s # probability that any given message is ham spam_count = self.tokenstore.get_spam_count(token) # number of known spams containing this token ham_count = self.tokenstore.get_ham_count(token) # number of known hams containing this token # "Dealing with rare words" if spam_count + ham_count < self.config.get(self.section, "minimum-token-occurence"): pr_s_w = 0.5 else: pr_w_s = float(spam_count) / total_spam # the probability that the token appears in spam messages pr_w_h = float(ham_count) / total_ham # the probability that the token appears in ham messages divisor = pr_w_s * pr_s + pr_w_h * pr_h if divisor < self.calc_minimum: divisor = self.calc_minimum pr_s_w = pr_w_s * pr_s / divisor # self.logger.info("Token '%s' : seen in %s spams, %s hams => spamicity= %.4f"%(token,spam_count,ham_count,pr_s_w)) return pr_s_w def spam_probability(self, suspect): """ :param text: :return: the probability that the given text is spam. float value between 0.0 and 1.0 """ tokens = self.tokenize(suspect) self.logger.debug("Got %s tokens" % len(tokens)) total = 0 for t in tokens: spamicity = self.single_token_spam_probability(t) if spamicity < self.calc_minimum: spamicity = self.calc_minimum # make sure we get at least a very small amount x = 1 - spamicity if x < self.calc_minimum: x = self.calc_minimum n = math.log(x) - math.log(spamicity) total += n try: probability = 1.0 / (1 + math.pow(math.e, total)) except OverflowError: return 0.0 return round(probability, 4) def ngrams(self, sequence, n=3, maxnumber=None): sequence = list(sequence) count = max(0, len(sequence) - n + 1) if maxnumber == None: maxnumber = count return ["".join(sequence[i : i + n]) for i in range(min(count, maxnumber))] def tokenize(self, suspect): visible_texts = self.filter.get_field(suspect, "body:stripped") stripped = " ".join([t.strip() for t in visible_texts if t.strip() != ""]) maxtokens = self.config.getint(self.section, "maximum-tokens-per-message") if maxtokens == 0: maxtokens = None tokens = self.ngrams(stripped, n=3, maxnumber=maxtokens) # self.logger.debug(tokens) return tokens
class SuspectFilterTestCase(unittest.TestCase): """Test Header Filter""" def setUp(self): self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex') def tearDown(self): pass def test_sf_get_args(self): """Test SuspectFilter files""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') suspect.tags['testtag'] = 'testvalue' headermatches = self.candidate.get_args(suspect) self.assertTrue( 'Sent to unittest domain!' in headermatches, "To_domain not found in headercheck") self.assertTrue('Envelope sender is [email protected]' in headermatches, "Envelope Sender not matched in header chekc") self.assertTrue('Mime Version is 1.0' in headermatches, "Standard header Mime Version not found") self.assertTrue( 'A tag match' in headermatches, "Tag match did not work") self.assertTrue( 'Globbing works' in headermatches, "header globbing failed") self.assertTrue( 'body rule works' in headermatches, "decoded body rule failed") self.assertTrue( 'full body rule works' in headermatches, "full body failed") self.assertTrue('mime rule works' in headermatches, "mime rule failed") self.assertFalse('this should not match in a body rule' in headermatches, 'decoded body rule matched raw body') # perl style advanced rules self.assertTrue('perl-style /-notation works!' in headermatches, "new rule format failed: %s" % headermatches) self.assertTrue('perl-style recipient match' in headermatches, "new rule format failed for to_domain: %s" % headermatches) self.assertFalse('this should not match' in headermatches, "rule flag ignorecase was not detected") # TODO: raw body rules def test_sf_matches(self): """Test SuspectFilter extended matches""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') (match, info) = self.candidate.matches(suspect, extended=True) self.assertTrue(match, 'Match should return True') field, matchedvalue, arg, regex = info self.assertTrue(field == 'to_domain') self.assertTrue(matchedvalue == 'unittests.fuglu.org') self.assertTrue(arg == 'Sent to unittest domain!') self.assertTrue(regex == 'unittests\.fuglu\.org') def test_sf_get_field(self): """Test SuspectFilter field extract""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') # additional field tests self.assertEqual(self.candidate.get_field( suspect, 'clienthelo')[0], 'helo1') self.assertEqual(self.candidate.get_field( suspect, 'clientip')[0], '10.0.0.1') self.assertEqual(self.candidate.get_field( suspect, 'clienthostname')[0], 'rdns1') def test_strip(self): html = """foo<a href="bar">bar</a><script language="JavaScript">echo('hello world');</script>baz""" declarationtest = """<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de"> <head> <title>greetings</title> </head> <body> <font color="red">well met!</font> </body> </html> """ # word generated empty message wordhtml = """<html xmlns:v=3D"urn:schemas-microsoft-com:vml" xmlns:o=3D"urn:schemas-microsoft-com:office:office" xmlns:w=3D"urn:schemas-microsoft-com:office:word" xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml" xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html; charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 15 (filtered medium)"><style><!-- /* Font Definitions */ @font-face {font-family:"Cambria Math"; panose-1:2 4 5 3 5 4 6 3 2 4;} @font-face {font-family:Calibri; panose-1:2 15 5 2 2 2 4 3 2 4;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal {margin:0cm; margin-bottom:.0001pt; font-size:11.0pt; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} a:link, span.MsoHyperlink {mso-style-priority:99; color:#0563C1; text-decoration:underline;} a:visited, span.MsoHyperlinkFollowed {mso-style-priority:99; color:#954F72; text-decoration:underline;} span.E-MailFormatvorlage17 {mso-style-type:personal-compose; font-family:"Calibri",sans-serif; color:windowtext;} .MsoChpDefault {mso-style-type:export-only; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} @page WordSection1 {size:612.0pt 792.0pt; margin:70.85pt 70.85pt 2.0cm 70.85pt;} div.WordSection1 {page:WordSection1;} --></style><!--[if gte mso 9]><xml> <o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" /> </xml><![endif]--><!--[if gte mso 9]><xml> <o:shapelayout v:ext=3D"edit"> <o:idmap v:ext=3D"edit" data=3D"1" /> </o:shapelayout></xml><![endif]--></head><body lang=3DDE-CH link=3D"#0563C1" vlink=3D"#954F72"><div class=3DWordSection1><p class=3DMsoNormal><o:p> </o:p></p></div></body></html>""" for use_bfs in [True, False]: stripped = self.candidate.strip_text(html, use_bfs=use_bfs) self.assertEqual(stripped, 'foobarbaz') docstripped = self.candidate.strip_text( declarationtest, use_bfs=use_bfs) self.assertEqual( docstripped.split(), ['greetings', 'well', 'met!']) wordhtmstripped = self.candidate.strip_text( wordhtml, use_bfs=use_bfs) self.assertEqual(wordhtmstripped.strip(), '')
class FuzorDigest(object): def __init__(self, msg): self.debug = [] self.digest = None self.predigest = None self.bodytext_size = 0 self.filter = SuspectFilter(None) self.logger = logging.getLogger('fuglu.plugins.fuzor.Digest') # digest config self.LONG_WORD_THRESHOLD = 10 # what is considered a long word self.REPLACE_LONG_WORD = '[LONG]' # Replace long words in pre-digest with... None to disable self.REPLACE_EMAIL = '[EMAIL]' # Replace email addrs in pre-digest with... None to disable self.REPLACE_URL = '[LINK]' # Replace urls in pre-digest with... None to disable self.INCLUDE_ATTACHMENT_CONTENT = False # should non-text attachment contents be included in digest (not recommended, there are better attachment hash systems) self.INCLUDE_ATTACHMENT_COUNT = True # should the number of non-text-attachments be included in the digest self.MINIMUM_PREDIGEST_SIZE = 27 # if the predigest is smaller than this, ignore this message self.MINIMUM_UNMODIFIED_CONTENT = 27 # minimum unmodified content after stripping, eg. [SOMETHING] removed from the predigest (27>'von meinem Iphone gesendet') self.MINIMUM_BODYTEXT_SIZE = 27 # if the body text content is smaller than this, ignore this message self.STRIP_WHITESPACE = True # remove all whitespace from the pre-digest self.STRIP_HTML_MARKUP = True # remove html tags (but keep content) self.REMOVE_HTML_TAGS = [ 'script', 'style'] # strip tags (including content) self.predigest = self._make_predigest(msg) self.digest = self._make_hash(self.predigest) def _make_hash(self, predigest): if self.bodytext_size < self.MINIMUM_BODYTEXT_SIZE: return None predigest = predigest.strip() if len(predigest) < self.MINIMUM_PREDIGEST_SIZE: return None unmodified = re.sub(r'\[[A-Z0-9:]+\]', '', predigest) if len(unmodified) < self.MINIMUM_UNMODIFIED_CONTENT: return None predigest = predigest.encode('utf-8', errors='ignore') return hashlib.sha1(predigest).hexdigest() def _handle_text_part(self, part): payload = part.get_payload(decode=True) charset = part.get_content_charset() errors = "ignore" if not charset: charset = "ascii" elif charset.lower().replace("_", "-") in ("quopri-codec", "quopri", "quoted-printable", "quotedprintable"): errors = "strict" try: payload = payload.decode(charset, errors) except (LookupError, UnicodeError, AssertionError): payload = payload.decode("ascii", "ignore") if self.STRIP_HTML_MARKUP: payload = self.filter.strip_text( payload, remove_tags=self.REMOVE_HTML_TAGS, use_bfs=True) if self.REPLACE_EMAIL is not None: payload = re.sub(r'\S{1,50}@\S{1,30}', self.REPLACE_EMAIL, payload) if self.REPLACE_URL is not None: payload = re.sub(r'[a-z]+:\S{1,100}', self.REPLACE_URL, payload) if self.REPLACE_LONG_WORD is not None: patt = r'\S{%s,}' % self.LONG_WORD_THRESHOLD payload = re.sub(patt, self.REPLACE_LONG_WORD, payload) if self.STRIP_WHITESPACE: payload = re.sub(r'\s', '', payload) payload = payload.strip() return payload def _make_predigest(self, msg): attachment_count = 0 predigest = '' for part in msg.walk(): if part.is_multipart(): continue if part.get_content_maintype() == "text": try: normalized_text_part = self._handle_text_part(part) predigest += normalized_text_part self.bodytext_size += len(normalized_text_part) except Exception as e: self.logger.warn(e) else: attachment_count += 1 if self.INCLUDE_ATTACHMENT_CONTENT: predigest += "[ATTH:%s]" % hashlib.sha1( part.get_payload()).hexdigest() if self.INCLUDE_ATTACHMENT_COUNT and attachment_count: predigest += "[ATTC:%s]" % attachment_count if self.STRIP_WHITESPACE: predigest = re.sub(r'\s', '', predigest) return predigest
def setUp(self): self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex')
class SuspectFilterTestCase(unittest.TestCase): """Test Suspectfilter""" def setUp(self): self.candidate = SuspectFilter(TESTDATADIR + '/headertest.regex') def tearDown(self): pass def test_sf_get_args(self): """Test SuspectFilter files""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') suspect.tags['testtag'] = 'testvalue' headermatches = self.candidate.get_args(suspect) self.assertTrue('Sent to unittest domain!' in headermatches, "To_domain not found in headercheck") self.assertTrue( 'Envelope sender is [email protected]' in headermatches, "Envelope Sender not matched in header chekc") self.assertTrue('Mime Version is 1.0' in headermatches, "Standard header Mime Version not found") self.assertTrue('A tag match' in headermatches, "Tag match did not work") self.assertTrue('Globbing works' in headermatches, "header globbing failed") self.assertTrue('body rule works' in headermatches, "decoded body rule failed") self.assertTrue('full body rule works' in headermatches, "full body failed") self.assertTrue('mime rule works' in headermatches, "mime rule failed") self.assertFalse( 'this should not match in a body rule' in headermatches, 'decoded body rule matched raw body') # perl style advanced rules self.assertTrue('perl-style /-notation works!' in headermatches, "new rule format failed: %s" % headermatches) self.assertTrue( 'perl-style recipient match' in headermatches, "new rule format failed for to_domain: %s" % headermatches) self.assertFalse('this should not match' in headermatches, "rule flag ignorecase was not detected") # TODO: raw body rules def test_sf_matches(self): """Test SuspectFilter extended matches""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') (match, info) = self.candidate.matches(suspect, extended=True) self.assertTrue(match, 'Match should return True') field, matchedvalue, arg, regex = info self.assertTrue(field == 'to_domain') self.assertTrue(matchedvalue == 'unittests.fuglu.org') self.assertTrue(arg == 'Sent to unittest domain!') self.assertTrue(regex == 'unittests\.fuglu\.org') def test_sf_get_field(self): """Test SuspectFilter field extract""" suspect = Suspect('*****@*****.**', '*****@*****.**', TESTDATADIR + '/helloworld.eml') # additional field tests self.assertEqual( self.candidate.get_field(suspect, 'clienthelo')[0], 'helo1') self.assertEqual( self.candidate.get_field(suspect, 'clientip')[0], '10.0.0.1') self.assertEqual( self.candidate.get_field(suspect, 'clienthostname')[0], 'rdns1') def test_strip(self): html = """foo<a href="bar">bar</a><script language="JavaScript">echo('hello world');</script>baz""" declarationtest = """<?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="de"> <head> <title>greetings</title> </head> <body> <font color="red">well met!</font> </body> </html> """ # word generated empty message wordhtml = """<html xmlns:v=3D"urn:schemas-microsoft-com:vml" xmlns:o=3D"urn:schemas-microsoft-com:office:office" xmlns:w=3D"urn:schemas-microsoft-com:office:word" xmlns:m=3D"http://schemas.microsoft.com/office/2004/12/omml" xmlns=3D"http://www.w3.org/TR/REC-html40"><head><META HTTP-EQUIV=3D"Content-Type" CONTENT=3D"text/html; charset=3Dus-ascii"><meta name=3DGenerator content=3D"Microsoft Word 15 (filtered medium)"><style><!-- /* Font Definitions */ @font-face {font-family:"Cambria Math"; panose-1:2 4 5 3 5 4 6 3 2 4;} @font-face {font-family:Calibri; panose-1:2 15 5 2 2 2 4 3 2 4;} /* Style Definitions */ p.MsoNormal, li.MsoNormal, div.MsoNormal {margin:0cm; margin-bottom:.0001pt; font-size:11.0pt; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} a:link, span.MsoHyperlink {mso-style-priority:99; color:#0563C1; text-decoration:underline;} a:visited, span.MsoHyperlinkFollowed {mso-style-priority:99; color:#954F72; text-decoration:underline;} span.E-MailFormatvorlage17 {mso-style-type:personal-compose; font-family:"Calibri",sans-serif; color:windowtext;} .MsoChpDefault {mso-style-type:export-only; font-family:"Calibri",sans-serif; mso-fareast-language:EN-US;} @page WordSection1 {size:612.0pt 792.0pt; margin:70.85pt 70.85pt 2.0cm 70.85pt;} div.WordSection1 {page:WordSection1;} --></style><!--[if gte mso 9]><xml> <o:shapedefaults v:ext=3D"edit" spidmax=3D"1026" /> </xml><![endif]--><!--[if gte mso 9]><xml> <o:shapelayout v:ext=3D"edit"> <o:idmap v:ext=3D"edit" data=3D"1" /> </o:shapelayout></xml><![endif]--></head><body lang=3DDE-CH link=3D"#0563C1" vlink=3D"#954F72"><div class=3DWordSection1><p class=3DMsoNormal><o:p> </o:p></p></div></body></html>""" for use_bfs in [True, False]: stripped = self.candidate.strip_text(html, use_bfs=use_bfs) self.assertEqual(stripped, 'foobarbaz') docstripped = self.candidate.strip_text(declarationtest, use_bfs=use_bfs) self.assertEqual(docstripped.split(), ['greetings', 'well', 'met!']) wordhtmstripped = self.candidate.strip_text(wordhtml, use_bfs=use_bfs) self.assertEqual(wordhtmstripped.strip(), '')
class RateLimitPlugin(ScannerPlugin): """This is a generic rolling window rate limiting plugin. It allows limiting the amount of accepted messages based on any combination of supported SuspectFilter fields. This means you could for example limit the number of similar subjects by sender domain to implement a simple bulk filter. Important notes: - This plugin is experimental and has not been tested in production - This plugin only makes sense in pre-queue mode. - The content filter stage is usually *not* the best place to implement rate-limiting. Faster options are postfix built-in rate limits or a policy access daemon which doesn't need to accept the full message to make a decision - the backends don't automatically perform global expiration of all events. Old entries are only cleared per event the next time the same event happens. Add a cron job for your backend to clear all old events from time to time. Supported backends: - memory: stores events in memory. Do not use this in production. - sqlalchemy: Stores events in a SQL database. Recommended for small/low-traffic setups - redis: stores events in a redis database. This is the fastest and therefore recommended backend. Configuration example for redis. Prerequisite: python redis module backendtype = redis backendconfig = localhost:6379:0 Configuration example for mysql: Prerequisite: python sqlalchemy module. The database must exist. The table will be created automatically. backendtype = sqlalchemy backendconfig = mysql://root@localhost/fuglu ratelimit.conf format: (not final yet) Each limiter is defined by a line which must match the following format. Each limiter is evaluated in the order specified. limit name=**name** rate=**max**/**timeframe** fields=**fieldlist** [match=/**filter regex**/ [skip=**skiplist** ]] action=**action** message=**message** **name** : a descriptive name for this filter, one word. Required to reference in skip lists **max** : the maximum number of events that may occur in the specified timeframe before an action is limited. Specify a negative value to indicate "no limit" **timeframe** : Timeframe for the limit **fields** : comma separated list of fields which should be used as unique values to limit **match** (optional): regular expression to apply to the actuall values. The limiter is only applied if this regular expression matches. If the limiter consists of multiple input fields, The regex will be applied to the comma separated list of field values. **skip** (optional): Comma separated list of subsequent limiter names, that should be skipped if this this limiter's regex matched the input values. Used for overrides. **action** : Action that should be performed if the limit is exceeded. ( REJECT / DEFER / ... ) **message** : Message returned to the connecting client Examples: # no sending limit for our newsletter limit name=newsletter rate=-1/1 fields=from_address match=/^newsletter@example\.com$/ skip=fromaddr,serverhelo action=DUNNO message=OK # max 10 messages in 30 seconds per unique sender address: limit name=fromaddr rate=10/30 fields=from_address action=REJECT message=Too many messages from ${from_address} # max 100 messages with same subject per hour per server helo limit name=serverhelo rate=100/3600 fields=clienthelo,subject action=REJECT message=Bulk message detected """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { 'limiterfile': { 'default': '/etc/fuglu/ratelimit.conf', 'description': 'file based rate limits', }, 'backendtype': { 'default': 'memory', 'description': 'type of backend where the events are stored. memory is only recommended for low traffic standalone systems. alternatives are: redis, sqlalchemy' }, 'backendconfig': { 'default': '', 'description': 'backend specific configuration. sqlalchemy: the database url, redis: hostname:port:db' } } self.logger = self._logger() self.backend_instance = None self.limiters = None self.filter = SuspectFilter(None) #TODO: make action and message optional def load_limiter_config(self, text): patt = re.compile( r'^limit\s+name=(?P<name>[^\s]+)\s+rate=(?P<max>\-?\d{1,10})\/(?P<time>\d{1,10})\s+fields=(?P<fieldlist>[^\s]+)(\s+match=\/(?P<matchregex>.+)\/(\s+skip=(?P<skiplist>[^\s]+))?)?\s+action=(?P<action>[^\s]+)\s+message=(?P<message>.*)$' ) limiters = [] lineno = 0 for line in text.split('\n'): lineno += 1 line = line.strip() if line.startswith('#') or line.strip() == '': continue match = patt.match(line) if match == None: self.logger.error('cannot parse limiter config line %s' % lineno) continue gdict = match.groupdict() limiter = Limiter() limiter.name = gdict['name'] limiter.max = int(gdict['max']) limiter.timespan = int(gdict['time']) limiter.fields = gdict['fieldlist'].split(',') limiter.regex = gdict['matchregex'] if gdict['skiplist'] != None: limiter.skip = gdict['skiplist'].split(',') action = string_to_actioncode(gdict['action']) if action == None: self.logger.error( "Limiter config line %s : invalid action %s" % (lineno, gdict['action'])) limiter.action = action limiter.message = gdict['message'] limiters.append(limiter) return limiters def examine(self, suspect): if self.limiters == None: filename = self.config.get(self.section, 'limiterfile') if not os.path.exists(filename): self.logger.error("Limiter config file %s not found" % filename) return limiterconfig = open(filename, 'r').read() limiters = self.load_limiter_config(limiterconfig) self.limiters = limiters self.logger.info("Found %s limiter configurations" % (len(limiters))) if self.backend_instance == None: btype = self.config.get(self.section, 'backendtype') if btype not in AVAILABLE_RATELIMIT_BACKENDS: self.logger.error('ratelimit backend %s not available' % (btype)) return self.backend_instance = AVAILABLE_RATELIMIT_BACKENDS[btype]( self.config.get(self.section, 'backendconfig')) skiplist = [] for limiter in self.limiters: if limiter.name in skiplist: # check if this limiter is skipped by a previous one self.logger.debug('limiter %s skipped due to previous match' % limiter.name) continue #get field values allfieldsavailable = True fieldvalues = [] for fieldname in limiter.fields: values = self.filter.get_field(suspect, fieldname) if len(values) < 1: allfieldsavailable = False self.logger.debug( 'Skipping limiter %s - field %s not available' % (limiter.name, fieldname)) break fieldvalues.append(values[0]) if not allfieldsavailable: #rate limit can not be applied continue checkval = ','.join(fieldvalues) if limiter.regex != None: if re.match(limiter.regex, checkval): if limiter.skip != None: skiplist.extend(limiter.skip) else: #no match, skip this limiter self.logger.debug( 'Skipping limiter %s - regex does not match' % (limiter.name)) continue #self.logger.debug("check %s"%str(limiter)) eventname = limiter.name + checkval timespan = limiter.timespan max = limiter.max if max < 0: #no limit continue event_count = self.backend_instance.check_count( eventname, timespan) self.logger.debug("Limiter event %s count: %s" % (eventname, event_count)) if event_count > max: return limiter.action, apply_template(limiter.message, suspect)
def lint_filter(self): filterfile = self.config.get(self.section, 'filterfile') filter = SuspectFilter(filterfile) return filter.lint()
class IMAPCopyPlugin(ScannerPlugin): """This plugins stores a copy of the message to an IMAP mailbox if it matches certain criteria (Suspect Filter). The rulefile works similar to the archive plugin. As third column you have to provide imap account data in the form: <protocol>://<username>:<password>@<servernameorip>[:port]/<mailbox> <protocol> is either imap or imaps """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { 'imapcopyrules': { 'default': '/etc/fuglu/imapcopy.regex', 'description': 'IMAP copy suspectFilter File', }, 'storeoriginal': { 'default': '1', 'description': "if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers", } } self.filter = None self.logger = self._logger() def examine(self, suspect): imapcopyrules = self.config.get(self.section, 'imapcopyrules') if imapcopyrules is None or imapcopyrules == "": return DUNNO if not os.path.exists(imapcopyrules): self._logger().error('IMAP copy rules file does not exist : %s' % imapcopyrules) return DUNNO if self.filter is None: self.filter = SuspectFilter(imapcopyrules) (match, info) = self.filter.matches(suspect, extended=True) if match: field, matchedvalue, arg, regex = info if arg is not None and arg.lower() == 'no': suspect.debug("Suspect matches imap copy exception rule") self.logger.info( """%s: Header %s matches imap copy exception rule '%s' """ % (suspect.id, field, regex)) else: if arg is None or (not arg.lower().startswith('imap')): self.logger.error( "Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'" % arg) else: self.logger.info( """%s: Header %s matches imap copy rule '%s' """ % (suspect.id, field, regex)) if suspect.get_tag('debug'): suspect.debug( "Suspect matches imap copy rule (I would copy it if we weren't in debug mode)" ) else: self.storeimap(suspect, arg) else: suspect.debug( "No imap copy rule/exception rule applies to this message") def imapconnect(self, imapurl, lintmode=False): p = urlparse(imapurl) scheme = p.scheme.lower() host = p.hostname port = p.port username = p.username password = p.password folder = p.path[1:] if scheme == 'imaps': ssl = True else: ssl = False if port is None: if ssl: port = imaplib.IMAP4_SSL_PORT else: port = imaplib.IMAP4_PORT try: if ssl: imap = imaplib.IMAP4_SSL(host=host, port=port) else: imap = imaplib.IMAP4(host=host, port=port) except Exception as e: ltype = 'IMAP' if ssl: ltype = 'IMAP-SSL' msg = "%s Connection to server %s failed: %s" % (ltype, host, str(e)) if lintmode: print(msg) else: self.logger.error(msg) return None try: imap.login(username, password) except Exception as e: msg = "Login to server %s failed: %s" % (host, str(e)) if lintmode: print(msg) else: self.logger.error(msg) return None mtype, count = imap.select(folder) if mtype == 'NO': msg = "Could not select folder %s" % folder if lintmode: print(msg) else: self.logger.error(msg) return None return imap def storeimap(self, suspect, imapurl): imap = self.imapconnect(imapurl) if not imap: return #imap.debug=4 p = urlparse(imapurl) folder = p.path[1:] if self.config.getboolean(self.section, 'storeoriginal'): src = suspect.get_original_source() else: src = suspect.get_source() mtype, data = imap.append(folder, None, None, src) if mtype != 'OK': self.logger.error( 'Could put store in IMAP. APPEND command failed: %s' % data) imap.logout() def lint(self): allok = (self.check_config() and self.lint_imap()) return allok def lint_imap(self): #read file, check for all imap accounts imapcopyrules = self.config.get(self.section, 'imapcopyrules') if imapcopyrules != '' and not os.path.exists(imapcopyrules): print("Imap copy rules file does not exist : %s" % imapcopyrules) return False sfilter = SuspectFilter(imapcopyrules) accounts = [] for tup in sfilter.patterns: headername, pattern, arg = tup if arg not in accounts: if arg is None: print("Rule %s %s has no imap copy target" % (headername, pattern.pattern)) return False if arg.lower() == 'no': continue accounts.append(arg) for acc in accounts: p = urlparse(acc) host = p.hostname username = p.username folder = p.path[1:] print("Checking %s@%s/%s" % (username, host, folder)) imap = self.imapconnect(acc, lintmode=True) if not imap: print("Lint failed for this account") return False return True
class FuzorDigest(object): def __init__(self, msg): self.debug = [] self.digest = None self.predigest = None self.bodytext_size = 0 self.filter = SuspectFilter(None) self.logger = logging.getLogger('fuglu.plugins.fuzor.Digest') # digest config self.LONG_WORD_THRESHOLD = 10 # what is considered a long word self.REPLACE_LONG_WORD = '[LONG]' # Replace long words in pre-digest with... None to disable self.REPLACE_EMAIL = '[EMAIL]' # Replace email addrs in pre-digest with... None to disable self.REPLACE_URL = '[LINK]' # Replace urls in pre-digest with... None to disable self.INCLUDE_ATTACHMENT_CONTENT = False # should non-text attachment contents be included in digest (not recommended, there are better attachment hash systems) self.INCLUDE_ATTACHMENT_COUNT = True # should the number of non-text-attachments be included in the digest self.MINIMUM_PREDIGEST_SIZE = 27 # if the predigest is smaller than this, ignore this message self.MINIMUM_UNMODIFIED_CONTENT = 27 # minimum unmodified content after stripping, eg. [SOMETHING] removed from the predigest (27>'von meinem Iphone gesendet') self.MINIMUM_BODYTEXT_SIZE = 27 # if the body text content is smaller than this, ignore this message self.STRIP_WHITESPACE = True # remove all whitespace from the pre-digest self.STRIP_HTML_MARKUP = True # remove html tags (but keep content) self.REMOVE_HTML_TAGS = [ 'script', 'style'] # strip tags (including content) self.predigest = self._make_predigest(msg) self.digest = self._make_hash(self.predigest) def _make_hash(self, predigest): if self.bodytext_size < self.MINIMUM_BODYTEXT_SIZE: return None predigest = predigest.strip() if isinstance(predigest, unicode): predigest = predigest.encode('utf-8', 'ignore') if len(predigest) < self.MINIMUM_PREDIGEST_SIZE: return None unmodified = re.sub(r'\[[A-Z0-9:]+\]', '', predigest) if len(unmodified) < self.MINIMUM_UNMODIFIED_CONTENT: return None try: return hashlib.sha1(predigest).hexdigest() except: return None def _handle_text_part(self, part): payload = part.get_payload(decode=True) charset = part.get_content_charset() errors = "ignore" if not charset: charset = "ascii" elif (charset.lower().replace("_", "-") in ("quopri-codec", "quopri", "quoted-printable", "quotedprintable")): errors = "strict" try: payload = payload.decode(charset, errors) except (LookupError, UnicodeError, AssertionError): payload = payload.decode("ascii", "ignore") if self.STRIP_HTML_MARKUP: payload = self.filter.strip_text( payload, remove_tags=self.REMOVE_HTML_TAGS, use_bfs=True) if self.REPLACE_EMAIL is not None: payload = re.sub(r'\S{1,50}@\S{1,30}', self.REPLACE_EMAIL, payload) if self.REPLACE_URL is not None: payload = re.sub(r'[a-z]+:\S{1,100}', self.REPLACE_URL, payload) if self.REPLACE_LONG_WORD is not None: patt = r'\S{%s,}' % self.LONG_WORD_THRESHOLD payload = re.sub(patt, self.REPLACE_LONG_WORD, payload) if self.STRIP_WHITESPACE: payload = re.sub(r'\s', '', payload) payload = payload.strip() return payload def _make_predigest(self, msg): attachment_count = 0 predigest = '' for part in msg.walk(): if part.is_multipart(): continue if part.get_content_maintype() == "text": try: normalized_text_part = self._handle_text_part(part) predigest += normalized_text_part self.bodytext_size += len(normalized_text_part) except Exception as e: self.logger.warn(e) else: attachment_count += 1 if self.INCLUDE_ATTACHMENT_CONTENT: predigest += "[ATTH:%s]" % hashlib.sha1( part.get_payload()).hexdigest() if self.INCLUDE_ATTACHMENT_COUNT and attachment_count: predigest += "[ATTC:%s]" % attachment_count if self.STRIP_WHITESPACE: predigest = re.sub(r'\s', '', predigest) return predigest
class BayesPlugin(object): def __init__(self): self.requiredvars = { 'backendtype': { 'default': 'redis', 'description': 'Token store backend type. Allowed values are: sqlalchemy , redis', }, 'backendconfig': { 'default': '', 'description': 'Backend configuration. Depends on backendtype, eg. sqlalchemy url, redis host:port:db', }, 'spambias': { 'default': '0.5', 'description': 'overall spam bias. 0.5=no bias. 0.8=around 80% of scanned mail traffic is spam', }, 'minimum-token-occurence': { 'default': '3', 'description': "don't make assumptions on tokens seen less than this amount", }, 'maximum-tokens-per-message': { 'default': '5000', 'description': 'stop tokenizing after x tokens', }, 'minimum-ham': { 'default': '10', 'description': "minimum known hams for classification", }, 'minimum-spam': { 'default': '10', 'description': "minimum known spams for classification", }, } self.tokenstore = None self.calc_minimum = 0.00000001 # work around division by zero etc self.logger = self._logger() self.filter = SuspectFilter(None) def init_backend(self): if self.tokenstore != None: return backendtype = self.config.get(self.section, 'backendtype') if backendtype not in SUPPORTED_BACKENDS: self.logger.error( "Bayes tokenstore %s not supported, maybe misspelled or missing dependency" % backendtype) backend = SUPPORTED_BACKENDS[backendtype](self.config.get( self.section, 'backendconfig')) self.tokenstore = backend def single_token_spam_probability(self, token): """Compute the probability that a message containing a given token is spam ( "spamicity of a word" ) """ total_spam = self.tokenstore.get_total_spam_count() if total_spam < self.config.getint(self.section, 'minimum-spam'): self.logger.warning( "Not enough known spams for bayes classification") return 0.5 total_ham = self.tokenstore.get_total_ham_count() if total_ham < self.config.getint(self.section, 'minimum-ham'): self.logger.warning( "Not enough known hams for bayes classification") return 0.5 pr_s = self.config.getfloat( self.section, 'spambias') # probability that any given message is spam pr_h = 1 - pr_s # probability that any given message is ham spam_count = self.tokenstore.get_spam_count( token) # number of known spams containing this token ham_count = self.tokenstore.get_ham_count( token) # number of known hams containing this token # "Dealing with rare words" if spam_count + ham_count < self.config.get(self.section, 'minimum-token-occurence'): pr_s_w = 0.5 else: pr_w_s = float( spam_count ) / total_spam # the probability that the token appears in spam messages pr_w_h = float( ham_count ) / total_ham # the probability that the token appears in ham messages divisor = (pr_w_s * pr_s + pr_w_h * pr_h) if divisor < self.calc_minimum: divisor = self.calc_minimum pr_s_w = pr_w_s * pr_s / divisor #self.logger.info("Token '%s' : seen in %s spams, %s hams => spamicity= %.4f"%(token,spam_count,ham_count,pr_s_w)) return pr_s_w def spam_probability(self, suspect): """ :param text: :return: the probability that the given text is spam. float value between 0.0 and 1.0 """ tokens = self.tokenize(suspect) self.logger.debug("Got %s tokens" % len(tokens)) total = 0 for t in tokens: spamicity = self.single_token_spam_probability(t) if spamicity < self.calc_minimum: spamicity = self.calc_minimum #make sure we get at least a very small amount x = 1 - spamicity if x < self.calc_minimum: x = self.calc_minimum n = math.log(x) - math.log(spamicity) total += n try: probability = 1.0 / (1 + math.pow(math.e, total)) except OverflowError: return 0.0 return round(probability, 4) def ngrams(self, sequence, n=3, maxnumber=None): sequence = list(sequence) count = max(0, len(sequence) - n + 1) if maxnumber == None: maxnumber = count return [ "".join(sequence[i:i + n]) for i in range(min(count, maxnumber)) ] def tokenize(self, suspect): visible_texts = self.filter.get_field(suspect, 'body:stripped') stripped = " ".join( [t.strip() for t in visible_texts if t.strip() != '']) maxtokens = self.config.getint(self.section, 'maximum-tokens-per-message') if maxtokens == 0: maxtokens = None tokens = self.ngrams(stripped, n=3, maxnumber=maxtokens) #self.logger.debug(tokens) return tokens
def lint_filter(self): filterfile = self.config.get(self.section, 'actionrules') filter = SuspectFilter(filterfile) return filter.lint()
class HeaderwriterPlugin(ScannerPlugin): """ Writes custom log based on suspect filter rules eg. if you put this into headerwriter.regex: From: (microsoft\.com|yahoo\.com|gmail\.com) ${id} claims to be from ${matchedvalue} fuglu would write a log with fuglu-id's whose from-domain is microsoft.com,yahoo.com or gmail.com """ def __init__(self,config,section=None): ScannerPlugin.__init__(self,config,section) self.requiredvars={ 'filterfile':{ 'default':'/etc/fuglu/headerwriter.regex', 'description':'Suspectfilter File', }, 'outputfile':{ 'default':'', 'description':'Output File', }, 'defaultlinetemplate':{ 'default':'${fieldname}: ${matchedvalue}', 'description':'Default line output template if nothing is specified in filter config', } } self.filter=None def examine(self,suspect): starttime=time.time() if self.filter==None: self.filter=SuspectFilter(self.config.get(self.section,'filterfile')) hits=self.filter.get_args(suspect,extended=True) if len(hits)==0: return DUNNO #open file ofile=self.config.get(self.section,'outputfile') if ofile.strip()=='': self._logger().error("No output file specified for headerwriter") return DUNNO fh=open(ofile,'a') for hit in hits: (fieldname, matchedvalue, arg, regex)=hit if arg==None or arg=='': arg=self.config.get(self.section,'defaultlinetemplate') addvalues=dict(fieldname=fieldname,matchedvalue=matchedvalue,regex=regex) outputline=apply_template(arg, suspect, addvalues) fh.write(outputline) fh.write('\n') fh.close() def lint(self): filterfile=self.config.get(self.section,'filterfile') if not os.path.exists(filterfile): print "file not found: %s"%filterfile return False if self.config.get(self.section,'outputfile').strip()=='': print "No outputfile configured" return False return True
class IMAPCopyPlugin(ScannerPlugin): """This plugins stores a copy of the message to an IMAP mailbox if it matches certain criteria (Suspect Filter). The rulefile works similar to the archive plugin. As third column you have to provide imap account data in the form: <protocol>://<username>:<password>@<servernameorip>[:port]/<mailbox> <protocol> is either imap or imaps """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { 'imapcopyrules': { 'default': '/etc/fuglu/imapcopy.regex', 'description': 'IMAP copy suspectFilter File', }, 'storeoriginal': { 'default': '1', 'description': "if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers", } } self.filter = None self.logger = self._logger() def examine(self, suspect): imapcopyrules = self.config.get(self.section, 'imapcopyrules') if imapcopyrules == None or imapcopyrules == "": return DUNNO if not os.path.exists(imapcopyrules): self._logger().error('IMAP copy rules file does not exist : %s' % imapcopyrules) return DUNNO if self.filter == None: self.filter = SuspectFilter(imapcopyrules) (match, info) = self.filter.matches(suspect, extended=True) if match: field, matchedvalue, arg, regex = info if arg != None and arg.lower() == 'no': suspect.debug("Suspect matches imap copy exception rule") self.logger.info( """%s: Header %s matches imap copy exception rule '%s' """ % (suspect.id, field, regex)) else: if arg == None or (not arg.lower().startswith('imap')): self.logger.error( "Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'" % arg) else: self.logger.info( """%s: Header %s matches imap copy rule '%s' """ % (suspect.id, field, regex)) if suspect.get_tag('debug'): suspect.debug( "Suspect matches imap copy rule (I would copy it if we weren't in debug mode)" ) else: self.storeimap(suspect, arg) else: suspect.debug( "No imap copy rule/exception rule applies to this message") def imapconnect(self, imapurl, lintmode=False): p = urlparse(imapurl) scheme = p.scheme.lower() host = p.hostname port = p.port username = p.username password = p.password folder = p.path[1:] if scheme == 'imaps': ssl = True else: ssl = False if port == None: if ssl: port = imaplib.IMAP4_SSL_PORT else: port = imaplib.IMAP4_PORT try: if ssl: imap = imaplib.IMAP4_SSL(host=host, port=port) else: imap = imaplib.IMAP4(host=host, port=port) except Exception, e: ltype = 'IMAP' if ssl: ltype = 'IMAP-SSL' msg = "%s Connection to server %s failed: %s" % (ltype, host, str(e)) if lintmode: print msg else: self.logger.error(msg) return None try: imap.login(username, password) except Exception, e: msg = "Login to server %s failed: %s" % (host, str(e)) if lintmode: print msg else: self.logger.error(msg) return None
class ArchivePlugin(ScannerPlugin): """This plugins stores a copy of the message if it matches certain criteria (Suspect Filter). You can use this if you want message archives for your domains or to debug problems occuring only for certain recipients. Examples for the archive.regex filter file: Archive messages to domain ''test.com'': ``to_domain test\.com`` Archive messages from [email protected]: ``envelope_from oli@fuglu\.org`` you can also append "yes" and "no" to the rules to create a more advanced configuration. Lets say we want to archive all messages to [email protected] and all regular messages [email protected] except the ones created by automated scripts like logwatch or daily backup messages etc. envelope_from logwatch@.*fuglu.org no envelope_to sales@fuglu\.org yes from [email protected] no envelope_to support@fuglu\.org yes Note: The first rule to match in a message is the only rule that will be applied. Exclusion rules should therefore be put above generic/catch-all rules. """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { 'archiverules': { 'default': '/etc/fuglu/archive.regex', 'description': 'Archiving SuspectFilter File', }, 'archivedir': { 'default': '/tmp', 'description': 'storage for archived messages', }, 'subdirtemplate': { 'default': '${to_domain}', 'description': 'subdirectory within archivedir', }, 'filenametemplate': { 'default': '${id}.eml', 'description': 'filename template for the archived messages', }, 'storeoriginal': { 'default': '1', 'description': "if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers", }, 'chown': { 'default': '', 'description': "change owner of saved messages (username or numeric id) - this only works if fuglu is running as root (which is NOT recommended)", }, 'chgrp': { 'default': '', 'description': "change group of saved messages (groupname or numeric id) - the user running fuglu must be a member of the target group for this to work", }, 'chmod': { 'default': '', 'description': "set file permissions of saved messages", }, } self.filter = None self.logger = self._logger() def __str__(self): return "Archive" def lint(self): allok = ( self.checkConfig() and self.check_deprecated() and self.lint_dirs() and self.lint_filter()) return allok def check_deprecated(self): if self.config.has_option(self.section, 'makedomainsubdir'): print( "the config option 'makedomainsubdir' has been replaced with 'subdirtemplate' ") print("please update your config") print("makedomainsubdir=1 -> subdirtemplate=${to_domain}") print("makedomainsubdir=0 -> subdirtemplate=") return False return True def lint_filter(self): filterfile = self.config.get(self.section, 'archiverules') filter = SuspectFilter(filterfile) return filter.lint() def lint_dirs(self): archivedir = self.config.get(self.section, 'archivedir') if archivedir == "": print('Archivedir is not specified') return False if not os.path.isdir(archivedir): print("Archivedir '%s' does not exist or is not a directory" % (archivedir)) return False return True def examine(self, suspect): archiverules = self.config.get(self.section, 'archiverules') if archiverules == None or archiverules == "": return DUNNO if not os.path.exists(archiverules): self.logger.error( 'Archive Rules file does not exist : %s' % archiverules) return DUNNO if self.filter == None: self.filter = SuspectFilter(archiverules) (match, arg) = self.filter.matches(suspect) if match: if arg != None and arg.lower() == 'no': suspect.debug("Suspect matches archive exception rule") self.logger.debug( """Header matches archive exception rule - not archiving""") else: if arg != None and arg.lower() != 'yes': self.logger.warning( "Unknown archive action '%s' assuming 'yes'" % arg) self.logger.debug("""Header matches archive rule""") if suspect.get_tag('debug'): suspect.debug( "Suspect matches archiving rule (i would archive it if we weren't in debug mode)") else: self.archive(suspect) else: suspect.debug( "No archive rule/exception rule applies to this message") def archive(self, suspect): archivedir = self.config.get(self.section, 'archivedir') if archivedir == "": self.logger.error('Archivedir is not specified') return subdirtemplate = self.config.get(self.section, 'subdirtemplate') if self.config.has_option(self.section, 'makedomainsubdir') and subdirtemplate == self.requiredvars['subdirtemplate']['default']: self.logger.warning( "Archive config is using deprecated 'makedomainsubdir' config option. Emulating old behaviour. Update your config(subdirtemplate)") if self.config.getboolean(self.section, 'makedomainsubdir'): subdirtemplate = "${to_domain}" else: subdirtemplate = "" # the archive root dir startdir = os.path.abspath(archivedir) # relative dir within archive root subdir = apply_template(subdirtemplate, suspect) if subdir.endswith('/'): subdir = subdir[:-1] # filename without dir filenametemplate = self.config.get(self.section, 'filenametemplate') filename = apply_template(filenametemplate, suspect) # make sure filename can't create new folders filename = filename.replace('/', '_') # full relative filepath within archive dir fpath = "%s/%s" % (subdir, filename) # absolute final filepath requested_path = os.path.abspath("%s/%s" % (startdir, fpath)) if not os.path.commonprefix([requested_path, startdir]).startswith(startdir): self.logger.error( "file path '%s' seems to be outside archivedir '%s' - storing to archivedir" % (requested_path, startdir)) requested_path = "%s/%s" % (startdir, filename) finaldir = os.path.dirname(requested_path) if not os.path.isdir(finaldir): os.makedirs(finaldir, 0o755) if self.config.getboolean(self.section, 'storeoriginal'): shutil.copy(suspect.tempfile, requested_path) else: with open(requested_path, 'w') as fp: fp.write(suspect.get_source()) chmod = self.config.get(self.section, 'chmod') chgrp = self.config.get(self.section, 'chgrp') chown = self.config.get(self.section, 'chown') if chmod or chgrp or chown: self.setperms(requested_path, chmod, chgrp, chown) self.logger.info('Message from %s to %s archived as %s' % ( suspect.from_address, suspect.to_address, requested_path)) return requested_path def setperms(self, filename, chmod, chgrp, chown): """Set file permissions and ownership :param filename The target file :param chmod string representing the permissions (example '640') :param chgrp groupname or group id of the target group. the user running fuglu must be a member of this group for this to work :param chown username or user id of the target user. fuglu must run as root for this to work (which is not recommended for security reasons) """ # chmod if chmod: perm = int(chmod, 8) try: os.chmod(filename, perm) except: self.logger.error( 'could not set permission on file %s' % filename) # chgrp changetogroup = -1 if chgrp: group = None try: group = grp.getgrnam(chgrp) except KeyError: pass try: group = grp.getgrgid(int(chgrp)) except KeyError: pass except ValueError: pass if group != None: changetogroup = group.gr_gid else: self.logger.warn("Group %s not found" % chgrp) # chown changetouser = -1 if chown: user = None try: user = pwd.getpwnam(chown) except KeyError: pass try: user = pwd.getpwuid(int(chown)) except KeyError: pass except ValueError: pass if user != None: changetouser = user.pw_uid else: self.logger.warn("User %s not found" % chown) if changetogroup != -1 or changetouser != -1: try: os.chown(filename, changetouser, changetogroup) except Exception as e: self.logger.error( "Could not change user/group of file %s : %s" % (filename, str(e)))
class ArchivePlugin(ScannerPlugin): """This plugins stores a copy of the message if it matches certain criteria (Suspect Filter). You can use this if you want message archives for your domains or to debug problems occuring only for certain recipients. Examples for the archive.regex filter file: Archive messages to domain ''test.com'': ``to_domain test\.com`` Archive messages from [email protected]: ``envelope_from oli@fuglu\.org`` you can also append "yes" and "no" to the rules to create a more advanced configuration. Lets say we want to archive all messages to [email protected] and all regular messages [email protected] except the ones created by automated scripts like logwatch or daily backup messages etc. envelope_from logwatch@.*fuglu.org no envelope_to sales@fuglu\.org yes from [email protected] no envelope_to support@fuglu\.org yes Note: The first rule to match in a message is the only rule that will be applied. Exclusion rules should therefore be put above generic/catch-all rules. """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { 'archiverules': { 'default': '/etc/fuglu/archive.regex', 'description': 'Archiving SuspectFilter File', }, 'archivedir': { 'default': '/tmp', 'description': 'storage for archived messages', }, 'subdirtemplate': { 'default': '${to_domain}', 'description': 'subdirectory within archivedir', }, 'filenametemplate': { 'default': '${id}.eml', 'description': 'filename template for the archived messages', }, 'storeoriginal': { 'default': '1', 'description': "if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers", }, 'chown': { 'default': '', 'description': "change owner of saved messages (username or numeric id) - this only works if fuglu is running as root (which is NOT recommended)", }, 'chgrp': { 'default': '', 'description': "change group of saved messages (groupname or numeric id) - the user running fuglu must be a member of the target group for this to work", }, 'chmod': { 'default': '', 'description': "set file permissions of saved messages", }, } self.filter = None self.logger = self._logger() def __str__(self): return "Archive" def lint(self): allok = (self.checkConfig() and self.check_deprecated() and self.lint_dirs() and self.lint_filter()) return allok def check_deprecated(self): if self.config.has_option(self.section, 'makedomainsubdir'): print( "the config option 'makedomainsubdir' has been replaced with 'subdirtemplate' " ) print("please update your config") print("makedomainsubdir=1 -> subdirtemplate=${to_domain}") print("makedomainsubdir=0 -> subdirtemplate=") return False return True def lint_filter(self): filterfile = self.config.get(self.section, 'archiverules') filter = SuspectFilter(filterfile) return filter.lint() def lint_dirs(self): archivedir = self.config.get(self.section, 'archivedir') if archivedir == "": print('Archivedir is not specified') return False if not os.path.isdir(archivedir): print("Archivedir '%s' does not exist or is not a directory" % (archivedir)) return False return True def examine(self, suspect): archiverules = self.config.get(self.section, 'archiverules') if archiverules == None or archiverules == "": return DUNNO if not os.path.exists(archiverules): self.logger.error('Archive Rules file does not exist : %s' % archiverules) return DUNNO if self.filter == None: self.filter = SuspectFilter(archiverules) (match, arg) = self.filter.matches(suspect) if match: if arg != None and arg.lower() == 'no': suspect.debug("Suspect matches archive exception rule") self.logger.debug( """Header matches archive exception rule - not archiving""" ) else: if arg != None and arg.lower() != 'yes': self.logger.warning( "Unknown archive action '%s' assuming 'yes'" % arg) self.logger.debug("""Header matches archive rule""") if suspect.get_tag('debug'): suspect.debug( "Suspect matches archiving rule (i would archive it if we weren't in debug mode)" ) else: self.archive(suspect) else: suspect.debug( "No archive rule/exception rule applies to this message") def archive(self, suspect): archivedir = self.config.get(self.section, 'archivedir') if archivedir == "": self.logger.error('Archivedir is not specified') return subdirtemplate = self.config.get(self.section, 'subdirtemplate') if self.config.has_option( self.section, 'makedomainsubdir' ) and subdirtemplate == self.requiredvars['subdirtemplate']['default']: self.logger.warning( "Archive config is using deprecated 'makedomainsubdir' config option. Emulating old behaviour. Update your config(subdirtemplate)" ) if self.config.getboolean(self.section, 'makedomainsubdir'): subdirtemplate = "${to_domain}" else: subdirtemplate = "" # the archive root dir startdir = os.path.abspath(archivedir) # relative dir within archive root subdir = apply_template(subdirtemplate, suspect) if subdir.endswith('/'): subdir = subdir[:-1] # filename without dir filenametemplate = self.config.get(self.section, 'filenametemplate') filename = apply_template(filenametemplate, suspect) # make sure filename can't create new folders filename = filename.replace('/', '_') # full relative filepath within archive dir fpath = "%s/%s" % (subdir, filename) # absolute final filepath requested_path = os.path.abspath("%s/%s" % (startdir, fpath)) if not os.path.commonprefix([requested_path, startdir ]).startswith(startdir): self.logger.error( "file path '%s' seems to be outside archivedir '%s' - storing to archivedir" % (requested_path, startdir)) requested_path = "%s/%s" % (startdir, filename) finaldir = os.path.dirname(requested_path) if not os.path.isdir(finaldir): os.makedirs(finaldir, 0o755) if self.config.getboolean(self.section, 'storeoriginal'): shutil.copy(suspect.tempfile, requested_path) else: with open(requested_path, 'w') as fp: fp.write(suspect.get_source()) chmod = self.config.get(self.section, 'chmod') chgrp = self.config.get(self.section, 'chgrp') chown = self.config.get(self.section, 'chown') if chmod or chgrp or chown: self.setperms(requested_path, chmod, chgrp, chown) self.logger.info( 'Message from %s to %s archived as %s' % (suspect.from_address, suspect.to_address, requested_path)) return requested_path def setperms(self, filename, chmod, chgrp, chown): """Set file permissions and ownership :param filename The target file :param chmod string representing the permissions (example '640') :param chgrp groupname or group id of the target group. the user running fuglu must be a member of this group for this to work :param chown username or user id of the target user. fuglu must run as root for this to work (which is not recommended for security reasons) """ # chmod if chmod: perm = int(chmod, 8) try: os.chmod(filename, perm) except: self.logger.error('could not set permission on file %s' % filename) # chgrp changetogroup = -1 if chgrp: group = None try: group = grp.getgrnam(chgrp) except KeyError: pass try: group = grp.getgrgid(int(chgrp)) except KeyError: pass except ValueError: pass if group != None: changetogroup = group.gr_gid else: self.logger.warn("Group %s not found" % chgrp) # chown changetouser = -1 if chown: user = None try: user = pwd.getpwnam(chown) except KeyError: pass try: user = pwd.getpwuid(int(chown)) except KeyError: pass except ValueError: pass if user != None: changetouser = user.pw_uid else: self.logger.warn("User %s not found" % chown) if changetogroup != -1 or changetouser != -1: try: os.chown(filename, changetouser, changetogroup) except Exception as e: self.logger.error( "Could not change user/group of file %s : %s" % (filename, str(e)))
class LDAPlugin(ScannerPlugin): """Deliver message to maildir / mbox""" def __init__(self,config,section=None): ScannerPlugin.__init__(self,config,section) self.requiredvars={ 'path':{ 'default':'/usr/local/fuglu/deliver/${to_address}', 'description':'Path to maildir / mbox file, supports templates', }, #maybe we need to support our own locking later, for now we use python's built-ins #'locktype':{ # 'default':'', # 'description':"flock, ...", #}, 'boxtype':{ 'default':'mbox', 'description':"mbox, maildir", }, #maybe we need to support various mbox types later, for now we use python's built-in module #'subtype':{ # 'default':'', # 'description':"what type of mbox... ", #}, 'filterfile':{ 'default':'', 'description':"only store messages which use filter...", }, } self.logger=self._logger() self.filter=None self.boxtypemap={ 'mbox':self.deliver_mbox, 'maildir':self.deliver_maildir, } def lint(self): allok=self.checkConfig() filterfile=self.config.get(self.section, 'filterfile','').strip() if filterfile!='' and not os.path.exists(filterfile): print 'LDA filter rules file does not exist : %s'%filterfile allok=False boxtype=self.config.get(self.section, 'boxtype') if boxtype not in self.boxtypemap: print "Unsupported boxtype: %s"%boxtype allok=False return allok def examine(self,suspect): starttime=time.time() filterfile=self.config.get(self.section, 'filterfile','').strip() if self.filter==None: if filterfile!='': if not os.path.exists(filterfile): self._logger().warning('LDA filter rules file does not exist : %s'%filterfile) return DEFER self.filter=SuspectFilter(filterfile) if self.filter!=None: match=self.filter.matches(suspect) if not match: return DUNNO self.boxtypemap[self.config.get(self.section, 'boxtype')](suspect) #For debugging, its good to know how long each plugin took endtime=time.time() difftime=endtime-starttime suspect.tags['LDAPlugin.time']="%.4f"%difftime def deliver_mbox(self,suspect): mbox_msg=mailbox.mboxMessage(suspect.get_message_rep()) mbox_path=apply_template(self.config.get(self.section,'path'), suspect) mbox=mailbox.mbox( mbox_path) try: mbox.lock() mbox.add(mbox_msg) mbox.flush() except Exception,e: self.logger.error("Could not store message %s to %s: %s"%(suspect.id,mbox_path,str(e))) finally:
class LDAPlugin(ScannerPlugin): """Deliver message to maildir / mbox""" def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.requiredvars = { "path": { "default": "/usr/local/fuglu/deliver/${to_address}", "description": "Path to maildir / mbox file, supports templates", }, # maybe we need to support our own locking later, for now we use python's built-ins #'locktype':{ # 'default':'', # 'description':"flock, ...", # }, "boxtype": {"default": "mbox", "description": "mbox, maildir"}, # maybe we need to support various mbox types later, for now we use python's built-in module #'subtype':{ # 'default':'', # 'description':"what type of mbox... ", # }, "filterfile": {"default": "", "description": "only store messages which use filter..."}, } self.logger = self._logger() self.filter = None self.boxtypemap = {"mbox": self.deliver_mbox, "maildir": self.deliver_maildir} def lint(self): allok = self.checkConfig() filterfile = self.config.get(self.section, "filterfile", "").strip() if filterfile != "" and not os.path.exists(filterfile): print "LDA filter rules file does not exist : %s" % filterfile allok = False boxtype = self.config.get(self.section, "boxtype") if boxtype not in self.boxtypemap: print "Unsupported boxtype: %s" % boxtype allok = False return allok def examine(self, suspect): starttime = time.time() filterfile = self.config.get(self.section, "filterfile", "").strip() if self.filter == None: if filterfile != "": if not os.path.exists(filterfile): self._logger().warning("LDA filter rules file does not exist : %s" % filterfile) return DEFER self.filter = SuspectFilter(filterfile) if self.filter != None: match = self.filter.matches(suspect) if not match: return DUNNO self.boxtypemap[self.config.get(self.section, "boxtype")](suspect) # For debugging, its good to know how long each plugin took endtime = time.time() difftime = endtime - starttime suspect.tags["LDAPlugin.time"] = "%.4f" % difftime def deliver_mbox(self, suspect): mbox_msg = mailbox.mboxMessage(suspect.get_message_rep()) mbox_path = apply_template(self.config.get(self.section, "path"), suspect) mbox = mailbox.mbox(mbox_path) try: mbox.lock() mbox.add(mbox_msg) mbox.flush() except Exception, e: self.logger.error("Could not store message %s to %s: %s" % (suspect.id, mbox_path, str(e))) finally:
class ActionOverridePlugin(ScannerPlugin): """ Override actions based on a Suspect Filter file. For example, delete all messages from a specific sender domain. """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.logger = self._logger() self.requiredvars = { 'actionrules': { 'default': '/etc/fuglu/actionrules.regex', 'description': 'Rules file', } } self.filter = None def __str__(self): return "Action Override" def lint(self): allok = (self.checkConfig() and self.lint_filter()) return allok def lint_filter(self): filterfile = self.config.get(self.section, 'actionrules') filter = SuspectFilter(filterfile) return filter.lint() def examine(self, suspect): actionrules = self.config.get(self.section, 'actionrules') if actionrules == None or actionrules == "": return DUNNO if not os.path.exists(actionrules): self.logger.error( 'Action Rules file does not exist : %s' % actionrules) return DUNNO if self.filter == None: self.filter = SuspectFilter(actionrules) (match, arg) = self.filter.matches(suspect) if match: if arg == None or arg.strip() == '': self.logger.error("Rule match but no action defined.") return DUNNO arg = arg.strip() spl = arg.split(None, 1) actionstring = spl[0] message = None if len(spl) == 2: message = spl[1] self.logger.debug( "%s: Rule match! Action override: %s" % (suspect.id, arg.upper())) actioncode = string_to_actioncode(actionstring, self.config) if actioncode != None: return actioncode, message elif actionstring.upper() == 'REDIRECT': suspect.to_address = message.strip() suspect.recipients = [suspect.to_address, ] # todo: should we override to_domain? probably not # todo: check for invalid adress, multiple adressses # todo: document redirect action else: self.logger.error("Invalid action: %s" % arg) return DUNNO return DUNNO
class ActionOverridePlugin(ScannerPlugin): """ Override actions based on a Suspect Filter file. For example, delete all messages from a specific sender domain. """ def __init__(self, config, section=None): ScannerPlugin.__init__(self, config, section) self.logger = self._logger() self.requiredvars = { 'actionrules': { 'default': '/etc/fuglu/actionrules.regex', 'description': 'Rules file', } } self.filter = None def __str__(self): return "Action Override" def lint(self): allok = self.check_config() and self.lint_filter() return allok def lint_filter(self): filterfile = self.config.get(self.section, 'actionrules') sfilter = SuspectFilter(filterfile) return sfilter.lint() def examine(self, suspect): actionrules = self.config.get(self.section, 'actionrules') if actionrules is None or actionrules == "": return DUNNO if not os.path.exists(actionrules): self.logger.error('Action Rules file does not exist : %s' % actionrules) return DUNNO if self.filter is None: self.filter = SuspectFilter(actionrules) (match, arg) = self.filter.matches(suspect) if match: if arg is None or arg.strip() == '': self.logger.error("Rule match but no action defined.") return DUNNO arg = arg.strip() spl = arg.split(None, 1) actionstring = spl[0] message = None if len(spl) == 2: message = spl[1] self.logger.debug("%s: Rule match! Action override: %s" % (suspect.id, arg.upper())) actioncode = string_to_actioncode(actionstring, self.config) if actioncode is not None: return actioncode, message elif actionstring.upper() == 'REDIRECT': suspect.to_address = message.strip() # todo: check for invalid adress, multiple adressses, set suspect.recipients instead of to_address # todo: document redirect action else: self.logger.error("Invalid action: %s" % arg) return DUNNO return DUNNO
def lint_filter(self): filterfile = self.config.get(self.section, 'actionrules') sfilter = SuspectFilter(filterfile) return sfilter.lint()
class IMAPCopyPlugin(ScannerPlugin): """This plugins stores a copy of the message to an IMAP mailbox if it matches certain criteria (Suspect Filter). The rulefile works similar to the archive plugin. As third column you have to provide imap account data in the form: <protocol>://<username>:<password>@<servernameorip>[:port]/<mailbox> <protocol> is either imap or imaps """ def __init__(self,config,section=None): ScannerPlugin.__init__(self,config,section) self.requiredvars={ 'imapcopyrules':{ 'default':'/etc/fuglu/imapcopy.regex', 'description':'IMAP copy suspectFilter File', }, 'storeoriginal':{ 'default':'1', 'description':"if true/1/yes: store original message\nif false/0/no: store message probably altered by previous plugins, eg with spamassassin headers", } } self.filter=None self.logger=self._logger() def examine(self,suspect): imapcopyrules=self.config.get(self.section, 'imapcopyrules') if imapcopyrules==None or imapcopyrules=="": return DUNNO if not os.path.exists(imapcopyrules): self._logger().error('IMAP copy rules file does not exist : %s'%imapcopyrules) return DUNNO if self.filter==None: self.filter=SuspectFilter(imapcopyrules) (match,info)=self.filter.matches(suspect,extended=True) if match: field,matchedvalue,arg,regex=info if arg!=None and arg.lower()=='no': suspect.debug("Suspect matches imap copy exception rule") self.logger.info("""%s: Header %s matches imap copy exception rule '%s' """%(suspect.id,field,regex)) else: if arg==None or (not arg.lower().startswith('imap')): self.logger.error("Unknown target format '%s' should be 'imap(s)://user:pass@host/folder'"%arg) else: self.logger.info("""%s: Header %s matches imap copy rule '%s' """%(suspect.id,field,regex)) if suspect.get_tag('debug'): suspect.debug("Suspect matches imap copy rule (I would copy it if we weren't in debug mode)") else: self.storeimap(suspect,arg) else: suspect.debug("No imap copy rule/exception rule applies to this message") def imapconnect(self,imapurl,lintmode=False): p=urlparse(imapurl) scheme=p.scheme.lower() host=p.hostname port=p.port username=p.username password=p.password folder=p.path[1:] if scheme=='imaps': ssl=True else: ssl=False if port==None: if ssl: port=imaplib.IMAP4_SSL_PORT else: port=imaplib.IMAP4_PORT try: if ssl: imap=imaplib.IMAP4_SSL(host=host,port=port) else: imap=imaplib.IMAP4(host=host,port=port) except Exception,e: ltype='IMAP' if ssl: ltype='IMAP-SSL' msg="%s Connection to server %s failed: %s"%(ltype,host,str(e)) if lintmode: print msg else: self.logger.error(msg) return None try: imap.login(username,password) except Exception,e: msg="Login to server %s failed: %s"%(host,str(e)) if lintmode: print msg else: self.logger.error(msg) return None