def pushIntoDB(mailFields, key, msgMailRequest, exeSql): logging.critical("[!] --------- Inside ShivaNewSpam module -----------------") insert_spam = "INSERT INTO spam (`to`, `from`, `subject`, `textMessage`, `htmlMessage`, `totalCounter`, `id`) VALUES('"+str(mailFields['to'])+"', '"+str(mailFields['from'])+"', '"+str(mailFields['subject'])+"', '"+str(mailFields['text'])+"', '"+str(mailFields['html'])+"', '1', '"+str(mailFields['spam_id'])+"')" try: exeSql.execute(insert_spam) except mdb.Error, e: logging.critical("[-] Error (ShivaNewSpam - insert_spam) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None
def pushIntoDB(mailFields, key, msgMailRequest, exeSql): logging.critical( "[!] --------- Inside ShivaNewSpam module -----------------") insert_spam = "INSERT INTO spam (`to`, `from`, `subject`, `textMessage`, `htmlMessage`, `totalCounter`, `id`) VALUES('" + str( mailFields['to']) + "', '" + str(mailFields['from']) + "', '" + str( mailFields['subject']) + "', '" + str( mailFields['text']) + "', '" + str( mailFields['html']) + "', '1', '" + str( mailFields['spam_id']) + "')" try: exeSql.execute(insert_spam) except mdb.Error, e: logging.critical("[-] Error (ShivaNewSpam - insert_spam) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None
def pushIntoDB(mailFields, key, msgMailRequest, exeSql): logging.critical("Inside ShivaOldSpam") # > Update "lastSeen" for spam whose copy has received again on same date date = 0 # date = 0 (received first copy of old spam today - update firstseen and lastseen for today) # date = 1 (received next copy of old spam today - update lastseen for today) checkDate = "SELECT sdate.date FROM sdate JOIN sdate_spam ON (sdate.id = sdate_spam.date_id) WHERE sdate_spam.spam_id = '"+ str(mailFields['spam_id'])+"' AND sdate.date = '"+ str(mailFields['date'])+"'" try: exeSql.execute(checkDate) if len(exeSql.fetchall()) >= 1: date = 1 except mdb.Error, e: logging.critical("[-] Error (ShivaOldSpam - retriving date from DB) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None
def pushIntoDB(mailFields, key, msgMailRequest, exeSql): #exeSql = ShivaConfig.dbConnect() # Call to dbConnect() module; connect to DB # Logic is: # > Same "from" + "subject" + "date" combo might come with different md5s. We shall not loose any attachment # > Under such case, we shall just save the attachment and increase the counters accordingly # Old Combo # Step 1: check if new attachment(s) has/have come with old combo # Step 2: if yes, save attachment(s), retrive spamPot.spam.totalCounter for combo and increase it by 1 # Step 3: if no, don't save attachment(s), retrive spamPot.spam.totalCounter for combo and increase it by 1 # Step 4: put relay test cases: # : if individual_relayed_today < 10 && totalRelayed_today < 100 -> relay, retrive individual_relayed_today & totalRelayed and increment by 1 # : if (individual_relayed_today = 10 && totalRelayed_today < 100 ) or (individual_relayed_today < 10 && totalRelayed_today = 100) - don't relay # Step 5: remove message from queue logging.critical("Inside ShivaOldSpam") #mailFields['lastSeen'] = datetime.datetime.now() #mailFields['date'] = datetime.date.today() # > Update "lastSeen" for spam whose copy has received again on same date date = 0 # date = 0 (received first copy of old spam today - update firstseen and lastseen for today) # date = 1 (received next copy of old spam today - update lastseen for today) #checkDate = "SELECT spamPot09252012.sdate.date FROM spamPot09252012.sdate WHERE spamPot09252012.sdate.date = '"+ str(mailFields['date'])+"' AND spamPot09252012.sdate.spam_id = '"+ str(mailFields['spam_id'])+"'" #checkDate = "SELECT spamPot09252012.sdate.date FROM spamPot09252012.sdate JOIN spamPot09252012.sdate_spam ON (spamPot09252012.sdate.id = spamPot09252012.sdate_spam.date_id) WHERE spamPot09252012.sdate_spam.spam_id = '"+ str(mailFields['spam_id'])+"' AND spamPot09252012.sdate.date = '"+ str(mailFields['date'])+"'" # 16th Nov, 2012 checkDate = "SELECT sdate.date FROM sdate JOIN sdate_spam ON (sdate.id = sdate_spam.date_id) WHERE sdate_spam.spam_id = '"+ str(mailFields['spam_id'])+"' AND sdate.date = '"+ str(mailFields['date'])+"'" #logging.critical("checkDate: %s", checkDate) #sys.exit(1) try: exeSql.execute(checkDate) if len(exeSql.fetchall()) >= 1: date = 1 except mdb.Error, e: logging.critical("[-] Error (ShivaOldSpam - retriving date from DB) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None
def relay(mailFields, key, msgMailRequest, exeSql): """ It does the decision making part - which spam to relay, which attachment to save, client notifications etc. """ status = 0 # status = 0 (new combo), i.e. "from" + "subject" + "date" combo is not in DB, so consider it as new combo and relay if counters allow # status = 1 (old combo), i.e. "from" + "subject" + "date" combo is already there but check if new md5 attachment has come. If yes, save it checkData = "SELECT spam.id FROM spam WHERE spam.id = '"+ str(mailFields['spam_id'])+"'" try: exeSql.execute(checkData) if len(exeSql.fetchall()) >= 1: status = 1 except mdb.Error, e: logging.critical("[-] Error (ShivaMailRelayer - retriving combos from DB) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None
def relay(mailFields, key, msgMailRequest, exeSql): """ It does the decision making part - which spam to relay, which attachment to save, client notifications etc. """ status = 0 # status = 0 (new combo), i.e. "from" + "subject" + "date" combo is not in DB, so consider it as new combo and relay if counters allow # status = 1 (old combo), i.e. "from" + "subject" + "date" combo is already there but check if new md5 attachment has come. If yes, save it checkData = "SELECT spam.id FROM spam WHERE spam.id = '" + str( mailFields['spam_id']) + "'" try: exeSql.execute(checkData) if len(exeSql.fetchall()) >= 1: status = 1 except mdb.Error, e: logging.critical( "[-] Error (ShivaMailRelayer - retriving combos from DB) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None
def pushIntoDB(mailFields, key, msgMailRequest, exeSql): logging.critical("Inside ShivaOldSpam") # > Update "lastSeen" for spam whose copy has received again on same date date = 0 # date = 0 (received first copy of old spam today - update firstseen and lastseen for today) # date = 1 (received next copy of old spam today - update lastseen for today) checkDate = "SELECT sdate.date FROM sdate JOIN sdate_spam ON (sdate.id = sdate_spam.date_id) WHERE sdate_spam.spam_id = '" + str( mailFields['spam_id']) + "' AND sdate.date = '" + str( mailFields['date']) + "'" try: exeSql.execute(checkDate) if len(exeSql.fetchall()) >= 1: date = 1 except mdb.Error, e: logging.critical( "[-] Error (ShivaOldSpam - retriving date from DB) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None
except mdb.Error, e: logging.critical("[-] Error (ShivaOldSpam - retriving date from DB) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None if date == 0: logging.critical("Seems new date for old spam") insert_sdate = "INSERT INTO sdate (`date`, `firstSeen`, `lastSeen`, `todaysCounter`) VALUES('"+str(mailFields['date'])+"', '"+str(mailFields['firstSeen'])+"', '"+str(mailFields['lastSeen'])+"', '1')" try: exeSql.execute(insert_sdate) except mdb.Error, e: logging.critical("[-] Error (ShivaOldSpam: insert_sdate) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None insert_sdate_spam = "INSERT INTO sdate_spam (`spam_id`, `date_id`) VALUES('"+str(mailFields['spam_id'])+"', '"+str(exeSql.lastrowid)+"')" try: exeSql.execute(insert_sdate_spam) except mdb.Error, e: logging.critical("[-] Error (ShivaOldSpam: insert_sdate_spam) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None else: logging.critical("Old spam & already seen today") update_date = "UPDATE sdate JOIN sdate_spam ON (sdate.id = sdate_spam.date_id) SET sdate.lastSeen = '"+str(mailFields['lastSeen'])+"', sdate.todaysCounter = sdate.todaysCounter + 1 WHERE sdate_spam.spam_id = '"+ str(mailFields['spam_id'])+"' AND sdate.date = '"+ str(mailFields['date'])+"'"
def relay(mailFields, key, msgMailRequest, exeSql): """ It does the decision making part - which spam to relay, which attachment to save, client notifications etc. """ status = 0 # status = 0 (new combo), i.e. "from" + "subject" + "date" combo is not in DB, so consider it as new combo and relay if counters allow # status = 1 (old combo), i.e. "from" + "subject" + "date" combo is already there but check if new md5 attachment has come. If yes, save it # Retrieve existing combos from DB to get checked against combo of spam under analysis #checkData = "SELECT spamPot.spam.from, spamPot.spam.subject, spamPot.spam.firstSeen FROM spamPot.spam" #checkData = "SELECT spamPot.spam.comboMD5 FROM spamPot.spam" #checkData = "SELECT spamPot.spam.id FROM spamPot.spam WHERE spamPot.spam.id = '"+ str(mailFields['spam_id'])+"'" - 09262012 #checkData = "SELECT spamPot09252012.spam.id FROM spamPot09252012.spam WHERE spamPot09252012.spam.id = '"+ str(mailFields['spam_id'])+"'" checkData = "SELECT spam.id FROM spam WHERE spam.id = '"+ str(mailFields['spam_id'])+"'" #logging.critical("Control reaches here") #logging.critical("DB Name: %s" % ShivaConfig.dataBaseName) #checkData = "SELECT %s" % (ShivaConfig.dataBaseName) + "." + "spam.id FROM spamPot09252012.spam WHERE spamPot09252012.spam.id = '"+ str(mailFields['spam_id'])+"'" #logging.critical("\n\n\t\tSQL Query: %s" % checkData) #sys.exit(1) try: exeSql.execute(checkData) #print "exiting......b0nd" #sys.exit(1) if len(exeSql.fetchall()) >= 1: status = 1 ''' exeSql.execute(checkData) while(1): row = exeSql.fetchone() #logging.critical("type of row[0]: %s", type(row[0].encode('utf-8'))) #logging.critical("type of row[1]: %s", type(row[1])) if row == None: # After reading last entry of DB, row returns None break #elif mailFields['from'] == row[0].encode('utf-8') and mailFields['subject'] == row[1].encode('utf-8') and str(datetime.date.today()) == str(row[2]).split(" ")[0]: # MySQL from and subject are utf8 collated, so all data retrieved of type unicode. Need to convert before comaprision elif str(mailFields['comboMD5']) == row[0]: status = 1 # i.e, recent spams "from" + "subject" + "date" combo is already in DB break else: # continue the loop if no match yet pass exeSql.execute(checkData) rows = exeSql.fetchall() for row in rows: if str(mailFields['comboMD5']) == row[0]: status = 1 break else: pass #str(mailFields['comboMD5']) == row[1]: #for row in rows: #print "%s, %s" % (row[0], row[1]) #print "Number of rows returned: %d" % cursor.rowcount ''' except mdb.Error, e: logging.critical("[-] Error (ShivaMailRelayer - retriving combos from DB) - %d: %s" % (e.args[0], e.args[1])) #print "exiting1......b0nd" #sys.exit(1) ShivaConfig.errorHandling(key, msgMailRequest) return None
except mdb.Error, e: logging.critical("[-] Error (ShivaNewSpam - insert_spam) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None insert_sdate = "INSERT INTO sdate (`date`, `firstSeen`, `lastSeen`, `todaysCounter`) VALUES('" + str( mailFields['date']) + "', '" + str( mailFields['firstSeen']) + "', '" + str( mailFields['lastSeen']) + "', '1')" try: exeSql.execute(insert_sdate) except mdb.Error, e: logging.critical("[-] Error (ShivaNewSpam: insert_sdate) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None insert_sdate_spam = "INSERT INTO sdate_spam (`spam_id`, `date_id`) VALUES('" + str( mailFields['spam_id']) + "', '" + str(exeSql.lastrowid) + "')" try: exeSql.execute(insert_sdate_spam) except mdb.Error, e: logging.critical( "[-] Error (ShivaNewSpam: insert_sdate_spam) - %d: %s" % (e.args[0], e.args[1])) ShivaConfig.errorHandling(key, msgMailRequest) return None insert_ip = "INSERT INTO ip (`date`, `sourceIP`) VALUES('" + str(
def main(key, msgMailRequest, exeSql): """ This function gets called from queueFilter.filter() """ global mailFields mailFields = { 'headers': '', 'to': '', 'from': '', 'subject': '', 'date': '', 'firstSeen': '', 'lastSeen': '', 'firstRelayed': '', 'lastRelayed': '', 'sourceIP': '', 'sensorID': '', 'text': '', 'html': '', 'inlineFileName': [], 'inlineFile': [], 'inlineFileMd5': [], 'attachmentFileName': [], 'attachmentFile': [], 'attachmentFileMd5': [], 'links': [], 'spam_id': '' } try: mailFile = open(ShivaConfig.queuePath + key, "rb") p = email.Parser.Parser() msg = p.parse(mailFile) mailFile.close() ## Extracting whole header - not individual fields - Nov, 2012 f = open(ShivaConfig.queuePath + key) msgmsg = email.message_from_file(f) pp = email.parser.HeaderParser() hh = pp.parsestr(msgmsg.as_string()) headerString = '' for h in hh.items(): headerString += str( h ) + '\n' # h is a tuple value. Converting to string to add a string to it mailFields['headers'] = headerString mailFields['headers'] = str(mailFields['headers']).replace("'", "") except IOError: logging.critical( "[-] Error (Module ShivaMailParser.py) - could not open|write file %s \n" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None try: try: mailFields['to'] = msg['to'].replace("'", "") except: logging.critical( "[-] Error (Module ShivaMailParser.py) - some issue in parsing 'to' field %s" % key) logging.critical("to: %s", mailFields['to']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: from_field = msg['from'] if from_field != None: regex_from = r'''([\w\-\.+]+@\w[\w\-]+\.+[\w\-]+)''' # Just looking for mail id mailFields['from'] = re.findall(re.compile(regex_from), from_field)[0] mailFields['from'] = mailFields['from'].replace("'", "") mailFields['from'] = mailFields['from'].replace('"', '') else: logging.critical( "[-] Info ShivaMailParser.py - From field has value None") pass except: logging.critical( "[-] Error (Module ShivaMailParser.py) - some issue in parsing 'from' field %s" % key) logging.critical("from: %s", mailFields['from']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: subject, encoding = decode_header( msg.get('subject') )[0] # Seen cases of unicode. Function returns the encoding type if any if encoding == None: mailFields['subject'] = subject pass else: mailFields['subject'] = subject.decode(encoding) mailFields['subject'] = mailFields['subject'].encode( 'utf-8' ) # Need to encode('utf-8') else won't be able to push into DB if msgMailRequest['subject'] != None: mailFields['subject'] = mailFields['subject'].replace("'", "") mailFields['subject'] = mailFields['subject'].replace('"', '') else: logging.critical( "[-] Info ShivaMailParser.py - Subject field has value None" ) pass except: logging.critical( "[-] Error (Module ShivaMailParser.py) - some issue in parsing 'subject' field %s" % key) logging.critical("subject: %s", mailFields['subject']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: mailFields['sourceIP'] = key.split("-")[-2] mailFields['sensorID'] = key.split("-")[-1] except: logging.critical( "[-] Error (Module ShivaMailParser.py) - some issue in parsing 'sourceIP and sensorID' field %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None try: writePartsRecurse(msg) except: logging.critical( "[-] Error (Module ShivaMailParser.py) - some issue in writePartsRecurse function %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None # remove single and double quotes from various fields, they break lamson server, this could be done in writePartsRecurse function itself try: if mailFields['text'] != None: mailFields['text'] = mailFields['text'].replace("'", "") if mailFields['html'] != None: mailFields['html'] = mailFields['html'].replace("'", "") except: logging.critical( "[-] Error (Module ShivaMailParser.py) - some issue in 'text' and 'html' field %s" % key) logging.critical("text: %s", mailFields['text']) logging.critical("html: %s", mailFields['html']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: mailFields['links'] = ShivaLinkParser.parser(mailFields['html']) mailFields['links'].extend( ShivaLinkParser.parser(mailFields['text'])) except: logging.critical( "[-] Error (Module ShivaMailParser.py) - some issue in parsing 'links' field %s" % key) logging.critical("links: %s", mailFields['links']) ShivaConfig.errorHandling(key, msgMailRequest) return None # Timestamping when spam is parsed by our code; not the original time stamping mailFields['date'] = datetime.date.today() mailFields['firstSeen'] = datetime.datetime.now() mailFields['lastSeen'] = datetime.datetime.now() mailFields['firstRelayed'] = datetime.datetime.now() mailFields['lastRelayed'] = datetime.datetime.now() spam_id = str(mailFields['from']) + str(mailFields['subject']) mailFields['spam_id'] = hashlib.md5(spam_id).hexdigest() except: logging.critical( "[-] Error (Module ShivaMailParser.py) - some issue in parsing file %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None # Call to ShivaMailRelayer.relay function which determines whether spam is new or old ShivaMailRelayer.relay(mailFields, key, msgMailRequest, exeSql) return None
def main(key, msgMailRequest, exeSql): """ This function gets called from queueFilter.filter() """ global mailFields #mailFields = {'to':'', 'from':'', 'subject':'', 'date':'', 'text':'', 'html':'', 'inlineFileName':[], 'inlineFile':[], 'inlineFileMd5':[], 'attachmentFileName':[], 'attachmentFile':[], 'attachmentFileMd5':[], 'links':[]} mailFields = {'headers':'', 'to':'', 'from':'', 'subject':'', 'date':'', 'firstSeen':'', 'lastSeen':'', 'firstRelayed':'', 'lastRelayed':'', 'sourceIP':'', 'sensorID':'', 'text':'', 'html':'', 'inlineFileName':[], 'inlineFile':[], 'inlineFileMd5':[], 'attachmentFileName':[], 'attachmentFile':[], 'attachmentFileMd5':[], 'links':[], 'spam_id':''} #logging.critical("[!] --------- Inside spamParse module -----------------") # 'key' contains the name of spam file retrieved from queue try: mailFile=open(ShivaConfig.queuePath + key,"rb") p=email.Parser.Parser() msg=p.parse(mailFile) mailFile.close() ## Extracting whole header - not individual fields - Nov, 2012 f = open(ShivaConfig.queuePath + key) msgmsg = email.message_from_file(f) pp = email.parser.HeaderParser() hh = pp.parsestr(msgmsg.as_string()) headerString = '' for h in hh.items(): headerString += str(h) + '\n' # h is a tuple value. Converting to string to add a string to it #headerString = headerString + '\n' #mailFields['headers'] = hh.items() mailFields['headers'] = headerString #print "typeof header: ", type(mailFields['headers']) mailFields['headers'] = str(mailFields['headers']).replace("'", "") except IOError: logging.critical("[-] Error (Module ShivaMailParser.py) - could not open|write file %s \n" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None try: # filling up basic fields of dictionary, rest get their values only after reverse parsing of multipart spam mail try: # "to" field - considered it won't be in unicode, else write code to handle it mailFields['to'] = msg['to'].replace("'", "") except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'to' field %s" % key) logging.critical("to: %s", mailFields['to']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: # "from" field - could be normal or unicode, but we are stipping of everything but just the mail id # from_field, encoding = decode_header(msg.get('from'))[0] # It failed for a case and output was just a double quote # logging.critical("from: %s encoding: %s" % (from_field, encoding)) from_field = msg['from'] if from_field != None: # Seen cases where "from" field had value "none" #mailFields['from'] = from_field.split(" ")[-1].replace("<","").replace(">","") #mailFields['from'] = from_field.split(" ")[-1] #logging.critical("from: %s", from_field) #mailFields['from'] = mailFields['from'].encode('unicode_escape') regex_from = r'''([\w\-\.+]+@\w[\w\-]+\.+[\w\-]+)''' # Just looking for mail id mailFields['from'] = re.findall (re.compile(regex_from), from_field)[0] #logging.critical("from after regex: %s", mailFields['from']) mailFields['from'] = mailFields['from'].replace("'", "") mailFields['from'] = mailFields['from'].replace('"', '') else: logging.critical("[-] Info ShivaMailParser.py - From field has value None") pass except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'from' field %s" % key) logging.critical("from: %s", mailFields['from']) ShivaConfig.errorHandling(key, msgMailRequest) return None #mailFields['from'] = mailFields['from'].replace('\n', '<br/>') #mailFields['from'] = mailFields['from'].encode('unicode_escape') try: # "subject" field - could be normal or unicode #logging.critical("f*****g mail subject: %s", msg.get('subject')) subject, encoding = decode_header(msg.get('subject'))[0] # Seen cases of unicode. Function returns the encoding type if any if encoding == None: #logging.critical("subject0: %s encoding0: %s" % (subject, encoding)) mailFields['subject'] = subject pass else: #logging.critical("subject1: %s encoding1: %s" % (subject.decode(encoding), encoding)) mailFields['subject'] = subject.decode(encoding) #logging.critical("mailFields['subject b0nd']: %s", mailFields['subject']) mailFields['subject'] = mailFields['subject'].encode('utf-8') # Need to encode('utf-8') else won't be able to push into DB if msgMailRequest['subject'] != None: #logging.critical("len of sub: %s", len(msgMailRequest['subject'])) #logging.critical("msgMailRequest['subject']: %s", msgMailRequest['subject']) #logging.critical("msg['subject']: %s", msg['subject']) #mailFields['subject'] = msgMailRequest['subject'] #mailFields['subject'] = mailFields['subject'].encode('unicode_escape') mailFields['subject'] = mailFields['subject'].replace("'", "") mailFields['subject'] = mailFields['subject'].replace('"', '') #logging.critical("mailFields['subject']-after replacing quotes: %s", mailFields['subject']) else: logging.critical("[-] Info ShivaMailParser.py - Subject field has value None") pass except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'subject' field %s" % key) logging.critical("subject: %s", mailFields['subject']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: #pass # The files names are generated in a way that last two fields of file name indicates sourceIP and sensorID mailFields['sourceIP'] = key.split("-")[-2] mailFields['sensorID'] = key.split("-")[-1] except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'sourceIP and sensorID' field %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None #pass try: # call function to obtain rest of the fields - it handles multipart mails as well writePartsRecurse(msg) except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in writePartsRecurse function %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None # remove single and double quotes from various fields, they break lamson server, this could be done in writePartsRecurse function itself try: #if msgMailRequest['text'] != None: if mailFields['text'] != None: mailFields['text'] = mailFields['text'].replace("'", "") #if msgMailRequest['html'] != None: if mailFields['html'] != None: #logging.critical("replacing single quotes in HTML") mailFields['html'] = mailFields['html'].replace("'", "") except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in 'text' and 'html' field %s" % key) logging.critical("text: %s", mailFields['text']) logging.critical("html: %s", mailFields['html']) ShivaConfig.errorHandling(key, msgMailRequest) return None # parse different parts of spam (text, html, inline) and hunt for URLs try: mailFields['links'] = ShivaLinkParser.parser(mailFields['html']) mailFields['links'].extend(ShivaLinkParser.parser(mailFields['text'])) except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'links' field %s" % key) logging.critical("links: %s", mailFields['links']) ShivaConfig.errorHandling(key, msgMailRequest) return None # Timestamping when spam is parsed by our code; not the original time stamping mailFields['date'] = datetime.date.today() mailFields['firstSeen'] = datetime.datetime.now() mailFields['lastSeen'] = datetime.datetime.now() mailFields['firstRelayed'] = datetime.datetime.now() mailFields['lastRelayed'] = datetime.datetime.now() # Md5 of combination of "from", "subject" and "date" to differentiate amongs spams. This key is indexed in DB for fast searching for decision making #comboMD5 = str(mailFields['from']) + str(mailFields['subject']) + str(mailFields['date']) spam_id = str(mailFields['from']) + str(mailFields['subject']) mailFields['spam_id'] = hashlib.md5(spam_id).hexdigest() #logging.critical("comboMD5: %s", mailFields['comboMD5']) except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing file %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None # Call to ShivaMailRelayer.relay function which determines whether spam is new or old ShivaMailRelayer.relay(mailFields, key, msgMailRequest, exeSql) return None
def main(key, msgMailRequest, exeSql): """ This function gets called from queueFilter.filter() """ global mailFields mailFields = {'headers':'', 'to':'', 'from':'', 'subject':'', 'date':'', 'firstSeen':'', 'lastSeen':'', 'firstRelayed':'', 'lastRelayed':'', 'sourceIP':'', 'sensorID':'', 'text':'', 'html':'', 'inlineFileName':[], 'inlineFile':[], 'inlineFileMd5':[], 'attachmentFileName':[], 'attachmentFile':[], 'attachmentFileMd5':[], 'links':[], 'spam_id':''} try: mailFile=open(ShivaConfig.queuePath + key,"rb") p=email.Parser.Parser() msg=p.parse(mailFile) mailFile.close() ## Extracting whole header - not individual fields - Nov, 2012 f = open(ShivaConfig.queuePath + key) msgmsg = email.message_from_file(f) pp = email.parser.HeaderParser() hh = pp.parsestr(msgmsg.as_string()) headerString = '' for h in hh.items(): headerString += str(h) + '\n' # h is a tuple value. Converting to string to add a string to it mailFields['headers'] = headerString mailFields['headers'] = str(mailFields['headers']).replace("'", "") except IOError: logging.critical("[-] Error (Module ShivaMailParser.py) - could not open|write file %s \n" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None try: try: mailFields['to'] = msg['to'].replace("'", "") except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'to' field %s" % key) logging.critical("to: %s", mailFields['to']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: from_field = msg['from'] if from_field != None: regex_from = r'''([\w\-\.+]+@\w[\w\-]+\.+[\w\-]+)''' # Just looking for mail id mailFields['from'] = re.findall (re.compile(regex_from), from_field)[0] mailFields['from'] = mailFields['from'].replace("'", "") mailFields['from'] = mailFields['from'].replace('"', '') else: logging.critical("[-] Info ShivaMailParser.py - From field has value None") pass except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'from' field %s" % key) logging.critical("from: %s", mailFields['from']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: subject, encoding = decode_header(msg.get('subject'))[0] # Seen cases of unicode. Function returns the encoding type if any if encoding == None: mailFields['subject'] = subject pass else: mailFields['subject'] = subject.decode(encoding) mailFields['subject'] = mailFields['subject'].encode('utf-8') # Need to encode('utf-8') else won't be able to push into DB if msgMailRequest['subject'] != None: mailFields['subject'] = mailFields['subject'].replace("'", "") mailFields['subject'] = mailFields['subject'].replace('"', '') else: logging.critical("[-] Info ShivaMailParser.py - Subject field has value None") pass except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'subject' field %s" % key) logging.critical("subject: %s", mailFields['subject']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: mailFields['sourceIP'] = key.split("-")[-2] mailFields['sensorID'] = key.split("-")[-1] except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'sourceIP and sensorID' field %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None try: writePartsRecurse(msg) except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in writePartsRecurse function %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None # remove single and double quotes from various fields, they break lamson server, this could be done in writePartsRecurse function itself try: if mailFields['text'] != None: mailFields['text'] = mailFields['text'].replace("'", "") if mailFields['html'] != None: mailFields['html'] = mailFields['html'].replace("'", "") except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in 'text' and 'html' field %s" % key) logging.critical("text: %s", mailFields['text']) logging.critical("html: %s", mailFields['html']) ShivaConfig.errorHandling(key, msgMailRequest) return None try: mailFields['links'] = ShivaLinkParser.parser(mailFields['html']) mailFields['links'].extend(ShivaLinkParser.parser(mailFields['text'])) except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing 'links' field %s" % key) logging.critical("links: %s", mailFields['links']) ShivaConfig.errorHandling(key, msgMailRequest) return None # Timestamping when spam is parsed by our code; not the original time stamping mailFields['date'] = datetime.date.today() mailFields['firstSeen'] = datetime.datetime.now() mailFields['lastSeen'] = datetime.datetime.now() mailFields['firstRelayed'] = datetime.datetime.now() mailFields['lastRelayed'] = datetime.datetime.now() spam_id = str(mailFields['from']) + str(mailFields['subject']) mailFields['spam_id'] = hashlib.md5(spam_id).hexdigest() except: logging.critical("[-] Error (Module ShivaMailParser.py) - some issue in parsing file %s" % key) ShivaConfig.errorHandling(key, msgMailRequest) return None # Call to ShivaMailRelayer.relay function which determines whether spam is new or old ShivaMailRelayer.relay(mailFields, key, msgMailRequest, exeSql) return None