class Mail(object): """Mail""" def __init__(self, path): """Initialize @param path: path to a file mail.msg """ self.path = path self.msg_id = None self.msg_ori = None self.date = None self.sender = None self.sender_ip = None self.subject = None self.receiver = [] self.cc = None self.content = None self.content_length = None self.status = 0 self.urls = [] self.attachments = [] self.tasks = [] self.safebrowsing = None self.dbmx = DatabaseMX() def get_msg_id(self): """Get msg_id @return: self.msg_id """ return self.msg_id def get_status(self): """Get status @return: self.status """ return self.status def get_path(self): """Get path @return: self.path """ return self.path def get_safebrowsing(self): """Get safebrowsing @return: self.safebrowsing """ return self.safebrowsing def get_tasks(self): return self.tasks def count_urls(self): """Get urls @return: self.urls """ return len(self.urls) def count_attachments(self): """Get attachments @return: self.attachments """ return len(self.attachments) def get_urls(self, content): """Get URLs from content of mail @para content: get URLs from content @return: List URLs """ url_pattern = ("http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|" "(?:%[0-9a-fA-F][0-9a-fA-F]))+") urls = re.findall(url_pattern, content) return urls def is_exist(self): """Check if this mail exist in database @return: True/False """ is_exist = self.dbmx.mail_exist(self.msg_id) return is_exist def process_urls(self, urls): """Process URLs @para urls: list of url need to be analyzed """ safebrowsing = SafeBrowsing() result = safebrowsing.lookup(urls) if result is not True: self.safebrowsing = result for url in urls: # Check if this url is exists in our database # Should be check it again if we already check this url 1 day ago if self.dbmx.url_exist(url): continue request = Request() task_id = request.create_url(url) if task_id is False: return False # Okay, add it to task list for _id in task_id: self.tasks.append({ "task_id": _id, "malscore": None, "url": url, "date_checked": None }) def process_attachments(self, attachments): """Process attachments @para filenames: list of filenames need to be analyzed """ for attachment in attachments: filename = attachment[0] payload = attachment[1] # Calculate a MD5 hash from payload sha256 = hashlib.sha256(payload).hexdigest() # Check if hash of file is exists in our database if self.dbmx.attachment_exist(sha256): continue request = Request() task_id = request.create_file(filename, payload) if task_id is False: return False # Okay, add it to task list for _id in task_id: self.tasks.append({ "task_id": _id, "malscore": None, "attachment": filename, "sha256": sha256, "date_checked": None }) return True def parse(self): """Parse mail Read a message file from self.path and get informations, urls and/or attachments in mail """ msg_file = open(self.path) message = email.message_from_file(msg_file) self.msg_id = message['message-id'] self.msg_ori = message.as_string() self.date = message['date'] self.sender = message['from'] self.sender_ip = message['x-originating-ip'] self.subject = message['subject'] self.receiver = message.get_all('to', []) self.cc = message.get_all('cc', []) self.content = [] self.content_length = message['content-length'] for part in message.walk(): ctype = part.get_content_maintype() if ctype == 'multipart': # Nothing to do continue if ctype == 'text': content = part.get_payload(decode=True) self.content.append(content) urls = self.get_urls(content) self.urls.extend(urls) if ctype in ['image', 'application', 'audio', 'video', 'font']: filename = part.get_filename() payload = part.get_payload(decode=True) # We can work with an array [filename, payload], so we don't # have to stored this file to hard disk self.attachments.append([filename, payload]) def analyze(self): """Analyze mail After parse mail, foreach url and attachment in it, analyze and add it to database """ # We should remove unchecking-task if process have any error if self.process_urls(self.urls) is False: log.error("%s:Skip this mail", self.msg_id) return False if self.process_attachments(self.attachments) is False: log.error("%s:Skip this mail", self.msg_id) return False return True