def __init__(self, mypath,nfile): self.mypath=mypath self.nfile=nfile self.onemail=None self.retval=None # filepath=os.path.join(mypath,i) filepath = os.path.join(mypath, nfile) # print(filepath) if not os.path.isfile(filepath): print("File not found!") exit(1) matchObj = re.match(r'.*.(msg|eml)$', nfile, re.M | re.I) if matchObj: if matchObj.group(1) == "msg": self.onemail = mailparser.parse_from_file_msg(filepath) elif matchObj.group(1) == "eml": self.onemail = mailparser.parse_from_file(filepath) else: print("Unsupported file type!") exit(1) else: print("Unsupported file type!") exit(1) if not self.onemail.headers: print("Broken file!") exit(1) self.retval=self.analyse_email()
def main(): args = get_args().parse_args() if args.file: if args.outlook: parser = mailparser.parse_from_file_msg(args.file) else: parser = mailparser.parse_from_file(args.file) elif args.string: parser = mailparser.parse_from_string(args.string) elif args.stdin: if args.outlook: raise MailParserOutlookError( "You can't use stdin with msg Outlook") parser = mailparser.parse_from_file_obj(sys.stdin) if args.json: safe_print(parser.mail_json) if args.body: safe_print(parser.body) if args.headers: safe_print(parser.headers_json) if args.to: safe_print(parser.to_json) if args.delivered_to: safe_print(parser.delivered_to_json) if args.from_: safe_print(parser.from_json) if args.subject: safe_print(parser.subject) if args.receiveds: safe_print(parser.received_json) if args.defects: for i in parser.defects_categories: safe_print(i) if args.senderip: r = parser.get_server_ipaddress(args.senderip) if r: safe_print(r) else: safe_print("Not Found") if args.attachments or args.attachments_hash: print_attachments(parser.attachments, args.attachments_hash) if args.mail_hash: print_mail_fingerprints(parser.body.encode("utf-8"))
def main(): args = get_args().parse_args() if args.file: if args.outlook: parser = mailparser.parse_from_file_msg(args.file) else: parser = mailparser.parse_from_file(args.file) elif args.string: parser = mailparser.parse_from_string(args.string) if args.json: j = json.loads(parser.parsed_mail_json) safe_print(json.dumps(j, ensure_ascii=False, indent=4)) if args.body: # safe_print(parser.body) safe_print(parser.body) if args.headers: safe_print(parser.headers) if args.to: safe_print(parser.to_) if args.from_: safe_print(parser.from_) if args.subject: safe_print(parser.subject) if args.receiveds: safe_print(parser.receiveds) if args.defects: for i in parser.defects_category: safe_print(i) if args.anomalies: for i in parser.anomalies: safe_print(i) if args.senderip: r = parser.get_server_ipaddress(args.senderip) if r: safe_print(r) else: safe_print("Not Found") if args.attachments or args.attachments_hash: print_attachments(parser.attachments_list, args.attachments_hash) if args.mail_hash: print_mail_fingerprints(parser.body.encode("utf-8"))
def test_parse_from_file_msg(self): """ Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0 The email used for unittest were found randomly on VirusTotal and then already publicly available so can not be considered as privacy violation """ m = mailparser.parse_from_file_msg(mail_outlook_1) email = m.mail self.assertIn("attachments", email) self.assertEqual(len(email["attachments"]), 5) self.assertIn("from", email) self.assertEqual(email["from"][0][1], "*****@*****.**") self.assertIn("subject", email)
def main(): args = get_args().parse_args() log = custom_log(level=args.log_level) if args.file: if args.outlook: log.debug("Analysis Outlook mail") parser = mailparser.parse_from_file_msg(args.file) else: parser = mailparser.parse_from_file(args.file) elif args.string: parser = mailparser.parse_from_string(args.string) elif args.stdin: if args.outlook: raise MailParserOutlookError( "You can't use stdin with msg Outlook") parser = mailparser.parse_from_file_obj(sys.stdin) if args.json: safe_print(parser.mail_json) if args.body: safe_print(parser.body) if args.headers: safe_print(parser.headers_json) if args.to: safe_print(parser.to_json) if args.delivered_to: safe_print(parser.delivered_to_json) if args.from_: safe_print(parser.from_json) if args.subject: safe_print(parser.subject) if args.receiveds: safe_print(parser.received_json) if args.defects: log.debug("Printing defects") for i in parser.defects_categories: safe_print(i) if args.senderip: log.debug("Printing sender IP") r = parser.get_server_ipaddress(args.senderip) if r: safe_print(r) else: safe_print("Not Found") if args.attachments or args.attachments_hash: log.debug("Printing attachments details") print_attachments(parser.attachments, args.attachments_hash) if args.mail_hash: log.debug("Printing also mail fingerprints") print_mail_fingerprints(parser.body.encode("utf-8"))
def __parse(self, filepath): return parse_from_file_msg(filepath)
def _email_parse_function(self, event, *args, **kwargs): """Function: Extract message headers and body parts from an email message (.eml or .msg). Any attachments found are added to the Incident as Artifacts if 'utilities_parse_email_attachments' is set to True""" try: log = logging.getLogger(__name__) # Set variables parsed_email = path_tmp_file = path_tmp_dir = reason = results = None # Get the function inputs: fn_inputs = validate_fields(["incident_id"], kwargs) # Instansiate ResultPayload rp = ResultPayload(CONFIG_DATA_SECTION, **kwargs) # If its just base64content as input, use parse_from_string if fn_inputs.get("base64content"): yield StatusMessage("Processing provided base64content") parsed_email = mailparser.parse_from_string( b_to_s(base64.b64decode(fn_inputs.get("base64content")))) yield StatusMessage("Provided base64content processed") else: # Validate that either: (incident_id AND attachment_id OR artifact_id) OR (task_id AND attachment_id) is defined if not (fn_inputs.get("incident_id") and (fn_inputs.get("attachment_id") or fn_inputs.get("artifact_id"))) and \ not (fn_inputs.get("task_id") and fn_inputs.get("attachment_id")): raise FunctionError( "You must define either: (incident_id AND attachment_id OR artifact_id) OR (task_id AND attachment_id)" ) # Instansiate new Resilient API object res_client = self.rest_client() # Get attachment metadata attachment_metadata = get_file_attachment_metadata( res_client=res_client, incident_id=fn_inputs.get("incident_id"), artifact_id=fn_inputs.get("artifact_id"), task_id=fn_inputs.get("task_id"), attachment_id=fn_inputs.get("attachment_id")) # Get attachment content attachment_contents = get_file_attachment( res_client=res_client, incident_id=fn_inputs.get("incident_id"), artifact_id=fn_inputs.get("artifact_id"), task_id=fn_inputs.get("task_id"), attachment_id=fn_inputs.get("attachment_id")) # Write the attachment_contents to a temp file path_tmp_file, path_tmp_dir = write_to_tmp_file( attachment_contents, tmp_file_name=attachment_metadata.get("name")) # Get the file_extension file_extension = os.path.splitext(path_tmp_file)[1] if file_extension == ".msg": yield StatusMessage("Processing MSG File") try: parsed_email = mailparser.parse_from_file_msg( path_tmp_file) yield StatusMessage("MSG File processed") except Exception as err: reason = u"Could not parse {0} MSG File".format( attachment_metadata.get("name")) yield StatusMessage(reason) results = rp.done(success=False, content=None, reason=reason) log.error(err) else: yield StatusMessage("Processing Raw Email File") try: parsed_email = mailparser.parse_from_file( path_tmp_file) yield StatusMessage("Raw Email File processed") except Exception as err: reason = u"Could not parse {0} Email File".format( attachment_metadata.get("name")) yield StatusMessage(reason) results = rp.done(success=False, content=None, reason=reason) log.error(err) if parsed_email is not None: if not parsed_email.mail: reason = u"Raw email in unsupported format. Failed to parse {0}".format( u"provided base64content" if fn_inputs. get("base64content" ) else attachment_metadata.get("name")) yield StatusMessage(reason) results = rp.done(success=False, content=None, reason=reason) else: # Load all parsed email attributes into a Python Dict parsed_email_dict = json.loads(parsed_email.mail_json, encoding="utf-8") parsed_email_dict[ "plain_body"] = parsed_email.text_plain_json parsed_email_dict[ "html_body"] = parsed_email.text_html_json yield StatusMessage("Email parsed") # If the input 'utilities_parse_email_attachments' is true and some attachments were found if fn_inputs.get("utilities_parse_email_attachments" ) and parsed_email_dict.get( "attachments"): yield StatusMessage( "Attachments found in email message") attachments_found = parsed_email_dict.get( "attachments") # Loop attachments found for attachment in attachments_found: yield StatusMessage( u"Attempting to add {0} to Incident: {1}". format(attachment.get("filename"), fn_inputs.get("incident_id"))) # Write the attachment.payload to a temp file path_tmp_file, path_tmp_dir = write_to_tmp_file( data=s_to_b(attachment.get("payload")), tmp_file_name=attachment.get("filename"), path_tmp_dir=path_tmp_dir) artifact_description = u"This email attachment was found in the parsed email message from: '{0}'".format( u"provided base64content" if fn_inputs. get("base64content" ) else attachment_metadata.get("name")) # POST the artifact to Resilient as an 'Email Attachment' Artifact res_client.post_artifact_file( uri=ARTIFACT_URI.format( fn_inputs.get("incident_id")), artifact_type=EMAIL_ATTACHMENT_ARTIFACT_ID, artifact_filepath=path_tmp_file, description=artifact_description, value=attachment.get("filename"), mimetype=attachment.get("mail_content_type")) results = rp.done(True, parsed_email_dict) else: reason = u"Raw email in unsupported format. Failed to parse {0}".format( u"provided base64content" if fn_inputs. get("base64content") else attachment_metadata.get("name")) yield StatusMessage(reason) results = rp.done(success=False, content=None, reason=reason) log.info("Done") yield FunctionResult(results) except Exception: yield FunctionError() finally: # Remove the tmp directory if path_tmp_dir and os.path.isdir(path_tmp_dir): shutil.rmtree(path_tmp_dir)
def export_messages( path_attachments: pathlib.Path, path_export: pathlib.Path, image_as_jpg: bool = False, ): """ Method to read all .msg files in the given folder. Returns list of msg objects. Parameters: path_attachments - pathlib.Path Path to folder containing all messages to be read. path_export - pathlib.Path Path to folder to save exports in path_tmp - pathlib.Path Path to folder used to temporarily store objects. path_tesseract - pathlib.path Path to local tesseract.exe image_as_jpg - bool If true, image is additionally exported as .jpg file, otherwise it is just exported as .tif file """ assert path_attachments.exists() assert path_export.exists() ocr_result_name = "ocr_result.txt" filenames = [fn for fn in path_attachments.iterdir()] assert len(filenames) > 0 for filename in tqdm(filenames): msg = mailparser.parse_from_file_msg(filename) msg_dict = msg.mail_partial assert len(msg_dict["from"][0]) == 2 # get fax number of sender number = msg_dict["from"][0][1].split("/")[1].split('"')[0] # set path path_praxis = path_export.joinpath(number) # create folder if it doesnt exist if not path_praxis.exists(): os.mkdir(path_praxis) name_tif = msg_dict["attachments"][0]["filename"] timestamp = name_tif.split("FAX_")[1].split(".")[0] # save final msg msg_export_path = path_praxis.joinpath(timestamp) i = 0 _name = msg_export_path.name while msg_export_path.exists() is True: msg_export_path = msg_export_path.with_name(f"{_name}_({i})") i = +1 os.mkdir(msg_export_path) # save json del msg_dict["attachments"] del msg_dict["date"] with open(msg_export_path.joinpath("mail.json"), "w") as f: json.dump(msg_dict, f) # write_msg msg.write_attachments(msg_export_path) # read image and run tesseract path_to_current_tif = msg_export_path.joinpath(name_tif) img = Image.open(path_to_current_tif) if image_as_jpg: img.save(path_to_current_tif.with_suffix(".jpg")) msg_txt = pytesseract.image_to_string(img, lang="deu") # save text with open(msg_export_path.joinpath(ocr_result_name), "w", encoding="utf8") as f: f.write(msg_txt)
def process_mail(content, filetype, parent_id): """ Single mail task """ # IF PARSE FAILS IGNORE if filetype == None: try: content = content.encode("utf_8") filepath = store_mail(content) msg = mailparser.parse_from_bytes(content) except Exception as e: logging.error(e) return "Error parsing mail from mail server: {}".format(e) elif filetype == ".msg": try: msg = mailparser.parse_from_file_msg(content) filepath = content except Exception as e: logging.error(e) return "Error parsing mail from msg attachment: {}".format(e) else: try: msg = mailparser.parse_from_file(content) filepath = content except Exception as e: logging.error(e) return "Error parsing mail from eml attachment: {}".format(e) info, _, cortex_api = get_info(mail=False) methmail = MethMail( msg, info=info, cortex_api=cortex_api, mail_filepath=filepath, parent_id=parent_id, ) subtasks = methmail.process_mail() # Errors must be raise if not subtasks["ignore"] and subtasks["error"]: logging.error(subtasks["error"]) raise Exception(subtasks["error"]) # ignored are ok elif subtasks["ignore"]: return subtasks["error"] if subtasks["tasks"]: for (ioc, ioc_type, object_id, is_mail) in subtasks["tasks"]: check_cortex.apply_async(args=[ ioc, ioc_type, object_id, is_mail, info.cortex_expiration_days ]) if subtasks["childs"] and subtasks["id"]: for filepath, fileext in subtasks["childs"]: process_mail.apply_async(args=[filepath, fileext, subtasks["id"]]) return "{} query run on cortex".format(len(subtasks))