def _msg_to_eml(self, msg_bytes: bytes) -> EmailMessage: """Converts a msg into an eml.""" msg_obj = openMsg(msg_bytes) # msg obj stores the original raw header here message, body, attachments = self._extract_msg_objects(msg_obj) eml = self._build_eml(message, body, attachments) return eml
def main(): args = parse_arguments() set_logging(args.verbose, args.debug) msg_path = args.msg_path with extract_msg.openMsg(msg_path) as msg: attachments = None try: attachments = get_attachments(msg) except KeyError as _e: log.debug("Msg does not have attachments embedded. Likely you used a low quality eml -> msg converter for testing and it provided somewhat broken msg files. Or at least that's when this pops off the most for me.") if attachments is None: log.debug("No attachments found in msg.") else: log.debug("{0} attachments found in msg.".format(len(attachments))) raw_rtf = msg.rtfBody if args.extract_raw: if args.outfile: with open(args.outfile, 'wb') as fp: fp.write(raw_rtf) else: print(raw_rtf.decode()) else: rtf_obj = DeEncapsulator(raw_rtf.decode()) rtf_obj.deencapsulate() if rtf_obj.content_type == 'html': print(rtf_obj.html) else: print(rtf_obj.text)
def parse_msg(msg=None) -> List[Dict]: try: messages = [] if isinstance(msg, str): msg = extract_msg.openMsg(msg) attachments = [] for attachment in msg.attachments: if isinstance(attachment.data, extract_msg.message.Message): messages += parse_msg(attachment.data) elif attachment.cid is not None: attachments.append({ "filename": attachment.longFilename, "size": len(attachment.data) }) parsed_message = { "to": list({address for address in parse_email_addresses(msg.to)}), "from": parse_email_address(msg.sender), "recipients": list({parse_email_address(rec.email) for rec in msg.recipients}), "emails_in_body": list({address for address in parse_email_addresses(msg.body)}), "subject": msg.subject if msg.subject is not None else "", "body": remove_email_address(msg.body), "date": parse_datetime(msg.date), "messageID": msg.messageId if msg.messageId is not None else "", "inReplyTo": msg.inReplyTo if msg.inReplyTo is not None else "", "attachments": attachments } messages.append(parsed_message) return messages except Exception as e: logger.error(f"Failed to parse .msg file: {msg}. Exception: {e}") return []
def run(config: config_loader.Config) -> None: if config.cli_args.file and config.cli_args.file.exists(): file_name = config.cli_args.file.name with open(config.cli_args.file, mode="rb") as rtf_file: rp = Rtf_Parser(rtf_file=rtf_file) rp.parse_file() elif config.cli_args.msg: file_name = config.cli_args.msg.name msg = em.openMsg(f"{config.cli_args.msg}") for attachment in msg.attachments: with open(config.html / f"{attachment.longFilename}", mode="wb") as att_file: att_file.write(attachment.data) decompressed_rtf = cr.decompress(msg.compressedRtf) with open((config.email_rtf / config.cli_args.msg.name).with_suffix(".rtf"), mode="wb") as email_rtf: email_rtf.write(decompressed_rtf) with io.BytesIO(decompressed_rtf) as rtf_file: rp = Rtf_Parser(rtf_file=rtf_file) rp.parse_file() if config.cli_args.de_encapsulate_html: de_encapsulate(rp, (config.html / file_name).with_suffix(".html"))
def get_text_with_msg(self) -> str: return extract_msg.openMsg(self.path)
contact = '' contactName = '' contactEmail = '' contactPhone = '' contactMob = '' contactAddress = '' contactFax = '' emailbody= ' ' signature='' address='' temp = '' addressNo=0 #reads .msg file msg = extract_msg.openMsg('test.msg') contact = msg.sender #split name and email address splitContact = contact.split('<') #removes any whitespace to match with text in bosy contactName = re.search("(^(\w+)(\-|\s)(\w+))",splitContact[0]) contactName = contactName[0].strip() contactEmail = splitContact[1].replace('>','') emailBody,temp,signature = msg.body.partition(contactName) # Find Telephone number , grabs first number found . Wont grab second telephone number e.g. FAX contactPhone = re.search("(\+\d{2}\s\d{1}\s\d{4}\s\d{4})|(\d{2}\s\d{4}\s\d{4})|(\d{3}[-\.\s]??\d{3}[-\.\s]??\d{4}|\(\d{3}\)\s*\d{3}[-\.\s]??\d{4}|\d{3}[-\.\s]??\d{4})",signature) contactPhone =(contactPhone[0]) print(contactPhone)