Example #1
0
 def _msg_to_eml(self, msg_bytes: bytes) -> EmailMessage:
     """Converts a msg into an eml."""
     msg_obj = openMsg(msg_bytes)
     # msg obj stores the original raw header here
     message, body, attachments = self._extract_msg_objects(msg_obj)
     eml = self._build_eml(message, body, attachments)
     return eml
Example #2
0
def main():
    args = parse_arguments()
    set_logging(args.verbose, args.debug)
    msg_path = args.msg_path
    with extract_msg.openMsg(msg_path) as msg:
        attachments = None
        try:
            attachments = get_attachments(msg)
        except KeyError as _e:
            log.debug("Msg does not have attachments embedded. Likely you used a low quality eml -> msg converter for testing and it provided somewhat broken msg files. Or at least that's when this pops off the most for me.")
        if attachments is None:
            log.debug("No attachments found in msg.")
        else:
            log.debug("{0} attachments found in msg.".format(len(attachments)))
        raw_rtf = msg.rtfBody
        if args.extract_raw:
            if args.outfile:
                with open(args.outfile, 'wb') as fp:
                    fp.write(raw_rtf)
            else:
                print(raw_rtf.decode())
        else:
            rtf_obj = DeEncapsulator(raw_rtf.decode())
            rtf_obj.deencapsulate()
            if rtf_obj.content_type == 'html':
                print(rtf_obj.html)
            else:
                print(rtf_obj.text)
def parse_msg(msg=None) -> List[Dict]:
    try:
        messages = []
        if isinstance(msg, str):
            msg = extract_msg.openMsg(msg)

        attachments = []

        for attachment in msg.attachments:
            if isinstance(attachment.data, extract_msg.message.Message):
                messages += parse_msg(attachment.data)
            elif attachment.cid is not None:
                attachments.append({
                    "filename": attachment.longFilename,
                    "size": len(attachment.data)
                })

        parsed_message = {
            "to":
            list({address
                  for address in parse_email_addresses(msg.to)}),
            "from":
            parse_email_address(msg.sender),
            "recipients":
            list({parse_email_address(rec.email)
                  for rec in msg.recipients}),
            "emails_in_body":
            list({address
                  for address in parse_email_addresses(msg.body)}),
            "subject":
            msg.subject if msg.subject is not None else "",
            "body":
            remove_email_address(msg.body),
            "date":
            parse_datetime(msg.date),
            "messageID":
            msg.messageId if msg.messageId is not None else "",
            "inReplyTo":
            msg.inReplyTo if msg.inReplyTo is not None else "",
            "attachments":
            attachments
        }

        messages.append(parsed_message)
        return messages
    except Exception as e:
        logger.error(f"Failed to parse .msg file: {msg}. Exception: {e}")
        return []
Example #4
0
def run(config: config_loader.Config) -> None:
    if config.cli_args.file and config.cli_args.file.exists():
        file_name = config.cli_args.file.name
        with open(config.cli_args.file, mode="rb") as rtf_file:
            rp = Rtf_Parser(rtf_file=rtf_file)
            rp.parse_file()
    elif config.cli_args.msg:
        file_name = config.cli_args.msg.name
        msg = em.openMsg(f"{config.cli_args.msg}")
        for attachment in msg.attachments:
            with open(config.html / f"{attachment.longFilename}", mode="wb") as att_file:
                att_file.write(attachment.data)
        decompressed_rtf = cr.decompress(msg.compressedRtf)
        with open((config.email_rtf / config.cli_args.msg.name).with_suffix(".rtf"), mode="wb") as email_rtf:
            email_rtf.write(decompressed_rtf)
        with io.BytesIO(decompressed_rtf) as rtf_file:
            rp = Rtf_Parser(rtf_file=rtf_file)
            rp.parse_file()
    if config.cli_args.de_encapsulate_html:
        de_encapsulate(rp, (config.html / file_name).with_suffix(".html"))
 def get_text_with_msg(self) -> str:
     return extract_msg.openMsg(self.path)
Example #6
0
contact = ''
contactName = ''
contactEmail = ''
contactPhone = ''
contactMob = ''
contactAddress = ''
contactFax = ''
emailbody= ' '
signature=''
address=''
temp = ''
addressNo=0


#reads .msg file
msg = extract_msg.openMsg('test.msg')
contact = msg.sender
#split name and email address
splitContact = contact.split('<')
#removes any whitespace to match with text in bosy
contactName = re.search("(^(\w+)(\-|\s)(\w+))",splitContact[0])
contactName = contactName[0].strip()
contactEmail = splitContact[1].replace('>','')

emailBody,temp,signature = msg.body.partition(contactName)

# Find Telephone number , grabs first number found . Wont grab second telephone number e.g. FAX
contactPhone = re.search("(\+\d{2}\s\d{1}\s\d{4}\s\d{4})|(\d{2}\s\d{4}\s\d{4})|(\d{3}[-\.\s]??\d{3}[-\.\s]??\d{4}|\(\d{3}\)\s*\d{3}[-\.\s]??\d{4}|\d{3}[-\.\s]??\d{4})",signature)
contactPhone =(contactPhone[0])
print(contactPhone)