Ejemplo n.º 1
0
 def test_decompress(self):
     """
     Test decompression
     """
     data = '-\x00\x00\x00+\x00\x00\x00LZFu\xf1\xc5\xc7\xa7\x03\x00\n\x00' \
         'rcpg125B2\n\xf3 hel\t\x00 bw\x05\xb0ld}\n\x80\x0f\xa0'
     self.assertEqual(decompress(data),
                      '{\\rtf1\\ansi\\ansicpg1252\\pard hello world}\r\n')
     # test raw decompression
     data = '.\x00\x00\x00"\x00\x00\x00MELA \xdf\x12\xce{\\rtf1\\ansi\\an' \
         'sicpg1252\\pard test}'
     self.assertEqual(decompress(data),
                      '{\\rtf1\\ansi\\ansicpg1252\\pard test}')
Ejemplo n.º 2
0
 def test_decompress(self):
     """
     Test decompression
     """
     data = '-\x00\x00\x00+\x00\x00\x00LZFu\xf1\xc5\xc7\xa7\x03\x00\n\x00' \
         'rcpg125B2\n\xf3 hel\t\x00 bw\x05\xb0ld}\n\x80\x0f\xa0'
     self.assertEqual(
         decompress(data),
         '{\\rtf1\\ansi\\ansicpg1252\\pard hello world}\r\n')
     # test raw decompression
     data = '.\x00\x00\x00"\x00\x00\x00MELA \xdf\x12\xce{\\rtf1\\ansi\\an' \
         'sicpg1252\\pard test}'
     self.assertEqual(
         decompress(data),
         '{\\rtf1\\ansi\\ansicpg1252\\pard test}')
Ejemplo n.º 3
0
 def email_message(self, policy=email.policy.default):
     if self.mfs.get('PidTagCodepage'):
         charset = str(Codepage(self.mfs.get('PidTagCodepage')))
     email_message = message_from_string(self.mfs.get('PidTagHeader'),
                                         policy=policy)
     email_message.clear_content()
     if self.mfs.get('PidTagBody'):
         email_message.add_alternative(self.mfs.get('PidTagBody'),
                                       charset=charset,
                                       subtype='plain')
     if self.mfs.get('PidTagBodyHtml'):
         email_message.add_alternative(
             self.mfs.get('PidTagBodyHtml').encode('utf-8'),
             maintype='text',
             subtype='html')
     if self.mfs.get('PidTagRtfCompressed'):
         email_message.add_alternative(compressed_rtf.decompress(
             self.mfs.get('PidTagRtfCompressed')),
                                       maintype='application',
                                       subtype='rtf')
     for attachment in self.attachments:
         with attachment.open() as fh:
             email_message.add_attachment(fh.read(),
                                          maintype='application',
                                          subtype='octet-stream',
                                          filename=attachment.filename)
     return email_message
Ejemplo n.º 4
0
def processFile(filepath, filename, prefix='', _canceler=canceler.FAKE):
    messa = extract_msg.Message(filepath, prefix,
                                Attachment)  # Create Message object.
    header = getHeader(messa)  # Gets the header.
    formatted = getFormattedHeader(
        header)  # The next few lines format the header
    headerString = '<p class=MsoNormal>'
    for u in headerVals:
        headerString = headerString + formatted[u]
    headerString = headerString + '</p><br>'
    ofilename = filename
    filename = mkdir(filename)
    os.chdir(filename)
    messa.save_attachments(True)  # Saves the attatchments
    if messa.htmlBody != None:  # Has html body?
        with open('output.html', 'wb') as o:
            o.write(messa.htmlBody)
    else:
        rtfContents = compressed_rtf.decompress(
            messa.compressedRtf
        )  # Read contents, decompress them, and store them in a variable.
        with open('out.rtf', 'wb') as rtfFile:
            rtfFile.write(rtfContents)
        callNode(ofilename, filepath, filename)
        if not debug:
            os.remove('out.rtf')
    addHeader(headerString)
    embedImages()
    callToPdf(filename)
    if not debug:
        os.remove('output.html')
    os.chdir('..')  # Move back to the parent directory
Ejemplo n.º 5
0
    def load_message_stream(self, entry, is_top_level, doc):
        # Load stream data.
        props = None
        props = self.__parse_properties(entry['__properties_version1.0'],
                                        is_top_level, entry, doc)

        # Construct the MIME message....
        self._MIME_MSG = None
        self._MIME_MSG = email.message.EmailMessage()

        # Add the raw headers, if known.
        if 'TRANSPORT_MESSAGE_HEADERS' in props:
            self.__add_transport_headers(props)
        else:
            props = self.__add_common_headers(props)

        # Add the plain-text body from the BODY field.
        if 'BODY' in props:
            body = props['BODY']
            if isinstance(body, str):
                self._MIME_MSG.set_content(body, cte='quoted-printable')
            else:
                self._MIME_MSG.set_content(body,
                                           maintype="text",
                                           subtype="plain",
                                           cte='8bit')

        # Plain-text is not availabe. Use the rich text version.
        else:
            doc.rtf_attachments += 1
            fn = "messagebody_{}.rtf".format(doc.rtf_attachments)

            self._MIME_MSG.set_content(
                "<no plain text message body --- see attachment {}>".format(
                    fn),
                cte='quoted-printable')

            # Decompress the value to Rich Text Format.
            from compressed_rtf import compress, decompress
            rtf = props['RTF_COMPRESSED']
            rtf = decompress(rtf)

            # Add RTF file as an attachment.
            self._MIME_MSG.add_attachment(rtf,
                                          maintype="text",
                                          subtype="rtf",
                                          filename=fn)

        # # Copy over string values of remaining properties as headers
        # # so we don't lose any information.
        # for k, v in props.items():
        #   if k == 'RTF_COMPRESSED': continue # not interested, save output
        #   msg[k] = str(v)

        # Add attachments.
        for stream in entry:
            if stream.name.startswith("__attach_version1.0_#"):
                self.__process_attachment(stream, doc)

        return self._MIME_MSG
Ejemplo n.º 6
0
def save_data(in_filename, out_dir, titles, data, meta_data, compressed_rtfs):
    """Save found data to `out_dir`"""
    # make out dir if not exists
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # decode compressed RTF
    for index, compressed_rtf_data in enumerate(compressed_rtfs):
        if not compressed_rtf_data:
            continue
        filename = '{}_data_{}.rtf'.format(in_filename, index)
        print('Saving decompressed RTF data to: {}'.format(filename))
        out_data = compressed_rtf.decompress(compressed_rtf_data)
        with open(os.path.join(out_dir, filename), 'wb') as out_file:
            out_file.write(out_data)

    # save attachments and meta files
    for index, title in enumerate(titles):
        filename = title.decode('utf-8')
        print('Saving attachment to: {}'.format(filename))
        with open(os.path.join(out_dir, filename), 'wb') as out_file:
            out_file.write(data[index])

        meta_filename = '{}_meta_{}.raw'.format(title.decode('utf-8'), index)
        print('Saving attachment meta file to: {}'.format(meta_filename))
        with open(os.path.join(out_dir, meta_filename), 'wb') as out_file:
            out_file.write(meta_data[index])
Ejemplo n.º 7
0
 def test_hither_and_thither_long(self):
     """
     Test decompression of compressed data larger than 4096
     """
     data = '{\\rtf1\\ansi\\ansicpg1252\\pard hello world'
     while len(data) < 4096:
         data += "testtest"
     data += "}"
     self.assertEqual(decompress(compress(data, compressed=True)), data)
Ejemplo n.º 8
0
 def rtfbody(self):
     if self._rtfbody:
         try:
             from compressed_rtf import decompress
             return decompress(self._rtfbody + b'\x00')
         except ImportError:
             logger.warning("Returning compressed RTF. Install compressed_rtf to decompress")
             return self._rtfbody
     else:
         return None
Ejemplo n.º 9
0
 def rtfbody(self):
     if self._rtfbody:
         try:
             from compressed_rtf import decompress
             return decompress(self._rtfbody + b'\x00')
         except ImportError:
             logger.warning("Returning compressed RTF. Install compressed_rtf to decompress")
             return self._rtfbody
     else:
         return None
Ejemplo n.º 10
0
def run(config: config_loader.Config) -> None:
    if config.cli_args.file and config.cli_args.file.exists():
        file_name = config.cli_args.file.name
        with open(config.cli_args.file, mode="rb") as rtf_file:
            rp = Rtf_Parser(rtf_file=rtf_file)
            rp.parse_file()
    elif config.cli_args.msg:
        file_name = config.cli_args.msg.name
        msg = em.openMsg(f"{config.cli_args.msg}")
        for attachment in msg.attachments:
            with open(config.html / f"{attachment.longFilename}", mode="wb") as att_file:
                att_file.write(attachment.data)
        decompressed_rtf = cr.decompress(msg.compressedRtf)
        with open((config.email_rtf / config.cli_args.msg.name).with_suffix(".rtf"), mode="wb") as email_rtf:
            email_rtf.write(decompressed_rtf)
        with io.BytesIO(decompressed_rtf) as rtf_file:
            rp = Rtf_Parser(rtf_file=rtf_file)
            rp.parse_file()
    if config.cli_args.de_encapsulate_html:
        de_encapsulate(rp, (config.html / file_name).with_suffix(".html"))
Ejemplo n.º 11
0
 def test_hither_and_thither(self):
     """
     Test decompression of compressed data
     """
     data = '{\\rtf1\\ansi\\mac\\deff0\\deftab720'
     self.assertEqual(decompress(compress(data, compressed=True)), data)
Ejemplo n.º 12
0
    def _set_properties(self):
        property_values = self._message.properties

        # setting generally required properties to easily access using MsOxMessage instance.
        self.subject = property_values.get("Subject")

        header = property_values.get("TransportMessageHeaders")
        self.header = parse_email_headers(header, True)
        self.header_dict = parse_email_headers(header) or {}

        self.created_date = property_values.get("CreationTime")
        self.received_date = property_values.get("ReceiptTime")

        sent_date = property_values.get("DeliverTime")
        if not sent_date:
            sent_date = self.header_dict.get("Date")
        self.sent_date = sent_date

        sender_address = self.header_dict.get("From")
        if not sender_address:
            sender_address = property_values.get(
                "SenderRepresentingSmtpAddress")
        self.sender = sender_address

        reply_to_address = self.header_dict.get("Reply-To")
        if not reply_to_address:
            reply_to_address = property_values.get("ReplyRecipientNames")
        self.reply_to = reply_to_address

        self.message_id = property_values.get("InternetMessageId")

        to_address = self.header_dict.get("TO")
        if not to_address:
            to_address = property_values.get("DisplayTo")
            if not to_address:
                to_address = property_values.get(
                    "ReceivedRepresentingSmtpAddress")
        self.to = to_address

        cc_address = self.header_dict.get("CC")
        # if cc_address:
        #     cc_address = [CONTROL_CHARS.sub(" ", cc_add) for cc_add in cc_address.split(",")]
        self.cc = cc_address

        bcc_address = self.header_dict.get("BCC")
        self.bcc = bcc_address

        # prefer HTMl over plain text
        if "Html" in property_values:
            self.body = property_values.get("Html")
        else:
            self.body = property_values.get("Body")

        if not self.body and "RtfCompressed" in property_values:
            try:
                import compressed_rtf
            except ImportError:
                compressed_rtf = None
            if compressed_rtf:
                compressed_rtf_body = property_values['RtfCompressed']
                self.body = compressed_rtf.decompress(compressed_rtf_body)
Ejemplo n.º 13
0
 def test_hither_and_thither(self):
     """
     Test decompression of compressed data
     """
     data = '{\\rtf1\\ansi\\mac\\deff0\\deftab720'
     self.assertEqual(decompress(compress(data, compressed=True)), data)
Ejemplo n.º 14
0
def load_message_stream(entry: CompoundFileEntity, is_top_level: bool,
                        doc: CompoundFileReader):
    # Load stream data.
    props = parse_properties(entry["__properties_version1.0"], is_top_level,
                             entry, doc)

    # Construct the MIME message....
    msg = email.message.EmailMessage()

    # Add the raw headers, if known.
    if "TRANSPORT_MESSAGE_HEADERS" in props:
        # Get the string holding all of the headers.
        headers = props["TRANSPORT_MESSAGE_HEADERS"]
        if isinstance(headers, bytes):
            headers = headers.decode("utf-8")

        # Remove content-type header because the body we can get this
        # way is just the plain-text portion of the email and whatever
        # Content-Type header was in the original is not valid for
        # reconstructing it this way.
        headers = re.sub("Content-Type: .*(\n\\s.*)*\n", "", headers, re.I)

        # Parse them.
        headers = email.parser.HeaderParser(
            policy=email.policy.default).parsestr(headers)

        # Copy them into the message object.
        for header, value in headers.items():
            msg[header] = value

    else:
        # Construct common headers from metadata.

        if "MESSAGE_DELIVERY_TIME" in props:
            msg["Date"] = formatdate(
                props["MESSAGE_DELIVERY_TIME"].timestamp())
            del props["MESSAGE_DELIVERY_TIME"]

        if "SENDER_NAME" in props:
            if "SENT_REPRESENTING_NAME" in props:
                if props["SENT_REPRESENTING_NAME"]:
                    if props["SENDER_NAME"] != props["SENT_REPRESENTING_NAME"]:
                        props["SENDER_NAME"] += (
                            " (" + props["SENT_REPRESENTING_NAME"] + ")")
                del props["SENT_REPRESENTING_NAME"]
            if props["SENDER_NAME"]:
                msg["From"] = formataddr((props["SENDER_NAME"], ""))
            del props["SENDER_NAME"]

        if "DISPLAY_TO" in props:
            if props["DISPLAY_TO"]:
                msg["To"] = props["DISPLAY_TO"]
            del props["DISPLAY_TO"]

        if "DISPLAY_CC" in props:
            if props["DISPLAY_CC"]:
                msg["CC"] = props["DISPLAY_CC"]
            del props["DISPLAY_CC"]

        if "DISPLAY_BCC" in props:
            if props["DISPLAY_BCC"]:
                msg["BCC"] = props["DISPLAY_BCC"]
            del props["DISPLAY_BCC"]

        if "SUBJECT" in props:
            if props["SUBJECT"]:
                msg["Subject"] = props["SUBJECT"]
            del props["SUBJECT"]

    # Add the plain-text body from the BODY field.
    if "BODY" in props:
        body = props["BODY"]
        if isinstance(body, str):
            msg.set_content(body, cte="quoted-printable")
        else:
            msg.set_content(body, maintype="text", subtype="plain", cte="8bit")

    # Plain-text is not availabe. Use the rich text version.
    else:
        doc.rtf_attachments += 1
        fn = f"messagebody_{doc.rtf_attachments}.rtf"

        msg.set_content(
            f"<no plain text message body --- see attachment {fn}>",
            cte="quoted-printable",
        )

        # Decompress the value to Rich Text Format.
        rtf = props["RTF_COMPRESSED"]
        rtf = compressed_rtf.decompress(rtf)

        # Add RTF file as an attachment.
        msg.add_attachment(rtf, maintype="text", subtype="rtf", filename=fn)

    # # Copy over string values of remaining properties as headers
    # # so we don't lose any information.
    # for k, v in props.items():
    #   if k == 'RTF_COMPRESSED': continue # not interested, save output
    #   msg[k] = str(v)

    # Add attachments.
    for stream in entry:
        if stream.name.startswith("__attach_version1.0_#"):
            process_attachment(msg, stream, doc)

    return msg
Ejemplo n.º 15
0
 def rtfBody(self):
     return decompress(self.compressedRtf)
Ejemplo n.º 16
0
    def _set_properties(self):
        property_values = self._message.properties

        # setting generally required properties to easily access using MsOxMessage instance.
        self.subject = property_values.get("Subject")

        header = property_values.get("TransportMessageHeaders")
        self.header = parse_email_headers(header, True)
        self.header_dict = parse_email_headers(header) or {}

        self.created_date = property_values.get("CreationTime")
        self.received_date = property_values.get("ReceiptTime")

        sent_date = property_values.get("DeliverTime")
        if not sent_date:
            sent_date = self.header_dict.get("Date")
        self.sent_date = sent_date

        sender_address = self.header_dict.get("From")
        if not sender_address:
            sender_address = property_values.get("SenderRepresentingSmtpAddress")
        self.sender = sender_address

        reply_to_address = self.header_dict.get("Reply-To")
        if not reply_to_address:
            reply_to_address = property_values.get("ReplyRecipientNames")
        self.reply_to = reply_to_address

        self.message_id = property_values.get("InternetMessageId")

        to_address = self.header_dict.get("TO")
        if not to_address:
            to_address = property_values.get("DisplayTo")
            if not to_address:
                to_address = property_values.get("ReceivedRepresentingSmtpAddress")
        self.to = to_address

        cc_address = self.header_dict.get("CC")
        # if cc_address:
        #     cc_address = [CONTROL_CHARS.sub(" ", cc_add) for cc_add in cc_address.split(",")]
        self.cc = cc_address

        bcc_address = self.header_dict.get("BCC")
        self.bcc = bcc_address

        # prefer HTMl over plain text
        if "Html" in property_values:
            self.body = property_values.get("Html")
        else:
            self.body = property_values.get("Body")

        # Trying to decode body if is bytes obj. This is not the way to go. Quick-fix only.
        # See IMAP specs. Use charset-normalizer, cchardet or chardet as last resort.
        if isinstance(self.body, bytes):
            self.body = self.body.decode("utf-8", "ignore")

        if not self.body and "RtfCompressed" in property_values:
            try:
                import compressed_rtf
            except ImportError:
                compressed_rtf = None
            if compressed_rtf:
                compressed_rtf_body = property_values["RtfCompressed"]
                self.body = compressed_rtf.decompress(compressed_rtf_body)
Ejemplo n.º 17
0
def load_message_stream(entry, is_top_level, doc):
    # Load stream data.
    props = parse_properties(entry['__properties_version1.0'], is_top_level,
                             entry, doc)

    # Construct the MIME message....
    msg = email.message.EmailMessage()

    # Add the raw headers, if known.
    if 'TRANSPORT_MESSAGE_HEADERS' in props:
        # Get the string holding all of the headers.
        headers = props['TRANSPORT_MESSAGE_HEADERS']
        if isinstance(headers, bytes):
            headers = headers.decode("utf-8")

        # Remove content-type header because the body we can get this
        # way is just the plain-text portion of the email and whatever
        # Content-Type header was in the original is not valid for
        # reconstructing it this way.
        headers = re.sub("Content-Type: .*(\n\s.*)*\n", "", headers, re.I)

        # Parse them.
        headers = email.parser.HeaderParser(policy=email.policy.default)\
          .parsestr(headers)

        # Copy them into the message object.
        for header, value in headers.items():
            msg[header] = value

    else:
        # Construct common headers from metadata.

        msg['Date'] = formatdate(props['MESSAGE_DELIVERY_TIME'].timestamp())
        del props['MESSAGE_DELIVERY_TIME']

        if props['SENDER_NAME'] != props['SENT_REPRESENTING_NAME']:
            props[
                'SENDER_NAME'] += " (" + props['SENT_REPRESENTING_NAME'] + ")"
        del props['SENT_REPRESENTING_NAME']
        msg['From'] = formataddr((props['SENDER_NAME'], ""))
        del props['SENDER_NAME']

        msg['To'] = props['DISPLAY_TO']
        del props['DISPLAY_TO']

        msg['CC'] = props['DISPLAY_CC']
        del props['DISPLAY_CC']

        msg['BCC'] = props['DISPLAY_BCC']
        del props['DISPLAY_BCC']

        msg['Subject'] = props['SUBJECT']
        del props['SUBJECT']

    # Add the plain-text body from the BODY field.
    if 'BODY' in props:
        body = props['BODY']
        if isinstance(body, str):
            msg.set_content(body, cte='quoted-printable')
        else:
            msg.set_content(body, maintype="text", subtype="plain", cte='8bit')

    # Plain-text is not availabe. Use the rich text version.
    else:
        doc.rtf_attachments += 1
        fn = "messagebody_{}.rtf".format(doc.rtf_attachments)

        msg.set_content(
            "<no plain text message body --- see attachment {}>".format(fn),
            cte='quoted-printable')

        # Decompress the value to Rich Text Format.
        import compressed_rtf
        rtf = props['RTF_COMPRESSED']
        rtf = compressed_rtf.decompress(rtf)

        # Add RTF file as an attachment.
        msg.add_attachment(rtf, maintype="text", subtype="rtf", filename=fn)

    # # Copy over string values of remaining properties as headers
    # # so we don't lose any information.
    # for k, v in props.items():
    #   if k == 'RTF_COMPRESSED': continue # not interested, save output
    #   msg[k] = str(v)

    # Add attachments.
    for stream in entry:
        if stream.name.startswith("__attach_version1.0_#"):
            process_attachment(msg, stream, doc)

    return msg
Ejemplo n.º 18
0
 def rtfBody(self):
     """
     Returns the decompressed Rtf body from the message.
     """
     return compressed_rtf.decompress(self.compressedRtf)