def validate(msg):
    validation_dict = {
        'input': {
            'class': get_full_class_name(msg), # Get the full name of the class
            'has_len': has_len(msg), # Does the input have a __len__ attribute?
            'len': len(msg) if has_len(msg) else None, # If input has __len__, put the value here
        },
        'olefile': {
            'valid': olefile.isOleFile(msg),
        },
    }
    if validation_dict['olefile']['valid']:
        validation_dict['message'] = {
            'initializes': False,
        }
        try:
            msg_instance = Message(msg)
        except NotImplementedError:
            # Should we have a special procedure for handling it if we get "not implemented"?
            pass
        except:
            pass
        else:
            validation_dict['message']['initializes'] = True
            validation_dict['message']['msg'] = validate_msg(msg_instance)
    return validation_dict
Example #2
0
 def _extract_msg_objects(self, msg_obj: MsgObj):
     """Extracts email objects needed to construct an eml from a msg."""
     original_eml_header = msg_obj._getStringStream('__substg1.0_007D')
     message = email.message_from_string(original_eml_header,
                                         policy=policy.default)
     body = {}
     if msg_obj.body is not None:
         body['text'] = {
             "obj": msg_obj.body,
             "subtype": 'plain',
             "charset": "utf-8",
             "cte": "base64"
         }
     if msg_obj.htmlBody is not None:
         try:
             _html_encoding_raw = msg_obj.mainProperties['3FDE0003'].value
             _html_encoding = codepage2codec(_html_encoding_raw)
         except KeyError:
             _html_encoding = msg_obj.stringEncoding
         body['html'] = {
             'obj': msg_obj.htmlBody.decode(),
             "subtype": 'html',
             "charset": _html_encoding,
             "cte": "base64"
         }
     if msg_obj.rtfBody is not None:
         body['rtf'] = {
             "obj": msg_obj.rtfBody.decode(),
             "subtype": 'rtf',
             "charset": 'ascii',
             "cte": "base64"
         }
         try:
             rtf_obj = DeEncapsulator(msg_obj.rtfBody)
             rtf_obj.deencapsulate()
             if (rtf_obj.content_type
                     == "html") and (msg_obj.htmlBody is None):
                 self.encapsulated_body = 'text/html'
                 body['html'] = {
                     "obj": rtf_obj.html,
                     "subtype": 'html',
                     "charset": rtf_obj.text_codec,
                     "cte": "base64"
                 }
             elif (rtf_obj.content_type
                   == "text") and (msg_obj.body is None):
                 self.encapsulated_body = 'text/plain'
                 body['text'] = {
                     "obj": rtf_obj.plain_text,
                     "subtype": 'plain',
                     "charset": rtf_obj.text_codec
                 }
         except NotEncapsulatedRtf:
             logger.debug("RTF body in Msg object is not encapsualted.")
         except MalformedEncapsulatedRtf:
             logger.info(
                 "RTF body in Msg object contains encapsulated content, but it is malformed and can't be converted."
             )
     attachments = msg_obj.attachments
     return message, body, attachments
Example #3
0
    def _get_parts_outlook(self, data):
        def ensure_bytes(data):
            return data if isinstance(data, bytes) else data.encode(self.codec)

        def make_message(name, msg):
            if msg.body:
                yield UnpackResult(F'{name}.TXT', ensure_bytes(msg.body))
            if msg.htmlBody:
                yield UnpackResult(F'{name}.HTM', ensure_bytes(msg.htmlBody))

        msgcount = 0

        with Message(bytes(data)) as msg:
            yield from self._get_headparts(msg.header.items())
            yield from make_message('BODY', msg)
            for attachment in msg.attachments:
                if attachment.type == 'msg':
                    msgcount += 1
                    yield from make_message(F'MSG{msgcount:d}',
                                            attachment.data)
                    continue
                if not isbuffer(attachment.data):
                    self.log_warn(
                        F'unknown attachment of type {attachment.type}, please report this!'
                    )
                    continue
                path = attachment.longFilename or attachment.shortFilename
                yield UnpackResult(path, attachment.data)
Example #4
0
def main():
    # Setup logging to stdout, indicate running from cli
    CLI_LOGGING = 'extract_msg_cli'

    args = utils.get_command_args(sys.argv[1:])
    level = logging.INFO if args.verbose else logging.WARNING
    currentdir = os.getcwdu(
    )  # Store this just in case the paths that have been given are relative
    if args.out_path:
        if not os.path.exists(args.out_path):
            os.makedirs(args.out_path)
        out = args.out_path
    else:
        out = currentdir
    if args.dev:
        import extract_msg.dev
        extract_msg.dev.main(args, sys.argv[1:])
    elif args.validate:
        import json
        import pprint
        import time

        from extract_msg import validation

        val_results = {x[0]: validation.validate(x[0]) for x in args.msgs}
        filename = 'validation {}.json'.format(int(time.time()))
        print('Validation Results:')
        pprint.pprint(val_results)
        print('These results have been saved to {}'.format(filename))
        with open(filename, 'w') as fil:
            fil.write(json.dumps(val_results))
        utils.get_input('Press enter to exit...')
    else:
        if not args.dump_stdout:
            utils.setup_logging(args.config_path, level, args.log,
                                args.file_logging)
        for x in args.msgs:
            try:
                with Message(x[0]) as msg:
                    # Right here we should still be in the path in currentdir
                    if args.dump_stdout:
                        print(msg.body)
                    else:
                        os.chdir(out)
                        msg.save(toJson=args.json,
                                 useFileName=args.use_filename,
                                 ContentId=args.cid
                                 )  #, html = args.html, rtf = args.html)
            except Exception as e:
                print("Error with file '" + x[0] + "': " +
                      traceback.format_exc())
            os.chdir(currentdir)
Example #5
0
def openMsg(path,
            prefix='',
            attachmentClass=None,
            filename=None,
            delayAttachments=False,
            strict=True):
    """
    Function to automatically open an MSG file and detect what type it is.

    :param path: path to the msg file in the system or is the raw msg file.
    :param prefix: used for extracting embeded msg files
        inside the main one. Do not set manually unless
        you know what you are doing.
    :param attachmentClass: optional, the class the Message object
        will use for attachments. You probably should
        not change this value unless you know what you
        are doing.
    :param filename: optional, the filename to be used by default when saving.
    :param delayAttachments: optional, delays the initialization of attachments
        until the user attempts to retrieve them. Allows MSG files with bad
        attachments to be initialized so the other data can be retrieved.

    If :param strict: is set to `True`, this function will raise an exception
    when it cannot identify what MSGFile derivitive to use. Otherwise, it will
    log the error and return a basic MSGFile instance.
    """
    from extract_msg.attachment import Attachment
    from extract_msg.contact import Contact
    from extract_msg.message import Message
    from extract_msg.msg import MSGFile

    attachmentClass = Attachment if attachmentClass is None else attachmentClass

    msg = MSGFile(path, prefix, attachmentClass, filename)
    if msg.classType.startswith('IPM.Contact'):
        return Contact(path, prefix, attachmentClass, filename)
    elif msg.classType.startswith('IPM.Note'):
        return Message(path, prefix, attachmentClass, filename,
                       delayAttachments)
    elif strict:
        raise UnrecognizedMSGTypeError(
            'Could not recognize msg class type "{}". It is recommended you report this to the developers.'
            .format(msg.classType))
    else:
        logger.error(
            'Could not recognize msg class type "{}". It is recommended you report this to the developers.'
            .format(msg.classType))
        return msg
Example #6
0
    def _get_parts_outlook(self, data):
        def ensure_bytes(data):
            return data if isinstance(data, bytes) else data.encode(self.codec)

        with Message(bytes(data)) as msg:
            parts = []
            if msg.body:
                parts.append(EmailPart(None, ensure_bytes(msg.body)))
            if msg.htmlBody:
                parts.append(EmailPart(None, ensure_bytes(msg.htmlBody)))
            for attachment in msg.attachments:
                parts.append(
                    EmailPart(
                        attachment.longFilename or attachment.shortFilename,
                        attachment.data))
            return parts
Example #7
0
def main(args, argv):
    """
    Please only run this from the command line. Attempting to use this
    otherwise is likely to fail. :param args: is the class instance
    returned by `extract_msg.utils.get_command_args`. :param argv: is
    the list of arguments that were the input to the aforementioned
    function.
    """
    setup_dev_logger(args.config_path, args.log)
    currentdir = os.getcwdu(
    )  # Store this just in case the paths that have been given are relative
    if args.out_path:
        if not os.path.exists(args.out_path):
            os.makedirs(args.out_path)
        out = args.out_path
    else:
        out = currentdir
    logger.log(5, 'ARGV: {}'.format(argv))
    for y, x in enumerate(args.msgs):
        logger.log(5,
                   '---- RUNNING DEVELOPER MODE ON FILE {} ----'.format(x[0]))
        logger.log(5, 'EXCEPTION CHECK:')
        try:
            with Message(x[0]) as msg:
                # Right here we should still be in the path in currentdir
                os.chdir(out)
                msg.save(toJson=args.json,
                         useFileName=args.use_filename,
                         ContentId=args.cid)
        except Exception as e:
            logger.exception(e)
        else:
            logger.log(5, 'No exceptions raised.')
        logger.log(5, 'DEVELOPER CLASS OUTPUT:')
        os.chdir(currentdir)
        dev_classes.Message(x[0])
        logger.log(5, '---- END OF DEVELOPER LOG ----')
        logpath = None
        for x in logging.root.handlers:
            try:
                logpath = x.baseFilename
            except AttributeError:
                pass
        print('Logging complete. Log has been saved to {}'.format(logpath))
Example #8
0
        import pprint
        import time

        from extract_msg import validation

        val_results = {x[0]: validation.validate(x[0]) for x in args.msgs}
        filename = 'validation {}.json'.format(int(time.time()))
        print('Validation Results:')
        pprint.pprint(val_results)
        print('These results have been saved to {}'.format(filename))
        with open(filename, 'w') as fil:
            fil.write(json.dumps(val_results))
        utils.get_input('Press enter to exit...')
    else:
        utils.setup_logging(args.config_path, level, args.log,
                            args.file_logging)
        for x in args.msgs:
            try:
                with Message(x[0]) as msg:
                    # Right here we should still be in the path in currentdir
                    os.chdir(out)
                    msg.save(toJson=args.json,
                             useFileName=args.use_filename,
                             ContentId=args.cid,
                             html=args.html,
                             rtf=args.html)
            except Exception as e:
                print("Error with file '" + x[0] + "': " +
                      traceback.format_exc())
            os.chdir(currentdir)