Example #1
0
    def __init__(self, path, prefix='', filename=None):
        """
        :param path: path to the msg file in the system or is the raw msg file.
        :param prefix: used for extracting embedded msg files
            inside the main one. Do not set manually unless
            you know what you are doing.
        """
        logger.log(5, 'prefix: {}'.format(prefix))
        self.__path = path
        olefile.OleFileIO.__init__(self, path)
        prefixl = []
        tmp_condition = prefix != ''
        if tmp_condition:
            prefix = inputToString(prefix, 'utf-8')
            try:
                prefix = '/'.join(prefix)
            except:
                raise TypeError('Invalid prefix type: ' + str(type(prefix)) +
                                '\n(This was probably caused by you setting it manually).')
            prefix = prefix.replace('\\', '/')
            g = prefix.split('/')
            if g[-1] == '':
                g.pop()
            prefixl = g
            if prefix[-1] != '/':
                prefix += '/'
        self.__prefix = prefix
        self.__prefixList = prefixl
        
        if tmp_condition:
            filename = self._getStringStream(prefixl[:-1] + ['__substg1.0_3001'], prefix=False)
        if filename is not None:
            self.filename = filename
        else:
            logger.log(5, ':param path: has __len__ attribute?: {}'.format(has_len(path)))
            if has_len(path):
                if len(path) < 1536:
                    self.filename = path
                    logger.log(5, ':param path: length is {}; Using :param path: as file path'.format(len(path)))
                else:
                    logger.log(5, ':param path: length is {}; Using :param path: as raw msg stream'.format(len(path)))
                    self.filename = None
            else:
                self.filename = None

        self.mainProperties
        recipientDirs = []

        for dir_ in self.listDir():
            if dir_[len(self.__prefixList)].startswith('__recip') and\
                    dir_[len(self.__prefixList)] not in recipientDirs:
                recipientDirs.append(dir_[len(self.__prefixList)])

        self.recipients
        self.attachments
        self.date
Example #2
0
 def body(self):
     """
     Returns the message body, if it exists.
     """
     try:
         return self._body
     except AttributeError:
         self._body = self._getStringStream('__substg1.0_1000')
         if self._body:
             self._body = inputToString(self._body, 'utf-8')
             a = re.search('\n', self._body)
             if a is not None:
                 if re.search('\r\n', self._body) is not None:
                     self.__crlf = '\r\n'
         return self._body
    def save(self,
             contentId=False,
             json=False,
             useFileName=False,
             raw=False,
             customPath=None,
             customFilename=None
             ):  #, html = False, rtf = False, allowFallback = False):
        # Check if the user has specified a custom filename
        filename = None
        if customFilename is not None and customFilename != '':
            filename = customFilename
        else:
            # If not...
            # Check if user wants to save the file under the Content-id
            if contentId:
                filename = self.cid
            # If filename is None at this point, use long filename as first preference
            if filename is None:
                filename = self.longFilename
            # Otherwise use the short filename
            if filename is None:
                filename = self.shortFilename
            # Otherwise just make something up!
            if filename is None:
                filename = 'UnknownFilename ' + \
                           ''.join(random.choice(string.ascii_uppercase + string.digits)
                                   for _ in range(5)) + '.bin'

        # Someone managed to have a null character here, so let's get rid of that
        filename = prepareFilename(
            inputToString(filename, self.msg.stringEncoding))

        if customPath is not None and customPath != '':
            if customPath[-1] != '/' or customPath[-1] != '\\':
                customPath += '/'
            filename = customPath + filename

        if self.__type == "data":
            with open(filename, 'wb') as f:
                f.write(self.__data)
        else:
            self.saveEmbededMessage(
                contentId, json, useFileName, raw, customPath,
                customFilename)  #, html, rtf, allowFallback)
        return filename
Example #4
0
    def __init__(self,
                 path,
                 prefix='',
                 attachmentClass=Attachment,
                 filename=None,
                 overrideEncoding=None):
        """
        :param path: path to the msg file in the system or is the raw msg file.
        :param prefix: used for extracting embeded msg files
            inside the main one. Do not set manually unless
            you know what you are doing.
        :param attachmentClass: optional, the class the MSGFile object
            will use for attachments. You probably should
            not change this value unless you know what you
            are doing.
        :param filename: optional, the filename to be used by default when saving.
        :param overrideEncoding: optional, an encoding to use instead of the one
            specified by the msg file. Do not report encoding errors caused by this.
        """
        # WARNING DO NOT MANUALLY MODIFY PREFIX. Let the program set it.
        self.__path = path
        self.__attachmentClass = attachmentClass
        if overrideEncoding is not None:
            codecs.lookup(overrideEncoding)
            logger.warning(
                'You have chosen to override the string encoding. Do not report encoding erros caused by this.'
            )
            self.__stringEncoding = overrideEncoding
        self.__overrideEncoding = overrideEncoding

        try:
            olefile.OleFileIO.__init__(self, path)
        except IOError as e:  # py2 and py3 compatible
            logger.error(e)
            if str(e) == 'not an OLE2 structured storage file':
                raise InvalidFileFormat(e)
            else:
                raise

        prefixl = []
        tmp_condition = prefix != ''
        if tmp_condition:
            try:
                prefix = inputToString(prefix, 'utf-8')
            except:
                try:
                    prefix = '/'.join(prefix)
                except:
                    raise TypeError(
                        'Invalid prefix type: ' + str(type(prefix)) +
                        '\n(This was probably caused by you setting it manually).'
                    )
            prefix = prefix.replace('\\', '/')
            g = prefix.split('/')
            if g[-1] == '':
                g.pop()
            prefixl = g
            if prefix[-1] != '/':
                prefix += '/'
        self.__prefix = prefix
        self.__prefixList = prefixl
        if tmp_condition:
            filename = self._getStringStream(prefixl[:-1] +
                                             ['__substg1.0_3001'],
                                             prefix=False)
        if filename is not None:
            self.filename = filename
        elif has_len(path):
            if len(path) < 1536:
                self.filename = path
            else:
                self.filename = None
        else:
            self.filename = None
Example #5
0
    def __init__(self,
                 path,
                 prefix='',
                 attachmentClass=Attachment,
                 filename=None):
        """
        :param path: path to the msg file in the system or is the raw msg file.
        :param prefix: used for extracting embeded msg files
            inside the main one. Do not set manually unless
            you know what you are doing.
        :param attachmentClass: optional, the class the Message object
            will use for attachments. You probably should
            not change this value unless you know what you
            are doing.
        :param filename: optional, the filename to be used by default when saving.
        """
        # WARNING DO NOT MANUALLY MODIFY PREFIX. Let the program set it.
        self.__path = path
        self.__attachmentClass = attachmentClass

        try:
            olefile.OleFileIO.__init__(self, path)
        except IOError as e:  # py2 and py3 compatible
            logger.error(e)
            if str(e) == 'not an OLE2 structured storage file':
                raise InvalidFileFormat(e)
            else:
                raise

        prefixl = []
        tmp_condition = prefix != ''
        if tmp_condition:
            try:
                prefix = inputToString(prefix, 'utf-8')
            except:
                try:
                    prefix = '/'.join(prefix)
                except:
                    raise TypeError(
                        'Invalid prefix type: ' + str(type(prefix)) +
                        '\n(This was probably caused by you setting it manually).'
                    )
            prefix = prefix.replace('\\', '/')
            g = prefix.split("/")
            if g[-1] == '':
                g.pop()
            prefixl = g
            if prefix[-1] != '/':
                prefix += '/'
        self.__prefix = prefix
        self.__prefixList = prefixl
        if tmp_condition:
            filename = self._getStringStream(prefixl[:-1] +
                                             ['__substg1.0_3001'],
                                             prefix=False)
        if filename is not None:
            self.filename = filename
        elif has_len(path):
            if len(path) < 1536:
                self.filename = path
            else:
                self.filename = None
        else:
            self.filename = None

        # Initialize properties in the order that is least likely to cause bugs.
        # TODO have each function check for initialization of needed data so these
        # lines will be unnecessary.
        self.mainProperties
        self.header
        self.recipients
        self.attachments
        self.to
        self.cc
        self.sender
        self.date
        self.__crlf = '\n'  # This variable keeps track of what the new line character should be
        self.body
Example #6
0
    def save(self,
             toJson=False,
             useFileName=False,
             raw=False,
             ContentId=False,
             customPath=None,
             customFilename=None,
             html=False,
             rtf=False):
        """
        Saves the message body and attachments found in the message. Setting toJson
        to true will output the message body as JSON-formatted text. The body and
        attachments are stored in a folder. Setting useFileName to true will mean that
        the filename is used as the name of the folder; otherwise, the message's date
        and subject are used as the folder name.
        Here is the absolute order of prioity for the name of the folder:
            1. customFilename
            2. self.filename if useFileName
            3. {date} {subject}
        """
        crlf = inputToBytes(self.__crlf, 'utf-8')

        if customFilename != None and customFilename != '':
            dirName = customFilename
        else:
            if useFileName:
                # strip out the extension
                if self.filename is not None:
                    dirName = self.filename.split('/').pop().split('.')[0]
                else:
                    ValueError(
                        'Filename must be specified, or path must have been an actual path, to save using filename'
                    )
            else:
                # Create a directory based on the date and subject of the message
                d = self.parsedDate
                if d is not None:
                    dirName = '{0:02d}-{1:02d}-{2:02d}_{3:02d}{4:02d}'.format(
                        *d)
                else:
                    dirName = 'UnknownDate'

                if self.subject is None:
                    subject = '[No subject]'
                else:
                    subject = ''.join(i for i in self.subject
                                      if i not in r'\/:*?"<>|')

                dirName = dirName + ' ' + subject

        if customPath != None and customPath != '':
            if customPath[-1] != '/' or customPath[-1] != '\\':
                customPath += '/'
            dirName = customPath + dirName
        try:
            os.makedirs(dirName)
        except Exception:
            newDirName = addNumToDir(dirName)
            if newDirName is not None:
                dirName = newDirName
            else:
                raise Exception(
                    "Failed to create directory '%s'. Does it already exist?" %
                    dirName)

        oldDir = os.getcwdu()
        try:
            os.chdir(dirName)
            attachmentNames = []
            # Save the attachments
            for attachment in self.attachments:
                attachmentNames.append(
                    attachment.save(ContentId,
                                    toJson,
                                    useFileName,
                                    raw,
                                    html=html,
                                    rtf=rtf))

            # Save the message body
            fext = 'json' if toJson else 'txt'

            useHtml = False
            useRtf = False
            #if html:
            #    if self.htmlBody is not None:
            #        useHtml = True
            #        fext = 'html'
            #elif rtf:
            #    if self.htmlBody is not None:
            #        useRtf = True
            #        fext = 'rtf'

            with open('message.' + fext, 'wb') as f:
                if toJson:
                    emailObj = {
                        'from': inputToString(self.sender, 'utf-8'),
                        'to': inputToString(self.to, 'utf-8'),
                        'cc': inputToString(self.cc, 'utf-8'),
                        'subject': inputToString(self.subject, 'utf-8'),
                        'date': inputToString(self.date, 'utf-8'),
                        'attachments': attachmentNames,
                        'body': decode_utf7(self.body)
                    }

                    f.write(inputToBytes(json.dumps(emailObj), 'utf-8'))
                else:
                    if useHtml:
                        # Do stuff
                        pass
                    elif useRtf:
                        # Do stuff
                        pass
                    else:
                        f.write(b'From: ' +
                                inputToBytes(self.sender, 'utf-8') + crlf)
                        f.write(b'To: ' + inputToBytes(self.to, 'utf-8') +
                                crlf)
                        f.write(b'CC: ' + inputToBytes(self.cc, 'utf-8') +
                                crlf)
                        f.write(b'Subject: ' +
                                inputToBytes(self.subject, 'utf-8') + crlf)
                        f.write(b'Date: ' + inputToBytes(self.date, 'utf-8') +
                                crlf)
                        f.write(b'-----------------' + crlf + crlf)
                        f.write(inputToBytes(self.body, 'utf-8'))

        except Exception as e:
            self.saveRaw()
            raise

        finally:
            # Return to previous directory
            os.chdir(oldDir)
Example #7
0
    def save(self,
             toJson=False,
             useFileName=False,
             raw=False,
             ContentId=False,
             customPath=None,
             customFilename=None
             ):  #, html = False, rtf = False, allowFallback = False):
        """
        Saves the message body and attachments found in the message. The body and
        attachments are stored in a folder. Setting useFileName to true will mean that
        the filename is used as the name of the folder; otherwise, the message's date
        and subject are used as the folder name.
        Here is the absolute order of prioity for the name of the folder:
            1. customFilename
            2. self.filename if useFileName
            3. {date} {subject}
        """
        #There are several parameters used to determine how the message will be saved.
        #By default, the message will be saved as plain text. Setting one of the
        #following parameters to True will change that:
        #    * :param html: will try to output the message in HTML format.
        #    * :param json: will output the message in JSON format.
        #    * :param raw: will output the message in a raw format.
        #    * :param rtf: will output the message in RTF format.
        #
        #Usage of more than one formatting parameter will raise an exception.
        #
        #Using HTML or RTF will raise an exception if they could not be retrieved
        #unless you have :param allowFallback: set to True. Fallback will go in this
        #order, starting at the top most format that is set:
        #    * HTML
        #    * RTF
        #    * Plain text
        #"""
        count = 1 if toJson else 0
        #count += 1 if html else 0
        #count += 1 if rtf else 0
        count += 1 if raw else 0

        if count > 1:
            raise IncompatibleOptionsError(
                'Only one of the following options may be used at a time: toJson, raw, html, rtf'
            )

        crlf = inputToBytes(self.crlf, 'utf-8')

        if customFilename != None and customFilename != '':
            dirName = customFilename
        else:
            if useFileName:
                # strip out the extension
                if self.filename is not None:
                    dirName = self.filename.split('/').pop().split('.')[0]
                else:
                    ValueError(
                        'Filename must be specified, or path must have been an actual path, to save using filename'
                    )
            else:
                # Create a directory based on the date and subject of the message
                d = self.parsedDate
                if d is not None:
                    dirName = '{0:02d}-{1:02d}-{2:02d}_{3:02d}{4:02d}'.format(
                        *d)
                else:
                    dirName = 'UnknownDate'

                if self.subject is None:
                    subject = '[No subject]'
                else:
                    subject = prepareFilename(self.subject)

                dirName = dirName + ' ' + subject

        if customPath != None and customPath != '':
            if customPath[-1] != '/' or customPath[-1] != '\\':
                customPath += '/'
            dirName = customPath + dirName
        try:
            os.makedirs(dirName)
        except Exception:
            newDirName = addNumToDir(dirName)
            if newDirName is not None:
                dirName = newDirName
            else:
                raise Exception(
                    "Failed to create directory '%s'. Does it already exist?" %
                    dirName)

        oldDir = os.getcwdu()
        try:
            os.chdir(dirName)
            attachmentNames = []
            # Save the attachments
            for attachment in self.attachments:
                attachmentNames.append(
                    attachment.save(ContentId, toJson, useFileName, raw)
                )  #, html = html, rtf = rtf, allowFallback = allowFallback))

            # Save the message body
            fext = 'json' if toJson else 'txt'

            useHtml = False
            useRtf = False
            #if html:
            #    if self.htmlBody is not None:
            #        useHtml = True
            #        fext = 'html'
            #elif not allowFallback:
            #    raise DataNotFoundError('Could not find the htmlBody')

            #if rtf or (html and not useHtml):
            #    if self.rtfBody is not None:
            #        useRtf = True
            #        fext = 'rtf'
            #elif not allowFallback:
            #    raise DataNotFoundError('Could not find the rtfBody')

            with open('message.' + fext, 'wb') as f:
                if toJson:
                    emailObj = {
                        'from': inputToString(self.sender, 'utf-8'),
                        'to': inputToString(self.to, 'utf-8'),
                        'cc': inputToString(self.cc, 'utf-8'),
                        'subject': inputToString(self.subject, 'utf-8'),
                        'date': inputToString(self.date, 'utf-8'),
                        'attachments': attachmentNames,
                        'body': decode_utf7(self.body)
                    }

                    f.write(inputToBytes(json.dumps(emailObj), 'utf-8'))
                else:
                    if useHtml:
                        # Do stuff
                        pass
                    elif useRtf:
                        # Do stuff
                        pass
                    else:
                        f.write(b'From: ' +
                                inputToBytes(self.sender, 'utf-8') + crlf)
                        f.write(b'To: ' + inputToBytes(self.to, 'utf-8') +
                                crlf)
                        f.write(b'CC: ' + inputToBytes(self.cc, 'utf-8') +
                                crlf)
                        f.write(b'Subject: ' +
                                inputToBytes(self.subject, 'utf-8') + crlf)
                        f.write(b'Date: ' + inputToBytes(self.date, 'utf-8') +
                                crlf)
                        f.write(b'-----------------' + crlf + crlf)
                        f.write(inputToBytes(self.body, 'utf-8'))

        except Exception as e:
            self.saveRaw()
            raise

        finally:
            # Return to previous directory
            os.chdir(oldDir)

        # Return the instance so that functions can easily be chained.
        return self