def __init__(self, path, prefix='', filename=None): """ :param path: path to the msg file in the system or is the raw msg file. :param prefix: used for extracting embedded msg files inside the main one. Do not set manually unless you know what you are doing. """ logger.log(5, 'prefix: {}'.format(prefix)) self.__path = path olefile.OleFileIO.__init__(self, path) prefixl = [] tmp_condition = prefix != '' if tmp_condition: prefix = inputToString(prefix, 'utf-8') try: prefix = '/'.join(prefix) except: raise TypeError('Invalid prefix type: ' + str(type(prefix)) + '\n(This was probably caused by you setting it manually).') prefix = prefix.replace('\\', '/') g = prefix.split('/') if g[-1] == '': g.pop() prefixl = g if prefix[-1] != '/': prefix += '/' self.__prefix = prefix self.__prefixList = prefixl if tmp_condition: filename = self._getStringStream(prefixl[:-1] + ['__substg1.0_3001'], prefix=False) if filename is not None: self.filename = filename else: logger.log(5, ':param path: has __len__ attribute?: {}'.format(has_len(path))) if has_len(path): if len(path) < 1536: self.filename = path logger.log(5, ':param path: length is {}; Using :param path: as file path'.format(len(path))) else: logger.log(5, ':param path: length is {}; Using :param path: as raw msg stream'.format(len(path))) self.filename = None else: self.filename = None self.mainProperties recipientDirs = [] for dir_ in self.listDir(): if dir_[len(self.__prefixList)].startswith('__recip') and\ dir_[len(self.__prefixList)] not in recipientDirs: recipientDirs.append(dir_[len(self.__prefixList)]) self.recipients self.attachments self.date
def body(self): """ Returns the message body, if it exists. """ try: return self._body except AttributeError: self._body = self._getStringStream('__substg1.0_1000') if self._body: self._body = inputToString(self._body, 'utf-8') a = re.search('\n', self._body) if a is not None: if re.search('\r\n', self._body) is not None: self.__crlf = '\r\n' return self._body
def save(self, contentId=False, json=False, useFileName=False, raw=False, customPath=None, customFilename=None ): #, html = False, rtf = False, allowFallback = False): # Check if the user has specified a custom filename filename = None if customFilename is not None and customFilename != '': filename = customFilename else: # If not... # Check if user wants to save the file under the Content-id if contentId: filename = self.cid # If filename is None at this point, use long filename as first preference if filename is None: filename = self.longFilename # Otherwise use the short filename if filename is None: filename = self.shortFilename # Otherwise just make something up! if filename is None: filename = 'UnknownFilename ' + \ ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(5)) + '.bin' # Someone managed to have a null character here, so let's get rid of that filename = prepareFilename( inputToString(filename, self.msg.stringEncoding)) if customPath is not None and customPath != '': if customPath[-1] != '/' or customPath[-1] != '\\': customPath += '/' filename = customPath + filename if self.__type == "data": with open(filename, 'wb') as f: f.write(self.__data) else: self.saveEmbededMessage( contentId, json, useFileName, raw, customPath, customFilename) #, html, rtf, allowFallback) return filename
def __init__(self, path, prefix='', attachmentClass=Attachment, filename=None, overrideEncoding=None): """ :param path: path to the msg file in the system or is the raw msg file. :param prefix: used for extracting embeded msg files inside the main one. Do not set manually unless you know what you are doing. :param attachmentClass: optional, the class the MSGFile object will use for attachments. You probably should not change this value unless you know what you are doing. :param filename: optional, the filename to be used by default when saving. :param overrideEncoding: optional, an encoding to use instead of the one specified by the msg file. Do not report encoding errors caused by this. """ # WARNING DO NOT MANUALLY MODIFY PREFIX. Let the program set it. self.__path = path self.__attachmentClass = attachmentClass if overrideEncoding is not None: codecs.lookup(overrideEncoding) logger.warning( 'You have chosen to override the string encoding. Do not report encoding erros caused by this.' ) self.__stringEncoding = overrideEncoding self.__overrideEncoding = overrideEncoding try: olefile.OleFileIO.__init__(self, path) except IOError as e: # py2 and py3 compatible logger.error(e) if str(e) == 'not an OLE2 structured storage file': raise InvalidFileFormat(e) else: raise prefixl = [] tmp_condition = prefix != '' if tmp_condition: try: prefix = inputToString(prefix, 'utf-8') except: try: prefix = '/'.join(prefix) except: raise TypeError( 'Invalid prefix type: ' + str(type(prefix)) + '\n(This was probably caused by you setting it manually).' ) prefix = prefix.replace('\\', '/') g = prefix.split('/') if g[-1] == '': g.pop() prefixl = g if prefix[-1] != '/': prefix += '/' self.__prefix = prefix self.__prefixList = prefixl if tmp_condition: filename = self._getStringStream(prefixl[:-1] + ['__substg1.0_3001'], prefix=False) if filename is not None: self.filename = filename elif has_len(path): if len(path) < 1536: self.filename = path else: self.filename = None else: self.filename = None
def __init__(self, path, prefix='', attachmentClass=Attachment, filename=None): """ :param path: path to the msg file in the system or is the raw msg file. :param prefix: used for extracting embeded msg files inside the main one. Do not set manually unless you know what you are doing. :param attachmentClass: optional, the class the Message object will use for attachments. You probably should not change this value unless you know what you are doing. :param filename: optional, the filename to be used by default when saving. """ # WARNING DO NOT MANUALLY MODIFY PREFIX. Let the program set it. self.__path = path self.__attachmentClass = attachmentClass try: olefile.OleFileIO.__init__(self, path) except IOError as e: # py2 and py3 compatible logger.error(e) if str(e) == 'not an OLE2 structured storage file': raise InvalidFileFormat(e) else: raise prefixl = [] tmp_condition = prefix != '' if tmp_condition: try: prefix = inputToString(prefix, 'utf-8') except: try: prefix = '/'.join(prefix) except: raise TypeError( 'Invalid prefix type: ' + str(type(prefix)) + '\n(This was probably caused by you setting it manually).' ) prefix = prefix.replace('\\', '/') g = prefix.split("/") if g[-1] == '': g.pop() prefixl = g if prefix[-1] != '/': prefix += '/' self.__prefix = prefix self.__prefixList = prefixl if tmp_condition: filename = self._getStringStream(prefixl[:-1] + ['__substg1.0_3001'], prefix=False) if filename is not None: self.filename = filename elif has_len(path): if len(path) < 1536: self.filename = path else: self.filename = None else: self.filename = None # Initialize properties in the order that is least likely to cause bugs. # TODO have each function check for initialization of needed data so these # lines will be unnecessary. self.mainProperties self.header self.recipients self.attachments self.to self.cc self.sender self.date self.__crlf = '\n' # This variable keeps track of what the new line character should be self.body
def save(self, toJson=False, useFileName=False, raw=False, ContentId=False, customPath=None, customFilename=None, html=False, rtf=False): """ Saves the message body and attachments found in the message. Setting toJson to true will output the message body as JSON-formatted text. The body and attachments are stored in a folder. Setting useFileName to true will mean that the filename is used as the name of the folder; otherwise, the message's date and subject are used as the folder name. Here is the absolute order of prioity for the name of the folder: 1. customFilename 2. self.filename if useFileName 3. {date} {subject} """ crlf = inputToBytes(self.__crlf, 'utf-8') if customFilename != None and customFilename != '': dirName = customFilename else: if useFileName: # strip out the extension if self.filename is not None: dirName = self.filename.split('/').pop().split('.')[0] else: ValueError( 'Filename must be specified, or path must have been an actual path, to save using filename' ) else: # Create a directory based on the date and subject of the message d = self.parsedDate if d is not None: dirName = '{0:02d}-{1:02d}-{2:02d}_{3:02d}{4:02d}'.format( *d) else: dirName = 'UnknownDate' if self.subject is None: subject = '[No subject]' else: subject = ''.join(i for i in self.subject if i not in r'\/:*?"<>|') dirName = dirName + ' ' + subject if customPath != None and customPath != '': if customPath[-1] != '/' or customPath[-1] != '\\': customPath += '/' dirName = customPath + dirName try: os.makedirs(dirName) except Exception: newDirName = addNumToDir(dirName) if newDirName is not None: dirName = newDirName else: raise Exception( "Failed to create directory '%s'. Does it already exist?" % dirName) oldDir = os.getcwdu() try: os.chdir(dirName) attachmentNames = [] # Save the attachments for attachment in self.attachments: attachmentNames.append( attachment.save(ContentId, toJson, useFileName, raw, html=html, rtf=rtf)) # Save the message body fext = 'json' if toJson else 'txt' useHtml = False useRtf = False #if html: # if self.htmlBody is not None: # useHtml = True # fext = 'html' #elif rtf: # if self.htmlBody is not None: # useRtf = True # fext = 'rtf' with open('message.' + fext, 'wb') as f: if toJson: emailObj = { 'from': inputToString(self.sender, 'utf-8'), 'to': inputToString(self.to, 'utf-8'), 'cc': inputToString(self.cc, 'utf-8'), 'subject': inputToString(self.subject, 'utf-8'), 'date': inputToString(self.date, 'utf-8'), 'attachments': attachmentNames, 'body': decode_utf7(self.body) } f.write(inputToBytes(json.dumps(emailObj), 'utf-8')) else: if useHtml: # Do stuff pass elif useRtf: # Do stuff pass else: f.write(b'From: ' + inputToBytes(self.sender, 'utf-8') + crlf) f.write(b'To: ' + inputToBytes(self.to, 'utf-8') + crlf) f.write(b'CC: ' + inputToBytes(self.cc, 'utf-8') + crlf) f.write(b'Subject: ' + inputToBytes(self.subject, 'utf-8') + crlf) f.write(b'Date: ' + inputToBytes(self.date, 'utf-8') + crlf) f.write(b'-----------------' + crlf + crlf) f.write(inputToBytes(self.body, 'utf-8')) except Exception as e: self.saveRaw() raise finally: # Return to previous directory os.chdir(oldDir)
def save(self, toJson=False, useFileName=False, raw=False, ContentId=False, customPath=None, customFilename=None ): #, html = False, rtf = False, allowFallback = False): """ Saves the message body and attachments found in the message. The body and attachments are stored in a folder. Setting useFileName to true will mean that the filename is used as the name of the folder; otherwise, the message's date and subject are used as the folder name. Here is the absolute order of prioity for the name of the folder: 1. customFilename 2. self.filename if useFileName 3. {date} {subject} """ #There are several parameters used to determine how the message will be saved. #By default, the message will be saved as plain text. Setting one of the #following parameters to True will change that: # * :param html: will try to output the message in HTML format. # * :param json: will output the message in JSON format. # * :param raw: will output the message in a raw format. # * :param rtf: will output the message in RTF format. # #Usage of more than one formatting parameter will raise an exception. # #Using HTML or RTF will raise an exception if they could not be retrieved #unless you have :param allowFallback: set to True. Fallback will go in this #order, starting at the top most format that is set: # * HTML # * RTF # * Plain text #""" count = 1 if toJson else 0 #count += 1 if html else 0 #count += 1 if rtf else 0 count += 1 if raw else 0 if count > 1: raise IncompatibleOptionsError( 'Only one of the following options may be used at a time: toJson, raw, html, rtf' ) crlf = inputToBytes(self.crlf, 'utf-8') if customFilename != None and customFilename != '': dirName = customFilename else: if useFileName: # strip out the extension if self.filename is not None: dirName = self.filename.split('/').pop().split('.')[0] else: ValueError( 'Filename must be specified, or path must have been an actual path, to save using filename' ) else: # Create a directory based on the date and subject of the message d = self.parsedDate if d is not None: dirName = '{0:02d}-{1:02d}-{2:02d}_{3:02d}{4:02d}'.format( *d) else: dirName = 'UnknownDate' if self.subject is None: subject = '[No subject]' else: subject = prepareFilename(self.subject) dirName = dirName + ' ' + subject if customPath != None and customPath != '': if customPath[-1] != '/' or customPath[-1] != '\\': customPath += '/' dirName = customPath + dirName try: os.makedirs(dirName) except Exception: newDirName = addNumToDir(dirName) if newDirName is not None: dirName = newDirName else: raise Exception( "Failed to create directory '%s'. Does it already exist?" % dirName) oldDir = os.getcwdu() try: os.chdir(dirName) attachmentNames = [] # Save the attachments for attachment in self.attachments: attachmentNames.append( attachment.save(ContentId, toJson, useFileName, raw) ) #, html = html, rtf = rtf, allowFallback = allowFallback)) # Save the message body fext = 'json' if toJson else 'txt' useHtml = False useRtf = False #if html: # if self.htmlBody is not None: # useHtml = True # fext = 'html' #elif not allowFallback: # raise DataNotFoundError('Could not find the htmlBody') #if rtf or (html and not useHtml): # if self.rtfBody is not None: # useRtf = True # fext = 'rtf' #elif not allowFallback: # raise DataNotFoundError('Could not find the rtfBody') with open('message.' + fext, 'wb') as f: if toJson: emailObj = { 'from': inputToString(self.sender, 'utf-8'), 'to': inputToString(self.to, 'utf-8'), 'cc': inputToString(self.cc, 'utf-8'), 'subject': inputToString(self.subject, 'utf-8'), 'date': inputToString(self.date, 'utf-8'), 'attachments': attachmentNames, 'body': decode_utf7(self.body) } f.write(inputToBytes(json.dumps(emailObj), 'utf-8')) else: if useHtml: # Do stuff pass elif useRtf: # Do stuff pass else: f.write(b'From: ' + inputToBytes(self.sender, 'utf-8') + crlf) f.write(b'To: ' + inputToBytes(self.to, 'utf-8') + crlf) f.write(b'CC: ' + inputToBytes(self.cc, 'utf-8') + crlf) f.write(b'Subject: ' + inputToBytes(self.subject, 'utf-8') + crlf) f.write(b'Date: ' + inputToBytes(self.date, 'utf-8') + crlf) f.write(b'-----------------' + crlf + crlf) f.write(inputToBytes(self.body, 'utf-8')) except Exception as e: self.saveRaw() raise finally: # Return to previous directory os.chdir(oldDir) # Return the instance so that functions can easily be chained. return self