def parse_env_data_header(self, env_buffer): """ Parse envelope data string and store data internally Args: env_string (bytes): """ try: mymsg = email.message_from_bytes(env_buffer) except AttributeError: mymsg = email.message_from_string(env_buffer) for key, header in mymsg.items(): value = Suspect.decode_msg_header(header).strip() if not value: continue if key == "X-ENV-SENDER": self.from_address = value.strip() self.logger.debug("Found env sender: %s" % value) elif key == "X-ENV-RECIPIENT": self.recipients.append(value.strip()) self.logger.debug("Found env recipient: %s" % value) elif key == "X-DATA-PREPEND-START": self.tags["prepend_identifier"] = value self.logger.debug( "set prepend identifier from Start header to: %s" % value) elif key == "X-DATA-PREPEND-END": self.tags["prepend_identifier"] = value self.logger.debug( "set prepend identifier from End header to: %s" % value) else: self.tags[key] = value self.logger.debug("Store in Suspect TAG: (%s,%s)" % (key, value))
def process_msg_part(self, part): """ Process message part, return tuple containing all information to create Mailattachment object Args: part (message part): Returns: tuple : tuple containing - att_name (string) : attachment filename - buffer (bytes) : attachment buffer as bytes - attsize (int) : attachment size in bytes - contenttype_mime (string) : content type - maintype_mime (string) : main content type - subtype_mime (string) : content subtype - ismultipart_mime (bool) : multipart - content_charset_mime (string) : charset for content - isattachment (bool) : True if this is a direct mail attachment, not inline (Content-Disposition=inline) - isinline (bool) : True if this is an inline mail attachment, not attachment (Content-Disposition=attachment) - defects (list) : A list of strings containing errors during decoding - att_name_generated (bool) : True if name has been generated (not in mail) """ contenttype_mime = part.get_content_type() maintype_mime = part.get_content_maintype() subtype_mime = part.get_content_subtype() ismultipart_mime = part.is_multipart() content_charset_mime = part.get_content_charset() att_name = part.get_filename(None) defects = [] att_name_generated = False # any error all parts are marked as attachment isattachment = True isinline = False content_disposition = "attachment" try: content_disposition = part.get_content_disposition() except AttributeError: content_disposition = part.get("Content-Disposition", None) # here it is possible we get a Header object back if content_disposition is not None: try: # include here to prevent cyclic import from fuglu.shared import Suspect content_disposition = Suspect.decode_msg_header( content_disposition) except Exception as e: self.logger.error( "%s, error extracting attachment info using Content-Disposition header : %s" % (self.fugluid, str(e))) content_disposition = "attachment" try: content_disposition = content_disposition.lower() except Exception as e: self.logger.error( "error extracting attachment info using Content-Disposition header : %s" % str(e)) content_disposition = "attachment" if content_disposition is None: isattachment = False isinline = False else: try: if "attachment" in content_disposition: isattachment = True isinline = False elif "inline" in content_disposition: isattachment = False isinline = True except AttributeError as e: self.logger.error("error extracting attachment info using " "Content-Disposition header as string: %s" % str(e)) logging.getLogger( "fuglu.Mailattachment_mgr.process_msg_part").exception(e) if att_name: # some filenames are encoded, try to decode try: # include here to prevent cyclic import from fuglu.shared import Suspect att_name = Suspect.decode_msg_header(att_name) except Exception: pass # for long filenames (<78 chars) not properly implementing # continuation according to RFC2231 we might end up with with # line break in the filename. Even tough some operating systems # allow line breaks in filenames, better to remove them... att_name = att_name.replace('\r', '').replace('\n', '') else: # -- # generate a filename # -- att_name_generated = True ct = part.get_content_type() if ct in MIMETYPE_EXT_OVERRIDES: ext = MIMETYPE_EXT_OVERRIDES[ct] else: exts = mimetypes.guess_all_extensions(ct) # reply is randomly sorted list, get consistent result if len(exts) > 0: exts.sort() ext = exts[0] else: ext = None if ext is None: ext = '' if ext.strip() == '': att_name = "unnamed" else: if ext.startswith("."): att_name = 'unnamed%s' % ext else: att_name = 'unnamed.%s' % ext try: buffer = part.get_payload(decode=True) # Py2: string, Py3: bytes if part.defects: self.logger.warning( "Could not get payload for %s, " "Defect(s): %s, " "continue without decoding" % (att_name, ",".join(defect.__doc__ for defect in part.defects))) defects.extend([defect.__doc__ for defect in part.defects]) except Exception as e: self.logger.warning("Could not get payload for %s, " "Reason: %s, " "continue without decoding" % (str(e), att_name)) buffer = part.get_payload(decode=False) defects.append(str(e)) # try to get size from buffer length try: attsize = len(buffer) except Exception: attsize = None return (att_name, buffer, attsize, contenttype_mime, maintype_mime, subtype_mime, ismultipart_mime, content_charset_mime, isattachment, isinline, defects, att_name_generated)