def write_mime_message_payloads( output_dir: str, transaction_id: str, msg: MIMEMessage, ): """For each payload in the MIME message, write out the ones that are attached as files. Args: msg (MIMEMessage): The parsed MIME message. transaction_id (str): The transaction ID of the email. output_dir (str): The directory to write the payloads in. """ for part in msg.walk(): if is_html(part) and not is_file(part): write_html(output_dir, transaction_id, part) # for some reason, outlook doesn't like EML files with HTML in them, # so we blank out the HTML payload here to display it correctly part.set_payload("") elif is_file(part) and not part.is_multipart(): write_attachment(output_dir, transaction_id, part)
def anonymise_mime_message(msg: MIMEMessage) -> MIMEMessage: """Replace all of the sensitive fields in the MIME message with messages indicating that this message has been anonymised. The fields that get replaced are "Subject", "Thread-Topic", as well as the body of the email. Attachments are preserved. """ msg.replace_header("Subject", "The subject has been removed for anonymity") # the thread topic isn't always present if msg.get("Thread-Topic") is not None: msg.replace_header("Thread-Topic", "The topic has been removed for anonymity") for part in msg.walk(): # the only parts that contain the body are text/html and text/plain # TODO(sam): I think sometimes these can be encoded as base64? # Might need special handling... if part.get_content_type() == "text/html" \ or part.get_content_type() == "text/plain": if not is_file(part): part.set_payload("The body has been removed for anonymity") return msg