def get_dmarc_report(msg: email.message.Message) -> Any: for attachment in msg.walk(): if attachment.get_content_type() == "application/zip": fp = io.BytesIO(attachment.get_payload(decode=True)) with zipfile.ZipFile(fp, "r") as zip: for name in zip.namelist(): with zip.open(name) as xml: return xml2dict(ET.parse(xml).getroot())
def extract_files_from_email( message: email.message.Message) -> ParseMessageResult: """ Parses an email Message and returns a ParseMessageResult: 1. Is there a subject and from in the email? If no, return. 2. Does the subject contain the word "unsubscribe"? If yes, return. 3. Is the subject 8 letters long? If yes, return. 4. Otherwise: - Walk through the message and grab all parts that match "application/pdf;" or "application/epub". - Return a ParseMessageResult with those files. """ subject: str = message.get("Subject") sent_from: str = message.get("From") assert subject and sent_from if "unsubscribe" in subject.lower(): return ParseMessageResult( sent_from=sent_from, subject=subject, status=MessageStatus.UNSUBSCRIBE, extracted_files=[], ) # FIXME: need a more robust check here if subject and len(subject) == 8: return ParseMessageResult( sent_from=sent_from, subject=subject, status=MessageStatus.REGISTER, extracted_files=[], ) # Now we're done parsing the subject, we should check if there are any attachments files: List[FileTuple] = [] for part in message.walk(): if "application/pdf;" in part["Content-Type"]: filename = part.get_filename() or "Remailable_Attachment.pdf" filebytes = base64.b64decode(part.get_payload()) assert type(filename) == str files.append((filename, filebytes)) elif "application/epub" in part["Content-Type"]: filename = part.get_filename() or "Remailable_Attachment.epub" filebytes = base64.b64decode(part.get_payload()) assert type(filename) == str files.append((filename, filebytes)) if files: return ParseMessageResult( sent_from=sent_from, subject=subject, status=MessageStatus.SUCCESS, extracted_files=files, ) else: # Couldn't parse any files, empty return ParseMessageResult( sent_from=sent_from, subject=subject, status=MessageStatus.FAILURE, extracted_files=files, )
def get_revolut_statement_part( msg: email.message.Message) -> Optional[email.message.Message]: for part in msg.walk(): filename = part.get_filename() if not filename: continue if re.match( '^account-statement_\d*-\d*-\d*_\d*-\d*-\d*_de-ch_[0-9a-f]*.csv$', filename): return part return None
def _find_timepie_part(msg: email.message.Message) -> t.Sequence[Ping]: """Try to find which part of a potentially nested multipart message contains TagTime data.""" for part in msg.walk(): payload = part.get_payload(decode=True) if payload: lines = payload.decode('utf-8').splitlines() if lines and all( re.match(r'^[0-9]{7,} [^\[]*\[.*\]$', line) for line in lines): return parse_timepie(lines) raise ValueError('given message has no timepie.log part')
def resolve_attachments(message: email.message.Message): attachments = [] for part in message.walk(): if part.get_content_maintype( ) == 'multipart' or part.get('Content-Disposition') is None: continue fileName = IMAPEmail.soft_decode(part.get_filename()) if fileName: a = IMAPAttachment(IMAPEmail.soft_decode(fileName)) a.write(part.get_payload(decode=True)) attachments.append(a) return attachments
def extract_pdf(message: email.message.Message) -> Tuple[str, bytes]: """ Get a PDF from the email. TODO: This is the thing to change to accommodate more than one PDF per msg. """ # Handle unsubscribes: subject = message.get("Subject") if "unsubscribe" in subject.lower(): plog(f"Permanently removing user {message.get('From')}.") delete_user(message.get("From")) return (False, False) filename = None filebytes = None for part in message.walk(): if "application/pdf;" in part["Content-Type"]: filename = part.get_filename() or "Remailable_Attachment.pdf" filebytes = base64.b64decode(part.get_payload()) break else: # Let's try getting the subjectline and body and see if there's a code # for us to gobble up in there :) code = message.get("Subject") if code and len(code) == 8: register_user(message.get("From"), code) plog(f"Registered a new user {message.get('From')}.") send_email_if_enabled( message.get("From"), subject="Your email address is now verified!", message= "Your verification succeeded, and you can now email documents to your reMarkable tablet. Try responding to this email with a PDF attachment!", ) return (False, False) else: send_email_if_enabled( message.get("From"), subject="A problem with your document :(", message= "Unfortunately, a problem occurred while processing your email. Remailable only supports PDF attachments for now. If you're still encountering issues, please get in touch with Jordan at [email protected] or on Twitter at @j6m8.", ) plog( f"ERROR: Encountered no PDF in message from {message.get('From')}" ) return (False, False) return (filename, filebytes)
def extract_pdf(message: email.message.Message) -> Tuple[str, bytes]: """ Get a PDF from the email. TODO: This is the thing to change to accommodate more than one PDF per msg. """ filename = None filebytes = None for part in message.walk(): if "application/pdf;" in part["Content-Type"]: filename = part.get_filename() or "Remailable_Attachment.pdf" filebytes = base64.b64decode(part.get_payload()) break else: # Let's try getting the subjectline and body and see if there's a code # for us to gobble up in there :) code = message.get("Subject") if code and len(code) == 8: register_user(message.get("From"), code) return True else: raise ValueError("No PDF in this message.") return (filename, filebytes)
def get_tls_report(msg: email.message.Message) -> Any: for attachment in msg.walk(): if attachment.get_content_type() == "application/tlsrpt+gzip": return json.loads(gzip.decompress(attachment.get_payload(decode=True))) if attachment.get_content_type() == "application/tlsrpt+json": return json.loads(attachment.get_payload(decode=True))