def store(self, *args): """ implements some flag commands args: ("<id>", "<+|->FLAGS", "(\\Flag1 \\Flag2 ... \\Flagn)") """ message = self.spam[self._mailbox][int(args[0] - 1)] old_flags = ParseFlags(message["flags"]) flags = ParseFlags("FLAGS" + args[2]) if args[1].strip().startswith("+"): message["flags"] = "FLAGS (%s)" % " ".join(set(flags + old_flags)) elif args[1].strip().startswith("-"): message["flags"] = "FLAGS (%s)" % " ".join( [flag for flag in old_flags if not flag in flags])
def parse_imap_message(message_type, message_data): parser = Parser() uid = message_data[0][0] flags = ParseFlags(message_data[0]) message = parser.parsestr(message_data[1]) message = build_message(message_type=message_type, uid=uid, message=message, flags=flags) return message
def parse_FETCH_header(self, raw_headers): self.flags = list(ParseFlags(raw_headers)) self.labels = parse_labels(raw_headers) if re.search(r'X-GM-THRID (\d+)', raw_headers): self.thread_id = re.search(r'X-GM-THRID (\d+)', raw_headers).groups(1)[0] if re.search(r'X-GM-MSGID (\d+)', raw_headers): self.message_id = re.search(r'X-GM-MSGID (\d+)', raw_headers).groups(1)[0]
def parse_flags(headers): """ Parses flags from headers using Python's `ParseFlags`. It drops all the \ in all the flags if it exists, to hide the details of the protocol. """ def _parse_flag(f): if f.startswith('\\'): return f[1:] else: return f return set(map(_parse_flag, ParseFlags(headers)))
def parse_flags(self, headers): return list(ParseFlags(headers))
def extract(self, date_def=None, fetch_all=False): """Parse email and extract attachments. :param str date_def: Date definition as [<>]date[to][date] where date is year, or year-month, or year-month-day. :param bool fetch_all: Fetch all messages. (default: False) """ if not self.dry_run: os.makedirs(self.extract_dir, exist_ok=True) if self.verbose: print("Create extract dir %s." % self.extract_dir) else: print("[Dry-run] Create extract dir %s." % self.extract_dir) folder = self.folder if " " in self.folder: folder = '"%s"' % folder status, select_data = self.imap.select(imaputf7encode(folder)) if status != "OK": raise RuntimeWarning("Could not select %s" % folder) total_mail = int(select_data[0]) print("Selected folder '%s' (%d mails)." % (self.folder, total_mail)) if not date_def and not fetch_all: raise RuntimeWarning( "Date criteria not found, use 'all' parameter to fetch all messages." ) elif date_def: date_on, date_since, date_before = self.parse_date(date_def) dates = self.parse_date(date_def, "ymd") date_crit = [] if date_on: date_crit.append('ON "%s"' % date_on) if date_since: date_crit.append('SINCE "%s"' % date_since) if date_before: date_crit.append('BEFORE "%s"' % date_before) else: date_crit = [] # status, search_data = self.imap.search("UTF-8", 'UNDELETED', 'ON "27-aug-2018"') if not self.gmail_mode: status, search_data = self.imap.search("UTF-8", 'UNDELETED', *date_crit) else: status, search_data = self.imap.search("UTF-8", 'UNDELETED', 'X-GM-RAW', 'has:attachment', *date_crit) if status != "OK": raise RuntimeWarning("Could not search in %s" % folder) if search_data[0]: uids = search_data[0].split(b' ') # type: list else: uids = [] print("%d messages corresponding to search." % len(uids)) if not uids: exit(0) to_fetch = [] try: status, fetch_data = self.imap.fetch(b','.join(uids), 'BODYSTRUCTURE[PEEK]') except IMAP4.error: status, fetch_data = self.imap.fetch(b','.join(uids), 'BODYSTRUCTURE') if status != "OK": print("Could not fetch messages.") merge_previous = False previous_structure = b'' for structure in fetch_data: # type: bytes if type(structure) in (list, tuple): if len(structure) == 2: structure = structure[0] + b'"' + structure[1] + b'"' else: structure = b' '.join(structure) if not structure.endswith(b')'): previous_structure = b'' + structure merge_previous = True continue if merge_previous: structure = previous_structure + structure merge_previous = False reg_attachment = "attachment|application" if self.inline_images: reg_attachment = reg_attachment + "|image" try: has_attachments = re.match('^.*"(%s)".*$' % reg_attachment, structure.decode("utf-8"), re.IGNORECASE) is not None except AttributeError as e: print("Failed to process message. Error: %s" % e) has_attachments = False if not has_attachments: continue uid = structure.split(b' ')[0] if uid: to_fetch.append(uid) else: # Probably an eml attachment pass print("%d messages with attachments." % len(to_fetch)) print() if not to_fetch: exit(0) for uid in to_fetch: try: status, fetch_data = self.imap.fetch(uid, '(FLAGS RFC822)') except imaplib.IMAP4.error as e: print("Encountered error when reading mail uid %s: %s" % (uid, repr(e))) continue if status != "OK": print("Could not fetch messages") skip_flags = False for i in range(len(fetch_data)): if skip_flags: # previous mail flags part skip_flags = False continue fetch = fetch_data[i] if b')' == fetch: continue flags = ParseFlags(bytes(fetch[0])) if not flags and i + 1 < len(fetch_data) and type( fetch_data[i + 1]) == bytes: # check if flags are in the next fetch_data flags = ParseFlags(fetch_data[i + 1]) if flags: skip_flags = True if flags: flags = tuple(map(lambda x: x.decode("utf-8"), flags)) mail = message_from_bytes(fetch[1]) # type: EmailMessage subject, encoding = decode_header(mail.get("Subject"))[0] if encoding: if not encoding.startswith('unknown'): subject = subject.decode(encoding) else: subject = str(subject) elif type(subject) == bytes: subject = subject.decode() is_flagged = "\\Flagged" in flags mail_date = mail.get("Date", None) date_match = re.match("^(.*\d{4} \d{2}:\d{2}:\d{2}).*$", mail_date) if date_match: mail_date = date_match.group(1) try: mail_date = datetime.strptime(mail_date, "%a, %d %b %Y %H:%M:%S") except ValueError: mail_date = datetime.strptime(mail_date, "%d %b %Y %H:%M:%S") if dates is not None: date_ok = False check_date = mail_date.strftime("%Y-%m-%d") if dates[0]: date_ok = check_date == dates[0] else: if dates[1] and not dates[2]: date_ok = check_date >= dates[1] elif dates[2] and not dates[1]: date_ok = check_date <= dates[2] elif dates[1] and dates[2]: date_ok = dates[1] <= check_date <= dates[2] if not date_ok: if self.verbose: print( "\nSkip email: '%s' [%s] (possible previous extract)." % (subject, mail_date)) continue new_mail = EmailMessage() new_mail._headers = mail._headers has_alternative = False nb_alternative = 0 nb_extraction = 0 part_nb = 1 to_print = [] # print buffer to_print.append("") to_print.append("Parsing mail: '%s' [%s]" % (subject, mail_date)) if is_flagged and 'skip' == self.flagged_action: if self.verbose: to_print.append(" Skip flagged mail.") continue for part in mail.walk(): # type: Message if part.get_content_type().startswith("multipart/"): if part.get_content_type() == "multipart/alternative": new_mail.attach( part ) # add text/plain and text/html alternatives has_alternative = True continue if has_alternative and nb_alternative < 2 and ( part.get_content_type() == "text/plain" or part.get_content_type() == "text/html"): nb_alternative = nb_alternative + 1 continue # text/plain and text/html already added in multipart/alternative is_attachment = part.get_content_disposition( ) is not None and part.get_content_disposition( ).startswith("attachment") if not is_attachment: new_mail.attach(part) continue part_nb = part_nb + 1 if not part.get_filename(): attachment_filename = "part.%d" % part_nb else: attachment_filename, encoding = decode_header( part.get_filename())[0] if encoding: attachment_filename = attachment_filename.decode( encoding) elif type(attachment_filename) == bytes: attachment_filename = attachment_filename.decode() if "AttachmentDetached" in part.get( "X-Mozilla-Altered", ""): if self.verbose: to_print.append( " Attachment '%s' already detached." % attachment_filename) continue if part.get("Content-Transfer-Encoding", "").lower() == "base64": try: attachment_content = b64decode(part.get_payload()) except BinasciiError: to_print.append( " Error when decoding attachment '%s', leave intact." % attachment_filename) new_mail.attach(part) continue else: if isinstance(part.get_payload(), list): attachment_content = part.get_payload(1).encode( "utf-8") else: attachment_content = part.get_payload().encode( "utf-8") attachment_size = len(attachment_content) if attachment_size < self.max_size: if self.verbose: to_print.append( " Attachment '%s' size (%s) is smaller than defined threshold (%s), leave intact." % (attachment_filename, human_readable_size(attachment_size), human_readable_size(self.max_size))) new_mail.attach(part) continue filename = "%s - %s" % (mail_date.strftime("%Y-%m-%d"), attachment_filename) idx = 0 while os.path.exists( os.path.join(self.extract_dir, filename)): idx = idx + 1 filename = re.sub( "^(\d{4}-\d{2}-\d{2}) (?:\(\d+\) )?- (.*)$", "\g<1> (%02d) - \g<2>" % idx, filename) if not self.dry_run: with open(os.path.join(self.extract_dir, filename), "wb") as file: file.write(attachment_content) to_print.append( " Extracted '%s' (%s) to '%s'" % (attachment_filename, human_readable_size(attachment_size), os.path.join(self.extract_dir, filename))) else: to_print.append( " [Dry-run] Extracted '%s' (%s) to '%s'" % (attachment_filename, human_readable_size(attachment_size), os.path.join(self.extract_dir, filename))) self.extracted_nb = self.extracted_nb + 1 self.extracted_size = self.extracted_size + attachment_size nb_extraction = nb_extraction + 1 if self.thunderbird_mode and ( 'detach' == self.flagged_action or not is_flagged): # replace attachement by local file URL headers_str = "" try: headers_str = "\n".join( map(lambda x: x[0] + ": " + x[1], part._headers)) except Exception as e: to_print.append( " Error when serializing headers: %s" % repr(e)) new_part = Message() new_part._headers = part._headers new_part.set_payload( "You deleted an attachment from this message. The original MIME headers for the attachment were:\n%s" % headers_str) new_part.replace_header("Content-Transfer-Encoding", "") url_path = "file:///%s/%s" % (self.extract_dir.replace( "\\", "/"), filename) new_part.add_header( "X-Mozilla-External-Attachment-URL", url_path) new_part.add_header( "X-Mozilla-Altered", 'AttachmentDetached; date=%s' % Time2Internaldate(time())) new_mail.attach(new_part) if nb_extraction: self.extracted_from_nb = self.extracted_from_nb + 1 if to_print: if nb_extraction > 0 or self.verbose: print("\n".join(to_print)) if nb_extraction > 0 and not self.extract_only and ( 'detach' == self.flagged_action or not is_flagged): if not self.dry_run: print( " Extracted %s attachment%s, replacing email." % (nb_extraction, "s" if nb_extraction > 1 else "")) status, append_data = self.imap.append( imaputf7encode(folder), " ".join(flags), '', new_mail.as_bytes(policy=policy.SMTPUTF8)) if status != "OK": print(" Could not append message to IMAP server.") continue if self.verbose: print(" Append message on IMAP server.") else: print( " [Dry-run] Extracted %s attachment%s, replacing email." % (nb_extraction, "s" if nb_extraction > 1 else "")) if self.verbose: print(" [Dry-run] Append message on IMAP server.") if not self.debug and not self.dry_run: status, store_data = self.imap.store( uid, '+FLAGS', '\Deleted') if status != "OK": print( " Could not delete original message from IMAP server." ) continue if self.verbose: print(" Delete original message.") elif self.dry_run: if self.verbose: print(" [Dry-run] Delete original message.") else: print(" Debug: would delete original message.") elif self.extract_only and nb_extraction > 0: print(" Extracted %s attachment%s." % (nb_extraction, "s" if nb_extraction > 1 else "")) elif 'extract' == self.flagged_action: if self.verbose: print(" Flagged message, leave intact.") else: if nb_extraction > 0 or self.verbose: print(" Nothing extracted.") print() print('Extract finished.') if self.extracted_size > 0: if not self.dry_run: print(" Extracted %d files from %s messages, %s gain." % (self.extracted_nb, self.extracted_from_nb, human_readable_size(self.extracted_size))) else: print( " [Dry-run] Extracted %d files from %s messages, %s gain." % (self.extracted_nb, self.extracted_from_nb, human_readable_size(self.extracted_size))) if self.verbose: print(" Thunderbird headers used: %s." % ("yes" if self.thunderbird_mode else "no")) print()
def parse_flags(self, headers): return list(ParseFlags(bytes(headers, 'ascii')))
def parse_flags(self, headers): return [flag.decode('ascii') for flag in ParseFlags(headers)]