def DeleteMail(): client = IMAPClient('imap.googlemail.com', use_uid=True, ssl=True) client.login(FROM, Decode(PASSMAIL)) folders = [ 'INBOX', '[Gmail]/Drafts', '[Gmail]/Important', '[Gmail]/Sent Mail', '[Gmail]/Spam', '[Gmail]/Starred', '[Gmail]/Trash' ] #for f in folders: fold = client.select_folder(folders[3]) print(client.search()) res = client.delete_messages(client.search()) res = client.expunge() client.close_folder() fold = client.select_folder(folders[6]) print(client.search()) res = client.delete_messages(client.search()) res = client.expunge() client.close_folder() #Google automatically will move deleted messages to "All Mail" folder. #Now we can remove all messages from "All Mail" client.select_folder("[Gmail]/All Mail") client.set_gmail_labels(client.search(), '\\Trash') client.delete_messages(client.search()) client.expunge() client.logout() print("Sent mails are deleted!")
def DeleteMail(): client = IMAPClient('imap.googlemail.com', use_uid=True, ssl=True) client.login(FROM, Decode(PASSMAIL)) folders = [ 'INBOX', '[Gmail]/Drafts', '[Gmail]/Important', '[Gmail]/Sent Mail', '[Gmail]/Spam', '[Gmail]/Starred', '[Gmail]/Trash' ] #for f in folders: fold = client.select_folder(folders[3]) print(client.search()) res = client.delete_messages(client.search()) res = client.expunge() client.close_folder() fold = client.select_folder(folders[6]) print(client.search()) res = client.delete_messages(client.search()) res = client.expunge() client.close_folder() #Google automatically will move deleted messages to "All Mail" folder. #Now we can remove all messages from "All Mail" client.select_folder("[Gmail]/All Mail") client.set_gmail_labels(client.search(), '\\Trash') client.delete_messages(client.search()) client.expunge() client.logout() print("Sent mails are deleted!")
def get_credit_card_bill_by_imap4_old(email_account, email_password): email_suffix = get_email_suffix(email_account) imap4_server_addr = IMAP4_SUFFIX_ADDRESS_DICT.get(email_suffix, "imap." + email_suffix) c = IMAPClient(host=imap4_server_addr) try: c.login(email_account, email_password) yield True for folder_type, _, folder_name in c.list_folders(): folder_ok = True folder_type = str(folder_type) for t in IMAP4_DONT_SEARCH_FOLDER_LIST: if t in folder_type: folder_ok = False break if not folder_ok: continue c.select_folder(folder_name, readonly=True) # c.search(['SUBJECT', 'test']) # 搜索 TEXT SUBJECT HEADER 都不行 since = date.today() - timedelta(days=IMAP4_SEARCH_SINCE_DAY) r_ids = c.search(['SINCE', since]) msgdict = c.fetch(r_ids, ['BODY.PEEK[]']) for message_id, message in msgdict.items(): msg = message_from_bytes(message[b'BODY[]']) address, subject = parse_email_headers(msg) bank_name = check_email_credit_card_by_address(subject, address) if bank_name: try: content = parse_email(msg) except Exception: print_exc() content = '解析邮件正文出错' yield (bank_name, subject, content) c.close_folder() finally: c.logout()
class Server: """ Server class to fetch and filter data Connects to the IMAP server, search according to a criteria, fetch all attachments of the mails matching the criteria and save them locally with a timestamp """ def __init__(self, host, username, password, debug=False): """ Server class __init__ which expects an IMAP host to connect to @param host: gmail's default server is fine: imap.gmail.com @param username: your gmail account (i.e. [email protected]) @param password: we highly recommend you to use 2-factor auth here """ if not host: raise Exception('Missing IMAP host parameter in your config') try: self._server = IMAPClient(host, use_uid=True, ssl=True) except: raise Exception('Could not successfully connect to the IMAP host') setattr(self._server, 'debug', debug) # mails index to avoid unnecessary redownloading index = '.index_%s' % (username) index = os.path.join(_app_folder(), index) self._index = shelve.open(index, writeback=True) # list of attachments hashes to avoid dupes hashes = '.hashes_%s' % (username) hashes = os.path.join(_app_folder(), hashes) self._hashes = shelve.open(hashes, writeback=True) self._username = username self._login(username, password) def _login(self, username, password): """ Login to the IMAP server and selects the all mail folder @param username: your gmail account (i.e. [email protected]) @param password: we highly recommend you to use 2-factor auth here """ if not username or not password: raise Exception('Missing username or password parameters') try: self._server.login(username, password) except: raise Exception('Cannot login, check username/password, are you using 2-factor auth?') # you may want to hack this to only fetch attachments # from a different exclusive label all_mail = "" # gmail's allmail folder always has the '\\AllMail' flag set # regardless which language the interface is using if not all_mail: all_mail = "[Gmail]/All Mail" for flags, delimiter, folder_name in self._server.xlist_folders(): if u'\\AllMail' in flags: all_mail = folder_name break # stats logging print "LOG: selecting message folder '%s'" % all_mail self._server.select_folder(all_mail, readonly=True) def _filter_messages(self): """Filter mail to only parse ones containing images""" # creates a list of all types of image files to search for, # even though we have no idea if gmail supports them or what mimetypes.init() mimes = [] for ext in mimetypes.types_map: if 'image' in mimetypes.types_map[ext]: mimes.append(ext.replace('.', '')) mimelist = ' OR '.join(mimes) # that's why we only support gmail # for other mail services we'd have to translate the custom # search to actual IMAP queries, thus no X-GM-RAW cookie for us criteria = 'X-GM-RAW "has:attachment filename:(%s)"' % (mimelist) try: messages = self._server.search([criteria]) except: raise Exception('Search criteria return a failure, it must be a valid gmail search') # stats logging print 'LOG: %d messages matched the search criteria %s' % (len(messages), criteria) return messages def _save_part(self, part, mail): """ Internal function to decode attachment filenames and save them all @param mail: the mail object from message_from_string so it can checks its date @param part: the part object after a mail.walk() to get multiple attachments """ if not hasattr(self, "seq"): self.seq = 0 # we check if None in filename instead of just if it is None # due to the type of data decode_header returns to us header_filename = _charset_decoder(part.get_filename()) # i.e. some inline attachments have no filename field in the header # so we have to hack around it and get the name field if 'None' in header_filename: header_filename = part.get('Content-Type').split('name=')[-1].replace('"', '') elif not header_filename[0][0] or header_filename[0][0] is None: # we should hopefully never reach this, attachments would be 'noname' in gmail header_filename = 'attachment-%06d.data' % (self.seq) self.seq += 1 # sanitize it punct = '!"#$&\'*+/;<>?[\]^`{|}~' header_filename = header_filename.translate(None, punct) # 2012-10-28_19-15-22 (Y-M-D_H-M-S) header_date = parsedate(mail['date']) header_date = '%s-%s-%s_%s-%s-%s_' % (header_date[0], header_date[1], header_date[2], header_date[3], header_date[4], header_date[5]) filename = header_date + header_filename # we should create it in the documents folder username = self._username userdir = os.path.expanduser('~/LostPhotosFound') savepath = os.path.join(userdir, username) if not os.path.isdir(savepath): os.makedirs(savepath) # logging complement print '\t...%s' % (filename) saved = os.path.join(savepath, filename) if not os.path.isfile(saved): with open(saved, 'wb') as imagefile: try: payload = part.get_payload(decode=True) except: message = 'Failed when downloading attachment: %s' % (saved) raise Exception(message) payload_hash = hashlib.sha1(payload).hexdigest() # gmail loves to duplicate attachments in replies if payload_hash not in self._hashes.keys(): try: imagefile.write(payload) except: message = 'Failed writing attachment to file: %s' % (saved) raise Exception(message) self._hashes[payload_hash] = payload_hash else: print 'Duplicated attachment %s (%s)' % (saved, payload_hash) os.remove(saved) def _cleanup(self): """Gracefully cleans up the mess and leave the server""" self._index.sync() self._index.close() self._hashes.sync() self._hashes.close() self._server.close_folder() self._server.logout() def lostphotosfound(self): """The actual program, which fetchs the mails and all its parts attachments""" messages = self._filter_messages() for msg in messages: try: idfetched = self._server.fetch([msg], ['X-GM-MSGID']) except: raise Exception('Could not fetch the message ID, server did not respond') msgid = str(idfetched[idfetched.keys()[0]]['X-GM-MSGID']) # mail has been processed in the past, skip it if msgid in self._index.keys(): print 'Skipping X-GM-MSDID %s' % (msgid) continue # if it hasn't, fetch it and iterate through its parts msgdata = self._server.fetch([msg], ['RFC822']) for data in msgdata: mail = message_from_string(msgdata[data]['RFC822'].encode('utf-8')) if mail.get_content_maintype() != 'multipart': continue # logging header_from = _charset_decoder(mail['From']) header_subject = _charset_decoder(mail['Subject']) print '[%s]: %s' % (header_from, header_subject) for part in mail.walk(): # if it's only plain text, i.e. no images if part.get_content_maintype() == 'multipart': continue # if no explicit attachments unless they're inline if part.get('Content-Disposition') is None: pass # if non-graphic inline data if 'image/' not in part.get_content_type(): continue # only then we can save this mail part self._save_part(part, mail) # all parts of mail processed, add it to the index self._index[msgid] = msgid self._cleanup()
class Server: """ Server class to fetch and filter data Connects to the IMAP server, search according to a criteria, fetch all attachments of the mails matching the criteria and save them locally with a timestamp """ def __init__(self, host, username, password, debug=False): """ Server class __init__ which expects an IMAP host to connect to @param host: gmail's default server is fine: imap.gmail.com @param username: your gmail account (i.e. [email protected]) @param password: we highly recommend you to use 2-factor auth here """ if not host: raise Exception('Missing IMAP host parameter in your config') try: self._server = IMAPClient(host, use_uid=True, ssl=True) except: raise Exception('Could not successfully connect to the IMAP host') setattr(self._server, 'debug', debug) # mails index to avoid unnecessary redownloading index = '.index_%s' % (username) index = os.path.join(_app_folder(), index) self._index = shelve.open(index, writeback=True) # list of attachments hashes to avoid dupes hashes = '.hashes_%s' % (username) hashes = os.path.join(_app_folder(), hashes) self._hashes = shelve.open(hashes, writeback=True) self._username = username self._login(username, password) def _login(self, username, password): """ Login to the IMAP server and selects the all mail folder @param username: your gmail account (i.e. [email protected]) @param password: we highly recommend you to use 2-factor auth here """ if not username or not password: raise Exception('Missing username or password parameters') try: self._server.login(username, password) except: raise Exception( 'Cannot login, check username/password, are you using 2-factor auth?' ) # you may want to hack this to only fetch attachments # from a different exclusive label all_mail = "" # gmail's allmail folder always has the '\\AllMail' flag set # regardless which language the interface is using if not all_mail: all_mail = "[Gmail]/All Mail" for flags, delimiter, folder_name in self._server.xlist_folders(): if u'\\AllMail' in flags: all_mail = folder_name break # stats logging print "LOG: selecting message folder '%s'" % all_mail self._server.select_folder(all_mail, readonly=True) def _filter_messages(self): """Filter mail to only parse ones containing images""" # creates a list of all types of image files to search for, # even though we have no idea if gmail supports them or what mimetypes.init() mimes = [] for ext in mimetypes.types_map: if 'image' in mimetypes.types_map[ext]: mimes.append(ext.replace('.', '')) mimelist = ' OR '.join(mimes) # that's why we only support gmail # for other mail services we'd have to translate the custom # search to actual IMAP queries, thus no X-GM-RAW cookie for us criteria = 'X-GM-RAW "has:attachment filename:(%s)"' % (mimelist) try: messages = self._server.search([criteria]) except: raise Exception( 'Search criteria return a failure, it must be a valid gmail search' ) # stats logging print 'LOG: %d messages matched the search criteria %s' % ( len(messages), criteria) return messages def _save_part(self, part, mail): """ Internal function to decode attachment filenames and save them all @param mail: the mail object from message_from_string so it can checks its date @param part: the part object after a mail.walk() to get multiple attachments """ if not hasattr(self, "seq"): self.seq = 0 # we check if None in filename instead of just if it is None # due to the type of data decode_header returns to us header_filename = _charset_decoder(part.get_filename()) # i.e. some inline attachments have no filename field in the header # so we have to hack around it and get the name field if 'None' in header_filename: header_filename = part.get('Content-Type').split( 'name=')[-1].replace('"', '') elif not header_filename[0][0] or header_filename[0][0] is None: # we should hopefully never reach this, attachments would be 'noname' in gmail header_filename = 'attachment-%06d.data' % (self.seq) self.seq += 1 # sanitize it punct = '!"#$&\'*+/;<>?[\]^`{|}~' header_filename = header_filename.translate(None, punct) # 2012-10-28_19-15-22 (Y-M-D_H-M-S) header_date = parsedate(mail['date']) header_date = '%s-%s-%s_%s-%s-%s_' % (header_date[0], header_date[1], header_date[2], header_date[3], header_date[4], header_date[5]) filename = header_date + header_filename # we should create it in the documents folder username = self._username userdir = os.path.expanduser('~/LostPhotosFound') savepath = os.path.join(userdir, username) if not os.path.isdir(savepath): os.makedirs(savepath) # logging complement print '\t...%s' % (filename) saved = os.path.join(savepath, filename) if not os.path.isfile(saved): with open(saved, 'wb') as imagefile: try: payload = part.get_payload(decode=True) except: message = 'Failed when downloading attachment: %s' % ( saved) raise Exception(message) payload_hash = hashlib.sha1(payload).hexdigest() # gmail loves to duplicate attachments in replies if payload_hash not in self._hashes.keys(): try: imagefile.write(payload) except: message = 'Failed writing attachment to file: %s' % ( saved) raise Exception(message) self._hashes[payload_hash] = payload_hash else: print 'Duplicated attachment %s (%s)' % (saved, payload_hash) os.remove(saved) def _cleanup(self): """Gracefully cleans up the mess and leave the server""" self._index.sync() self._index.close() self._hashes.sync() self._hashes.close() self._server.close_folder() self._server.logout() def lostphotosfound(self): """The actual program, which fetchs the mails and all its parts attachments""" messages = self._filter_messages() for msg in messages: try: idfetched = self._server.fetch([msg], ['X-GM-MSGID']) except: raise Exception( 'Could not fetch the message ID, server did not respond') msgid = str(idfetched[idfetched.keys()[0]]['X-GM-MSGID']) # mail has been processed in the past, skip it if msgid in self._index.keys(): print 'Skipping X-GM-MSDID %s' % (msgid) continue # if it hasn't, fetch it and iterate through its parts msgdata = self._server.fetch([msg], ['RFC822']) for data in msgdata: mail = message_from_string( msgdata[data]['RFC822'].encode('utf-8')) if mail.get_content_maintype() != 'multipart': continue # logging header_from = _charset_decoder(mail['From']) header_subject = _charset_decoder(mail['Subject']) print '[%s]: %s' % (header_from, header_subject) for part in mail.walk(): # if it's only plain text, i.e. no images if part.get_content_maintype() == 'multipart': continue # if no explicit attachments unless they're inline if part.get('Content-Disposition') is None: pass # if non-graphic inline data if 'image/' not in part.get_content_type(): continue # only then we can save this mail part self._save_part(part, mail) # all parts of mail processed, add it to the index self._index[msgid] = msgid self._cleanup()
header_from = decode_header(header_from).pop(0)[0].decode(decode_header(header_from).pop(0)[1]).encode('utf-8') header_subject = mail["Subject"] if not decode_header(header_subject).pop(0)[1]: header_subject = decode_header(header_subject).pop(0)[0].decode('iso-8859-1').encode('utf-8') else: header_subject = decode_header(header_subject).pop(0)[0].decode(decode_header(header_subject).pop(0)[1]).encode('utf-8') print '[%s]: %s' % (header_from, header_subject) for part in mail.walk(): if part.get_content_maintype() == 'multipart': continue if part.get('Content-Disposition') is None: continue filename = part.get_filename() seq = 1 if not filename: filename = 'attachment-%03d%s' % (seq, 'bin') seq += 1 if not os.path.isdir(USERNAME): os.mkdir(USERNAME) saved = os.path.join(USERNAME, filename) if not os.path.isfile(saved): f = open(saved, 'wb') f.write(part.get_payload(decode=True)) f.close() server.close_folder() server.logout()
class Miner: """create a new email miner instance that can be used to traverse mails""" imap: IMAPClient = None def __init__(self, hostname: str, username: str, password: str, port: int = imaplib.IMAP4_SSL_PORT, use_ssl: bool = True, verify: bool = True, log_level: int = None): """ Create a new instance of the miner. :param hostname: the hostname of the imap server to connect to :param username: the user to login as :param password: :param port: the port to connect to. (defaults to 993) :param use_ssl: whether to use SSL to connect (defaults to True) :param verify: whether to verify the SSL certificates (defaults to False) """ if log_level is not None: logging.basicConfig( format='%(asctime)s - %(levelname)s: %(message)s', level=log_level) ssl_context = ssl.create_default_context() if not verify: # disable hostname check. certificate may not match hostname. ssl_context.check_hostname = False # disable certificate authority verification. certificate maybe issued by unknown CA ssl_context.verify_mode = ssl.CERT_NONE self.imap = IMAPClient(host=hostname, port=port, ssl=use_ssl, ssl_context=ssl_context) self.imap.login(username, password) @contextlib.contextmanager def folder(self, folder_name: str, read_only: bool = True): """ Switch to a specific folder. :param folder_name: name of the folder to switch to :param read_only: read-only mode will not mark emails as read even after retrieval :return: """ try: yield self.imap.select_folder(folder_name, read_only) finally: self.imap.close_folder() @contextlib.contextmanager def inbox(self, read_only: bool = True): """ Switch to the inbox folder. :param read_only: read-only mode will not mark emails as read even after retrieval :return: """ try: yield self.imap.select_folder('inbox', read_only) finally: self.imap.close_folder() def mark_as_unread(self, message_ids: List[int]): """ Mark the given message IDs as unread by removing the SEEN flag. :param message_ids: :return: """ self.imap.remove_flags(message_ids, [SEEN]) def mark_as_read(self, message_ids: List[int]): """ Mark the given message IDs as read by adding the SEEN flag. :param message_ids: :return: """ self.imap.add_flags(message_ids, [SEEN]) def delete(self, message_ids: List[int]): """ Delete the given message IDs :param message_ids: :return: """ self.imap.delete_messages(message_ids, True) def archive(self, message_ids: List[int]): """ Archive the given message IDs :param message_ids: :return: """ self.imap.copy(message_ids, br'\Archive') self.delete(message_ids) def get_emails(self, unread_only: bool = True, with_body: bool = False, keep_as_unread: bool = False, in_memory: bool = True) -> List[Email]: """ Get emails from the selected folder. :param keep_as_unread: keep any retrieved emails as unread in the mailbox. :param unread_only: choose only to retrieve unread mails :param with_body: read-only mode will not mark emails as read even after retrieval :param in_memory: store the parsed attachments in-memory as bytes or to a temp file locally :return: """ ids = self.imap.search('(UNSEEN)' if unread_only else 'ALL') flags = ['ENVELOPE', 'FLAGS', 'UID', 'INTERNALDATE'] if with_body: flags.append('BODY[]') response = self.imap.fetch(ids, flags) try: if keep_as_unread: self.mark_as_unread(ids) else: self.mark_as_read(ids) except Exception: # will throw an exception if folder in read-only mode. so ignore. pass return parse_emails(response, in_memory) def __enter__(self): """ return the instance of the miner for use as a context manager. :return: """ return self def __exit__(self, *args): """ Close folder and logout on exit when used as a context manager. :param args: :return: """ if self.imap is not None: try: self.imap.close_folder() except: pass self.imap.logout()
class IMAP(Thread): def __init__(self, filman): super(IMAP, self).__init__() self.imap = IMAPClient(HOST, use_uid=True, ssl=ssl) self.imap.login(USERNAME, PASSWORD) self.messages = [] self.filterman = filman self.counter = 0 self.check_dests() self.loop() def check_dests(self): dests = self.filterman.get_dests() for d in dests: if not self.imap.folder_exists(d): self.imap.create_folder(d) logging.info('[create folder] %s' % d) if not self.imap.folder_exists(default_not_matched_dest): self.imap.create_folder(default_not_matched_dest) logging.info('[create folder] %s' % default_not_matched_dest) def mark_as_unread(self, msgs): return self.imap.remove_flags(msgs, ('\\SEEN')) def check(self): server = self.imap select_info = server.select_folder('INBOX') logging.info("source imap inited: %r" % select_info) messages = server.search(['NOT SEEN']) messages = sorted(messages, reverse=True) self.messages = list(messages) logging.info('got %d unread messages' % len(self.messages)) def idle(self, secs=30): server = self.imap server.idle() responses = server.idle_check(timeout=secs) text, responses = server.idle_done() logging.info('idle response: %s' % (responses)) return not responses def loop(self): logging.info('enter loop %d' % self.counter) self.counter += 1 self.check() while self.messages: self._dozen() self.imap.close_folder() def _dozen(self): if self.messages: msgs = self.messages[:12] self.messages = self.messages[12:] else: return logging.info('processing the first %d msgs; left %d...' % ( len(msgs), len(self.messages))) logging.info(msgs) response = self.imap.fetch(msgs, ['RFC822']) msgs = [(msgid, Msg(string=data['RFC822'])) for (msgid, data) in response.iteritems()] self.filterman.test_match_and_take_action(self.imap, msgs) def run(self): count = 0 while True: count += 1 logging.info('idle counter: %d' % count) self.idle() or self.loop() sleep(10) if not count % 5: # do loop every 10 runs. self.loop()