def headers_roundtrip_test(): headers = MimeHeaders.from_stream(StringIO(BILINGUAL)) out = StringIO() headers.to_stream(out) headers2 = MimeHeaders.from_stream(StringIO(out.getvalue())) eq_(21, len(headers2)) eq_(u"Simple text. How are you? Как ты поживаешь?", headers["Subject"]) received_headers = headers.getall("Received") eq_(5, len(received_headers)) ok_("c2cs24435ybk" in received_headers[0]) eq_(headers["Content-Transfer-Encoding"], headers2["Content-Transfer-Encoding"]) eq_(headers["DKIM-Signature"], headers2["DKIM-Signature"])
def headers_roundtrip_test(): headers = MimeHeaders.from_stream(StringIO(BILINGUAL)) out = StringIO() headers.to_stream(out) headers2 = MimeHeaders.from_stream(StringIO(out.getvalue())) eq_(21, len(headers2)) eq_(u"Simple text. How are you? Как ты поживаешь?", headers['Subject']) received_headers = headers.getall('Received') eq_(5, len(received_headers)) ok_('c2cs24435ybk' in received_headers[0]) eq_(headers['Content-Transfer-Encoding'], headers2['Content-Transfer-Encoding']) eq_(headers['DKIM-Signature'], headers2['DKIM-Signature'])
def headers_roundtrip_test(): headers = MimeHeaders.from_stream(six.StringIO(BILINGUAL.decode('utf-8'))) out = six.StringIO() headers.to_stream(out) headers2 = MimeHeaders.from_stream(six.StringIO(out.getvalue())) eq_(21, len(headers2)) eq_(u"Simple text. How are you? Как ты поживаешь?", headers['Subject']) received_headers = headers.getall('Received') eq_(5, len(received_headers)) ok_('c2cs24435ybk' in received_headers[0]) eq_(headers['Content-Transfer-Encoding'], headers2['Content-Transfer-Encoding']) eq_(headers['DKIM-Signature'], headers2['DKIM-Signature'])
def bilingual_message_test(): headers = MimeHeaders.from_stream(StringIO(BILINGUAL)) eq_(21, len(headers)) eq_(u"Simple text. How are you? Как ты поживаешь?", headers['Subject']) received_headers = headers.getall('Received') eq_(5, len(received_headers)) ok_('c2cs24435ybk' in received_headers[0])
def bilingual_message_test(): headers = MimeHeaders.from_stream(StringIO(BILINGUAL)) eq_(21, len(headers)) eq_(u"Simple text. How are you? Как ты поживаешь?", headers['Subject']) received_headers = headers.getall('Received') eq_(5, len(received_headers)) ok_('c2cs24435ybk' in received_headers[0])
def test_folding_combinations(): message = """From [email protected] Mon Feb 8 02:53:47 PST 1993\nTo: sasha\r\n continued\n line\nFrom: single line \r\nSubject: hello, how are you\r\n today?""" headers = MimeHeaders.from_stream(StringIO(message)) eq_('sasha continued line', headers['To']) eq_('single line ', headers['From']) eq_("hello, how are you today?", headers['Subject'])
def broken_sequences_test(): headers = StringIO(" hello this is a bad header\nGood: this one is ok") headers = MimeHeaders.from_stream(headers) eq_(1, len(headers)) eq_("this one is ok", headers["Good"])
def headers_parsing_empty_test(): h = MimeHeaders.from_stream(StringIO("")) eq_(0, len(h))
def headers_parsing_binary_stuff_survives_test(): value = zlib.compress(b"abcdefg") header = "Hello: {0}\r\n".format(value) ok_(MimeHeaders.from_stream(StringIO(header)))
def slurp_imap_namespace_gmail(imap, db, namespace=None, account=None): # folder attrs -> RFC 6154 Special-Use mailbox flags singleton_flags = { 'all_folder': u'\\All', 'archive_folder': u'\\Archive', 'drafts_folder': u'\\Drafts', 'starred_folder': u'\\Flagged', 'spam_folder': u'\\Junk', 'sent_folder': u'\\Sent', 'trash_folder': u'\\Trash', } # List folders -- Returns sequence of (flags, delimiter, name) folders_fdn = imap.list_folders() with db: # Folder names & delimiters db.executemany( """ INSERT INTO folders ( folder_name, clean_folder_name, imap_delimiter ) VALUES (?, ?, ?) """, ((name, cleanup_folder_name(name), delimiter) for flags, delimiter, name in folders_fdn)) # Folder flags db.executemany( """ INSERT INTO folder_flags (folder_name, flag) VALUES (?, ?) """, ((name, flag) for flags, delimiter, name in folders_fdn for flag in flags)) # Set imap_noselect = 1 on folders that have the \Noselect flag; # Set imap_noselect = 0 on folders that don't. db.execute(""" UPDATE folders SET imap_noselect = ( SELECT folder_flags.flag IS NOT NULL FROM folders AS a LEFT JOIN folder_flags ON ( a.folder_name = folder_flags.folder_name AND folder_flags.flag = '\Noselect' ) WHERE folders.folder_name = a.folder_name ) """) # Insert 'inbox_folder' -> 'INBOX' if there is an INBOX folder, which # there should always be, I think. db.execute( """ INSERT INTO special_folders (attr_name, folder_name) SELECT ?, folder_name FROM folders WHERE folder_name = ? """, ['inbox_folder', 'INBOX']) # Insert other special folder names db.executemany( """ INSERT INTO special_folders (attr_name, folder_name) SELECT ?, folder_name FROM folder_flags WHERE flag = ? """, singleton_flags.items()) # Fetch all messages from each folder with db: folder_names = [ row[0] for row in db.execute( "SELECT folder_name FROM folders WHERE NOT imap_noselect") ] for folder_name in folder_names: # EXAMINE the folder examine_response = imap.select_folder(folder_name, readonly=True) # Update imap_uidvalidity db.execute( """ UPDATE folders SET imap_uidvalidity = ?, imap_uidnext = ? WHERE folder_name = ? """, [ examine_response[u'UIDVALIDITY'], examine_response[u'UIDNEXT'], folder_name ]) # Get uids of the messages in the folder imap_uids = imap.search(u'ALL') # Result should match the stated number of messages in the folder. if len(imap_uids) != examine_response[u'EXISTS']: raise AssertionError("len(imap_uids)={0}, EXISTS={1!r}".format( len(imap_uids), examine_response[u'EXISTS'])) # Create folder_messages entries db.executemany( """ INSERT INTO folder_messages (folder_name, imap_uid) VALUES (?, ?) """, ((folder_name, imap_uid) for imap_uid in imap_uids)) ## Get the folder flags #folder_flags = set(row[0] for row in db.execute( # "SELECT flag FROM folder_flags WHERE folder_name = ?", # [folder_name])) # ## This is Gmail, so only actually fetch messages from the 'All ## Mail' and 'Trash' folders. This *should* give us all of the ## messages. #if not folder_flags & {u'\\All', u'\\Trash', u'\\Sent'}: # continue # Get folder messages batch_size = 1000 fetch_data = [ 'RFC822.SIZE', 'ENVELOPE', 'FLAGS', 'X-GM-MSGID', 'X-GM-THRID', 'X-GM-LABELS', 'INTERNALDATE', 'RFC822.HEADER' ] for i in range(0, len(imap_uids), batch_size): imap_uids_batch = imap_uids[i:i + batch_size] # Fetch message info from the IMAP server fetch_response = imap.fetch(imap_uids_batch, fetch_data) # Fetch message info and insert it into the messages table. # Don't bother deduplicating at this point. for uid, data in fetch_response.items(): headers = MimeHeaders.from_stream( StringIO(data['RFC822.HEADER'])) msg_data = dict( date=data['INTERNALDATE'], subject=data['ENVELOPE'].subject, in_reply_to=data['ENVELOPE'].in_reply_to, size=data['RFC822.SIZE'], message_id_header=data['ENVELOPE'].message_id, x_gm_thrid=unicode(data['X-GM-THRID']), x_gm_msgid=unicode(data['X-GM-MSGID']), sender_addr=json.dumps( parse_email_address_list(headers.get('Sender'))), from_addr=json.dumps( parse_email_address_list(headers.get('From'))), reply_to_addr=json.dumps( parse_email_address_list(headers.get('Reply-To'))), to_addr=json.dumps( parse_email_address_list(headers.get('To'))), cc_addr=json.dumps( parse_email_address_list(headers.get('Cc'))), bcc_addr=json.dumps( parse_email_address_list(headers.get('Bcc'))), ) msg_data['clean_subject'] = \ cleanup_subject(parse_header_value('Subject', msg_data['subject'])) # Check if we've already stored the message cur = db.execute( """ SELECT id, x_gm_msgid FROM messages WHERE x_gm_msgid = :x_gm_msgid """, msg_data) row = next(iter(cur.fetchall()), None) # returns 0 or 1 rows message_id = row['id'] if row is not None else None # If we've never stored the message, store it now. if message_id is None: cur = db.execute( """ INSERT INTO messages ( date, subject, clean_subject, in_reply_to, size, message_id_header, x_gm_msgid, x_gm_thrid, sender_addr, from_addr, reply_to_addr, to_addr, cc_addr, bcc_addr ) VALUES ( :date, :subject, :clean_subject, :in_reply_to, :size, :message_id_header, :x_gm_msgid, :x_gm_thrid, :sender_addr, :from_addr, :reply_to_addr, :to_addr, :cc_addr, :bcc_addr ) """, msg_data) message_id = cur.lastrowid # Store the Gmail labels (these can be different in # different folders; e.g. messages in the 'Sent' folder are # missing the u'\\Sent' label) db.executemany( """ INSERT INTO folder_message_gm_labels (folder_name, message_id, label) VALUES (?, ?, ?) """, ((folder_name, message_id, label) for label in data['X-GM-LABELS'])) # Mark the message as being in the current folder. db.execute( """ UPDATE folder_messages SET message_id = ? WHERE folder_name = ? AND imap_uid = ? """, (message_id, folder_name, uid)) # Construct threads (assuming gmail for now) db.execute(""" INSERT INTO threads (x_gm_thrid) SELECT DISTINCT x_gm_thrid FROM messages """) db.execute(""" INSERT INTO thread_messages (thread_id, message_id) SELECT threads.id, messages.id FROM threads, messages WHERE threads.x_gm_thrid = messages.x_gm_thrid """) # Construct folder_threads db.execute(""" INSERT INTO folder_threads (folder_name, thread_id) SELECT DISTINCT folder_messages.folder_name, thread_messages.thread_id FROM folder_messages LEFT JOIN thread_messages USING (message_id) """)
def slurp_imap_namespace_gmail(imap, db, namespace=None, account=None): # folder attrs -> RFC 6154 Special-Use mailbox flags singleton_flags = { 'all_folder': u'\\All', 'archive_folder': u'\\Archive', 'drafts_folder': u'\\Drafts', 'starred_folder': u'\\Flagged', 'spam_folder': u'\\Junk', 'sent_folder': u'\\Sent', 'trash_folder': u'\\Trash', } # List folders -- Returns sequence of (flags, delimiter, name) folders_fdn = imap.list_folders() with db: # Folder names & delimiters db.executemany(""" INSERT INTO folders ( folder_name, clean_folder_name, imap_delimiter ) VALUES (?, ?, ?) """, ((name, cleanup_folder_name(name), delimiter) for flags, delimiter, name in folders_fdn)) # Folder flags db.executemany(""" INSERT INTO folder_flags (folder_name, flag) VALUES (?, ?) """, ((name, flag) for flags, delimiter, name in folders_fdn for flag in flags)) # Set imap_noselect = 1 on folders that have the \Noselect flag; # Set imap_noselect = 0 on folders that don't. db.execute(""" UPDATE folders SET imap_noselect = ( SELECT folder_flags.flag IS NOT NULL FROM folders AS a LEFT JOIN folder_flags ON ( a.folder_name = folder_flags.folder_name AND folder_flags.flag = '\Noselect' ) WHERE folders.folder_name = a.folder_name ) """) # Insert 'inbox_folder' -> 'INBOX' if there is an INBOX folder, which # there should always be, I think. db.execute(""" INSERT INTO special_folders (attr_name, folder_name) SELECT ?, folder_name FROM folders WHERE folder_name = ? """, ['inbox_folder', 'INBOX']) # Insert other special folder names db.executemany(""" INSERT INTO special_folders (attr_name, folder_name) SELECT ?, folder_name FROM folder_flags WHERE flag = ? """, singleton_flags.items()) # Fetch all messages from each folder with db: folder_names = [row[0] for row in db.execute( "SELECT folder_name FROM folders WHERE NOT imap_noselect")] for folder_name in folder_names: # EXAMINE the folder examine_response = imap.select_folder(folder_name, readonly=True) # Update imap_uidvalidity db.execute(""" UPDATE folders SET imap_uidvalidity = ?, imap_uidnext = ? WHERE folder_name = ? """, [examine_response[u'UIDVALIDITY'], examine_response[u'UIDNEXT'], folder_name]) # Get uids of the messages in the folder imap_uids = imap.search(u'ALL') # Result should match the stated number of messages in the folder. if len(imap_uids) != examine_response[u'EXISTS']: raise AssertionError("len(imap_uids)={0}, EXISTS={1!r}".format( len(imap_uids), examine_response[u'EXISTS'])) # Create folder_messages entries db.executemany(""" INSERT INTO folder_messages (folder_name, imap_uid) VALUES (?, ?) """, ((folder_name, imap_uid) for imap_uid in imap_uids)) ## Get the folder flags #folder_flags = set(row[0] for row in db.execute( # "SELECT flag FROM folder_flags WHERE folder_name = ?", # [folder_name])) # ## This is Gmail, so only actually fetch messages from the 'All ## Mail' and 'Trash' folders. This *should* give us all of the ## messages. #if not folder_flags & {u'\\All', u'\\Trash', u'\\Sent'}: # continue # Get folder messages batch_size = 1000 fetch_data = ['RFC822.SIZE', 'ENVELOPE', 'FLAGS', 'X-GM-MSGID', 'X-GM-THRID', 'X-GM-LABELS', 'INTERNALDATE', 'RFC822.HEADER'] for i in range(0, len(imap_uids), batch_size): imap_uids_batch = imap_uids[i:i+batch_size] # Fetch message info from the IMAP server fetch_response = imap.fetch(imap_uids_batch, fetch_data) # Fetch message info and insert it into the messages table. # Don't bother deduplicating at this point. for uid, data in fetch_response.items(): headers = MimeHeaders.from_stream(StringIO(data['RFC822.HEADER'])) msg_data = dict( date=data['INTERNALDATE'], subject=data['ENVELOPE'].subject, in_reply_to=data['ENVELOPE'].in_reply_to, size=data['RFC822.SIZE'], message_id_header=data['ENVELOPE'].message_id, x_gm_thrid=unicode(data['X-GM-THRID']), x_gm_msgid=unicode(data['X-GM-MSGID']), sender_addr=json.dumps(parse_email_address_list(headers.get('Sender'))), from_addr=json.dumps(parse_email_address_list(headers.get('From'))), reply_to_addr=json.dumps(parse_email_address_list(headers.get('Reply-To'))), to_addr=json.dumps(parse_email_address_list(headers.get('To'))), cc_addr=json.dumps(parse_email_address_list(headers.get('Cc'))), bcc_addr=json.dumps(parse_email_address_list(headers.get('Bcc'))), ) msg_data['clean_subject'] = \ cleanup_subject(parse_header_value('Subject', msg_data['subject'])) # Check if we've already stored the message cur = db.execute(""" SELECT id, x_gm_msgid FROM messages WHERE x_gm_msgid = :x_gm_msgid """, msg_data) row = next(iter(cur.fetchall()), None) # returns 0 or 1 rows message_id = row['id'] if row is not None else None # If we've never stored the message, store it now. if message_id is None: cur = db.execute(""" INSERT INTO messages ( date, subject, clean_subject, in_reply_to, size, message_id_header, x_gm_msgid, x_gm_thrid, sender_addr, from_addr, reply_to_addr, to_addr, cc_addr, bcc_addr ) VALUES ( :date, :subject, :clean_subject, :in_reply_to, :size, :message_id_header, :x_gm_msgid, :x_gm_thrid, :sender_addr, :from_addr, :reply_to_addr, :to_addr, :cc_addr, :bcc_addr ) """, msg_data) message_id = cur.lastrowid # Store the Gmail labels (these can be different in # different folders; e.g. messages in the 'Sent' folder are # missing the u'\\Sent' label) db.executemany(""" INSERT INTO folder_message_gm_labels (folder_name, message_id, label) VALUES (?, ?, ?) """, ((folder_name, message_id, label) for label in data['X-GM-LABELS'])) # Mark the message as being in the current folder. db.execute(""" UPDATE folder_messages SET message_id = ? WHERE folder_name = ? AND imap_uid = ? """, (message_id, folder_name, uid)) # Construct threads (assuming gmail for now) db.execute(""" INSERT INTO threads (x_gm_thrid) SELECT DISTINCT x_gm_thrid FROM messages """) db.execute(""" INSERT INTO thread_messages (thread_id, message_id) SELECT threads.id, messages.id FROM threads, messages WHERE threads.x_gm_thrid = messages.x_gm_thrid """) # Construct folder_threads db.execute(""" INSERT INTO folder_threads (folder_name, thread_id) SELECT DISTINCT folder_messages.folder_name, thread_messages.thread_id FROM folder_messages LEFT JOIN thread_messages USING (message_id) """)
def test_folding_combinations(): message = """From [email protected] Mon Feb 8 02:53:47 PST 1993\nTo: sasha\r\n continued\n line\nFrom: single line \r\nSubject: hello, how are you\r\n today?""" headers = MimeHeaders.from_stream(StringIO(message)) eq_('sasha continued line', headers['To']) eq_('single line ', headers['From']) eq_("hello, how are you today?", headers['Subject'])
def headers_parsing_ridiculously_long_line_test(): val = "abcdefg"*100000 header = "Hello: {0}\r\n".format(val) MimeHeaders.from_stream(StringIO(header))
def broken_sequences_test(): headers = StringIO(" hello this is a bad header\nGood: this one is ok") headers = MimeHeaders.from_stream(headers) eq_(1, len(headers)) eq_("this one is ok", headers["Good"])
def headers_parsing_binary_stuff_survives_test(): value = zlib.compress(b"abcdefg") header = "Hello: {0}\r\n".format(value) ok_(MimeHeaders.from_stream(StringIO(header)))
def headers_parsing_ridiculously_long_line_test(): val = "abcdefg" * 100000 header = "Hello: {0}\r\n".format(val) MimeHeaders.from_stream(StringIO(header))
def headers_parsing_empty_test(): h = MimeHeaders.from_stream(StringIO("")) eq_(0, len(h))