def write_cursor(self, texts, cursor): if (cursor.execute("SELECT Count() FROM sms").fetchone()[0] > 0): raise sms_exceptions.NonEmptyStartDBError("Output DB has existing messages!") #populate fast lookup table: contactIdFromNumber = {} query = cursor.execute('SELECT _id,address FROM canonical_addresses;') for row in query: contactIdFromNumber[core.cleanNumber(row[1])] = row[0] #start the main loop through each message i=0 lastSpeed=0 lastCheckedSpeed=0 starttime = time.time() for txt in texts: clean_number = core.cleanNumber(txt.num) #add a new canonical_addresses lookup entry and thread item if it doesn't exist if not clean_number in contactIdFromNumber: cursor.execute( "INSERT INTO canonical_addresses (address) VALUES (?)", [txt.num]) contactIdFromNumber[clean_number] = cursor.lastrowid cursor.execute( "INSERT INTO threads (recipient_ids) VALUES (?)", [contactIdFromNumber[clean_number]]) contact_id = contactIdFromNumber[clean_number] ## TODO Only run this query at the END of the loop! #now update the conversation thread (happends with each new message) cursor.execute( "UPDATE threads SET message_count=message_count + 1,snippet=?,'date'=? WHERE recipient_ids=? ", [txt.body,txt.date,contact_id] ) cursor.execute( "SELECT _id FROM threads WHERE recipient_ids=? ", [contact_id] ) thread_id = cursor.fetchone()[0] if False: ## TODO move this debug output to better better comments print("thread_id = "+ str(thread_id)) cursor.execute( "SELECT * FROM threads WHERE _id=?", [contact_id] ) print("updated thread: " + str(cursor.fetchone())) print("adding entry to message db: " + str([txt.num,txt.date,txt.body,thread_id,txt.incoming+1])) ## TODO try using cur.execute('BEGIN TRANSACTION') and cur.execute('COMMIT') every 1000 for speedup #add message to sms table cursor.execute( "INSERT INTO sms (address,'date',body,thread_id,read,type,seen) VALUES (?,?,?,?,1,?,1)", [txt.num,txt.date,txt.body,thread_id,txt.incoming+1]) #print status (with fancy speed calculation) recalculate_every = 100 if i%recalculate_every == 0: lastSpeed = int(recalculate_every/(time.time() - lastCheckedSpeed)) lastCheckedSpeed = time.time() sys.stdout.write( "\rprocessed {0} entries, {1} convos, ({2} entries/sec)".format(i, len(contactIdFromNumber), lastSpeed )) sys.stdout.flush() i += 1 print("\nfinished in {0} seconds (average {1}/second)".format((time.time() - starttime), int(i/(time.time() - starttime)))) if False: print("\n\nthreads: ") for row in cursor.execute('SELECT * FROM threads'): print(row)
def write_cursor(self, texts, cursor): if (cursor.execute("SELECT Count() FROM sms").fetchone()[0] > 0): raise sms_exceptions.NonEmptyStartDBError("Output DB has existing messages!") #populate fast lookup table: contactIdFromNumber = {} query = cursor.execute('SELECT _id,address FROM canonical_addresses;') for row in query: contactIdFromNumber[core.cleanNumber(row[1])] = row[0] #start the main loop through each message i=0 lastSpeed=0 lastCheckedSpeed=0 starttime = time.time() for txt in texts: clean_number = core.cleanNumber(txt.num) #add a new canonical_addresses lookup entry and thread item if it doesn't exist if not clean_number in contactIdFromNumber: cursor.execute( "INSERT INTO canonical_addresses (address) VALUES (?)", [txt.num]) contactIdFromNumber[clean_number] = cursor.lastrowid cursor.execute( "INSERT INTO threads (recipient_ids) VALUES (?)", [contactIdFromNumber[clean_number]]) contact_id = contactIdFromNumber[clean_number] ## TODO Only run this query at the END of the loop! #now update the conversation thread (happends with each new message) cursor.execute( "UPDATE threads SET message_count=message_count + 1,snippet=?,'date'=? WHERE recipient_ids=? ", [txt.body,txt.date,contact_id] ) cursor.execute( "SELECT _id FROM threads WHERE recipient_ids=? ", [contact_id] ) thread_id = cursor.fetchone()[0] if False: ## TODO move this debug output to better better comments print "thread_id = "+ str(thread_id) cursor.execute( "SELECT * FROM threads WHERE _id=?", [contact_id] ) print "updated thread: " + str(cursor.fetchone()) print "adding entry to message db: " + str([txt.num,txt.date,txt.body,thread_id,txt.incoming+1]) ## TODO try using cur.execute('BEGIN TRANSACTION') and cur.execute('COMMIT') every 1000 for speedup #add message to sms table cursor.execute( "INSERT INTO sms (address,'date',body,thread_id,read,type,seen) VALUES (?,?,?,?,1,?,1)", [txt.num,txt.date,txt.body,thread_id,txt.incoming+1]) #print status (with fancy speed calculation) recalculate_every = 100 if i%recalculate_every == 0: lastSpeed = int(recalculate_every/(time.time() - lastCheckedSpeed)) lastCheckedSpeed = time.time() sys.stdout.write( "\rprocessed {0} entries, {1} convos, ({2} entries/sec)".format(i, len(contactIdFromNumber), lastSpeed )) sys.stdout.flush() i += 1 print "\nfinished in {0} seconds (average {1}/second)".format((time.time() - starttime), int(i/(time.time() - starttime))) if False: print "\n\nthreads: " for row in cursor.execute('SELECT * FROM threads'): print row
def parse_cursor(self, cursor): handles = {} query = cursor.execute('SELECT ROWID, id, country FROM handle') for row in query: handles[row[0]] = (row[1], row[2], core.cleanNumber(row[1])) chats = {} # room_name -> [members] #query = cursor.execute('SELECT room_name, ROWID FROM chat WHERE room_name <> "" ') query = cursor.execute('SELECT chat.room_name, handle.id FROM chat \ LEFT OUTER JOIN chat_handle_join ON chat_handle_join.chat_id = chat.ROWID \ JOIN handle ON chat_handle_join.handle_id = handle.ROWID \ WHERE chat.room_name <> "" ') for row in query: if (not row[0] in chats): chats[row[0]] = [] chats[row[0]].append(row[1]) texts = [] query = cursor.execute( 'SELECT message.handle_id, message.date, message.is_from_me, message.text, chat.room_name \ FROM message \ LEFT OUTER JOIN chat_message_join ON message.ROWID = chat_message_join.message_id \ LEFT OUTER JOIN chat ON chat_message_join.chat_id = chat.ROWID \ ORDER BY message.ROWID ASC;') for row in query: number = handles[row[0]][0] if row[0] in handles else "unknown" text = core.Text(num=number, date=long((row[1] + 978307200) * 1000), incoming=row[2] == 0, body=row[3], chatroom=row[4], members=(chats[row[4]] if row[4] else None)) texts.append(text) return texts
def parse_cursor(self, cursor): handles = {} query = cursor.execute( 'SELECT ROWID, id, country FROM handle') for row in query: handles[row[0]] = (row[1], row[2], core.cleanNumber(row[1])) chats = {} # room_name -> [members] #query = cursor.execute('SELECT room_name, ROWID FROM chat WHERE room_name <> "" ') query = cursor.execute( 'SELECT chat.room_name, handle.id FROM chat \ LEFT OUTER JOIN chat_handle_join ON chat_handle_join.chat_id = chat.ROWID \ JOIN handle ON chat_handle_join.handle_id = handle.ROWID \ WHERE chat.room_name <> "" ') for row in query: if (not row[0] in chats): chats[row[0]] = [] chats[row[0]].append(row[1]) texts = [] query = cursor.execute( 'SELECT message.handle_id, message.date, message.is_from_me, message.text, chat.room_name \ FROM message \ LEFT OUTER JOIN chat_message_join ON message.ROWID = chat_message_join.message_id \ LEFT OUTER JOIN chat ON chat_message_join.chat_id = chat.ROWID \ ORDER BY message.ROWID ASC;') for row in query: number = handles[row[0]][0] if row[0] in handles else "unknown" text = core.Text( num = number, date = long((row[1] + 978307200)*1000), incoming = row[2] == 0, body = row[3], chatroom = row[4], members=(chats[row[4]] if row[4] else None)) texts.append(text) return texts
def write_cursor(self, texts, cursor): if (cursor.execute("SELECT Count() FROM message").fetchone()[0] > 0): raise sms_exceptions.NonEmptyStartDBError("Output DB has existing messages!") ## First populate the 'handle' table with each contact handles_lookup = {} # cleaned # -> handle ROWID chat_lookup = {} # chat_key -> chat ROWID chat_participants = {} # chat_key -> [cleaned1, cleaned2] for txt in texts: try: clean_number = core.cleanNumber(txt.num) chat_key = txt.chatroom if txt.chatroom else txt.num ## Create the handle table (effectively a contacts table) if (clean_number) and (not clean_number in handles_lookup): cursor.execute( "INSERT INTO handle ('id', service, uncanonicalized_id ) \ VALUES (?,?,?)", [txt.num,"SMS",clean_number]) handles_lookup[clean_number] = cursor.lastrowid if not chat_key: core.warning("no txt chat_key [%s] for %s" % (chat_key, txt)) ## Create the chat table (effectively a threads table) if not chat_key in chat_lookup: guid = ("SMS;+;%s" % txt.chatroom) if txt.chatroom else ("SMS;-;%s" % txt.num) style = 43 if txt.chatroom else 45 cursor.execute( "INSERT INTO chat (guid, style, state, chat_identifier, service_name, room_name ) \ VALUES (?,?,?,?,?,?)", [guid, style, 3, chat_key, 'SMS', txt.chatroom]) chat_lookup[chat_key] = cursor.lastrowid ## Create the chat_handle_join table (represents participants in all threads) if not chat_key in chat_participants: chat_participants[chat_key] = set() if not clean_number in chat_participants[chat_key]: chat_participants[chat_key].add(clean_number) chat_id = chat_lookup[chat_key] try: handle_id = handles_lookup[clean_number] cursor.execute( "INSERT INTO chat_handle_join (chat_id, handle_id ) \ VALUES (?,?)", [chat_id, handle_id]) except: pass #don't add handle joins for unknown contacts. except: print core.term.red("something failed at: %s") % (txt) raise print "built handles table with %i, chat with %i, chat_handle_join with %i entries" \ % (len(handles_lookup), len(chat_lookup), len(chat_participants)) for txt in texts: chat_key = txt.chatroom if txt.chatroom else txt.num handle_i = handles_lookup[core.cleanNumber(txt.num)] if core.cleanNumber(txt.num) in handles_lookup else 0 idate = long( (float(txt.date)/1000) - 978307200) from_me = 0 if txt.incoming else 1 guid = str(uuid.uuid1()) cursor.execute( "INSERT INTO message \ ('text', guid, handle_id, version, type, service, 'date', is_finished, is_from_me, is_sent, is_read ) \ VALUES (?,?,?,?,?,?,?,?,?,?,?)", [txt.body, guid, handle_i, 1, txt.chatroom != None, 'SMS', idate, 1, from_me, from_me, (1 - from_me)]) message_id = cursor.lastrowid chat_id = chat_lookup[chat_key] cursor.execute( "INSERT INTO chat_message_join (chat_id, message_id) \ VALUES (?,?)", [chat_id, message_id]) print "built messages table with %i entries" % len(texts)
def test_clean_number(self): self.assertEqual(core.cleanNumber("+15105023391"), "5105023391") self.assertEqual(core.cleanNumber("(415) 637-3582"), "4156373582") self.assertEqual(core.cleanNumber("*****@*****.**"), "*****@*****.**") self.assertEqual(core.cleanNumber("89203"), "89203")
def write_cursor(self, texts, cursor): if (cursor.execute("SELECT Count() FROM message").fetchone()[0] > 0): raise sms_exceptions.NonEmptyStartDBError("Output DB has existing messages!") ## First populate the 'handle' table with each contact handles_lookup = {} # cleaned # -> handle ROWID chat_lookup = {} # chat_key -> chat ROWID chat_participants = {} # chat_key -> [cleaned1, cleaned2] for txt in texts: try: clean_number = core.cleanNumber(txt.num) chat_key = txt.chatroom if txt.chatroom else txt.num ## Create the handle table (effectively a contacts table) if (clean_number) and (not clean_number in handles_lookup): cursor.execute( "INSERT INTO handle ('id', service, uncanonicalized_id ) \ VALUES (?,?,?)", [txt.num,"SMS",clean_number]) handles_lookup[clean_number] = cursor.lastrowid if not chat_key: core.warning("no txt chat_key [%s] for %s" % (chat_key, txt)) ## Create the chat table (effectively a threads table) if not chat_key in chat_lookup: guid = ("SMS;+;%s" % txt.chatroom) if txt.chatroom else ("SMS;-;%s" % txt.num) style = 43 if txt.chatroom else 45 cursor.execute( "INSERT INTO chat (guid, style, state, chat_identifier, service_name, room_name ) \ VALUES (?,?,?,?,?,?)", [guid, style, 3, chat_key, 'SMS', txt.chatroom]) chat_lookup[chat_key] = cursor.lastrowid ## Create the chat_handle_join table (represents participants in all threads) if not chat_key in chat_participants: chat_participants[chat_key] = set() if not clean_number in chat_participants[chat_key]: chat_participants[chat_key].add(clean_number) chat_id = chat_lookup[chat_key] try: handle_id = handles_lookup[clean_number] cursor.execute( "INSERT INTO chat_handle_join (chat_id, handle_id ) \ VALUES (?,?)", [chat_id, handle_id]) except: pass #don't add handle joins for unknown contacts. except: print(core.term.red("something failed at: %s") % (txt)) raise print("built handles table with %i, chat with %i, chat_handle_join with %i entries" \ % (len(handles_lookup), len(chat_lookup), len(chat_participants))) for txt in texts: chat_key = txt.chatroom if txt.chatroom else txt.num handle_i = handles_lookup[core.cleanNumber(txt.num)] if core.cleanNumber(txt.num) in handles_lookup else 0 idate = long( (float(txt.date)/1000) - 978307200) from_me = 0 if txt.incoming else 1 guid = str(uuid.uuid1()) cursor.execute( "INSERT INTO message \ ('text', guid, handle_id, version, type, service, 'date', is_finished, is_from_me, is_sent, is_read ) \ VALUES (?,?,?,?,?,?,?,?,?,?,?)", [txt.body, guid, handle_i, 1, txt.chatroom != None, 'SMS', idate, 1, from_me, from_me, (1 - from_me)]) message_id = cursor.lastrowid chat_id = chat_lookup[chat_key] cursor.execute( "INSERT INTO chat_message_join (chat_id, message_id) \ VALUES (?,?)", [chat_id, message_id]) print("built messages table with %i entries" % len(texts))