def add_to_duplicate(): """Helper method to add a message group to a DUPLICATE conversation""" nonlocal msgs, cur_thread, key, next_time, duplicate_index duplicate_num = 1 # the number of duplicate message groups (have the same people, aka name) # but in reality belong to different conversations added = False # used so that we don't accidentally add something twice # while there exists another duplicate conversation in our list keep checking if the current # message group belongs to it. If we exit the while loop a new conversation is created with # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation while (key + ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added: new_name = key + ', DUPLICATE #{0}'.format(duplicate_num) # The time of the last message in the conversation for new_name prev_time = CustomDate(msgs[new_name][-1][2]) if prev_time.distance_from(next_time) <= 0: # if our current conversation was during or after the last message in the new_name convo print(one_line()) print() print(one_line()) print("#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format(len(msgs[new_name])) + Style.RESET_ALL) print_thread(msgs[new_name], end=True, padding=10) print(current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print(are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # User input for whether the two message groups are in the same conversation if not are_same: duplicate_num += 1 # if they aren't the same, increment duplicate_num and try again else: # check to make sure the current message group hasn't already been added, otherwise add it msgs[new_name].extend(cur_thread) added = True else: # this conversation existed but was after our current one, so increment the number and try again duplicate_num += 1 if not added: # if the current message group hasn't been added, add with a new duplicate # msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread duplicate_index += 1 return
def add_to_duplicate(): """Helper method to add a message group to a DUPLICATE conversation""" nonlocal msgs, cur_thread, key, next_time, duplicate_index duplicate_num = 1 # the number of duplicate message groups (have the same people, aka name) # but in reality belong to different conversations added = False # used so that we don't accidentally add something twice # while there exists another duplicate conversation in our list keep checking if the current # message group belongs to it. If we exit the while loop a new conversation is created with # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation while (key + ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added: new_name = key + ', DUPLICATE #{0}'.format(duplicate_num) # The time of the last message in the conversation for new_name prev_time = CustomDate(msgs[new_name][-1][2]) if prev_time.distance_from(next_time) <= 0: # if our current conversation was during or after the last message in the new_name convo print(one_line()) print() print(one_line()) print( "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format( len(msgs[new_name])) + Style.RESET_ALL) print_thread(msgs[new_name], end=True, padding=10) print( current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print( are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com/messages [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # User input for whether the two message groups are in the same conversation if not are_same: duplicate_num += 1 # if they aren't the same, increment duplicate_num and try again else: # check to make sure the current message group hasn't already been added, otherwise add it msgs[new_name].extend(cur_thread) added = True else: # this conversation existed but was after our current one, so increment the number and try again duplicate_num += 1 if not added: # if the current message group hasn't been added, add with a new duplicate # msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread duplicate_index += 1 return
def get_all_msgs_dict(msg_html_path, unordered_threads, footer, times): """Returns the dictionary used by MessageReader""" conversation_color = Fore.LIGHTYELLOW_EX + Back.LIGHTBLACK_EX previous_color = Fore.LIGHTCYAN_EX + Back.BLACK current_color = Fore.LIGHTGREEN_EX + Back.BLACK are_same_color = Fore.LIGHTRED_EX + Back.BLACK def add_to_duplicate(): """Helper method to add a message group to a DUPLICATE conversation""" nonlocal msgs, cur_thread, key, next_time, duplicate_index duplicate_num = 1 # the number of duplicate message groups (have the same people, aka name) # but in reality belong to different conversations added = False # used so that we don't accidentally add something twice # while there exists another duplicate conversation in our list keep checking if the current # message group belongs to it. If we exit the while loop a new conversation is created with # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation while (key + ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added: new_name = key + ', DUPLICATE #{0}'.format(duplicate_num) # The time of the last message in the conversation for new_name prev_time = CustomDate(msgs[new_name][-1][2]) if prev_time.distance_from(next_time) <= 0: # if our current conversation was during or after the last message in the new_name convo print(one_line()) print() print(one_line()) print("#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format(len(msgs[new_name])) + Style.RESET_ALL) print_thread(msgs[new_name], end=True, padding=10) print(current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print(are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # User input for whether the two message groups are in the same conversation if not are_same: duplicate_num += 1 # if they aren't the same, increment duplicate_num and try again else: # check to make sure the current message group hasn't already been added, otherwise add it msgs[new_name].extend(cur_thread) added = True else: # this conversation existed but was after our current one, so increment the number and try again duplicate_num += 1 if not added: # if the current message group hasn't been added, add with a new duplicate # msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread duplicate_index += 1 return def print_thread(thread, start=False, end=False, padding=5): """Prettily prints the thread passed from start to start + padding or end - padding to end""" if start: assert not end, "Either start or end can be True, not both" else: assert end, "Exactly one value from start and end must be true" assert isinstance(padding, int), "padding must be an integer" assert padding > 0, "padding must be greater than 0" if start: start = 0 end = min(start + padding, len(thread)) date_color = Fore.GREEN + Back.BLACK else: end = len(thread) start = max(0, end - padding) date_color = Fore.CYAN + Back.BLACK max_name_length = max(len(name) for name, _, _ in thread[start:end]) + 1 for person, msg, date in thread[start:end]: print("{0:{align}{width}}: {1} | {2}" .format(person, msg, date_color + date, align='<', width=max_name_length)) return # Getting values if default arguments were left as default if unordered_threads is None or footer is None: all_thread_containers = get_all_thread_containers(msg_html_path) unordered_threads, footer = get_all_threads_unordered(all_thread_containers) msgs = dict() # result we return duplicate_bucket = dict() # temporarily holds duplicate conversations num_duplicates = 0 for thread in unordered_threads: convo_name = clean_convo_name(thread.contents[0]) cur_thread = get_messages_readable(thread) if convo_name not in msgs: # A conversation with the name of current message group does not exist, so it is added with no issues :D msgs[convo_name] = cur_thread else: # Another conversation with this name been seen before, add to duplicate bucket if appropriate num_duplicates += 1 if convo_name in duplicate_bucket: duplicate_bucket[convo_name].append(cur_thread) else: duplicate_bucket[convo_name] = [cur_thread] # The following is used in setup to time how long it takes various processes # This timing counts the time that user input starts, as there can be a lag before times.append(time.time()) # Background setup done print('\n' + one_line() + '\n') input("Press enter when you're ready to continue to user input: \n") # this time is user input prompt time times.append(time.time()) # User selection is starting # Lets finish up this setup boys, place all elements in the conversation bucket duplicate_index = 1 for key in sorted(duplicate_bucket.keys()): for cur_thread in duplicate_bucket[key]: previous = msgs[key][-1] next = cur_thread[0] prev_time, next_time = CustomDate(previous[2]), CustomDate(next[2]) # prev_time and next_time are used to calculate the time difference between the previous # message group's last message and this message group's first message. This time helps # determine whether both message groups belong to the same conversation if -3 <= prev_time.distance_from(next_time) <= 0 and len(msgs[key]) > 10000 and len(cur_thread) == 10000: msgs[key].extend(cur_thread) duplicate_index += 1 elif prev_time.distance_from(next_time) <= 0: clear_screen() print("#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format(len(msgs[key])) + Style.RESET_ALL) print_thread(msgs[key], end=True, padding=10) print(current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print(are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # user input to decide if the above two message groups are the same conversation if are_same: msgs[key].extend(cur_thread) duplicate_index += 1 clear_screen() else: # the two conversations are NOT the same (because of user input) # so we need to add the new one to an appropriate duplicate add_to_duplicate() else: # The two conversations are not the same (since the previous is after the new one) # so we need t add the new one to an appropriate duplicate add_to_duplicate() clear_screen() quick_preferences = PreferencesSearcher.from_msgs_dict(msgs) times.append(time.time()) # The end of setup return msgs, str(footer), quick_preferences.preferences
def get_all_msgs_dict(msg_html_path, unordered_threads, footer, times): """Returns the dictionary used by MessageReader""" conversation_color = Fore.LIGHTYELLOW_EX + Back.LIGHTBLACK_EX previous_color = Fore.LIGHTCYAN_EX + Back.BLACK current_color = Fore.LIGHTGREEN_EX + Back.BLACK are_same_color = Fore.LIGHTRED_EX + Back.BLACK def add_to_duplicate(): """Helper method to add a message group to a DUPLICATE conversation""" nonlocal msgs, cur_thread, key, next_time, duplicate_index duplicate_num = 1 # the number of duplicate message groups (have the same people, aka name) # but in reality belong to different conversations added = False # used so that we don't accidentally add something twice # while there exists another duplicate conversation in our list keep checking if the current # message group belongs to it. If we exit the while loop a new conversation is created with # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation while (key + ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added: new_name = key + ', DUPLICATE #{0}'.format(duplicate_num) # The time of the last message in the conversation for new_name prev_time = CustomDate(msgs[new_name][-1][2]) if prev_time.distance_from(next_time) <= 0: # if our current conversation was during or after the last message in the new_name convo print(one_line()) print() print(one_line()) print( "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format( len(msgs[new_name])) + Style.RESET_ALL) print_thread(msgs[new_name], end=True, padding=10) print( current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print( are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com/messages [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # User input for whether the two message groups are in the same conversation if not are_same: duplicate_num += 1 # if they aren't the same, increment duplicate_num and try again else: # check to make sure the current message group hasn't already been added, otherwise add it msgs[new_name].extend(cur_thread) added = True else: # this conversation existed but was after our current one, so increment the number and try again duplicate_num += 1 if not added: # if the current message group hasn't been added, add with a new duplicate # msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread duplicate_index += 1 return def print_thread(thread, start=False, end=False, padding=5): """Prettily prints the thread passed from start to start + padding or end - padding to end""" if start: assert not end, "Either start or end can be True, not both" else: assert end, "Exactly one value from start and end must be true" assert isinstance(padding, int), "padding must be an integer" assert padding > 0, "padding must be greater than 0" if start: start = 0 end = min(start + padding, len(thread)) date_color = Fore.GREEN + Back.BLACK else: end = len(thread) start = max(0, end - padding) date_color = Fore.CYAN + Back.BLACK max_name_length = max(len(name) for name, _, _ in thread[start:end]) + 1 for person, msg, date in thread[start:end]: print("{0:{align}{width}}: {1} | {2}".format( person, msg, date_color + date, align='<', width=max_name_length)) return # Getting values if default arguments were left as default if unordered_threads is None or footer is None: all_thread_containers = get_all_thread_containers(msg_html_path) unordered_threads, footer = get_all_threads_unordered( all_thread_containers) msgs = dict() # result we return duplicate_bucket = dict() # temporarily holds duplicate conversations num_duplicates = 0 for thread in unordered_threads: convo_name = clean_convo_name(thread.contents[0]) cur_thread = get_messages_readable(thread) if convo_name not in msgs: # A conversation with the name of current message group does not exist, so it is added with no issues :D msgs[convo_name] = cur_thread else: # Another conversation with this name been seen before, add to duplicate bucket if appropriate num_duplicates += 1 if convo_name in duplicate_bucket: duplicate_bucket[convo_name].append(cur_thread) else: duplicate_bucket[convo_name] = [cur_thread] # The following is used in setup to time how long it takes various processes # This timing counts the time that user input starts, as there can be a lag before times.append(time.time()) # Background setup done print('\n' + one_line() + '\n') input_text = ( "Press enter when you're ready to continue to user input: (Consider making your terminal full screen to " "easier read the messages printed)\n") input_text = textwrap.fill(input_text, width=min(shutil.get_terminal_size().columns, 150)) input(input_text) # this time is user input prompt time times.append(time.time()) # User selection is starting # Lets finish up this setup boys, place all elements in the conversation bucket duplicate_index = 1 for key in sorted(duplicate_bucket.keys()): for cur_thread in duplicate_bucket[key]: previous = msgs[key][-1] next = cur_thread[0] prev_time, next_time = CustomDate(previous[2]), CustomDate(next[2]) # prev_time and next_time are used to calculate the time difference between the previous # message group's last message and this message group's first message. This time helps # determine whether both message groups belong to the same conversation if -3 <= prev_time.distance_from(next_time) <= 0 and len( msgs[key]) > 10000 and len(cur_thread) == 10000: msgs[key].extend(cur_thread) duplicate_index += 1 elif prev_time.distance_from(next_time) <= 0: clear_screen() print( "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format( len(msgs[key])) + Style.RESET_ALL) print_thread(msgs[key], end=True, padding=10) print( current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print( are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # user input to decide if the above two message groups are the same conversation if are_same: msgs[key].extend(cur_thread) duplicate_index += 1 clear_screen() else: # the two conversations are NOT the same (because of user input) # so we need to add the new one to an appropriate duplicate add_to_duplicate() else: # The two conversations are not the same (since the previous is after the new one) # so we need t add the new one to an appropriate duplicate add_to_duplicate() clear_screen() quick_preferences = PreferencesSearcher.from_msgs_dict(msgs) times.append(time.time()) # The end of setup return msgs, str(footer), quick_preferences.preferences