def __init__(self, convo_name, convo_list, rank): """Parameters: convo_name: A string for the conversation name, found in your facebook archive convo_list: A 2D list with inner lists of the format [person_name (str), message (str), date-time (str)] """ self._name = convo_name.lower() self._convo = [[name.lower(), msg, CustomDate(date)] for name, msg, date in convo_list] self._people = self.get_people() self._kicked_or_left = [person for person in self._people if person not in self._name.split(', ')] self._individual_words = self._cleaned_word_freqs() self._len = len(self._convo) self._path = BaseConvoReader.BASE_PATH + str(rank) + '/' self._word_cloud = None
def from_msgs_dict(cls, msgs_dict: dict): preferences = dict() # a list of names sorted first alphabetically and second by reversed number of messages (largest first) alphabetical = [ name for name, _ in sorted(msgs_dict.items(), key=lambda x: (x[0], -len(x[1]))) ] alpha_dict = dict() for i, name in enumerate(alphabetical): alpha_dict[i + 1] = (name, name) alpha_dict[name] = (i + 1, name) preferences['alpha'] = alpha_dict # a list of names and conversations lengths sorted first by # reverse number of messages (largest first) and then alphabetically by_num = sorted([(name, len(convo)) for name, convo in msgs_dict.items()], key=lambda x: (-x[1], x[0])) by_num_dict = dict() for i, entry in enumerate(by_num): name, convo_length = entry by_num_dict[i + 1] = tuple(entry) by_num_dict[name] = (i + 1, convo_length) preferences['length'] = by_num_dict # a list of conversation names and conversations sorted first by date values # (more recent dates first - None should be after the current date, or date of download) by_recently_contacted = sorted( [(name, convo) for name, convo in msgs_dict.items()], key=lambda val: CustomDate(val[1][-1][2]), reverse=True) by_recent_dict = dict() for i, entry in enumerate(by_recently_contacted): date = entry[1][-1][2] name = entry[0] by_recent_dict[str(i + 1)] = (name, date) by_recent_dict[name] = (str(i + 1), date) preferences['contacted'] = by_recent_dict return cls(preferences)
def add_to_duplicate(): """Helper method to add a message group to a DUPLICATE conversation""" nonlocal msgs, cur_thread, key, next_time, duplicate_index duplicate_num = 1 # the number of duplicate message groups (have the same people, aka name) # but in reality belong to different conversations added = False # used so that we don't accidentally add something twice # while there exists another duplicate conversation in our list keep checking if the current # message group belongs to it. If we exit the while loop a new conversation is created with # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation while (key + ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added: new_name = key + ', DUPLICATE #{0}'.format(duplicate_num) # The time of the last message in the conversation for new_name prev_time = CustomDate(msgs[new_name][-1][2]) if prev_time.distance_from(next_time) <= 0: # if our current conversation was during or after the last message in the new_name convo print(one_line()) print() print(one_line()) print( "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format( len(msgs[new_name])) + Style.RESET_ALL) print_thread(msgs[new_name], end=True, padding=10) print( current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print( are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com/messages [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # User input for whether the two message groups are in the same conversation if not are_same: duplicate_num += 1 # if they aren't the same, increment duplicate_num and try again else: # check to make sure the current message group hasn't already been added, otherwise add it msgs[new_name].extend(cur_thread) added = True else: # this conversation existed but was after our current one, so increment the number and try again duplicate_num += 1 if not added: # if the current message group hasn't been added, add with a new duplicate # msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread duplicate_index += 1 return
def get_all_msgs_dict(msg_html_path, unordered_threads, footer, times): """Returns the dictionary used by MessageReader""" conversation_color = Fore.LIGHTYELLOW_EX + Back.LIGHTBLACK_EX previous_color = Fore.LIGHTCYAN_EX + Back.BLACK current_color = Fore.LIGHTGREEN_EX + Back.BLACK are_same_color = Fore.LIGHTRED_EX + Back.BLACK def add_to_duplicate(): """Helper method to add a message group to a DUPLICATE conversation""" nonlocal msgs, cur_thread, key, next_time, duplicate_index duplicate_num = 1 # the number of duplicate message groups (have the same people, aka name) # but in reality belong to different conversations added = False # used so that we don't accidentally add something twice # while there exists another duplicate conversation in our list keep checking if the current # message group belongs to it. If we exit the while loop a new conversation is created with # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation while (key + ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added: new_name = key + ', DUPLICATE #{0}'.format(duplicate_num) # The time of the last message in the conversation for new_name prev_time = CustomDate(msgs[new_name][-1][2]) if prev_time.distance_from(next_time) <= 0: # if our current conversation was during or after the last message in the new_name convo print(one_line()) print() print(one_line()) print( "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format( len(msgs[new_name])) + Style.RESET_ALL) print_thread(msgs[new_name], end=True, padding=10) print( current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print( are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com/messages [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # User input for whether the two message groups are in the same conversation if not are_same: duplicate_num += 1 # if they aren't the same, increment duplicate_num and try again else: # check to make sure the current message group hasn't already been added, otherwise add it msgs[new_name].extend(cur_thread) added = True else: # this conversation existed but was after our current one, so increment the number and try again duplicate_num += 1 if not added: # if the current message group hasn't been added, add with a new duplicate # msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread duplicate_index += 1 return def print_thread(thread, start=False, end=False, padding=5): """Prettily prints the thread passed from start to start + padding or end - padding to end""" if start: assert not end, "Either start or end can be True, not both" else: assert end, "Exactly one value from start and end must be true" assert isinstance(padding, int), "padding must be an integer" assert padding > 0, "padding must be greater than 0" if start: start = 0 end = min(start + padding, len(thread)) date_color = Fore.GREEN + Back.BLACK else: end = len(thread) start = max(0, end - padding) date_color = Fore.CYAN + Back.BLACK max_name_length = max(len(name) for name, _, _ in thread[start:end]) + 1 for person, msg, date in thread[start:end]: print("{0:{align}{width}}: {1} | {2}".format( person, msg, date_color + date, align='<', width=max_name_length)) return # Getting values if default arguments were left as default if unordered_threads is None or footer is None: all_thread_containers = get_all_thread_containers(msg_html_path) unordered_threads, footer = get_all_threads_unordered( all_thread_containers) msgs = dict() # result we return duplicate_bucket = dict() # temporarily holds duplicate conversations num_duplicates = 0 for thread in unordered_threads: convo_name = clean_convo_name(thread.contents[0]) cur_thread = get_messages_readable(thread) if convo_name not in msgs: # A conversation with the name of current message group does not exist, so it is added with no issues :D msgs[convo_name] = cur_thread else: # Another conversation with this name been seen before, add to duplicate bucket if appropriate num_duplicates += 1 if convo_name in duplicate_bucket: duplicate_bucket[convo_name].append(cur_thread) else: duplicate_bucket[convo_name] = [cur_thread] # The following is used in setup to time how long it takes various processes # This timing counts the time that user input starts, as there can be a lag before times.append(time.time()) # Background setup done print('\n' + one_line() + '\n') input_text = ( "Press enter when you're ready to continue to user input: (Consider making your terminal full screen to " "easier read the messages printed)\n") input_text = textwrap.fill(input_text, width=min(shutil.get_terminal_size().columns, 150)) input(input_text) # this time is user input prompt time times.append(time.time()) # User selection is starting # Lets finish up this setup boys, place all elements in the conversation bucket duplicate_index = 1 for key in sorted(duplicate_bucket.keys()): for cur_thread in duplicate_bucket[key]: previous = msgs[key][-1] next = cur_thread[0] prev_time, next_time = CustomDate(previous[2]), CustomDate(next[2]) # prev_time and next_time are used to calculate the time difference between the previous # message group's last message and this message group's first message. This time helps # determine whether both message groups belong to the same conversation if -3 <= prev_time.distance_from(next_time) <= 0 and len( msgs[key]) > 10000 and len(cur_thread) == 10000: msgs[key].extend(cur_thread) duplicate_index += 1 elif prev_time.distance_from(next_time) <= 0: clear_screen() print( "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes." .format(duplicate_index, num_duplicates)) print(conversation_color + key) print(one_line() + "\n") print(previous_color + '# previous conversation end - length = {0:,}'.format( len(msgs[key])) + Style.RESET_ALL) print_thread(msgs[key], end=True, padding=10) print( current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)" .format(len(cur_thread)) + Style.RESET_ALL) print_thread(cur_thread, start=True, padding=10) print('\n' + one_line()) # Prints the last 5 messages of the previous message group and the first 5 message of # the current message group, both in RED with a BLACK background print( are_same_color + "\nAre these two chunks from the same conversation? You might have " "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL) are_same = user_says_yes() # user input to decide if the above two message groups are the same conversation if are_same: msgs[key].extend(cur_thread) duplicate_index += 1 clear_screen() else: # the two conversations are NOT the same (because of user input) # so we need to add the new one to an appropriate duplicate add_to_duplicate() else: # The two conversations are not the same (since the previous is after the new one) # so we need t add the new one to an appropriate duplicate add_to_duplicate() clear_screen() quick_preferences = PreferencesSearcher.from_msgs_dict(msgs) times.append(time.time()) # The end of setup return msgs, str(footer), quick_preferences.preferences