예제 #1
0
    def raw_msgs_by_time(self, window=60, contact=None) -> list:
        """The percent of conversation by time of day
        Parameters:
            window (optional): The time length of each bin in minutes (default, 60 minutes, or 1 hour)
            contact (optional): The contact you are interested in. (default, all contacts)
        Return:
            a list containing average frequency of chatting by times in days, starting at 12:00 am. Default window
            is 60 minute interval. If time less than the passed window is left at the end, it is put at the end of
            the list in it's own window. e.g. if window=60, the  list returned is of length 24, with each index
                                                    representing one hour (60 minutes) of chatting
                                              if window=61 the list returned is still of length 24, but indexes
                                                    0-22 representing 61 minutes, and index 23 representing 37 minutes
        """
        contact = self._assert_contact(contact)

        if contact is not None:
            filt = lambda x: x in contact
        else:
            filt = lambda x: True

        msg_bucket = [[CustomDate.minutes_to_time(i * window), 0] for i in range(ceil(60 * 24 / window))]

        for person, msg, date in self._convo:
            if filt(person.lower()):
                index = (date.minutes() // window) % (len(msg_bucket))
                msg_bucket[index][1] += 1
        for i in range(len(msg_bucket)):
            msg_bucket[i][1] /= (len(self) / 100)
        return msg_bucket
예제 #2
0
    def raw_msgs_by_time(self, window=60, contact=None) -> list:
        """The percent of conversation by time of day
        Parameters:
            window (optional): The time length of each bin in minutes (default, 60 minutes, or 1 hour)
            contact (optional): The contact you are interested in. (default, all contacts)
        Return:
            a list containing average frequency of chatting by times in days, starting at 12:00 am. Default window
            is 60 minute interval. If time less than the passed window is left at the end, it is put at the end of
            the list in it's own window. e.g. if window=60, the  list returned is of length 24, with each index
                                                    representing one hour (60 minutes) of chatting
                                              if window=61 the list returned is still of length 24, but indexes
                                                    0-22 representing 61 minutes, and index 23 representing 37 minutes
        """
        contact = self._assert_contact(contact)

        if contact is not None:
            filt = lambda x: x in contact
        else:
            filt = lambda x: True

        msg_bucket = [[CustomDate.minutes_to_time(i * window), 0] for i in range(ceil(60 * 24 / window))]

        for person, msg, date in self._convo:
            if filt(person.lower()):
                index = (date.minutes() // window) % (len(msg_bucket))
                msg_bucket[index][1] += 1
        for i in range(len(msg_bucket)):
            msg_bucket[i][1] /= (len(self) / 100)
        return msg_bucket
예제 #3
0
    def _raw_convo_killer(self, threshold, start=None, end=None):
        """Returns a Counter"""
        CustomDate.assert_dates(start, end)

        # Sets the start and end dates, finds the appropriate
        #  message number if start/ end are not None, else index 1 for start and len(convo) for end
        start_date_index = CustomDate.bsearch_index(self._convo, start, key=lambda x: x[2]) if start is not None else 0
        end_date_index = CustomDate.bsearch_index(self._convo, end, key=lambda x: x[2]) \
            if start is not None else self._len - 1

        convo_start_freq = dict()
        for person in self._people:
            convo_start_freq[person] = []
        convo_start_freq[self._convo[start_date_index - 1][0]].append(start_date_index - 1)
        for i in range(start_date_index, end_date_index):
            curr_date = self._convo[i][2]
            next_date = self._convo[i + 1][2]
            if next_date.distance_from(curr_date) >= threshold:
                convo_start_freq[self._convo[i][0]].append(i)
        return Counter(dict((key.title(), val) for key, val in convo_start_freq.items()))
예제 #4
0
    def _raw_convo_killer(self, threshold, start=None, end=None):
        """Returns a Counter"""
        CustomDate.assert_dates(start, end)

        # Sets the start and end dates, finds the appropriate
        #  message number if start/ end are not None, else index 1 for start and len(convo) for end
        start_date_index = CustomDate.bsearch_index(self._convo, start, key=lambda x: x[2]) if start is not None else 0
        end_date_index = CustomDate.bsearch_index(self._convo, end, key=lambda x: x[2]) \
            if start is not None else self._len - 1

        convo_start_freq = dict()
        for person in self._people:
            convo_start_freq[person] = []
        convo_start_freq[self._convo[start_date_index - 1][0]].append(start_date_index - 1)
        for i in range(start_date_index, end_date_index):
            curr_date = self._convo[i][2]
            next_date = self._convo[i + 1][2]
            if next_date.distance_from(curr_date) >= threshold:
                convo_start_freq[self._convo[i][0]].append(i)
        return Counter(dict((key.title(), val) for key, val in convo_start_freq.items()))
예제 #5
0
 def __init__(self, convo_name, convo_list, rank):
     """Parameters:
         convo_name: A string for the conversation name, found in your facebook archive
         convo_list: A 2D list with inner lists of the format [person_name (str), message (str), date-time (str)]
     """
     self._name = convo_name.lower()
     self._convo = [[name.lower(), msg, CustomDate(date)] for name, msg, date in convo_list]
     self._people = self.get_people()
     self._kicked_or_left = [person for person in self._people if person not in self._name.split(', ')]
     self._individual_words = self._cleaned_word_freqs()
     self._len = len(self._convo)
     self._path = BaseConvoReader.BASE_PATH + str(rank) + '/'
     self._word_cloud = None
예제 #6
0
	def _msgs_graph(self, contact=None):
		"""The raw data used by print_msgs_graph to display message graphs
		Parameters:
			contact (optional): the name (as a string) of the person you are interested in
				(default: all contacts)
		Return:
			A 2D list with inner lists being of length 2 lists and storing a day as element 0
			and the number of total messages sent that day as element 1
		"""
		assert type(contact) in [type(None), str, list], "Contact must be of type string or a list of strings"
		if type(contact) is list:
			for i, ele in enumerate(contact):
				assert type(ele) is str, "Each element in contact must be a string"
				contact[i] = ele.lower()
			for ele in contact:	
				assert ele in self.people, "{0} is not in the list of people for this conversation:\n{1}".format(
											ele, str(self.people))
		elif type(contact) is str:
			assert contact in self.people, "{0} is not in the list of people for this conversation:\n{1}".format(
											contact, str(self.people)) 
			contact = [contact]


		if contact is not None:
			filt = lambda x: x in contact 
		else:
			filt = lambda x: True

		start = self.dates[0]
		end = self.dates[-1]
		days = end - start

		msg_freq = [[None, 0] for i in range(days + 1)]
		for person, msg, date in self.convo:
			if filt(person.lower()):
				msg_freq[date - start][1] += 1
		
		for day in range(len(msg_freq)):
			msg_freq[day][0] = CustomDate.from_date(start + day)

		return msg_freq
예제 #7
0
    def from_msgs_dict(cls, msgs_dict: dict):
        preferences = dict()

        # a list of names sorted first alphabetically and second by reversed number of messages (largest first)
        alphabetical = [
            name for name, _ in sorted(msgs_dict.items(),
                                       key=lambda x: (x[0], -len(x[1])))
        ]
        alpha_dict = dict()
        for i, name in enumerate(alphabetical):
            alpha_dict[i + 1] = (name, name)
            alpha_dict[name] = (i + 1, name)
        preferences['alpha'] = alpha_dict

        # a list of names and conversations lengths sorted first by
        # reverse number of messages (largest first) and then alphabetically
        by_num = sorted([(name, len(convo))
                         for name, convo in msgs_dict.items()],
                        key=lambda x: (-x[1], x[0]))
        by_num_dict = dict()
        for i, entry in enumerate(by_num):
            name, convo_length = entry
            by_num_dict[i + 1] = tuple(entry)
            by_num_dict[name] = (i + 1, convo_length)
        preferences['length'] = by_num_dict

        # a list of conversation names and conversations sorted first by date values
        # (more recent dates first - None should be after the current date, or date of download)
        by_recently_contacted = sorted(
            [(name, convo) for name, convo in msgs_dict.items()],
            key=lambda val: CustomDate(val[1][-1][2]),
            reverse=True)
        by_recent_dict = dict()
        for i, entry in enumerate(by_recently_contacted):
            date = entry[1][-1][2]
            name = entry[0]
            by_recent_dict[str(i + 1)] = (name, date)
            by_recent_dict[name] = (str(i + 1), date)
        preferences['contacted'] = by_recent_dict

        return cls(preferences)
예제 #8
0
	def _msgs_by_day(self, window=60, contact=None):
		"""The percent of conversation by time of day
		Parameters:
			window (optional): The length of each bin in minutes (default, 60 minutes, or 1 hour)
			contact (optional): The contact you are interested in. (default, all contacts)
		Return:
			a list containing average frequency of chatting by 
			times in days, starting at 12:00 am. Default window is 60 minute 
			interval.If time less than the passed window is left at the end,
			it is put at the end of the list
		"""
		assert type(contact) in [type(None), str, list], "Contact must be of type string or a list of strings"
		if type(contact) is list:
			for i, ele in enumerate(contact):
				assert type(ele) is str, "Each element in contact must be a string"
				contact[i] = ele.lower()
			for ele in contact:	
				assert ele in self.people, "{0} is not in the list of people for this conversation:\n{1}".format(
											ele, str(self.people))
		elif type(contact) is str:
			assert contact in self.people, "{0} is not in the list of people for this conversation:\n{1}".format(
											contact, str(self.people)) 
			contact = [contact]


		if contact is not None:
			filt = lambda x: x in contact 
		else:
			filt = lambda x: True

		total_msgs = 0
		msg_bucket = [[CustomDate.minutes_to_time(i * window), 0] for i in range(ceil(60*24 // window))]

		for person, msg, date in self.convo:
			if filt(person.lower()):
				msg_bucket[(int(date.minutes() // window) % (len(msg_bucket) - 1))][1] += 1
				total_msgs += 1
		for i in range(len(msg_bucket)):
			msg_bucket[i][1] /= (total_msgs / 100)
		return msg_bucket 
예제 #9
0
    def raw_msgs_graph(self, contact=None, forward_shift=0) -> list:
        """The raw data used by print_msgs_graph to display message graphs
        Parameters:
            contact (optional): the name (as a string) of the person you are interested in
                (default: all contacts)
            forward_shift (optional): The number of minutes past 12 midnight that should count as the previous day
        Return:
            A 2D list with inner lists being of the form [ CustomDate(), num-messages]. The CustomDate object represents
            12:00am of a date, and num-messages is the integer number of messages sent and/ or received that day.
        """
        contact = self._assert_contact(contact)
        assert isinstance(forward_shift, int), "Forward shift must be an integer"
        assert -60 * 24 < forward_shift < 60 * 24, "Forward shift must be between {0} and {1}, not including them" \
            .format(-60 * 24, 60 * 24)

        if contact is not None:
            filt = lambda x: x in contact
        else:
            filt = lambda x: True

        start = self._convo[0][2]
        end = self._convo[-1][2]
        days = end - start

        msg_freq = [[None, 0] for i in range(days + 1)]
        for person, msg, date in self._convo:
            if filt(person.lower()):
                if date.minutes() < forward_shift:  # if we are counting this time as the previous day
                    msg_freq[max(0, date - start - 1)][1] += 1
                else:  # this time is ahead of the shift, so it is counted as the right day
                    msg_freq[date - start][1] += 1

        for day in range(len(msg_freq)):
            msg_freq[day][0] = CustomDate.from_date(start + day)

        return msg_freq
예제 #10
0
    def raw_msgs_graph(self, contact=None, forward_shift=0) -> list:
        """The raw data used by print_msgs_graph to display message graphs
        Parameters:
            contact (optional): the name (as a string) of the person you are interested in
                (default: all contacts)
            forward_shift (optional): The number of minutes past 12 midnight that should count as the previous day
        Return:
            A 2D list with inner lists being of the form [ CustomDate(), num-messages]. The CustomDate object represents
            12:00am of a date, and num-messages is the integer number of messages sent and/ or received that day.
        """
        contact = self._assert_contact(contact)
        assert isinstance(forward_shift, int), "Forward shift must be an integer"
        assert -60 * 24 < forward_shift < 60 * 24, "Forward shift must be between {0} and {1}, not including them" \
            .format(-60 * 24, 60 * 24)

        if contact is not None:
            filt = lambda x: x in contact
        else:
            filt = lambda x: True

        start = self._convo[0][2]
        end = self._convo[-1][2]
        days = end - start

        msg_freq = [[None, 0] for i in range(days + 1)]
        for person, msg, date in self._convo:
            if filt(person.lower()):
                if date.minutes() < forward_shift:  # if we are counting this time as the previous day
                    msg_freq[max(0, date - start - 1)][1] += 1
                else:  # this time is ahead of the shift, so it is counted as the right day
                    msg_freq[date - start][1] += 1

        for day in range(len(msg_freq)):
            msg_freq[day][0] = CustomDate.from_date(start + day)

        return msg_freq
예제 #11
0
    def add_to_duplicate():
        """Helper method to add a message group to a DUPLICATE conversation"""
        nonlocal msgs, cur_thread, key, next_time, duplicate_index
        duplicate_num = 1  # the number of duplicate message groups (have the same people, aka name)
        # but in reality belong to different conversations

        added = False  # used so that we don't accidentally add something twice

        # while there exists another duplicate conversation in our list keep checking if the current
        # message group belongs to it. If we exit the while loop a new conversation is created with
        # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation
        while (key + ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added:
            new_name = key + ', DUPLICATE #{0}'.format(duplicate_num)

            # The time of the last message in the conversation for new_name
            prev_time = CustomDate(msgs[new_name][-1][2])

            if prev_time.distance_from(next_time) <= 0:
                # if our current conversation was during or after the last message in the new_name convo
                print(one_line())
                print()
                print(one_line())

                print("#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes."
                      .format(duplicate_index, num_duplicates))
                print(conversation_color + key)
                print(one_line() + "\n")

                print(previous_color + '# previous conversation end - length = {0:,}'.format(len(msgs[new_name]))
                      + Style.RESET_ALL)
                print_thread(msgs[new_name], end=True, padding=10)

                print(current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)"
                      .format(len(cur_thread)) + Style.RESET_ALL)
                print_thread(cur_thread, start=True, padding=10)

                print('\n' + one_line())
                # Prints the last 5 messages of the previous message group and the first 5 message of
                # the current message group, both in RED with a BLACK background

                print(are_same_color + "\nAre these two chunks from the same conversation? You might have "
                                       "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL)

                are_same = user_says_yes()
                # User input for whether the two message groups are in the same conversation

                if not are_same:
                    duplicate_num += 1  # if they aren't the same, increment duplicate_num and try again
                else:
                    # check to make sure the current message group hasn't already been added, otherwise add it
                    msgs[new_name].extend(cur_thread)
                    added = True
            else:
                # this conversation existed but was after our current one, so increment the number and try again
                duplicate_num += 1

        if not added:
            # if the current message group hasn't been added, add with a new duplicate #
            msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread
        duplicate_index += 1
        return
예제 #12
0
def get_all_msgs_dict(msg_html_path, unordered_threads, footer, times):
    """Returns the dictionary used by MessageReader"""
    conversation_color = Fore.LIGHTYELLOW_EX + Back.LIGHTBLACK_EX
    previous_color = Fore.LIGHTCYAN_EX + Back.BLACK
    current_color = Fore.LIGHTGREEN_EX + Back.BLACK
    are_same_color = Fore.LIGHTRED_EX + Back.BLACK

    def add_to_duplicate():
        """Helper method to add a message group to a DUPLICATE conversation"""
        nonlocal msgs, cur_thread, key, next_time, duplicate_index
        duplicate_num = 1  # the number of duplicate message groups (have the same people, aka name)
        # but in reality belong to different conversations

        added = False  # used so that we don't accidentally add something twice

        # while there exists another duplicate conversation in our list keep checking if the current
        # message group belongs to it. If we exit the while loop a new conversation is created with
        # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation
        while (key + ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added:
            new_name = key + ', DUPLICATE #{0}'.format(duplicate_num)

            # The time of the last message in the conversation for new_name
            prev_time = CustomDate(msgs[new_name][-1][2])

            if prev_time.distance_from(next_time) <= 0:
                # if our current conversation was during or after the last message in the new_name convo
                print(one_line())
                print()
                print(one_line())

                print("#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes."
                      .format(duplicate_index, num_duplicates))
                print(conversation_color + key)
                print(one_line() + "\n")

                print(previous_color + '# previous conversation end - length = {0:,}'.format(len(msgs[new_name]))
                      + Style.RESET_ALL)
                print_thread(msgs[new_name], end=True, padding=10)

                print(current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)"
                      .format(len(cur_thread)) + Style.RESET_ALL)
                print_thread(cur_thread, start=True, padding=10)

                print('\n' + one_line())
                # Prints the last 5 messages of the previous message group and the first 5 message of
                # the current message group, both in RED with a BLACK background

                print(are_same_color + "\nAre these two chunks from the same conversation? You might have "
                                       "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL)

                are_same = user_says_yes()
                # User input for whether the two message groups are in the same conversation

                if not are_same:
                    duplicate_num += 1  # if they aren't the same, increment duplicate_num and try again
                else:
                    # check to make sure the current message group hasn't already been added, otherwise add it
                    msgs[new_name].extend(cur_thread)
                    added = True
            else:
                # this conversation existed but was after our current one, so increment the number and try again
                duplicate_num += 1

        if not added:
            # if the current message group hasn't been added, add with a new duplicate #
            msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread
        duplicate_index += 1
        return

    def print_thread(thread, start=False, end=False, padding=5):
        """Prettily prints the thread passed from start to start + padding or end - padding to end"""
        if start:
            assert not end, "Either start or end can be True, not both"
        else:
            assert end, "Exactly one value from start and end must be true"
        assert isinstance(padding, int), "padding must be an integer"
        assert padding > 0, "padding must be greater than 0"

        if start:
            start = 0
            end = min(start + padding, len(thread))
            date_color = Fore.GREEN + Back.BLACK
        else:
            end = len(thread)
            start = max(0, end - padding)
            date_color = Fore.CYAN + Back.BLACK

        max_name_length = max(len(name) for name, _, _ in thread[start:end]) + 1
        for person, msg, date in thread[start:end]:

            print("{0:{align}{width}}: {1} | {2}"
                  .format(person, msg, date_color + date, align='<', width=max_name_length))
        return

    # Getting values if default arguments were left as default
    if unordered_threads is None or footer is None:
        all_thread_containers = get_all_thread_containers(msg_html_path)
        unordered_threads, footer = get_all_threads_unordered(all_thread_containers)

    msgs = dict()  # result we return
    duplicate_bucket = dict()  # temporarily holds duplicate conversations
    num_duplicates = 0
    for thread in unordered_threads:
        convo_name = clean_convo_name(thread.contents[0])
        cur_thread = get_messages_readable(thread)
        if convo_name not in msgs:
            # A conversation with the name of current message group does not exist, so it is added with no issues :D
            msgs[convo_name] = cur_thread

        else:  # Another conversation with this name been seen before, add to duplicate bucket if appropriate
            num_duplicates += 1
            if convo_name in duplicate_bucket:
                duplicate_bucket[convo_name].append(cur_thread)
            else:
                duplicate_bucket[convo_name] = [cur_thread]

    # The following is used in setup to time how long it takes various processes
    # This timing counts the time that user input starts, as there can be a lag before
    times.append(time.time())  # Background setup done
    print('\n' + one_line() + '\n')
    input("Press enter when you're ready to continue to user input: \n")
    # this time is user input prompt time
    times.append(time.time())  # User selection is starting

    # Lets finish up this setup boys, place all elements in the conversation bucket
    duplicate_index = 1
    for key in sorted(duplicate_bucket.keys()):
        for cur_thread in duplicate_bucket[key]:

            previous = msgs[key][-1]
            next = cur_thread[0]
            prev_time, next_time = CustomDate(previous[2]), CustomDate(next[2])
            # prev_time and next_time are used to calculate the time difference between the previous
            # message group's last message and this message group's first message. This time helps
            # determine whether both message groups belong to the same conversation

            if -3 <= prev_time.distance_from(next_time) <= 0 and len(msgs[key]) > 10000 and len(cur_thread) == 10000:
                msgs[key].extend(cur_thread)
                duplicate_index += 1
            elif prev_time.distance_from(next_time) <= 0:

                clear_screen()

                print("#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes."
                      .format(duplicate_index, num_duplicates))
                print(conversation_color + key)
                print(one_line() + "\n")

                print(previous_color + '# previous conversation end - length = {0:,}'.format(len(msgs[key]))
                      + Style.RESET_ALL)
                print_thread(msgs[key], end=True, padding=10)

                print(current_color + "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)"
                      .format(len(cur_thread)) + Style.RESET_ALL)
                print_thread(cur_thread, start=True, padding=10)

                print('\n' + one_line())
                # Prints the last 5 messages of the previous message group and the first 5 message of
                # the current message group, both in RED with a BLACK background

                print(are_same_color + "\nAre these two chunks from the same conversation? You might have "
                                       "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL)

                are_same = user_says_yes()
                # user input to decide if the above two message groups are the same conversation

                if are_same:
                    msgs[key].extend(cur_thread)
                    duplicate_index += 1

                    clear_screen()
                else:
                    # the two conversations are NOT the same (because of user input)
                    #  so we need to add the new one to an appropriate duplicate

                    add_to_duplicate()
            else:
                # The two conversations are not the same (since the previous is after the new one)
                # so we need t add the new one to an appropriate duplicate

                add_to_duplicate()

    clear_screen()
    quick_preferences = PreferencesSearcher.from_msgs_dict(msgs)
    times.append(time.time())  # The end of setup

    return msgs, str(footer), quick_preferences.preferences
예제 #13
0
    def add_to_duplicate():
        """Helper method to add a message group to a DUPLICATE conversation"""
        nonlocal msgs, cur_thread, key, next_time, duplicate_index
        duplicate_num = 1  # the number of duplicate message groups (have the same people, aka name)
        # but in reality belong to different conversations

        added = False  # used so that we don't accidentally add something twice

        # while there exists another duplicate conversation in our list keep checking if the current
        # message group belongs to it. If we exit the while loop a new conversation is created with
        # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation
        while (key +
               ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added:
            new_name = key + ', DUPLICATE #{0}'.format(duplicate_num)

            # The time of the last message in the conversation for new_name
            prev_time = CustomDate(msgs[new_name][-1][2])

            if prev_time.distance_from(next_time) <= 0:
                # if our current conversation was during or after the last message in the new_name convo
                print(one_line())
                print()
                print(one_line())

                print(
                    "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes."
                    .format(duplicate_index, num_duplicates))
                print(conversation_color + key)
                print(one_line() + "\n")

                print(previous_color +
                      '# previous conversation end - length = {0:,}'.format(
                          len(msgs[new_name])) + Style.RESET_ALL)
                print_thread(msgs[new_name], end=True, padding=10)

                print(
                    current_color +
                    "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)"
                    .format(len(cur_thread)) + Style.RESET_ALL)
                print_thread(cur_thread, start=True, padding=10)

                print('\n' + one_line())
                # Prints the last 5 messages of the previous message group and the first 5 message of
                # the current message group, both in RED with a BLACK background

                print(
                    are_same_color +
                    "\nAre these two chunks from the same conversation? You might have "
                    "to look this up on facebook.com/messages [Y/n]" +
                    Style.RESET_ALL)

                are_same = user_says_yes()
                # User input for whether the two message groups are in the same conversation

                if not are_same:
                    duplicate_num += 1  # if they aren't the same, increment duplicate_num and try again
                else:
                    # check to make sure the current message group hasn't already been added, otherwise add it
                    msgs[new_name].extend(cur_thread)
                    added = True
            else:
                # this conversation existed but was after our current one, so increment the number and try again
                duplicate_num += 1

        if not added:
            # if the current message group hasn't been added, add with a new duplicate #
            msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread
        duplicate_index += 1
        return
예제 #14
0
def get_all_msgs_dict(msg_html_path, unordered_threads, footer, times):
    """Returns the dictionary used by MessageReader"""
    conversation_color = Fore.LIGHTYELLOW_EX + Back.LIGHTBLACK_EX
    previous_color = Fore.LIGHTCYAN_EX + Back.BLACK
    current_color = Fore.LIGHTGREEN_EX + Back.BLACK
    are_same_color = Fore.LIGHTRED_EX + Back.BLACK

    def add_to_duplicate():
        """Helper method to add a message group to a DUPLICATE conversation"""
        nonlocal msgs, cur_thread, key, next_time, duplicate_index
        duplicate_num = 1  # the number of duplicate message groups (have the same people, aka name)
        # but in reality belong to different conversations

        added = False  # used so that we don't accidentally add something twice

        # while there exists another duplicate conversation in our list keep checking if the current
        # message group belongs to it. If we exit the while loop a new conversation is created with
        # "DUPLICATE #X appended to distinguish it, with X being the xth new duplicate conversation
        while (key +
               ', DUPLICATE #{0}'.format(duplicate_num)) in msgs and not added:
            new_name = key + ', DUPLICATE #{0}'.format(duplicate_num)

            # The time of the last message in the conversation for new_name
            prev_time = CustomDate(msgs[new_name][-1][2])

            if prev_time.distance_from(next_time) <= 0:
                # if our current conversation was during or after the last message in the new_name convo
                print(one_line())
                print()
                print(one_line())

                print(
                    "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes."
                    .format(duplicate_index, num_duplicates))
                print(conversation_color + key)
                print(one_line() + "\n")

                print(previous_color +
                      '# previous conversation end - length = {0:,}'.format(
                          len(msgs[new_name])) + Style.RESET_ALL)
                print_thread(msgs[new_name], end=True, padding=10)

                print(
                    current_color +
                    "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)"
                    .format(len(cur_thread)) + Style.RESET_ALL)
                print_thread(cur_thread, start=True, padding=10)

                print('\n' + one_line())
                # Prints the last 5 messages of the previous message group and the first 5 message of
                # the current message group, both in RED with a BLACK background

                print(
                    are_same_color +
                    "\nAre these two chunks from the same conversation? You might have "
                    "to look this up on facebook.com/messages [Y/n]" +
                    Style.RESET_ALL)

                are_same = user_says_yes()
                # User input for whether the two message groups are in the same conversation

                if not are_same:
                    duplicate_num += 1  # if they aren't the same, increment duplicate_num and try again
                else:
                    # check to make sure the current message group hasn't already been added, otherwise add it
                    msgs[new_name].extend(cur_thread)
                    added = True
            else:
                # this conversation existed but was after our current one, so increment the number and try again
                duplicate_num += 1

        if not added:
            # if the current message group hasn't been added, add with a new duplicate #
            msgs[key + ', DUPLICATE #{0}'.format(duplicate_num)] = cur_thread
        duplicate_index += 1
        return

    def print_thread(thread, start=False, end=False, padding=5):
        """Prettily prints the thread passed from start to start + padding or end - padding to end"""
        if start:
            assert not end, "Either start or end can be True, not both"
        else:
            assert end, "Exactly one value from start and end must be true"
        assert isinstance(padding, int), "padding must be an integer"
        assert padding > 0, "padding must be greater than 0"

        if start:
            start = 0
            end = min(start + padding, len(thread))
            date_color = Fore.GREEN + Back.BLACK
        else:
            end = len(thread)
            start = max(0, end - padding)
            date_color = Fore.CYAN + Back.BLACK

        max_name_length = max(len(name)
                              for name, _, _ in thread[start:end]) + 1
        for person, msg, date in thread[start:end]:

            print("{0:{align}{width}}: {1} | {2}".format(
                person,
                msg,
                date_color + date,
                align='<',
                width=max_name_length))
        return

    # Getting values if default arguments were left as default
    if unordered_threads is None or footer is None:
        all_thread_containers = get_all_thread_containers(msg_html_path)
        unordered_threads, footer = get_all_threads_unordered(
            all_thread_containers)

    msgs = dict()  # result we return
    duplicate_bucket = dict()  # temporarily holds duplicate conversations
    num_duplicates = 0
    for thread in unordered_threads:
        convo_name = clean_convo_name(thread.contents[0])
        cur_thread = get_messages_readable(thread)
        if convo_name not in msgs:
            # A conversation with the name of current message group does not exist, so it is added with no issues :D
            msgs[convo_name] = cur_thread

        else:  # Another conversation with this name been seen before, add to duplicate bucket if appropriate
            num_duplicates += 1
            if convo_name in duplicate_bucket:
                duplicate_bucket[convo_name].append(cur_thread)
            else:
                duplicate_bucket[convo_name] = [cur_thread]

    # The following is used in setup to time how long it takes various processes
    # This timing counts the time that user input starts, as there can be a lag before
    times.append(time.time())  # Background setup done
    print('\n' + one_line() + '\n')
    input_text = (
        "Press enter when you're ready to continue to user input: (Consider making your terminal full screen to "
        "easier read the messages printed)\n")
    input_text = textwrap.fill(input_text,
                               width=min(shutil.get_terminal_size().columns,
                                         150))
    input(input_text)
    # this time is user input prompt time
    times.append(time.time())  # User selection is starting

    # Lets finish up this setup boys, place all elements in the conversation bucket
    duplicate_index = 1
    for key in sorted(duplicate_bucket.keys()):
        for cur_thread in duplicate_bucket[key]:

            previous = msgs[key][-1]
            next = cur_thread[0]
            prev_time, next_time = CustomDate(previous[2]), CustomDate(next[2])
            # prev_time and next_time are used to calculate the time difference between the previous
            # message group's last message and this message group's first message. This time helps
            # determine whether both message groups belong to the same conversation

            if -3 <= prev_time.distance_from(next_time) <= 0 and len(
                    msgs[key]) > 10000 and len(cur_thread) == 10000:
                msgs[key].extend(cur_thread)
                duplicate_index += 1
            elif prev_time.distance_from(next_time) <= 0:

                clear_screen()

                print(
                    "#{0} of {1} (at maximum) duplicate conversations. Some might be done for you behind the scenes."
                    .format(duplicate_index, num_duplicates))
                print(conversation_color + key)
                print(one_line() + "\n")

                print(previous_color +
                      '# previous conversation end - length = {0:,}'.format(
                          len(msgs[key])) + Style.RESET_ALL)
                print_thread(msgs[key], end=True, padding=10)

                print(
                    current_color +
                    "\n# next conversation start - length = {0:,} (maximum possible length is 10,000)"
                    .format(len(cur_thread)) + Style.RESET_ALL)
                print_thread(cur_thread, start=True, padding=10)

                print('\n' + one_line())
                # Prints the last 5 messages of the previous message group and the first 5 message of
                # the current message group, both in RED with a BLACK background

                print(
                    are_same_color +
                    "\nAre these two chunks from the same conversation? You might have "
                    "to look this up on facebook.com. [Y/n]" + Style.RESET_ALL)

                are_same = user_says_yes()
                # user input to decide if the above two message groups are the same conversation

                if are_same:
                    msgs[key].extend(cur_thread)
                    duplicate_index += 1

                    clear_screen()
                else:
                    # the two conversations are NOT the same (because of user input)
                    #  so we need to add the new one to an appropriate duplicate

                    add_to_duplicate()
            else:
                # The two conversations are not the same (since the previous is after the new one)
                # so we need t add the new one to an appropriate duplicate

                add_to_duplicate()

    clear_screen()
    quick_preferences = PreferencesSearcher.from_msgs_dict(msgs)
    times.append(time.time())  # The end of setup

    return msgs, str(footer), quick_preferences.preferences