コード例 #1
0
    def counter_on_all_words(self):
        with tempfile.TemporaryFile() as tmpfile:
            # Clean each tweet and write it out to the temporary file, with
            # a trailing newline
            for tweet in self.tweet_iterable:
                for word in utils.clean_tweet(tweet).split(" "):
                    tmpfile.write(word + "\n")
            # Make sure that the file is at the beginning and then create a
            # Counter from it to get the unique items
            tmpfile.seek(0)
            count_container = Counter(tmpfile.read().splitlines())

            sorted_count_dictionary = OrderedDict(sorted(count_container.items(),
                                              key=itemgetter(0)))
            return sorted_count_dictionary.items()
コード例 #2
0
    def count_unique(self):
        """
        This is the function documentation
        """
        count_container = Counter()
        for tweet in self.tweet_iterable:
            # Encapsulte tweet in string call and return strip to
            # escape any strange characters
            count_container = count_container + \
                Counter(utils.clean_tweet(tweet).split(" "))

        sorted_count_dictionary = OrderedDict(sorted(count_container.items(),
                                              key=itemgetter(0)))

        # Remove edge cases of blank string or space string
        sorted_count_dictionary.pop(' ', None)
        sorted_count_dictionary.pop('', None)
        return sorted_count_dictionary
コード例 #3
0
    def populate_median_list(self):
        for dirty_tweet in self.tweet_iterable:
            tweet = utils.clean_tweet(dirty_tweet)
            self.append_to_unique_word_list(tweet)
            current_number_of_tweets = len(self.num_unique_words_sorted)
            # Condition if odd amount
            if current_number_of_tweets % 2:
                index = current_number_of_tweets / 2
                self.median_list.append(self.num_unique_words_sorted[index])
            # Condition if even amount
            else:
                left_index = (current_number_of_tweets / 2) - 1
                right_index = current_number_of_tweets / 2

                average_of_medians = self.format_to_two_decimal(
                    (self.num_unique_words_sorted[left_index] +
                     self.num_unique_words_sorted[right_index]) / 2.0)
                self.median_list.append(average_of_medians)

        return self.median_list