def counter_on_all_words(self): with tempfile.TemporaryFile() as tmpfile: # Clean each tweet and write it out to the temporary file, with # a trailing newline for tweet in self.tweet_iterable: for word in utils.clean_tweet(tweet).split(" "): tmpfile.write(word + "\n") # Make sure that the file is at the beginning and then create a # Counter from it to get the unique items tmpfile.seek(0) count_container = Counter(tmpfile.read().splitlines()) sorted_count_dictionary = OrderedDict(sorted(count_container.items(), key=itemgetter(0))) return sorted_count_dictionary.items()
def count_unique(self): """ This is the function documentation """ count_container = Counter() for tweet in self.tweet_iterable: # Encapsulte tweet in string call and return strip to # escape any strange characters count_container = count_container + \ Counter(utils.clean_tweet(tweet).split(" ")) sorted_count_dictionary = OrderedDict(sorted(count_container.items(), key=itemgetter(0))) # Remove edge cases of blank string or space string sorted_count_dictionary.pop(' ', None) sorted_count_dictionary.pop('', None) return sorted_count_dictionary
def populate_median_list(self): for dirty_tweet in self.tweet_iterable: tweet = utils.clean_tweet(dirty_tweet) self.append_to_unique_word_list(tweet) current_number_of_tweets = len(self.num_unique_words_sorted) # Condition if odd amount if current_number_of_tweets % 2: index = current_number_of_tweets / 2 self.median_list.append(self.num_unique_words_sorted[index]) # Condition if even amount else: left_index = (current_number_of_tweets / 2) - 1 right_index = current_number_of_tweets / 2 average_of_medians = self.format_to_two_decimal( (self.num_unique_words_sorted[left_index] + self.num_unique_words_sorted[right_index]) / 2.0) self.median_list.append(average_of_medians) return self.median_list