def mimic_dict(self): """ Returns mimic dict mapping each word to list of words which follow it. """ file_input = open(self._filename, 'r') string_input = file_input.read() file_input.close() string_tool = StringTool() string_cleaned = string_tool.clean_string(string_input) # this leaves contraction apostrophe in word words_input = re.split(' +', string_cleaned) # Make a new list, skipping any empty words. # Note small.txt yields words_input with last element ''. # list comprehension # http://stackoverflow.com/questions/1450111/delete-many-elements-of-list-python words_cleaned = [ item for item in words_input if (item is not '') ] output_dict = {} for index in range(0, len(words_cleaned)): current_word = words_cleaned[index] if ((len(words_cleaned) - 1) == index): # current_word is the last word, no next word. # if current_word is in keys, do nothing # if current_word isn't in keys, add it as a key with an empty list if (not current_word in output_dict.keys()): output_dict[current_word] = [] else: # we aren't on the last word, so it's safe to reference next word next_word = words_cleaned[index + 1] if current_word in output_dict.keys(): # append to existing list output_dict[current_word].append(next_word) else: # add new key-value pair, use trailing comma to define new list current_list = [next_word,] output_dict[current_word] = current_list print('output_dict') print(output_dict) print() return output_dict
def count_words(filename): string_tool = StringTool() words_file = open(filename, 'r') word_counts = {} for line in words_file: cleaned_string = string_tool.clean_string(line) # split on whitespace line_list = cleaned_string.split() for word in line_list: word_lower = word.lower() if not (word_lower in word_counts): word_counts[word_lower] = 1 else: word_counts[word_lower] += 1 words_file.close() return word_counts