def main(): import sys import cleanup as clean arguments = sys.argv[1:] if len(arguments) >= 1: print_histogram(arguments) else: potter_books = clean.clean_txt('onefish.txt') #print(potter_books) potter_books.append("STOP") print(potter_books)
def run_histogram(): start_time = time.time() print(start_time) text_list = c.clean_txt('onefish.txt') onefish_dic = histogram_dict(text_list) print(onefish_dic) print(list(onefish_dic.items())) end_time = time.time() print(abs(start_time - end_time))
def main(): import sys # Import the module to clean text import cleanup as clean arguments = sys.argv[1:] # Exclude script name in first argument if len(arguments) >= 1: # Test histogram on given arguments print_histogram(arguments) else: onefish_list = clean.clean_txt('onefish.txt') onefish_list.append("STOP") # print(onefish_list) # Create the Dictionary of Histograms markov_dict = second_order_markov_chain(onefish_list)
def run_histogram(): ''' Read the source text, run the helper functions. perform the clean_data function. ''' start_time = time.time() print(start_time) # Takes in a text file, cleans it, and returns a list of words. text_list = c.clean_txt('onefish.txt') # print(text_list) onefish_dict = histogram_dict(text_list) # write_to_file(alice_dict) print(onefish_dict) print(list(onefish_dict.items())) # unique_words(alice_dict) # print(frequency(alice_dict, "alice")) # histogram_list(text_list) # histogram_list_tuples(text_list) end_time = time.time() print(abs(start_time - end_time))
def clean_text(): clean_list = c.clean_txt('corpus.txt') clean_list.append("STOP") return clean_list
def clean_text(): clean_list = c.clean_txt('harry_potter_books.txt') clean_list.append("STOP") return clean_list