def generate_words(): my_file = open("./book.txt", "r") lines = my_file.readlines() my_histogram = histogram(lines) print(my_histogram) sentence = "" num_words = 10 for i in range(num_words): word = stochastic(my_histogram, words_in_total) sentence += " " + str(word) return sentence
if(len(sys.argv)) > 1: token_list = tokenize(sys.argv[1]) else: filename = input("Enter Filename: ") token_list = tokenize(filename) my_hash = token_hash_table.MyHash() for i, token in enumerate(token_list): # cur_val = my_hash.get(token) if my_hash.get(token) == 0: my_hash.set(token, 1) # TODO: Also add to hash table of markov chain else: my_hash.update(token) stoch_histo = sample.stochastic(my_hash) new_keys = stoch_histo.keys() new_freq = stoch_histo.values() my_heap = heap.Heap() for i in range(len(new_keys)): my_heap.insert(new_keys[i], new_freq[i]) if(len(sys.argv)) > 2: for i in range(int(sys.argv[2])): print(my_heap.delete_max()) else: large = input("Enter n max tokens: ") for i in range(int(large)): print(my_heap.delete_max())
token_list = tokenization.tokenize(sys.argv[1]) else: filename = input("Enter Filename: ") token_list = tokenization.tokenize(filename) my_graph = Markov_Chain() for i, k in enumerate(token_list): if my_graph.get(k) is None: my_graph.set(k, True) else: my_graph.get(k).occured = True my_graph.get(k).node_val += 1 if i < len(token_list) - 1: my_graph.update(k, token_list[i+1]) histo = sample.stochastic(my_graph) start_word = sentence.first_word(histo) print("Starting at: " + start_word) print(" ".join(my_graph.gen_sentence(start_word, 20))) print("-------------------------------") start_word = sentence.first_word(histo) print("Starting at: " + start_word) print(" ".join(my_graph.gen_sentence(start_word, 20))) print("-------------------------------") start_word = sentence.first_word(histo) print("Starting at: " + start_word) print(" ".join(my_graph.gen_sentence(start_word, 20))) print("-------------------------------") start_word = sentence.first_word(histo) print("Starting at: " + start_word) print(" ".join(my_graph.gen_sentence(start_word, 20)))