Ejemplo n.º 1
0
def generate_words():
    my_file = open("./book.txt", "r")
    lines = my_file.readlines()
    my_histogram = histogram(lines)
    print(my_histogram)

    sentence = ""
    num_words = 10
    for i in range(num_words):
        word = stochastic(my_histogram, words_in_total)
        sentence += " " + str(word)

    return sentence
    if(len(sys.argv)) > 1:
        token_list = tokenize(sys.argv[1])
    else:
        filename = input("Enter Filename: ")
        token_list = tokenize(filename)
    my_hash = token_hash_table.MyHash()

    for i, token in enumerate(token_list):
        # cur_val = my_hash.get(token)
        if my_hash.get(token) == 0:
            my_hash.set(token, 1)
            # TODO: Also add to hash table of markov chain
        else:
            my_hash.update(token)

    stoch_histo = sample.stochastic(my_hash)
    new_keys = stoch_histo.keys()
    new_freq = stoch_histo.values()

    my_heap = heap.Heap()

    for i in range(len(new_keys)):
        my_heap.insert(new_keys[i], new_freq[i])

    if(len(sys.argv)) > 2:
        for i in range(int(sys.argv[2])):
            print(my_heap.delete_max())
    else:
        large = input("Enter n max tokens: ")
        for i in range(int(large)):
            print(my_heap.delete_max())
        token_list = tokenization.tokenize(sys.argv[1])
    else:
        filename = input("Enter Filename: ")
        token_list = tokenization.tokenize(filename)

    my_graph = Markov_Chain()
    for i, k in enumerate(token_list):
        if my_graph.get(k) is None:
            my_graph.set(k, True)
        else:
            my_graph.get(k).occured = True
            my_graph.get(k).node_val += 1
        if i < len(token_list) - 1:
            my_graph.update(k, token_list[i+1])

    histo = sample.stochastic(my_graph)
    start_word = sentence.first_word(histo)
    print("Starting at: " + start_word)
    print(" ".join(my_graph.gen_sentence(start_word, 20)))
    print("-------------------------------")
    start_word = sentence.first_word(histo)
    print("Starting at: " + start_word)
    print(" ".join(my_graph.gen_sentence(start_word, 20)))
    print("-------------------------------")
    start_word = sentence.first_word(histo)
    print("Starting at: " + start_word)
    print(" ".join(my_graph.gen_sentence(start_word, 20)))
    print("-------------------------------")
    start_word = sentence.first_word(histo)
    print("Starting at: " + start_word)
    print(" ".join(my_graph.gen_sentence(start_word, 20)))