Exemple #1
0
def main():
    import sys
    import cleanup as clean
    arguments = sys.argv[1:]
    if len(arguments) >= 1:
        print_histogram(arguments)
    else:
        potter_books = clean.clean_txt('onefish.txt')
        #print(potter_books)
        potter_books.append("STOP")
        print(potter_books)
def run_histogram():
    start_time = time.time()
    print(start_time)

    text_list = c.clean_txt('onefish.txt')
    onefish_dic = histogram_dict(text_list)

    print(onefish_dic)
    print(list(onefish_dic.items()))

    end_time = time.time()
    print(abs(start_time - end_time))
Exemple #3
0
def main():
    import sys
    # Import the module to clean text
    import cleanup as clean
    arguments = sys.argv[1:]  # Exclude script name in first argument
    if len(arguments) >= 1:
        # Test histogram on given arguments
        print_histogram(arguments)
    else:
        onefish_list = clean.clean_txt('onefish.txt')
        onefish_list.append("STOP")
        # print(onefish_list)
        # Create the Dictionary of Histograms
        markov_dict = second_order_markov_chain(onefish_list)
Exemple #4
0
def run_histogram():
    '''
    Read the source text, run the helper functions.
    perform the clean_data function.
    '''
    start_time = time.time()
    print(start_time)
    # Takes in a text file, cleans it, and returns a list of words.
    text_list = c.clean_txt('onefish.txt')
    # print(text_list)
    onefish_dict = histogram_dict(text_list)
    # write_to_file(alice_dict)
    print(onefish_dict)
    print(list(onefish_dict.items()))
    # unique_words(alice_dict)
    # print(frequency(alice_dict, "alice"))
    # histogram_list(text_list)
    # histogram_list_tuples(text_list)
    end_time = time.time()
    print(abs(start_time - end_time))
def clean_text():
    clean_list = c.clean_txt('corpus.txt')
    clean_list.append("STOP")
    return clean_list
Exemple #6
0
def clean_text():
    clean_list = c.clean_txt('harry_potter_books.txt')
    clean_list.append("STOP")
    return clean_list