def markov(): """Forming sentence with markov chain""" clean_text_list = clean_file('corpus.txt') markov_word = markov_chain(clean_text_list) # higher_order_markov_chain = nth_order_markov_model(2, clean_text_list) sentence = generate_sentence(10, markov_word) return sentence
def tests(): texts = ['rumpelstiltskin.txt', 'tom_thumb.txt'] story_list = cleanup.clean_file(texts[0]) result = tuples_histogram(story_list) # print(result) print(unique_words_lists_tuples_dictionary(result)) print(frequency_tuples_and_lists("Killer", result)) print(frequency_tuples_and_lists("to", result)) # print(sort_tuples_and_lists(result)) result = lists_histogram(story_list) # print(result) print(unique_words_lists_tuples_dictionary(result)) print(frequency_tuples_and_lists("Killer", result)) print(frequency_tuples_and_lists("to", result)) # print(sort_tuples_and_lists(result)) result = dictionary_histogram(story_list) # print(result) print(unique_words_lists_tuples_dictionary(result)) print(frequency_dictionary("Killer", result)) print(frequency_dictionary("to", result)) result = counts_histogram(story_list) # print(result) print(unique_words_counts(result))
def tests(): import sys arguments = sys.argv[1:] # Exclude script name in first argument if len(arguments) >= 1: # Test histogram on given arguments print_histogram(arguments) else: # # Test histogram on letters in a word word = 'abracadabra' print_histogram(list(word)) # Test histogram on words in a classic book title fish_text = 'one fish two fish red fish blue fish' print_histogram(fish_text.split()) # # Test histogram on words in a long repetitive sentence woodchuck_text = ('how much wood would a wood chuck chuck' ' if a wood chuck could chuck wood') print_histogram(woodchuck_text.split()) contents = cleanup.clean_file('ghosts_on_coruscant.txt') narkie = Narkovogram(4, contents) print(narkie.random_walk(10))
def home(): # Get cleaned data file_name = 'SiliconValley.txt' cleaned_file = cleanup.clean_file(file_name) # Create data structure data_structure = markov.make_higher_order_markov_model(3, cleaned_file) # Pass data structure to get random setence with 140 chars random_sentence = markov.generate_random_sentence_n(140, data_structure) # Get a random actor to say the quote random_actor = word_frequency.random_actor() random_actor_cleaned = random_actor.replace(' ', '_') random_actor_cleaned = random_actor_cleaned.replace('\'', '') print('random actor = ', random_actor) print('random actor cleaned ', random_actor_cleaned) print('random sentence ', random_sentence) # Render website <3 return render_template('layout.html', sentence=random_sentence, actor=random_actor, actor_image=random_actor_cleaned)
sentence_count += 1 current_window = generate_random_start(markov_model) sentence = [current_window[0]] elif sentence_count == 0 and new_tweet_len >= length: # forget the sentence and generate a new one :P current_window = generate_random_start(markov_model) sentence = [current_window[0]] elif sentence_count > 0 and new_tweet_len < length: # More than one sentence. and length is still less max # Get another new sentence tweet += sentence_string sentence_string = ' '.join(sentence) sentence_count += 1 current_window = generate_random_start(markov_model) sentence = [current_window[0]] else: # Return this good good tweet return tweet if __name__ == '__main__': clean_text_list = clean_file('corpus.txt') # print(clean_text_list) # print(markov_chain(clean_text_list)) # markov_chain = markov_chain(clean_text_list) # # higher_order_markov_chain = nth_order_markov_model(2, clean_text_list) # print(markov_chain) # sentence = generate_sentence(10, markov_chain) # # sentence = generate_sentence_with_higher_order(10, higher_order_markov_chain) # print(sentence)
def main(): texts = ['rumpelstiltskin.txt', 'tom_thumb.txt'] contents = cleanup.clean_file(texts[0]) print(random_walk(contents, 10))
def tests(): texts = ['rumpelstiltskin.txt', 'tom_thumb.txt'] contents = cleanup.clean_file(texts[0]) generate_sentence(10, contents)
ten_thousand_words = list_of_words(10000) hundred_hgram = Dictogram(hundred_words) ten_thousand_hgram = Dictogram(ten_thousand_words) hundred_search = hundred_words[-1] ten_thousand_search = ten_thousand_words[-1] stmt = "ten_thousand_hgram.count('{}')".format(ten_thousand_search) setup = "from __main__ import ten_thousand_hgram" timer = timeit.Timer(stmt, setup=setup) iterations = 10000 result = timer.timeit(number=iterations) print("count time for 100-word histogram: " + str(result)) # '*********** CREATING DATA STRUCTURES ***********' words_list = cleanup.clean_file('SiliconValley_Actors.txt') print 'Start creating Dictogram' dictogram = Dictogram(words_list) print 'Time to create Dictogram: ', 'seconds' stmt_dictogram = "dictogram.return_weighted_random_word()" setup_dictogram = "from __main__ import dictogram" timer_dictogram = timeit.Timer(stmt_dictogram, setup=setup_dictogram) result_dictogram = timer_dictogram.timeit(number=iterations) print("count time for finding " + str(iterations) + " random weighted words " + str(result_dictogram)) listogram = Listogram(words_list) print 'Time to create Listogram: ', 'seconds'
from flask import Flask, request, render_template import re import random import os import sample import cleanup import narkovogram import sentence app = Flask(__name__) contents = cleanup.clean_file('ghosts_on_coruscant.txt') last_order = 5 # def get_words(): # f = open("./text/ghosts_on_coruscant.txt", "r") # contents = f.read() # f.close() # return contents @app.route('/', methods=['GET']) def generate_sentence(): narkie = narkovogram.Narkovogram(5, contents) number = request.args.get('num') order = request.args.get('order') if number is None: number = 10 else: number = int(number)