def make_the_words(): # build Histogram # my_file = open("./words.txt", "r") # # absolute path -> ./file.ext ## more fuctional for live deploy # lines = my_file.readlines() filename = "transient.txt" lines = open(filename, "r").readlines() transient_txt_words = [] # word_list for line in lines: wordslist = line.split(' ') for word in wordslist: word = word.strip(' . , ;" \n _ ?') transient_txt_words.append(word) my_histogram = histogram(transient_txt_words) # put together words into a sentence sentence = '' num_words = 10 ''' # comment out to impliment markov for i in range(num_words): word = sample_by_frequency(my_histogram) sentence = sentence + " " + word ''' # uncomment to impliment markov markovchain = MarkovChain(transient_txt_words) sentence = markovchain.walk(num_words) return sentence
def main(): txt = read_text_file('fish.txt') text_list = list_convert(text) my_histogram = histogram(text_list) test_random(my_histogram) test_random_weight(my_histogram)
def main(): '''reads in histogram and returns random word''' txt = read_text_file('fish.txt') text_list = list_convert(text) my_histogram = histogram(text_list) test_random(my_histogram) test_random_weight(my_histogram)
def main(): '''This program reads in a histogram and returns a random word''' #read in text file, store as list txt = read_textfile("short_story.txt") txt_list = convert_to_list(txt) #create histogram based on list my_histogram = histogram(txt_list) test_randomness(my_histogram) test_weighted_randomness(my_histogram)
def compare_structures_using_file(input_file): histogram_linked_list = histogram(input_file) frequency_linked = frequency histogram_tuples = extra_sec_3.histogram_tuples(input_file) frequency_tuples = extra_sec_3.frequency_tuples histogram_dict = word_frequency.histogram(input_file) frequency_dict = word_frequency.frequency three_functions = [frequency_linked, frequency_tuples, frequency_dict] three_hgrms = [histogram_linked_list, histogram_tuples, histogram_dict] three_types = ["linked_list", "tuples", "dictionary"] time_many_funcs(20000, 'loremipsum', three_hgrms, three_functions, three_types)
key]: # return word within range return key # change range to decrease liklihood of same word repeating word_range += histogram[key] if __name__ == "__main__": ''' Entry point of program, opens and closes words.txt file for program to use Holds empty dictionary for program to use ''' filename = 'words.txt' word_histogram = {} with open(filename, 'r') as f: words = f.read().split(' ') hist = histogram(words) print(dict_sample(hist)) # ''' # calculate range # add frequency to determine range # see if word is in that range # take random number # find word at the index generated by random number # return random word # '''
def hash_table(source_text): # O(n) Complexity to create hash table # dictionary structure -- hash table of words, frequencies histogram = wf.histogram(source_text) return histogram
def random_word(list): return random.choice(list) def randomnes_check(hist): n_of_times = word_frequency.unique_words(hist) list = stochastic_list(hist) chosen = {} for i in range(n_of_times * 100): word = random_word(list) if chosen.get(word, 0) != 0: chosen[word] += 1 else: chosen[word] = 1 return chosen if __name__ == "__main__": path = sys.argv[1] hist = word_frequency.histogram(path) check = randomnes_check(hist) for key, value in check.items(): print(key + "\t" + str(value)) print(len(check.keys()))
import word_frequency as wf import random histogram = wf.histogram('Biggie_Smalls_Ready_To_Die.txt') def stochastic_sampler(histogram): totalLyricsList = [] for word, total_use in histogram.items(): # [brian] You could replace the below code with: # totalLyricsList.extend([word]*total_use) for x in range(total_use): totalLyricsList.append(word) return random.choice(totalLyricsList) def percentage_checker(word, wordList): word_count = 0 actual_frequency = 0.0 for w in wordList: if w == word: word_count += 1 actual_frequency = float(word_count) / len(wordList) return actual_frequency testList = [] for x in range(100): if x < 13: testList.append('a') else: testList.append('b')
def convert_histogram(): #text # text_stuff = text.split() # word_counts = word_frequency.histogram(text) word_counts = word_frequency.histogram(word_frequency.get_words()) # print(word_counts) return word_counts
test = 0 print("---------------RESULTS---------------") # Prints all the frequencies for word in keys: print("Test Frequency: {} -> {} Actual Frequency: {} -> {}".format(word, freq[word]/trials, word, histogram[word]/total)) if __name__ == '__main__': time_start = time.clock() trials = 1000 source_text = "text_files/" + sys.argv[1] histogram = word_frequency.histogram(source_text) true_rand = gen_list(histogram) random_trials(trials, histogram) #print("Random word: {}".format(random_word(histogram, rand))) print("---------------STATISTICS---------------") time_end = time.clock() time_diff = time_end - time_start #print("Time taken: {} seconds".format(time_diff)) print("Time taken: %.3f seconds" % (time_diff)) print("Memory consumed: {} Bytes".format(sys.getsizeof(true_rand))) print()
def main(): '''Return random word from histogram''' txt = read_textfile("sample_text.txt") txt_list = convert_to_list(txt) histogram1 = histogram(txt_list)
return new_list def random_word(list): return random.choice(list) def randomnes_check(hist): n_of_times = word_frequency.unique_words(hist) list = stochastic_list(hist) chosen = {} for i in range(n_of_times * 100): word = random_word(list) if chosen.get(word, 0) != 0: chosen[word] += 1 else: chosen[word] = 1 return chosen if __name__ == "__main__": path = sys.argv[1] hist = word_frequency.histogram(path) check = randomnes_check(hist) for key, value in check.items(): print(key + "\t" + str(value)) print(len(check.keys()))
def random_word_colorbonus(input_histogram): my_vals = list(input_histogram.values()) my_keys = list(input_histogram.keys()) enemies = 0 winning_idx = 0 for idx, each_val in enumerate(my_vals): if each_val == 0: continue multiplier = 1 each_word = my_keys[idx] if each_word in ['red', 'blue', 'yellow', 'green', 'orange', 'purple']: multiplier = 2 friends_lots = each_val * multiplier result = random.randint(1, (friends_lots + enemies)) if result <= friends_lots: winning_idx = idx enemies += friends_lots return my_keys[winning_idx] if __name__ == '__main__': my_file = str(sys.argv[1]) my_histogram = word_frequency.histogram(my_file) # random_word = histogram_word(my_histogram) # print(random_word) # random_word = random_word_frequency(my_histogram) # print(random_word) my_results = dan_hoang_helper.test_results_parameter(10000, random_word_colorbonus, my_histogram) print(my_results)
'''Dictionary-type histogram for stochatic sampling Stochastic sampling takes an element from a given collection at random based on weight Then returns a random word ''' word_count = 0 word_count += sum( histogram[word] for word in histogram.keys()) # Calculates total word count in given text word_range = 0 histogram = {} random_integer = random.random() for key in histogram.keys(): histogram[key] = histogram[key] / word_count if random_integer > word_range and random_integer <= word_range + histogram[ key]: return key word_range += histogram[key] if __name__ == '__main__': filename = "words.txt" word_histogram = {} with open(filename, 'r') as f: words = f.read().split(' ') histogram = histogram(words) print(sample_dict(histogram))