Ejemplo n.º 1
0
def make_the_words():
    # build Histogram
    # my_file = open("./words.txt", "r")
    # # absolute path -> ./file.ext ## more fuctional for live deploy
    # lines = my_file.readlines()
    filename = "transient.txt"
    lines = open(filename, "r").readlines()
    transient_txt_words = []  # word_list

    for line in lines:
        wordslist = line.split(' ')
        for word in wordslist:
            word = word.strip(' . , ;" \n _ ?')

            transient_txt_words.append(word)

    my_histogram = histogram(transient_txt_words)

    # put together words into a sentence
    sentence = ''
    num_words = 10
    ''' # comment out to impliment markov
    for i in range(num_words):
        word = sample_by_frequency(my_histogram)
        sentence = sentence + " " + word '''

    # uncomment to impliment markov
    markovchain = MarkovChain(transient_txt_words)
    sentence = markovchain.walk(num_words)
    return sentence
Ejemplo n.º 2
0
def main():
    txt = read_text_file('fish.txt') 
    text_list = list_convert(text)
    
    my_histogram = histogram(text_list)
    
    test_random(my_histogram)
    test_random_weight(my_histogram)
Ejemplo n.º 3
0
def main():
    '''reads in histogram and returns random word'''
    txt = read_text_file('fish.txt')
    text_list = list_convert(text)

    my_histogram = histogram(text_list)

    test_random(my_histogram)
    test_random_weight(my_histogram)
Ejemplo n.º 4
0
def main():
    '''This program reads in a histogram and returns a random word'''
    #read in text file, store as list
    txt = read_textfile("short_story.txt")
    txt_list = convert_to_list(txt)

    #create histogram based on list
    my_histogram = histogram(txt_list)

    test_randomness(my_histogram)
    test_weighted_randomness(my_histogram)
def compare_structures_using_file(input_file):
    histogram_linked_list = histogram(input_file)
    frequency_linked = frequency
    histogram_tuples = extra_sec_3.histogram_tuples(input_file)
    frequency_tuples = extra_sec_3.frequency_tuples
    histogram_dict = word_frequency.histogram(input_file)
    frequency_dict = word_frequency.frequency

    three_functions = [frequency_linked, frequency_tuples, frequency_dict]
    three_hgrms = [histogram_linked_list, histogram_tuples, histogram_dict]
    three_types = ["linked_list", "tuples", "dictionary"]
    time_many_funcs(20000, 'loremipsum', three_hgrms, three_functions, three_types)
Ejemplo n.º 6
0
                key]:
            # return word within range
            return key
        # change range to decrease liklihood of same word repeating
        word_range += histogram[key]


if __name__ == "__main__":
    '''
    Entry point of program, opens and closes words.txt file for program to use
    Holds empty dictionary for program to use
    '''
    filename = 'words.txt'

    word_histogram = {}

    with open(filename, 'r') as f:
        words = f.read().split(' ')
        hist = histogram(words)

    print(dict_sample(hist))

# '''
# calculate range
# add frequency to determine range
# see if word is in that range

# take random number
# find word at the index generated by random number
# return random word
# '''
def hash_table(source_text):  # O(n) Complexity to create hash table
    # dictionary structure -- hash table of words, frequencies
    histogram = wf.histogram(source_text)
    return histogram

def random_word(list):
    return random.choice(list)


def randomnes_check(hist):
    n_of_times = word_frequency.unique_words(hist)
    list = stochastic_list(hist)
    chosen = {}

    for i in range(n_of_times * 100):
        word = random_word(list)

        if chosen.get(word, 0) != 0:
            chosen[word] += 1
        else:
            chosen[word] = 1

    return chosen


if __name__ == "__main__":
    path = sys.argv[1]
    hist = word_frequency.histogram(path)
    check = randomnes_check(hist)

    for key, value in check.items():
        print(key + "\t" + str(value))
    print(len(check.keys()))
import word_frequency as wf
import random

histogram = wf.histogram('Biggie_Smalls_Ready_To_Die.txt')


def stochastic_sampler(histogram):
    totalLyricsList = []
    for word, total_use in histogram.items():
        # [brian] You could replace the below code with:
        # totalLyricsList.extend([word]*total_use)
        for x in range(total_use):
            totalLyricsList.append(word)
    return random.choice(totalLyricsList)


def percentage_checker(word, wordList):
    word_count = 0
    actual_frequency = 0.0
    for w in wordList:
        if w == word:
            word_count += 1
    actual_frequency = float(word_count) / len(wordList)
    return actual_frequency

testList = []
for x in range(100):
    if x < 13:
        testList.append('a')
    else:
        testList.append('b')
def convert_histogram():  #text
    # text_stuff = text.split()
    # word_counts = word_frequency.histogram(text)
    word_counts = word_frequency.histogram(word_frequency.get_words())
    # print(word_counts)
    return word_counts
    test = 0

    print("---------------RESULTS---------------")
    # Prints all the frequencies
    for word in keys:
        print("Test Frequency: {} -> {} Actual Frequency: {} -> {}".format(word, freq[word]/trials, word, histogram[word]/total))


if __name__ == '__main__':
    time_start = time.clock()
    trials = 1000

    source_text = "text_files/" + sys.argv[1]


    histogram = word_frequency.histogram(source_text)

    true_rand = gen_list(histogram)
    random_trials(trials, histogram)

    #print("Random word: {}".format(random_word(histogram, rand)))

    print("---------------STATISTICS---------------")
    time_end = time.clock()
    time_diff = time_end - time_start
    #print("Time taken: {} seconds".format(time_diff))
    print("Time taken: %.3f seconds" % (time_diff))
    print("Memory consumed: {} Bytes".format(sys.getsizeof(true_rand)))
    print()
def main():
    '''Return random word from histogram'''
    txt = read_textfile("sample_text.txt")
    txt_list = convert_to_list(txt)
    histogram1 = histogram(txt_list)
    return new_list


def random_word(list):
    return random.choice(list)


def randomnes_check(hist):
    n_of_times = word_frequency.unique_words(hist)
    list = stochastic_list(hist)
    chosen = {}

    for i in range(n_of_times * 100):
        word = random_word(list)

        if chosen.get(word, 0) != 0:
            chosen[word] += 1
        else:
            chosen[word] = 1

    return chosen

if __name__ == "__main__":
    path = sys.argv[1]
    hist = word_frequency.histogram(path)
    check = randomnes_check(hist)

    for key, value in check.items():
        print(key + "\t" + str(value))
    print(len(check.keys()))
def random_word_colorbonus(input_histogram):
    my_vals = list(input_histogram.values())
    my_keys = list(input_histogram.keys())
    enemies = 0
    winning_idx = 0
    for idx, each_val in enumerate(my_vals):
        if each_val == 0:
            continue
        multiplier = 1
        each_word = my_keys[idx]
        if each_word in ['red', 'blue', 'yellow', 'green', 'orange', 'purple']:
            multiplier = 2
        friends_lots = each_val * multiplier
        result = random.randint(1, (friends_lots + enemies))
        if result <= friends_lots:
            winning_idx = idx
        enemies += friends_lots
    return my_keys[winning_idx]

if __name__ == '__main__':
    my_file = str(sys.argv[1])
    my_histogram = word_frequency.histogram(my_file)
    # random_word = histogram_word(my_histogram)
    # print(random_word)
    # random_word = random_word_frequency(my_histogram)
    # print(random_word)

    my_results = dan_hoang_helper.test_results_parameter(10000, random_word_colorbonus, my_histogram)
    print(my_results)
Ejemplo n.º 15
0
    '''Dictionary-type histogram for stochatic sampling
       Stochastic sampling takes an element from a given collection at random
       based on weight
       Then returns a random word
    '''
    word_count = 0
    word_count += sum(
        histogram[word] for word in
        histogram.keys())  # Calculates total word count in given text
    word_range = 0
    histogram = {}
    random_integer = random.random()

    for key in histogram.keys():
        histogram[key] = histogram[key] / word_count
        if random_integer > word_range and random_integer <= word_range + histogram[
                key]:
            return key
        word_range += histogram[key]


if __name__ == '__main__':
    filename = "words.txt"
    word_histogram = {}

    with open(filename, 'r') as f:
        words = f.read().split(' ')
        histogram = histogram(words)

    print(sample_dict(histogram))