예제 #1
0
def sentence_gen():
    histo_text = get_words('siddhartha.txt')
    histo = histogram(histo_text)
    random_word = sample(histo)
    random_words = []
    for i in range(7):
        random_words.append(sample(histo))
    random_sentence = sentence_maker(random_words)
    return random_sentence
예제 #2
0
    max_freq = max(word_dict.values())
    rand_frequency = random.uniform(0, max_freq)
    list_from_words = list(word_dict)
    while True:
        rand_index = random.randint(0, len(word_dict) - 1)
        selected_word = list_from_words[rand_index]
        if word_dict[selected_word] >= rand_frequency:
            return selected_word


def make_sentence(chain, starting_words, sentence_len):
    previous_words = starting_words
    selected_list = [starting_words[0], starting_words[1]]
    for _ in range(sentence_len - 2):
        selected_word = next_word(chain, previous_words)
        if selected_word is not None:
            selected_list.append(selected_word)
            previous_words = (previous_words[1], selected_word)
    sentence = ' '.join(selected_list)
    return sentence


if __name__ == '__main__':
    word_list = get_words('life.txt')
    chain = markov_chain(word_list)
    list_from_chain = list(chain)
    random_words = random.choice(list_from_chain)
    # print(random_words)
    sentence = make_sentence(chain, random_words, 5)
    print(sentence)
예제 #3
0
import random
from dictogram import Dictogram
from histogram import get_words
from stochastics import sampler

corpus = get_words("corpus/corpus-0.txt")


def first_order(corpus_of_words):
    words = corpus_of_words
    chain = {}
    corpus_length = len(corpus_of_words)

    for i, key in enumerate(words):
        if corpus_length > (i + 1):
            word = words[i + 1]
            if key not in chain:
                chain[key] = Dictogram([word])

            else:
                chain[key].add_count(word)
    return chain


def second_order(corpus_of_words):
    words = corpus_of_words  # list of strings
    chain = {}  # dict to hold Markov states, key: word, value: histogram
    corpus_length = len(words)

    for i, word1 in enumerate(words):
        if i + 2 >= corpus_length:
        return self[tokens].sample()

    def build_sentence(self, num_words, words_list):
        self.build_state_histogram(words_list)
        sentence = []
        first_words = random.choice(list(self.keys()))
        tokens = Queue(first_words)
        sentence.extend(first_words)
        total_words = len(first_words)

        while total_words < num_words:
            next_word = self.get_next_word(tuple(tokens.items()))
            if next_word == '**STOP**':
                sentence.append(next_word)
                break
            sentence.append(next_word)
            tokens.dequeue()
            tokens.enqueue(next_word)
            total_words += 1
        return ' '.join(sentence)


if __name__ == '__main__':
    text = 'text/three_wishes.txt'
    words_list = histogram.get_words(text)
    markov = MarkovChain(4)
    # markov.build_state_histogram()
    # for key in markov:
    #     print(key, markov[key])
    print(markov.build_sentence(6, words_list))
예제 #5
0
        total_freq += index[1]

    print(total_freq)
    random_num = random.uniform(0, 1)

    #returns random sample words
    for index in listogram:
        chance += index[1] / total_freq
        if chance >= random_num:
            return index[0]


if __name__ == '__main__':

    #using histogram functions to get corpus
    histo_text = get_words('fish.txt')
    histo = histogram(histo_text)
    test = total_freq(histo)
    print(test)

    #sampling using dictionary method
    sample_word = sample(histo)
    probability = print_probability(histo)
    print(sample_word)

    #testing that sampling function actually works
    # outcomes = test_probability(histo)
    # results = results_histogram(outcomes)
    # print(results)

    #list implementation of sampling
def random_word(source_text):
    words_list = histogram.get_words(source_text)
    return random.choice(words_list)
def sample(words_list):
    word_distribution = get_word_distribution(words_list)

    ran_num = random.randint(1, len(words_list))
    for word in word_distribution:
        if ran_num in word_distribution[word]:
            return word


def test(words_list):
    test = []
    histogram_ = histogram.histogram_dict(words_list)
    for _ in range(10000):
        test.append(sample(words_list))
    hist_ran_words = histogram.histogram_dict(test)

    for word in hist_ran_words:
        print(word, 'test', hist_ran_words[word]/10000,
              'distribution', round((histogram_[word]/len(words_list)), 4))

    return


# params = sys.argv[1:]
# words_list = histogram.get_words(params[0])
words_list = histogram.get_words('text/three_wishes.txt')
print(sample(words_list))
# print(get_word_distribution(words_list))
# test(words_list)
예제 #8
0
    dictionary = words

    if type(words) == list():
        for word in words:
            dictionary = {}
            dictionary[word] = words.count(word)

    random_value = random.random()
    total_value = sum(dictionary.values())
    total = 0
    # number_wins = {}

    for key, value in dictionary.items():
        total += value
        if total / total_value >= random_value:
            return key
            # if key in number_wins:
            #     number_wins[key] += 1
            # else:
            #     number_wins[key] = 1
            # break


if __name__ == '__main__':
    words = get_words('animals.txt')
    histogram_dict = get_words_counts_dict(words)

    # sampler(words)
    print(test_sampler(words, 10000))