예제 #1
0
def generate_words_list(src_text, chars_max_count=0, words_max_count=0):
    if chars_max_count <= 0 and words_max_count <= 0:
        raise ValueError('Either chars_max_count or words_max_count must be positive.')
    
    output_words = []
    src_text = src_text.lower()
    cleared_text = Text.clear_input_text(src_text)

    words = Text.split_text_alnum(cleared_text)
    punct_words = Text.split_text_punct(cleared_text)

    chain = MarkovChain()
    for i in range(1, len(words)):
        chain.add_pair(words[i - 1].root, words[i])

    punct_chain = MarkovChain()
    for i in range(len(punct_words) - 1):
        if not Text.is_punct(punct_words[i]):
            if Text.is_punct(punct_words[i + 1]):
                punct_chain.add_pair(punct_words[i].root, punct_words[i + 1].root)
            else:
                punct_chain.add_pair(punct_words[i].root, '')

    suffix_chain = MarkovChain()  # (previous suffix, current root) -> current suffix
    for i in range(1, len(words)):
        suffix_chain.add_pair((words[i - 1].suffix, words[i].root), words[i].suffix)

    next_word, prev_word = Text.Word(''), Text.Word('')
    generated_chars = 0

    while True:
        prev_word = next_word
        if next_word.is_empty():
            next_word = random.choice(words)
        else:
            next_word = chain.get_next_word(next_word.root, lambda: Text.Word(''))

        suffix = suffix_chain.get_next_word((prev_word.suffix, next_word.root), lambda: '')
        if len(suffix) == 0:
            suffix = next_word.suffix

        punct = punct_chain.get_next_word(next_word.root, lambda: '')

        if len(output_words) == 0 or output_words[-1].is_ending_word():
            res_word = Text.PunctedWord(next_word.root.capitalize(), suffix, punct)
        else:
            res_word = Text.PunctedWord(next_word.root, suffix, punct)

        output_words += [res_word] 
        generated_chars += len(res_word)
        if chars_max_count > 0 and generated_chars > chars_max_count:
            break
        if words_max_count > 0 and len(output_words) > words_max_count:
            break
    
    output_words[-1].punct = '.'
    return output_words
예제 #2
0
def render_page():

    my_list = read_file('tintern_abbey.txt')
    chain = MarkovChain(my_list)
    num_words = int(10) - 1
    my_sentence = chain.walk(num_words)

    my_list2 = read_file("the_rime.txt")
    chain2 = MarkovChain(my_list2)
    num_words2 = int(10) - 1
    my_sentence2 = chain2.walk(num_words2)

    return render_template('index.html',
                           sentence=my_sentence,
                           sentence2=my_sentence2)
예제 #3
0
def make_the_words():
    # build Histogram
    # my_file = open("./words.txt", "r")
    # # absolute path -> ./file.ext ## more fuctional for live deploy
    # lines = my_file.readlines()
    filename = "transient.txt"
    lines = open(filename, "r").readlines()
    transient_txt_words = []  # word_list

    for line in lines:
        wordslist = line.split(' ')
        for word in wordslist:
            word = word.strip(' . , ;" \n _ ?')

            transient_txt_words.append(word)

    my_histogram = histogram(transient_txt_words)

    # put together words into a sentence
    sentence = ''
    num_words = 10
    ''' # comment out to impliment markov
    for i in range(num_words):
        word = sample_by_frequency(my_histogram)
        sentence = sentence + " " + word '''

    # uncomment to impliment markov
    markovchain = MarkovChain(transient_txt_words)
    sentence = markovchain.walk(num_words)
    return sentence
예제 #4
0
def markov(num=0):

    list_of_words = words_list()

    markovChain = MarkovChain(list_of_words)

    sentence = markovChain.walk(10)

    return sentence
예제 #5
0
    def __init__(self, sources, order):
        '''
			sources: list of text sources
			order: Markov Chain order. The higher the order, 
				   the more the model "remembers" its history.
		'''

        self.order = order
        self.sentences = self.parse_sources(sources)
        self.markov_chain = MarkovChain(order, self.sentences)
예제 #6
0
    def __init__(self, filename):

        self.filename = filename

        self.ticks = None

        self.tempo = get_tempo(filename)

        self.chain = MarkovChain()
        self.parse()
예제 #7
0
파일: app.py 프로젝트: QFREEE/MarkovMusic
    def run(self):
        print ("Loading input and generating...")
        fileload, resolution, format = loadMidi.load('midi/bach_simple.mid')

        stringNotes = convert.listToString(fileload)

        mc = MarkovChain(1)
        mc.add_string(stringNotes)
        markovNotes = ' '.join(mc.generate_text(50))

        writeMidi.writeList(convert.stringToList(markovNotes), resolution, format)
        print ('Process complete, output is in ./rebuilt.mid')
예제 #8
0
def generate_text(input, output):
    if os.path.isfile(args.o):
        os.remove(args.o)
    num_words = args.n

    contents = read_file(input)
    wordlist = contents.split(' ')

    markov = MarkovChain(wordlist)

    with open(output, 'a+') as f:
        f.write(markov.generate_text(num_words))
예제 #9
0
def gen_word():
    my_file = open("./words.txt", "r")
    lines = my_file.readlines()
    my_histogram = histogram(lines)

    sentence = ""
    num_words = 10
    # for i in range(num_words):
    #     word = sample(my_histogram)
    #     sentence += " " + word
    markovchain = MarkovChain(lines)
    sentence = markovchain.walk(num_words)
    return sentence
예제 #10
0
def generate_words():
    '''my_histogram = (lines)
    
    sentence = ""
    num_words = 10
    for i in range (num_words):
        word = weighted_sample(my_histogram)
        sentence += " " + word
    return sentence'''

    markovchain = MarkovChain(
        ["one", "fish", "two", "fish", "red", "fish", "blue", "fish"])
    return markovchain.walk(10)
예제 #11
0
def main():
    print("Loading data...")
    text = load_data("data/philosophy").lower()
    text = preprocess(text)

    print("Building Markov model...")
    mc = MarkovChain()
    mc.train_ngram(1, text)
    mc.train_ngram(2, text)
    mc.train_ngram(3, text)

    bot = Bot(mc)
    bot.run()
예제 #12
0
def handle_data():
	
	word1 = request.form["1"]
	word2 = request.form["2"]
	
	m = MarkovChain("Database/w3_.db", n = 3)
	# pprint(m.query(*(options.predict[0])))
	ngram = [word1, word2]
	res = m.query(*ngram).keys() # Actual Result
	print(res)
	# word = list(res)[random.randint(0, 2)]
	# print(word, end = ' ')
	response = jsonify(dict(zip(["predict1", "predict2", "predict3"], list(res)[:3])))
	response.headers.add('Access-Control-Allow-Origin', '*')
	return response
예제 #13
0
def create_sentence(word_num):
    source_text = "nietsche.txt"
    with open(source_text, "r") as file:
        og_text = file.read()

    word_list = og_text.split()

    for index, word in enumerate(word_list):
        word_list[index] = word.rstrip()
    chain = MarkovChain(word_list)
    chain.print_chain()
    sentence_words = []

    sentence = chain.walk(word_num)

    return sentence
예제 #14
0
def run_dir(base_path, authors_path):
    text = ''
    path = base_path + authors_path + '/'
    files = [name for name in os.listdir(path) if '.txt' in name]

    for f in files:
        with open(path + f, 'r') as f:
            text += f.read()

    # special treatment for wittgenstein formulas
    text = re.sub(r'“(.+?)”', '', text)

    markov = MarkovChain(text=text)
    bipolar_discourse = markov.generate(100)
    print repr(bipolar_discourse)
    save(authors_path + '.txt', bipolar_discourse)
예제 #15
0
def main():
    # Get our training data, Brown corpus
    b_sents = brown.sents()
    b_pos = nltk.pos_tag_sents(b_sents)

    # Filter to only verb phrases
    b_verbs = verb_filter(b_pos)

    # Fit our MarkovChain
    b_mc = MarkovChain(order=1)
    b_mc.fit(b_verbs)
    b_mc.normalize_transition_matrix()

    examples = load_examples('examples.json')
    
    for ex in examples:
        compare_sentences(mc=b_mc, s1=ex['good'], s2=ex['bad'])
예제 #16
0
def generate_words():
    my_file = open("./words.txt", "r")
    lines = my_file.readlines()
    my_histogram = histogram(lines)

    word_list = []
    for line in lines:
        for word in line.split():
            word_list.append(word)
    sentence = ""
    num_words = 10
    # for i in range(num_words):
    #     word = sample_by_frequency(my_histogram)
    #     sentence += " " + word
    markovchain = MarkovChain(word_list)
    sentence = markovchain.walk(num_words)
    return sentence
예제 #17
0
def generate_words():
    words_list = []
    with open('./EAP.text') as f:
        lines = f.readlines()
        for line in lines:
            for word in line.split():
                words_list.append(word)
    #lines = Dictogram(['one', 'fish', 'two', 'fish', 'red', 'fish', 'blue', 'fish'])
    markovchain = MarkovChain(words_list)
    '''sentence = ""
    num_words = 20
    for i in range(num_words):
        word = lines.sample()
        sentence += " " + word
    return sentence'''
    sentence = markovchain.walk(24)

    return render_template('index.html', sentence=sentence)
예제 #18
0
def hello():

    # hs = histogram("words.txt")
    # samp = sample(hs)
    my_file = open("./words.txt", "r")
    lines = my_file.readlines()

    word_list = []

    for line in lines:
        for word in line.split():
            word_list.append(word)

    print(word_list)
    markovchain = MarkovChain(word_list)
    # return samp
    # num_words = 10

    return (markovchain.walk(20))
예제 #19
0
def generate_words():
    #Build a histogram
    my_file = histogram("./text.txt")
    lines = my_file.readlines()
    my_histogram = histogram(lines)
    
    word_list = []
    for line in lines: 
        for word in line.split():
            word_list.append(word)

    sentence = ""
    num_words = 10
    # for i in range(num_words):
    #     #sample/frequency goes here
    #     word = sample(my_histogram)
    #     sentence += " " + word
    # return sentence
    markovchain = MarkovChain(word_list)
    sentence = markovchain.walk(num_words)
    return sentence
def generate_words():
    #build a histogram
    my_file = open("words.txt","r")
    lines = my_file.readlines()
    my_histogram = Histogram(lines)
    word_list = []
    for line in lines:
        for word in line.split():
            word_list.append(word)

    word = weighted_sample(my_histogram)
    #return word

    sentence = ""
    num_words = 10
    # for i in range(num_words):
    #     word = weighted_sample(my_histogram)
    #     sentence += " " + word
    markovChain = MarkovChain(word_list)
    sentence = markovChain.walk(num_words)
    print("sentence", sentence)
    return sentence
예제 #21
0
class MarkovTests(unittest.TestCase):
    # setup chain object
    source_file = "the_black_cat.txt"
    with open(source_file, encoding="utf-8") as f:
        text = f.read()
    m = MarkovChain(text)

    def test_start(self):
        """Check that the correct start is used"""
        start = "From"
        predicted = self.m.predict(20, start)
        self.assertTrue(predicted.startswith(start))
        pass

    def test_length(self):
        """Check that the chain outputs the correct number of words"""
        n = 100
        predicted = self.m.predict(100)
        tokens = findall(d_re_pattern, predicted, d_re_flags)
        expected = n
        actual = len(tokens)
        self.assertEqual(actual, expected)
예제 #22
0
    def __init__(self, bot_token, master=None):
        self.bot_token = bot_token
        self.session_id = None
        self.acl = {master.upper(): 1000} if master is not None else {}

        self._default_handlers()

        self.mc = MarkovChain()

        self.nn_temp = 0.7

        self.seq_num = None
        self.hb_every = -1
        self.hb_task = None

        self.ident = ('', '', '')  #user,disc,nick
        self.ident_id = ''
        self.guilds = {}

        self.cmd_re = re.compile('\\.(\\S+)')

        self.approver = srl_approve.SRLApprove()
예제 #23
0
def generate_words():
    #build a histogram
    # my_file = open("despacito.txt","r")
    lines = "one fish two fish red fish blue fish"
    my_histogram = histogram(lines)
    word_list = []
    for line in lines:
        for word in line.split():
            word_list.append(word)

    word = sample(my_histogram)
    #return word

    sentence = ""
    num_words = 10
    # for i in range(num_words):
    #     word = weighted_sample(my_histogram)
    #     sentence += " " + word
    markovChain = MarkovChain(word_list)
    sentence = markovChain.walk(num_words)
    print("sentence", sentence)
    return sentence
예제 #24
0
def render_page():
        
    my_list = read_file('plato.txt')

    chain = MarkovChain(my_list)

    num_words = int(10) - 1
    
    my_sentence = chain.walk(num_words)  

    my_sentence_2 = chain.walk(num_words)

    my_sentence_3 = chain.walk(num_words)

    my_sentence_4 = chain.walk(num_words)

    my_sentence_5 = chain.walk(num_words)
    
    return render_template('index.html', sentence=my_sentence,
                                        sentence2=my_sentence_2,
                                        sentence3=my_sentence_3,
                                        sentence4=my_sentence_4,
                                        sentence5=my_sentence_5)
예제 #25
0
def main():

    # read file, return list of tweets
    list_of_tweets = read_file()

    # returns 2 lists
    # first is list of each word used in all of my tweets
    # second is list of unigrams and frequencies i.e. {(word, frequency)}
    lengths, list_of_words, word_freq = get_counts(list_of_tweets)
    # print(word_freq)

    # get top users I have replied to
    top_users = get_top_users(word_freq)

    # return bigrams where {(word1, word2): frequency}
    bigrams = get_bigrams(list_of_tweets)
    # print(bigrams)
    # print(bigrams.keys())

    # create model for calculating the probability of a given sentence
    model = bigram_model(word_freq, bigrams)
    # print(model)

    #create Markov chain
    m = MarkovChain()
    m.learn(bigrams.keys())
    # print(m.memory)

    # generate length probability dictionary {length of sentence: frequency of sentences of that length}
    length_prob = length_probability(lengths)

    # output generated sentences, must have probability > 0.5
    generated_tweets = generate_sentences(length_prob, m, model, word_freq, 50)

    for tweet in generated_tweets:
        print(tweet)
예제 #26
0
 def __init__(self, title_bank):
     self.markov = MarkovChain(3)
     for item in title_bank.values():
         self.markov.add(item['title'].replace('—', '-'))
예제 #27
0
 def __init__(self, nick, log=False):
     GenericChatBot.__init__(self, nick, log)
     MucRoomHandler.__init__(self, nick)
     self.responder = MarkovChain(PidginLogs('~/.purple/logs/jabber/'))
예제 #28
0
def randomized_markov(word_list, num):
    markov_chain = MarkovChain(word_list)
    return f'''
예제 #29
0
from markov import MarkovChain

dan = MarkovChain('dan.txt')
# baldur = MarkovChain('baldur.txt')

print(dan.get_new_sentence(12).upper())
# print(baldur.get_new_sentence(15))
예제 #30
0
app = FlaskAPI(__name__)
CORS(app)

words = open('./text_files/words.txt', 'r')
words = ''.join(words.readlines()).split()

# listo = Listogram(words).listogram_samples(10)
# dicto = Dictogram(words).dictogram_samples(10)

# histograms = {
#     0: listo,
#     1: dicto
# }

MC = MarkovChain(words)


@app.route("/", methods=['GET'])
def notes_list():
    """
    List or create notes.
    """
    request.method == 'GET'
    sentences = MC.walk(10)

    return sentences


# @app.route("/int:key", methods=['GET'])
# def one_note(key):