def make_the_words(): # build Histogram # my_file = open("./words.txt", "r") # # absolute path -> ./file.ext ## more fuctional for live deploy # lines = my_file.readlines() filename = "transient.txt" lines = open(filename, "r").readlines() transient_txt_words = [] # word_list for line in lines: wordslist = line.split(' ') for word in wordslist: word = word.strip(' . , ;" \n _ ?') transient_txt_words.append(word) my_histogram = histogram(transient_txt_words) # put together words into a sentence sentence = '' num_words = 10 ''' # comment out to impliment markov for i in range(num_words): word = sample_by_frequency(my_histogram) sentence = sentence + " " + word ''' # uncomment to impliment markov markovchain = MarkovChain(transient_txt_words) sentence = markovchain.walk(num_words) return sentence
class TweetGenerator(object): def __init__(self, riddle_sentences, ngram_size): self.riddle_sentences = set(riddle_sentences) self.chain = MarkovChain(ngram_size=ngram_size) for sentence in self.riddle_sentences: self.chain.train_sentence(sentence) def get_unique_sentence(self): while True: s = self.chain.generate_sentence() if s.lower() not in self.riddle_sentences: break return s def generate_tweet(self): tweet = '' while True: next_sentence = self.get_unique_sentence() if len(next_sentence) > 125: continue if len(tweet) > 0 and len(tweet) + len(next_sentence) > 125: break tweet = tweet + next_sentence + ' ' return tweet[:-1].strip()
def markov(num=0): list_of_words = words_list() markovChain = MarkovChain(list_of_words) sentence = markovChain.walk(10) return sentence
class TemplateBank: def __init__(self, title_bank): self.markov = MarkovChain(3) for item in title_bank.values(): self.markov.add(item['title'].replace('—', '-')) def _random_template(self): title = self.markov.generate() replacements = {} tokens = [] doc = nlp(title) i = 0 for token in doc: # Consider named entities as single token. if token.ent_type_ in ('PERSON', 'FAC', 'GPE', 'LOC'): if token.ent_iob == 1: tokens[-2] += tokens[-1] + token.text tokens[-1] = token.whitespace_ else: tokens.append(token.text) tokens.append(token.whitespace_) replacements[i] = '[[PERSON]]' if token.ent_type_ == 'PERSON' else '[[LOC]]' i += 2 continue tokens.append(token.text) tokens.append(token.whitespace_) if token.tag_ in ("NN", "NNP"): replacements[i] = "[[NOUN]]" elif token.tag_ in ("NNS", "NNPS"): replacements[i] = "[[NOUNS]]" elif token.pos_ == "ADJ": replacements[i] = "[[ADJ]]" i += 2 if len(replacements) < 2: return None logger.debug('generated title: ' + ''.join(tokens)) # Create a template by replacing two random tokens with POS tags for i, replacement in random.sample(replacements.items(), 2): tokens[i] = replacement logger.debug('generated template: ' + ''.join(tokens)) return tokens def random_template(self): """Get random template from the bank.""" for i in range(0, 25): template = self._random_template() if template is not None: return template raise RecursionError("Title generation was unable to find fitting template.")
def __init__(self, filename): self.filename = filename self.ticks = None self.tempo = get_tempo(filename) self.chain = MarkovChain() self.parse()
def __init__(self, sources, order): ''' sources: list of text sources order: Markov Chain order. The higher the order, the more the model "remembers" its history. ''' self.order = order self.sentences = self.parse_sources(sources) self.markov_chain = MarkovChain(order, self.sentences)
def generate_text(input, output): if os.path.isfile(args.o): os.remove(args.o) num_words = args.n contents = read_file(input) wordlist = contents.split(' ') markov = MarkovChain(wordlist) with open(output, 'a+') as f: f.write(markov.generate_text(num_words))
def run(self): print ("Loading input and generating...") fileload, resolution, format = loadMidi.load('midi/bach_simple.mid') stringNotes = convert.listToString(fileload) mc = MarkovChain(1) mc.add_string(stringNotes) markovNotes = ' '.join(mc.generate_text(50)) writeMidi.writeList(convert.stringToList(markovNotes), resolution, format) print ('Process complete, output is in ./rebuilt.mid')
def generate_words(): '''my_histogram = (lines) sentence = "" num_words = 10 for i in range (num_words): word = weighted_sample(my_histogram) sentence += " " + word return sentence''' markovchain = MarkovChain( ["one", "fish", "two", "fish", "red", "fish", "blue", "fish"]) return markovchain.walk(10)
def sample(self, outf, nr_frames=1e6, n=3): '''Sample using an n-gram into the given file.''' chain = MarkovChain(n) chain.add_sequence(self.buf) gen = chain.walk() out = wave.open(outf, 'wb') out.setparams(self.params) out.setnframes(nr_frames) chunk = nr_frames / 100 for k in xrange(int(nr_frames)): if k % chunk == 0: print k / chunk, "%" out.writeframes(self.repr_to_pcm(next(gen)))
def gen_word(): my_file = open("./words.txt", "r") lines = my_file.readlines() my_histogram = histogram(lines) sentence = "" num_words = 10 # for i in range(num_words): # word = sample(my_histogram) # sentence += " " + word markovchain = MarkovChain(lines) sentence = markovchain.walk(num_words) return sentence
def random_markov_phrase(): txt = load_quotes() # Create an instance of the markov chain. By default, it uses MarkovChain.py's location to # store and load its database files to. You probably want to give it another location, like so: mc = MarkovChain(PATH + '/quotes_data/markov_scores') # To generate the markov chain's language model, in case it's not present mc.generateDatabase(textSample=txt) # To let the markov chain generate some text, execute sentence = mc.generateString() generated_sentence = sentence[0].upper() + sentence[1:] if generated_sentence[-1] not in ['?', '!']: generated_sentence += '.' return generated_sentence
def handle_data(): word1 = request.form["1"] word2 = request.form["2"] m = MarkovChain("Database/w3_.db", n = 3) # pprint(m.query(*(options.predict[0]))) ngram = [word1, word2] res = m.query(*ngram).keys() # Actual Result print(res) # word = list(res)[random.randint(0, 2)] # print(word, end = ' ') response = jsonify(dict(zip(["predict1", "predict2", "predict3"], list(res)[:3]))) response.headers.add('Access-Control-Allow-Origin', '*') return response
def __init__(self, riddle_sentences, ngram_size): self.riddle_sentences = set(riddle_sentences) self.chain = MarkovChain(ngram_size=ngram_size) for sentence in self.riddle_sentences: self.chain.train_sentence(sentence)
def create_sentence(word_num): source_text = "nietsche.txt" with open(source_text, "r") as file: og_text = file.read() word_list = og_text.split() for index, word in enumerate(word_list): word_list[index] = word.rstrip() chain = MarkovChain(word_list) chain.print_chain() sentence_words = [] sentence = chain.walk(word_num) return sentence
def run_dir(base_path, authors_path): text = '' path = base_path + authors_path + '/' files = [name for name in os.listdir(path) if '.txt' in name] for f in files: with open(path + f, 'r') as f: text += f.read() # special treatment for wittgenstein formulas text = re.sub(r'“(.+?)”', '', text) markov = MarkovChain(text=text) bipolar_discourse = markov.generate(100) print repr(bipolar_discourse) save(authors_path + '.txt', bipolar_discourse)
def generate_words(): my_file = open("./words.txt", "r") lines = my_file.readlines() my_histogram = histogram(lines) word_list = [] for line in lines: for word in line.split(): word_list.append(word) sentence = "" num_words = 10 # for i in range(num_words): # word = sample_by_frequency(my_histogram) # sentence += " " + word markovchain = MarkovChain(word_list) sentence = markovchain.walk(num_words) return sentence
def generate_words(): words_list = [] with open('./EAP.text') as f: lines = f.readlines() for line in lines: for word in line.split(): words_list.append(word) #lines = Dictogram(['one', 'fish', 'two', 'fish', 'red', 'fish', 'blue', 'fish']) markovchain = MarkovChain(words_list) '''sentence = "" num_words = 20 for i in range(num_words): word = lines.sample() sentence += " " + word return sentence''' sentence = markovchain.walk(24) return render_template('index.html', sentence=sentence)
def hello(): # hs = histogram("words.txt") # samp = sample(hs) my_file = open("./words.txt", "r") lines = my_file.readlines() word_list = [] for line in lines: for word in line.split(): word_list.append(word) print(word_list) markovchain = MarkovChain(word_list) # return samp # num_words = 10 return (markovchain.walk(20))
def generate_words(): #Build a histogram my_file = histogram("./text.txt") lines = my_file.readlines() my_histogram = histogram(lines) word_list = [] for line in lines: for word in line.split(): word_list.append(word) sentence = "" num_words = 10 # for i in range(num_words): # #sample/frequency goes here # word = sample(my_histogram) # sentence += " " + word # return sentence markovchain = MarkovChain(word_list) sentence = markovchain.walk(num_words) return sentence
def generate_words(): #build a histogram # my_file = open("despacito.txt","r") lines = "one fish two fish red fish blue fish" my_histogram = histogram(lines) word_list = [] for line in lines: for word in line.split(): word_list.append(word) word = sample(my_histogram) #return word sentence = "" num_words = 10 # for i in range(num_words): # word = weighted_sample(my_histogram) # sentence += " " + word markovChain = MarkovChain(word_list) sentence = markovChain.walk(num_words) print("sentence", sentence) return sentence
def generate_words(): #build a histogram my_file = open("words.txt","r") lines = my_file.readlines() my_histogram = Histogram(lines) word_list = [] for line in lines: for word in line.split(): word_list.append(word) word = weighted_sample(my_histogram) #return word sentence = "" num_words = 10 # for i in range(num_words): # word = weighted_sample(my_histogram) # sentence += " " + word markovChain = MarkovChain(word_list) sentence = markovChain.walk(num_words) print("sentence", sentence) return sentence
def test_fits_sum_to_one(self): mc = MarkovChain(self.corpus, 1) mc.fit() p = mc.matrix_list[0].p self.assertEqual(sum(p[mc.convert_word_to_number('alpha')]),1) self.assertEqual(sum(p[mc.convert_word_to_number('beta')]),1) self.assertEqual(sum(p[mc.convert_word_to_number('gamma')]),1) self.assertEqual(sum(p[mc.convert_word_to_number('delta')]),1)
def main(): # read file, return list of tweets list_of_tweets = read_file() # returns 2 lists # first is list of each word used in all of my tweets # second is list of unigrams and frequencies i.e. {(word, frequency)} lengths, list_of_words, word_freq = get_counts(list_of_tweets) # print(word_freq) # get top users I have replied to top_users = get_top_users(word_freq) # return bigrams where {(word1, word2): frequency} bigrams = get_bigrams(list_of_tweets) # print(bigrams) # print(bigrams.keys()) # create model for calculating the probability of a given sentence model = bigram_model(word_freq, bigrams) # print(model) #create Markov chain m = MarkovChain() m.learn(bigrams.keys()) # print(m.memory) # generate length probability dictionary {length of sentence: frequency of sentences of that length} length_prob = length_probability(lengths) # output generated sentences, must have probability > 0.5 generated_tweets = generate_sentences(length_prob, m, model, word_freq, 50) for tweet in generated_tweets: print(tweet)
def main(): print("Loading data...") text = load_data("data/philosophy").lower() text = preprocess(text) print("Building Markov model...") mc = MarkovChain() mc.train_ngram(1, text) mc.train_ngram(2, text) mc.train_ngram(3, text) bot = Bot(mc) bot.run()
def render_page(): my_list = read_file('tintern_abbey.txt') chain = MarkovChain(my_list) num_words = int(10) - 1 my_sentence = chain.walk(num_words) my_list2 = read_file("the_rime.txt") chain2 = MarkovChain(my_list2) num_words2 = int(10) - 1 my_sentence2 = chain2.walk(num_words2) return render_template('index.html', sentence=my_sentence, sentence2=my_sentence2)
def __init__(self, bot_token, master=None): self.bot_token = bot_token self.session_id = None self.acl = {master.upper(): 1000} if master is not None else {} self._default_handlers() self.mc = MarkovChain() self.nn_temp = 0.7 self.seq_num = None self.hb_every = -1 self.hb_task = None self.ident = ('', '', '') #user,disc,nick self.ident_id = '' self.guilds = {} self.cmd_re = re.compile('\\.(\\S+)') self.approver = srl_approve.SRLApprove()
class MarkovTests(unittest.TestCase): # setup chain object source_file = "the_black_cat.txt" with open(source_file, encoding="utf-8") as f: text = f.read() m = MarkovChain(text) def test_start(self): """Check that the correct start is used""" start = "From" predicted = self.m.predict(20, start) self.assertTrue(predicted.startswith(start)) pass def test_length(self): """Check that the chain outputs the correct number of words""" n = 100 predicted = self.m.predict(100) tokens = findall(d_re_pattern, predicted, d_re_flags) expected = n actual = len(tokens) self.assertEqual(actual, expected)
def main(): # Get our training data, Brown corpus b_sents = brown.sents() b_pos = nltk.pos_tag_sents(b_sents) # Filter to only verb phrases b_verbs = verb_filter(b_pos) # Fit our MarkovChain b_mc = MarkovChain(order=1) b_mc.fit(b_verbs) b_mc.normalize_transition_matrix() examples = load_examples('examples.json') for ex in examples: compare_sentences(mc=b_mc, s1=ex['good'], s2=ex['bad'])
def render_page(): my_list = read_file('plato.txt') chain = MarkovChain(my_list) num_words = int(10) - 1 my_sentence = chain.walk(num_words) my_sentence_2 = chain.walk(num_words) my_sentence_3 = chain.walk(num_words) my_sentence_4 = chain.walk(num_words) my_sentence_5 = chain.walk(num_words) return render_template('index.html', sentence=my_sentence, sentence2=my_sentence_2, sentence3=my_sentence_3, sentence4=my_sentence_4, sentence5=my_sentence_5)
def test_finds_highest_order_solution_first(self): mc = MarkovChain(self.corpus, 3) mc.fit() nextw = mc.next_word(['alpha','beta','gamma']) self.assertEqual(nextw, 'delta')
def test_probs_stop_across_lines(self): mc = MarkovChain(self.corpus, 3) mc.fit() nextw = mc.next_word(['alpha','epsilon']) self.assertEqual(nextw, '\n') # Nothing follows epsilon
# drop punctuation (optional) #text = text.replace(";", "").replace(".", "").replace("!", "").replace(",", "") text = text.replace('"', "") return text if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('length', type=int, help='Maximum number of words to generate.') parser.add_argument('--lines', type=int, help='Number of lines to generate (default=1).', default=1) args = parser.parse_args() chain = MarkovChain() if not os.path.isfile("table.json"): f = open("source.txt") lines = [christmas_parser(f.read())] f.close() chain.create_table(lines) chain.save_table("table.json") else: f = open("table.json") raw_table = f.read() f.close() chain.parse_table(raw_table)
########## # BASED ON https://github.com/thomasboyt/python-markov/blob/master/examples/AChristmasMarkov/christmas_markov.py ########## import os, json from markov import MarkovChain def sanitize(text): replaced_chars = { "\n": " ", "\r": "", ";": "", ".": "", "!": "", ",": "", "?": "", '"': "" } for c in replaced_chars: text = text.replace(c, replaced_chars[c]) return text chain = MarkovChain() f = open("markov_source.txt") source = sanitize(f.read()) f.close() chain.create_table(source) chain.save_table("markov_source.json")
def __init__(self, title_bank): self.markov = MarkovChain(3) for item in title_bank.values(): self.markov.add(item['title'].replace('—', '-'))
print "example:" print "python algorithmic.music.py 1 data/lilypond-template.ly data/entchen.ly 300" sys.exit(1) order = int(sys.argv[1]) template = sys.argv[2] filename = sys.argv[3] length = int(sys.argv[4]) lilypond = "lilypond" opencmd = "xdg-open" if sys.platform.startswith("darwin"): lilypond = "/Applications/LilyPond.app/Contents/Resources/bin/lilypond" opencmd = "open" m = MarkovChain(order) m.observe_file(filename, True) start = m.get_random_prestate() result = m.random_walk(length, start) def tactify(tuplelist, resolution, tact): # tuplelist: eingabeliste # resolution: kleinste note # tact: wieviele resolution-noten einen takt geben (ignored) # immer 4/4 Takt takt = resolution # in 16teln count = 0 output = []
order = 1 length = 50 text = """ g8 e8 e4 f8 d8 d4 c8 d8 e8 f8 g8 g8 g4 g8 e8 e4 f8 d8 d4 c8 e8 g8 g8 c4 r4 g8 e8 e4 f8 d8 d4 c8 d8 e8 f8 g8 g8 g4 g8 e8 e4 f8 d8 d4 c8 e8 g8 g8 c4 r4 """ m = MarkovChain(order) print "observing %s" % text spacer() m.observe_string(text, True) print "TODO make logging verbose for observe()..." spacer() m.print_transitions() spacer() m.print_matrix() start = m.get_random_prestate() spacer() result = m.random_walk_string(length, start)
def __init__(self, nick, log=False): GenericChatBot.__init__(self, nick, log) MucRoomHandler.__init__(self, nick) self.responder = MarkovChain(PidginLogs('~/.purple/logs/jabber/'))
class MidiParser: def __init__(self, filename): self.filename = filename self.ticks = None self.tempo = get_tempo(filename) self.chain = MarkovChain() self.parse() def parse(self): """ reads midi and makes markov insertions """ midi = mido.MidiFile(self.filename) self.ticks = midi.ticks_per_beat prev = [] curr = [] for track in midi.tracks: for msg in track: if msg.type is 'note_on': if msg.time is 0: curr.append(msg.note) else: self.make_transition(prev, curr, msg.time) prev = curr curr = [] def make_transition(self, prev, curr, time): """ inserts all state transition possibilities into markov """ print(prev, curr, time) for i in prev: for j in curr: self.chain.add(i, j, self.millisecond(time)) def millisecond(self, ticks): """ tick to millisecond converter """ try: ms = ((ticks / self.ticks) * self.tempo) / 1000 return int(ms - (ms % 250) + 250) except TypeError: raise TypeError( "Could not read a tempo and ticks_per_beat from midi") def get_markov(self): return self.chain
#!/usr/bin/env python import sys from markov import MarkovChain if len(sys.argv) != 4: print "usage:" print "python lorem.ipsum.py $ORDER $DATAFILE $OUTLENGTH" print "example:" print "python lorem.ipsum.py 3 data.txt 300" sys.exit(1) order = int(sys.argv[1]) filename = sys.argv[2] length = int(sys.argv[3]) m = MarkovChain(order) m.observe_file(filename, True) start = m.get_random_prestate() result = m.random_walk_string(length, start) print result
def randomized_markov(word_list, num): markov_chain = MarkovChain(word_list) return f'''
from markov import MarkovChain dan = MarkovChain('dan.txt') # baldur = MarkovChain('baldur.txt') print(dan.get_new_sentence(12).upper()) # print(baldur.get_new_sentence(15))
def get(self): #-------------MAIN SCRIPT----------------------- #app_status = state.load() client = memcache.Client() print client #logName = app_status.get('log_name', 'logs/app_log.log') #logger = self.set_up_logging(logName) #Create and train markov chain chainTrainingFile = './data/fortunes.txt' fortuneChain = MarkovChain(1) fortuneChain.learn(chainTrainingFile) #lastChainUpdate = datetime.datetime.now() #updateFrequency = 2*60*60 #in seconds #logger.info('Markov Chain Initial Trained on '+ chainTrainingFile + ' at ' + str(lastChainUpdate)) #Authenticate twitter account #Note: Only do this when ready to go live! auth = OAuthHandler(secrets['CONSUMER_KEY'], secrets['CONSUMER_SECRET']) auth.set_access_token(secrets['ACCESS_TOKEN'], secrets['ACCESS_TOKEN_SECRET']) api = API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) #logger.info('Authentication Successful') #mostRecentReply = app_status.get('recent_reply', 0) mostRecentReply = client.gets('recent_reply') if mostRecentReply is None: mostRecentReply = 0 memcache.add(key='recent_reply', value=0) print "MOST RECENT", mostRecentReply #waitTime = 1 * 60 # In Seconds #logger.info('Wait time set to ' + str(waitTime) + ' seconds') #Run forever #while True: #If we're on startup, fastforward to current tweet if mostRecentReply is 0: #Grab all mentions mentions = api.mentions_timeline() #Do not reply to these tweets (as they are old) if len(mentions) > 0: mostRecentReply = mentions[0].id #logger.info('Fast-forwarding most recent reply to ' + str(mostRecentReply)) #Check if we need to retrain the chain (do once per hour) #if (datetime.datetime.now() - lastChainUpdate).total_seconds() > updateFrequency: #Retrain chain # fortuneChain.learn(chainTrainingFile) # lastChainUpdate = datetime.datetime.now() # logger.info('Markov chain retrained at ' + str(lastChainUpdate)) print mostRecentReply #Get tweets directed at account since last check mentions = api.mentions_timeline(since_id = mostRecentReply) mentions.reverse() print len(mentions) for mention in mentions: #print mention.text #print mention.author.screen_name #logger.info(str(mention)) #Generate a fortune fortune = self.generateFortune(fortuneChain) #logger.info(str(fortune)) #Send that user a reply with their fortune statusRet = api.update_status(status='@' + mention.author.screen_name + ' ' + fortune, in_reply_to_status_id = mention.id) #logger.info('Replied to ' + mention.author.screen_name) #Update most recent reply if there's something newer if len(mentions) > 0: mostRecentReply = mentions[-1].id #logger.info('Updating most recent reply to ' + str(mostRecentReply)) #Set the value of the current tweet in the memcache. tries = 40 print "SETTING REPLY", mostRecentReply print "TRIES", tries while tries > 0: tries -= 1 reply = client.gets('recent_reply') print reply if client.cas('recent_reply', mostRecentReply): break print tries #app_status['recent_reply'] = mostRecentReply #Wait for a period before looping again #time.sleep(waitTime) #state.save(app_status) #logging.shutdown() self.response.write(MAIN_PAGE_HTML)
def walk_corpus(fname): with open(fname, 'r') as f: words = f.read().split() chain = MarkovChain(5) chain.add_sequence(words) return chain.walk()
def test_second_best_solution_next(self): mc = MarkovChain(self.corpus, 3) mc.fit() # TODO: Make sure it tests for beta delta together first? nextw = mc.next_word(['beta','delta']) self.assertEqual(nextw, 'alpha')
def walk_corpus(): chain = MarkovChain(mc_nodes) chain.add_sequence(corpus) return chain.walk()
if "reply_to" in args.config: reply_to = args.config["reply_to"] subject = args.payload["subject"][0] if subject.strip() == "": subject = "[no subject]" text_body = args.payload["text"][0] text_in = text_body.split("\n")[0] sender = args.payload["from"][0] print "Sender: %s" % (sender,) print "Got text: %s" % (text_body,) print "Using text: %s" % (text_in,) chain = MarkovChain() f = open("markov_source.json") raw_table = f.read() f.close() chain.parse_table(raw_table) text_words = [word.strip() for word in text_in.split(" ")] text_in = chain.generate_chain(length=len(text_words) + 1, words=text_words) print "New text: %s" % (text_in,) from_param = from_address if from_name is not None: from_param = (from_address, from_name) message = sendgrid.Message(from_param, subject, text_in)
#!/usr/bin/env python import sys from markov import MarkovChain if len(sys.argv) != 2: print "usage:" print "python brownian.py $OUTLENGTH" print "example:" print "python brownian.py 300" sys.exit(1) m = MarkovChain(1) min, max = -999, +999 for i in range(min, max+1): m.observe( (str(i),) , str(i-1) ) m.observe( (str(i),) , str(i+1) ) m.observe( (str(min-1),) , str(min-1) ) m.observe( (str(max+1),) , str(max+1) ) start = ["0"] #result = m.random_walk_string(int(sys.argv[1]), start) #print result from pylab import * result = m.random_walk(int(sys.argv[1]), start) result = [int(x) for x in result] plot(result)