def test_generate_phrase_no_params(self): """Tests the MarkovChain.generate_phrase method with no input arguments.""" chain = MarkovChain(self._corpus) phrase = chain.generate_phrase() # Assert non-None self.assertNotEqual(phrase, '')
def test_generate_phrase_max_size(self): """Tests the MarkovChain.generate_phrase method with max_size arg specified.""" _max_size = 140 chain = MarkovChain(self._corpus) # Generate 10 phrases; make sure all under max size. for i in range(10): phrase = chain.generate_phrase(max_size=_max_size) self.assertTrue(len(phrase) <= _max_size)
def test_generate_phrase_min_words(self): """Tests the MarkovChain.generate_phrase method with min_words arg specified.""" _min_words = 20 chain = MarkovChain(self._corpus) # Generate 10 phrases; test each one for i in range(10): phrase = chain.generate_phrase(min_words=_min_words) self.assertTrue(len(phrase.split(' ')) >= _min_words)
def main(): crd = pickle.load(open("./crdy.nope",'r')) outfile = None outdebug = None searchterms = ["strawberries","gorgonzola"] try: opts, args = getopt.gnu_getopt(sys.argv[1:], "o:s:h", ["out=","search=","help"]) except getopt.GetoptError as err: print (str(err)) usage() for o, a in opts: if o in ("-o", "--out"): outfile = a autdebug = a+".debug" elif o in ("-h", "--help"): usage() elif o in ("-s", "--search"): searchterms = a.split(" ") else: assert False, "unhandled option" if outfile is None: outfile = "out.tmp" outdebug = outfile+".debug" rec = searchRecipes(crd=crd, searchterms = searchterms) #print(rec) corpus = [] randomRec = set() for i in range(0,5): randomRec.add(random.choice(rec)) with open(outdebug,'w') as o: for r in randomRec: print(r,"RR") o.write(r) o.write("\n") ing = returnIngredients(recipeName = r, crd = crd) print("ing", r, returnIngredients(recipeName = r, crd = crd)) corpus += ing corpus = [c.encode('utf-8') for c in corpus] jc = " ".join(corpus) jc = jc.replace("\n"," ") with open(outdebug, 'w') as o: o.write("***".join(corpus)) o.write("\n") o.write(jc) mc = MarkovChain(corpus = jc, separator = " ") result = mc.printSth(2000) with open(outfile, 'w') as o: o.write(result)
def test_generate_phrase_both_valid_params(self): """Tests the MarkovChain.generate_phrase method with min_words and max_size args specified.""" _max_size = 140 _min_words = 5 chain = MarkovChain(self._corpus) for i in range(10): phrase = chain.generate_phrase(max_size=_max_size, min_words=_min_words) valid = len(phrase.split(' ')) >= _min_words and len(phrase) < 140 self.assertTrue(valid)
def __init__(self): path_ini = os.getcwd()+'/resources/models/mc_model/mc_initial.npy' path_trans = os.getcwd()+'/resources/models/mc_model/mc_transition.npy' trans = np.load(path_trans) pi = np.load(path_ini) self.model = MarkovChain(transition=trans, initial=pi, states=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])
def _update_chain(self): """Creates a Markov chain based on the current corpus.""" text_corpus = '' for song in self._corpus: text_corpus += song.lyrics if not text_corpus: raise Exception("Error: no corpus to generate MarkovChain") # Create a new Markov chain, and signal that it is current self._chain = MarkovChain(text_corpus) self._chain_is_current = True
def plot_graph(self, mk_prop, Labels): P = np.array([mk_prop]) # Transition matrix mc = MarkovChain(P, Labels) mc.draw()
class LyricScrambler(): """Generates scrambled phrases based on song lyrics.""" def __init__(self, max_songs=None): """Initializes the LyricScrambler.""" # Set max corpus size (# songs) if not max_songs: max_songs = 10 self._max_songs = max_songs # Initialize the corpus and Markov chain self._corpus = [] self._chain = {} # Signifies if current Markov chain reflects the corpus self._chain_is_current = False def _get_lyrics(self, artist, title): """Retrieves the full lyric listing (if available) for the given song.""" # Retrieve song lyrics client = LyricClient() return client.get_lyrics(artist, title) def add_song(self, artist, title): """Adds a song to the current corpus. Returns False if song is not found.""" # Type check artist_valid = artist and type(artist) is str title_valid = title and type(title) is str if not artist_valid or not title_valid: raise ValueError("Expected string input for artist and title.") # Get lyrics lyrics = self._get_lyrics(artist, title) # Return False; song was not found. if not lyrics: return False # Make sure corpus size isn't maxed out. Pop oldest song. if len(self._corpus) >= self._max_songs: del self._corpus[0] # Add song to corpus; note that the Markov chain is not current self._corpus.append(Song(artist, title, lyrics)) self._chain_is_current = False return True def _update_chain(self): """Creates a Markov chain based on the current corpus.""" text_corpus = '' for song in self._corpus: text_corpus += song.lyrics if not text_corpus: raise Exception("Error: no corpus to generate MarkovChain") # Create a new Markov chain, and signal that it is current self._chain = MarkovChain(text_corpus) self._chain_is_current = True def get_phrase(self, max_size=None, min_words=None): """Generates a silly phrase based on the underlying Markov Chain.""" # Ensure there's corpus if not self._corpus: raise Exception('No song lyrics to generate a phrase.') # Update the Markov chain if not self._chain_is_current: self._update_chain() # _chain.generate_phrase will raise a ValueError if max_size and min_words # are invalid. return self._chain.generate_phrase(max_size=max_size, min_words=min_words)
def main(): #-------------------------------------------------------------------------- # 2-state Markov chain #-------------------------------------------------------------------------- P = np.array([[0.8, 0.2], [0.1, 0.9]]) # Transition matrix mc = MarkovChain(P, ['1', '2']) mc.draw("../img/markov-chain-two-states.png") #-------------------------------------------------------------------------- # 3-state Markov chain #-------------------------------------------------------------------------- P = np.array([ [0.8, 0.1, 0.1], [0.1, 0.7, 0.2], [0.1, 0.7, 0.2], ]) mc = MarkovChain(P, ['A', 'B', 'C']) mc.draw("../img/markov-chain-three-states.png") #-------------------------------------------------------------------------- # 4-state Markov chain #-------------------------------------------------------------------------- P = np.array([[0.8, 0.1, 0.1, 0.0], [0.1, 0.7, 0.0, 0.2], [0.1, 0.0, 0.7, 0.2], [0.1, 0.0, 0.7, 0.2]]) mc = MarkovChain(P, ['1', '2', '3', '4']) mc.draw("../img/markov-chain-four-states.png")
#!/usr/bin/env python # Makes a database for markovtwitter from a corpus file. # The corpus file is expected to be one sentence per line. # # usage: make_db.py [-h] -c corpusfile -n depth -o outfile import argparse from markovchain import MarkovChain parser = argparse.ArgumentParser() parser.add_argument('-c', '--corpus', help='Name of corpus file', required=True, metavar='corpusfile') parser.add_argument('-n', '--order', help='Chain depth', type=int, required=True, default=1, metavar='depth') parser.add_argument('-o', '--out', help='Output DB filename', required=True, metavar='outfile') args = parser.parse_args() print 'Generating markov chain database from %s' % args.corpus mc = MarkovChain(corpusfile=args.corpus,order=args.order) print mc print 'Saving database to %s' % args.out mc.save_db(args.out)
("sjor", "data/usjavarornefni.txt", 3), ("sveit", "data/usveit.txt", 3), ("borg", "data/uthettbyli.txt", 3), ("vatn", "data/uvatnaornefni.txt", 4), ("land", "data/ulandornefni.txt", 4), ("jokull", "data/ujoklaornefni.txt", 3), ("kvk", "data/ukvknofn.txt", 3), ("kk", "data/ukknofn.txt", 3), ("milli", "data/umillinofn.txt", 3), ("gata", "data/ugotunofn.txt", 4), ("hus", "data/uhusanofn.txt", 3), ] CHAINS = {} for name, fname, order in name_files: mc = MarkovChain(order=order, analyzer="char") mc.fit(fname) CHAINS[name] = mc mc = MarkovChain(order=order, analyzer="char") mc.fit(fname, reversed=True) CHAINS[name + "_r"] = mc NAMES = {} for chain, fname, _ in name_files: NAMES[chain] = set() with open(fname) as f: for name in f.readlines(): NAMES[chain].add(name.strip()) def is_bool(value):
from markovchain import MarkovChain corpus = "this is a bunch of text used to make a markov chain. This is pretty neat." chain = MarkovChain(corpus) for i in range(10): print chain.generate_phrase(min_words=5, max_size=140)
if _ == (0, 0): pass else: start += step_size stop += step_size # Slice sessions @sessions(None, None) next_step = input.sessions(start, stop) if _ >= (1, 1): past_start = (start-step_size) past_stop = (stop-step_size) past_step = input.sessions(past_start, past_stop) # Compute transition matrix next mc_next = MarkovChain(next_step, states) mc_next = mc_next.csr_sparse_matrix() # Compute transition matrix past mc_past = MarkovChain(past_step, states) mc_past = mc_past.csr_sparse_matrix() print('matrix done', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')) print('start clustering') # DBSCAN dbscan_next = Clustering(mc_next) dbscan_past = Clustering(mc_past) print("End clustering", datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), '\n') """
#!/usr/bin/env python import argparse from markovchain import MarkovChain parser = argparse.ArgumentParser() parser.add_argument('-d', '--db', help='Filename of saved database', metavar='database', required=True) parser.add_argument('-n', '--number', type=int, default=10, help='Number of lines to generate', metavar='numlines') args = parser.parse_args() mc = MarkovChain(saveddb=args.db) for i in xrange(args.number): print mc.random_title()