Esempio n. 1
0
    def test_generate_phrase_no_params(self):
        """Tests the MarkovChain.generate_phrase method with no input arguments."""

        chain = MarkovChain(self._corpus)
        phrase = chain.generate_phrase()

        # Assert non-None
        self.assertNotEqual(phrase, '')
Esempio n. 2
0
    def test_generate_phrase_max_size(self):
        """Tests the MarkovChain.generate_phrase method with max_size arg specified."""

        _max_size = 140
        chain = MarkovChain(self._corpus)

        # Generate 10 phrases; make sure all under max size.
        for i in range(10):
            phrase = chain.generate_phrase(max_size=_max_size)
            self.assertTrue(len(phrase) <= _max_size)
Esempio n. 3
0
    def test_generate_phrase_min_words(self):
        """Tests the MarkovChain.generate_phrase method with min_words arg specified."""

        _min_words = 20
        chain = MarkovChain(self._corpus)

        # Generate 10 phrases; test each one
        for i in range(10):
            phrase = chain.generate_phrase(min_words=_min_words)
            self.assertTrue(len(phrase.split(' ')) >= _min_words)
Esempio n. 4
0
def main():
    crd = pickle.load(open("./crdy.nope",'r'))
    outfile = None
    outdebug = None
    searchterms = ["strawberries","gorgonzola"] 
    try:
        opts, args = getopt.gnu_getopt(sys.argv[1:], "o:s:h", ["out=","search=","help"])
    except getopt.GetoptError as err:
        print (str(err))
        usage()
    for o, a in opts:
        if o in ("-o", "--out"):
            outfile = a
            autdebug = a+".debug"
        elif o in ("-h", "--help"):
            usage()
        elif o in ("-s", "--search"):
            searchterms = a.split(" ")
        else:
            assert False, "unhandled option"
    
    if outfile is None:
        outfile = "out.tmp"
        outdebug = outfile+".debug"


    rec = searchRecipes(crd=crd, searchterms = searchterms)
    #print(rec)
    corpus = []
    randomRec = set()
    for i in range(0,5):
        randomRec.add(random.choice(rec))
     
    with open(outdebug,'w') as o:
        for r in randomRec:
            print(r,"RR")
            o.write(r)
            o.write("\n")
            ing = returnIngredients(recipeName = r, crd = crd)
            print("ing", r, returnIngredients(recipeName = r, crd = crd))
            corpus += ing
    
    corpus = [c.encode('utf-8') for c in corpus]
    jc = " ".join(corpus)
    jc = jc.replace("\n"," ")
    with open(outdebug, 'w') as o:
        o.write("***".join(corpus))
        o.write("\n")
        o.write(jc)
    
    mc = MarkovChain(corpus = jc, separator = " ")
    result = mc.printSth(2000)
    with open(outfile, 'w') as o:
        o.write(result)
Esempio n. 5
0
    def test_generate_phrase_both_valid_params(self):
        """Tests the MarkovChain.generate_phrase method with min_words and max_size args specified."""

        _max_size = 140
        _min_words = 5
        chain = MarkovChain(self._corpus)

        for i in range(10):
            phrase = chain.generate_phrase(max_size=_max_size, min_words=_min_words)
            valid = len(phrase.split(' ')) >= _min_words and len(phrase) < 140
            self.assertTrue(valid)
	def __init__(self):

		path_ini = os.getcwd()+'/resources/models/mc_model/mc_initial.npy'
		path_trans = os.getcwd()+'/resources/models/mc_model/mc_transition.npy'

		trans = np.load(path_trans)
		pi = np.load(path_ini)

		self.model = MarkovChain(transition=trans, initial=pi, states=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])
Esempio n. 7
0
    def _update_chain(self):
        """Creates a Markov chain based on the current corpus."""

        text_corpus = ''
        for song in self._corpus:
            text_corpus += song.lyrics

        if not text_corpus:
            raise Exception("Error: no corpus to generate MarkovChain")

        # Create a new Markov chain, and signal that it is current
        self._chain = MarkovChain(text_corpus)
        self._chain_is_current = True
Esempio n. 8
0
 def plot_graph(self, mk_prop, Labels):
     P = np.array([mk_prop])  # Transition matrix
     mc = MarkovChain(P, Labels)
     mc.draw()
Esempio n. 9
0
class LyricScrambler():
    """Generates scrambled phrases based on song lyrics."""

    def __init__(self, max_songs=None):
        """Initializes the LyricScrambler."""

        # Set max corpus size (# songs)
        if not max_songs:
            max_songs = 10

        self._max_songs = max_songs

        # Initialize the corpus and Markov chain
        self._corpus = []
        self._chain = {}

        # Signifies if current Markov chain reflects the corpus
        self._chain_is_current = False

    def _get_lyrics(self, artist, title):
        """Retrieves the full lyric listing (if available) for the given song."""

        # Retrieve song lyrics
        client = LyricClient()
        return client.get_lyrics(artist, title)

    def add_song(self, artist, title):
        """Adds a song to the current corpus. Returns False if song is not found."""

        # Type check
        artist_valid = artist and type(artist) is str
        title_valid = title and type(title) is str

        if not artist_valid or not title_valid:
            raise ValueError("Expected string input for artist and title.")

        # Get lyrics
        lyrics = self._get_lyrics(artist, title)

        # Return False; song was not found.
        if not lyrics:
            return False

        # Make sure corpus size isn't maxed out. Pop oldest song.
        if len(self._corpus) >= self._max_songs:
            del self._corpus[0]

        # Add song to corpus; note that the Markov chain is not current
        self._corpus.append(Song(artist, title, lyrics))
        self._chain_is_current = False
        return True

    def _update_chain(self):
        """Creates a Markov chain based on the current corpus."""

        text_corpus = ''
        for song in self._corpus:
            text_corpus += song.lyrics

        if not text_corpus:
            raise Exception("Error: no corpus to generate MarkovChain")

        # Create a new Markov chain, and signal that it is current
        self._chain = MarkovChain(text_corpus)
        self._chain_is_current = True

    def get_phrase(self, max_size=None, min_words=None):
        """Generates a silly phrase based on the underlying Markov Chain."""

        # Ensure there's corpus
        if not self._corpus:
            raise Exception('No song lyrics to generate a phrase.')

        # Update the Markov chain
        if not self._chain_is_current:
            self._update_chain()

        # _chain.generate_phrase will raise a ValueError if max_size and min_words
        # are invalid.
        return self._chain.generate_phrase(max_size=max_size, min_words=min_words)
Esempio n. 10
0
def main():

    #--------------------------------------------------------------------------
    # 2-state Markov chain
    #--------------------------------------------------------------------------
    P = np.array([[0.8, 0.2], [0.1, 0.9]])  # Transition matrix
    mc = MarkovChain(P, ['1', '2'])
    mc.draw("../img/markov-chain-two-states.png")

    #--------------------------------------------------------------------------
    # 3-state Markov chain
    #--------------------------------------------------------------------------
    P = np.array([
        [0.8, 0.1, 0.1],
        [0.1, 0.7, 0.2],
        [0.1, 0.7, 0.2],
    ])
    mc = MarkovChain(P, ['A', 'B', 'C'])
    mc.draw("../img/markov-chain-three-states.png")

    #--------------------------------------------------------------------------
    # 4-state Markov chain
    #--------------------------------------------------------------------------
    P = np.array([[0.8, 0.1, 0.1, 0.0], [0.1, 0.7, 0.0, 0.2],
                  [0.1, 0.0, 0.7, 0.2], [0.1, 0.0, 0.7, 0.2]])
    mc = MarkovChain(P, ['1', '2', '3', '4'])
    mc.draw("../img/markov-chain-four-states.png")
Esempio n. 11
0
#!/usr/bin/env python

# Makes a database for markovtwitter from a corpus file.
# The corpus file is expected to be one sentence per line.
#
# usage: make_db.py [-h] -c corpusfile -n depth -o outfile
import argparse
from markovchain import MarkovChain

parser = argparse.ArgumentParser()
parser.add_argument('-c', '--corpus',
                    help='Name of corpus file', required=True,
                    metavar='corpusfile')
parser.add_argument('-n', '--order', help='Chain depth', 
                    type=int, required=True, default=1, metavar='depth')
parser.add_argument('-o', '--out', help='Output DB filename',
                    required=True, metavar='outfile')
args = parser.parse_args()

print 'Generating markov chain database from %s' % args.corpus

mc = MarkovChain(corpusfile=args.corpus,order=args.order)
print mc

print 'Saving database to %s' % args.out
mc.save_db(args.out)

Esempio n. 12
0
    ("sjor", "data/usjavarornefni.txt", 3),
    ("sveit", "data/usveit.txt", 3),
    ("borg", "data/uthettbyli.txt", 3),
    ("vatn", "data/uvatnaornefni.txt", 4),
    ("land", "data/ulandornefni.txt", 4),
    ("jokull", "data/ujoklaornefni.txt", 3),
    ("kvk", "data/ukvknofn.txt", 3),
    ("kk", "data/ukknofn.txt", 3),
    ("milli", "data/umillinofn.txt", 3),
    ("gata", "data/ugotunofn.txt", 4),
    ("hus", "data/uhusanofn.txt", 3),
]

CHAINS = {}
for name, fname, order in name_files:
    mc = MarkovChain(order=order, analyzer="char")
    mc.fit(fname)
    CHAINS[name] = mc
    mc = MarkovChain(order=order, analyzer="char")
    mc.fit(fname, reversed=True)
    CHAINS[name + "_r"] = mc

NAMES = {}
for chain, fname, _ in name_files:
    NAMES[chain] = set()
    with open(fname) as f:
        for name in f.readlines():
            NAMES[chain].add(name.strip())


def is_bool(value):
Esempio n. 13
0
from markovchain import MarkovChain

corpus = "this is a bunch of text used to make a markov chain. This is pretty neat."
chain = MarkovChain(corpus)

for i in range(10):
    print chain.generate_phrase(min_words=5, max_size=140)
Esempio n. 14
0
        if _ == (0, 0):
            pass
        else:
            start += step_size
            stop += step_size

        # Slice sessions @sessions(None, None)
        next_step = input.sessions(start, stop)

        if _ >= (1, 1):
            past_start = (start-step_size)
            past_stop = (stop-step_size)
            past_step = input.sessions(past_start, past_stop)

            # Compute transition matrix next
            mc_next = MarkovChain(next_step, states)
            mc_next = mc_next.csr_sparse_matrix()

            # Compute transition matrix past
            mc_past = MarkovChain(past_step, states)
            mc_past = mc_past.csr_sparse_matrix()

            print('matrix done', datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'))
            print('start clustering')

            # DBSCAN
            dbscan_next = Clustering(mc_next)
            dbscan_past = Clustering(mc_past)

            print("End clustering", datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S'), '\n')
            """
Esempio n. 15
0
#!/usr/bin/env python

import argparse
from markovchain import MarkovChain

parser = argparse.ArgumentParser()
parser.add_argument('-d', '--db', help='Filename of saved database',
                    metavar='database', required=True)
parser.add_argument('-n', '--number', type=int, default=10,
                    help='Number of lines to generate', metavar='numlines')
args = parser.parse_args()

mc = MarkovChain(saveddb=args.db)
for i in xrange(args.number):
    print mc.random_title()