Esempio n. 1
0
    def test_generate_phrase_no_params(self):
        """Tests the MarkovChain.generate_phrase method with no input arguments."""

        chain = MarkovChain(self._corpus)
        phrase = chain.generate_phrase()

        # Assert non-None
        self.assertNotEqual(phrase, '')
Esempio n. 2
0
    def test_generate_phrase_max_size(self):
        """Tests the MarkovChain.generate_phrase method with max_size arg specified."""

        _max_size = 140
        chain = MarkovChain(self._corpus)

        # Generate 10 phrases; make sure all under max size.
        for i in range(10):
            phrase = chain.generate_phrase(max_size=_max_size)
            self.assertTrue(len(phrase) <= _max_size)
Esempio n. 3
0
    def test_generate_phrase_min_words(self):
        """Tests the MarkovChain.generate_phrase method with min_words arg specified."""

        _min_words = 20
        chain = MarkovChain(self._corpus)

        # Generate 10 phrases; test each one
        for i in range(10):
            phrase = chain.generate_phrase(min_words=_min_words)
            self.assertTrue(len(phrase.split(' ')) >= _min_words)
Esempio n. 4
0
def main():
    crd = pickle.load(open("./crdy.nope",'r'))
    outfile = None
    outdebug = None
    searchterms = ["strawberries","gorgonzola"] 
    try:
        opts, args = getopt.gnu_getopt(sys.argv[1:], "o:s:h", ["out=","search=","help"])
    except getopt.GetoptError as err:
        print (str(err))
        usage()
    for o, a in opts:
        if o in ("-o", "--out"):
            outfile = a
            autdebug = a+".debug"
        elif o in ("-h", "--help"):
            usage()
        elif o in ("-s", "--search"):
            searchterms = a.split(" ")
        else:
            assert False, "unhandled option"
    
    if outfile is None:
        outfile = "out.tmp"
        outdebug = outfile+".debug"


    rec = searchRecipes(crd=crd, searchterms = searchterms)
    #print(rec)
    corpus = []
    randomRec = set()
    for i in range(0,5):
        randomRec.add(random.choice(rec))
     
    with open(outdebug,'w') as o:
        for r in randomRec:
            print(r,"RR")
            o.write(r)
            o.write("\n")
            ing = returnIngredients(recipeName = r, crd = crd)
            print("ing", r, returnIngredients(recipeName = r, crd = crd))
            corpus += ing
    
    corpus = [c.encode('utf-8') for c in corpus]
    jc = " ".join(corpus)
    jc = jc.replace("\n"," ")
    with open(outdebug, 'w') as o:
        o.write("***".join(corpus))
        o.write("\n")
        o.write(jc)
    
    mc = MarkovChain(corpus = jc, separator = " ")
    result = mc.printSth(2000)
    with open(outfile, 'w') as o:
        o.write(result)
Esempio n. 5
0
    def test_generate_phrase_both_valid_params(self):
        """Tests the MarkovChain.generate_phrase method with min_words and max_size args specified."""

        _max_size = 140
        _min_words = 5
        chain = MarkovChain(self._corpus)

        for i in range(10):
            phrase = chain.generate_phrase(max_size=_max_size, min_words=_min_words)
            valid = len(phrase.split(' ')) >= _min_words and len(phrase) < 140
            self.assertTrue(valid)
Esempio n. 6
0
    def _update_chain(self):
        """Creates a Markov chain based on the current corpus."""

        text_corpus = ''
        for song in self._corpus:
            text_corpus += song.lyrics

        if not text_corpus:
            raise Exception("Error: no corpus to generate MarkovChain")

        # Create a new Markov chain, and signal that it is current
        self._chain = MarkovChain(text_corpus)
        self._chain_is_current = True
Esempio n. 7
0
class LyricScrambler():
    """Generates scrambled phrases based on song lyrics."""

    def __init__(self, max_songs=None):
        """Initializes the LyricScrambler."""

        # Set max corpus size (# songs)
        if not max_songs:
            max_songs = 10

        self._max_songs = max_songs

        # Initialize the corpus and Markov chain
        self._corpus = []
        self._chain = {}

        # Signifies if current Markov chain reflects the corpus
        self._chain_is_current = False

    def _get_lyrics(self, artist, title):
        """Retrieves the full lyric listing (if available) for the given song."""

        # Retrieve song lyrics
        client = LyricClient()
        return client.get_lyrics(artist, title)

    def add_song(self, artist, title):
        """Adds a song to the current corpus. Returns False if song is not found."""

        # Type check
        artist_valid = artist and type(artist) is str
        title_valid = title and type(title) is str

        if not artist_valid or not title_valid:
            raise ValueError("Expected string input for artist and title.")

        # Get lyrics
        lyrics = self._get_lyrics(artist, title)

        # Return False; song was not found.
        if not lyrics:
            return False

        # Make sure corpus size isn't maxed out. Pop oldest song.
        if len(self._corpus) >= self._max_songs:
            del self._corpus[0]

        # Add song to corpus; note that the Markov chain is not current
        self._corpus.append(Song(artist, title, lyrics))
        self._chain_is_current = False
        return True

    def _update_chain(self):
        """Creates a Markov chain based on the current corpus."""

        text_corpus = ''
        for song in self._corpus:
            text_corpus += song.lyrics

        if not text_corpus:
            raise Exception("Error: no corpus to generate MarkovChain")

        # Create a new Markov chain, and signal that it is current
        self._chain = MarkovChain(text_corpus)
        self._chain_is_current = True

    def get_phrase(self, max_size=None, min_words=None):
        """Generates a silly phrase based on the underlying Markov Chain."""

        # Ensure there's corpus
        if not self._corpus:
            raise Exception('No song lyrics to generate a phrase.')

        # Update the Markov chain
        if not self._chain_is_current:
            self._update_chain()

        # _chain.generate_phrase will raise a ValueError if max_size and min_words
        # are invalid.
        return self._chain.generate_phrase(max_size=max_size, min_words=min_words)
Esempio n. 8
0
#!/usr/bin/env python

# Makes a database for markovtwitter from a corpus file.
# The corpus file is expected to be one sentence per line.
#
# usage: make_db.py [-h] -c corpusfile -n depth -o outfile
import argparse
from markovchain import MarkovChain

parser = argparse.ArgumentParser()
parser.add_argument('-c', '--corpus',
                    help='Name of corpus file', required=True,
                    metavar='corpusfile')
parser.add_argument('-n', '--order', help='Chain depth', 
                    type=int, required=True, default=1, metavar='depth')
parser.add_argument('-o', '--out', help='Output DB filename',
                    required=True, metavar='outfile')
args = parser.parse_args()

print 'Generating markov chain database from %s' % args.corpus

mc = MarkovChain(corpusfile=args.corpus,order=args.order)
print mc

print 'Saving database to %s' % args.out
mc.save_db(args.out)

Esempio n. 9
0
from markovchain import MarkovChain

corpus = "this is a bunch of text used to make a markov chain. This is pretty neat."
chain = MarkovChain(corpus)

for i in range(10):
    print chain.generate_phrase(min_words=5, max_size=140)
Esempio n. 10
0
#!/usr/bin/env python

import argparse
from markovchain import MarkovChain

parser = argparse.ArgumentParser()
parser.add_argument('-d', '--db', help='Filename of saved database',
                    metavar='database', required=True)
parser.add_argument('-n', '--number', type=int, default=10,
                    help='Number of lines to generate', metavar='numlines')
args = parser.parse_args()

mc = MarkovChain(saveddb=args.db)
for i in xrange(args.number):
    print mc.random_title()