Beispiel #1
0
def main():
    df = fill_song_pd()
    lyrics = ""  #going to be one huge string
    db_name = './markov/' + genre
    mc = MarkovChain(db_name)
    #creating new markov dataset if it doesn't exist
    if not os.path.isfile(db_name):
        print("creating new data set based on the " + str(genre) + " genre...")
        for index, row in df.iterrows():
            if row['genre'] == genre_dict[genre]:
                lyrics += row["lyrics"] + " "
            mc.generateDatabase(lyrics)
            mc.dumpdb()

    for i in range(int(lines) + 1):
        print(mc.generateString())
Beispiel #2
0
def generate_database():
    """
    Generates the database that the Markov Chain will use to make its word-by-
    word predictions. It will attempt to create this file in the same directory
    as where the script is currently located.
    """
    currpath = os.path.dirname(__file__)
    path_to_data = os.path.join(currpath, 'in.txt')

    chain = MarkovChain()

    with open(path_to_data) as f:
        chain.generateDatabase(f.read())
        chain.dumpdb()

    print(chain.generateString())
Beispiel #3
0
def main():
    args = parser.parse_args()
    dirname=os.path.split(__file__)[0]
    filename=os.path.join(dirname,"phil.txt")
    title_filename=os.path.join(dirname,"phil_titles.txt")
    dbname1 = "database.pkl"
    dbname2 = "database_title.pkl"
    new_db = not os.path.exists(dbname1)
    body_maker = MarkovChain(dbname1)
    title_maker = MarkovChain(dbname2)
    if new_db:
        title_maker.generateDatabase(open(title_filename).read())
        title_maker.dumpdb()
        body_maker.generateDatabase(open(filename).read())
        body_maker.dumpdb()

    name = title_maker.generateString()
    body = '  '.join([body_maker.generateString()+'.' for i in xrange(3)])

    if args.repo:
        if args.token:
            token = args.token
        else:
            token_filename = os.path.join(dirname, "token.txt")
            if not os.path.exists(token_filename):
                sys.stderr.write("Please either specify --token=XXX on the command line or put a github API token in token.txt\n")
                sys.stderr.write("You can generate a token here: https://github.com/settings/tokens\n")
                sys.exit(1)
            token = open(token_filename).read().strip()

        import github
        gh=github.Github(token)
        user=gh.get_user()
        repo=user.get_repo(args.repo)
        issue = repo.create_issue(title=name, body=body)
        print issue.html_url
    else:
        print 
        print name
        print "-"*len(name)
        print body
        artist_name.lower().encode('utf-8')).hexdigest()
    mc = MarkovChain(db_name_hashed)

    # Checking if the database already exists, if so uses the cache instead another API call
    if not os.path.isfile(db_name_hashed):
        print(
            "No data cached. Please be patient while we search the lyrics of %s."
            % artist_name)

        # Adding lyrics to a single gigant string
        lyrics = ''

        # Parsing each lyric from this artist.
        # [http://api.wikia.com/wiki/LyricWiki_API]
        artist = requests.get(API_URI, params=params).json()
        for album in artist['albums']:
            for song in album['songs']:
                params = {'artist': artist_name, 'song': song}
                print("Parsing \"{}\" from Wikia.".format(song))
                response = requests.get(API_URI,
                                        params=params).json()["lyrics"]
                lyrics += response.replace('[...]', '') + ' '

        # Generating the database
        mc.generateDatabase(lyrics)
        mc.dumpdb()

    # Printing a string
    for i in range(0, int(number_of_phrases)):
        print(mc.generateString())
if not os.path.isfile(DB_FILE):
    # Handle common user errors
    if not os.path.isfile(SOURCE_FILE):
        if os.path.isfile(DB_FILE + '.7z'):
            sys.exit("NOTICE: Please extract the archive containing the Markov database before use.")
        sys.exit("NOTICE: You can't regenerate the Markov database without the source text.");

    # Moving this in here avoids an annoying warning message if either of the
    # above two sys.exit() calls would be triggered
    mc = MarkovChain(DB_FILE)

    # Generate the database
    with open(SOURCE_FILE, 'r') as f:
        mc.generateDatabase(f.read(), sentenceSep='[.!?"\n]', n=2)
    mc.dumpdb()
else:
    mc = MarkovChain(DB_FILE)

def generate_string(max_length):
    # Generate the string
    # We could be a bit smarter about this, but it works fairly well
    gen_string = ''
    short_counter = 0
    while len(gen_string) < max_length:
        new_str = mc.generateString().strip()
        new_str = re.sub(r' , ?', ', ', new_str)
    
        # Too short or too long to be meaningful
        if len(new_str) < 4 or len(new_str) > 100:
            continue
Beispiel #6
0
        if "//" in t:
            continue
        if "cw: " in t:
            continue

        # Prune short tags
        if ARGS.prune and len(t) <= 3:
            continue

        # Tags which are just numbers should not be in the corpus
        try:
            int(t.strip())
            continue
        except ValueError:
            pass

        if ARGS.nohash:
            CORPUS += t + " "
        else:
            CORPUS += '#' + t + " "
    CORPUS += "\n"

if ARGS.debug:
    print(CORPUS)
    exit(1)
print("Generating database...")
BOT = MarkovChain(TARGET_FILE)
BOT.generateDatabase(CORPUS)
print("Dumping database to {}".format(TARGET_FILE))
BOT.dumpdb()