예제 #1
0
        process_batch(word_batch, curr_dict)

        new_stems = len(curr_dict) - start_dict_size
        spent_seconds = time.time() - start
        if new_stems >= 0:
            print("finished batch {} in {} seconds".format(batch_no, spent_seconds))
            print("added {} entries to stem map".format(new_stems))
            save_dict(curr_dict)
        else:
            log_error('stem dictionary is corrupted')
            raise RuntimeError
        if spent_seconds < INTER_BATCH_SLEEP_SECONDS:
            time.sleep(INTER_BATCH_SLEEP_SECONDS - spent_seconds)
            batch_no += 1
        if test and batch_no >= n_loops:
            break
    return curr_dict


if __name__ == "__main__":
    lyrics, unique_words = get_lyrics()
    # print(len(lyrics))
    # print(lyrics[0])
    # print("*"*10)
    # print(unique_words[0])
    # print("...")
    # print(unique_words[-1])
    stem_dict = build_stem_dict()
    get_counts(lyrics, stem_dict)

예제 #2
0
def init():
    get_lyrics()
        return [(c, cnt / s) for c, cnt in counter.iteritems()]

    outlm = {hist: normalize(chars) for hist, chars in lm.iteritems()}
    return outlm


def generate_text(lm, order, nletters=1000):
    history = random.choice(lm.items())[0]
    out = []
    for i in xrange(nletters):
        c = generate_letter(lm, history, order)
        history = history[-order:] + c
        out.append(c)
    return "".join(out)


def generate_letter(lm, history, order):
    history = history[-order:]
    dist = lm[history]
    x = random.random()
    for c, v in dist:
        x = x - v
        if x <= 0: return c


artist = raw_input('Enter Artist: ')
order = int(raw_input('Enter character-level n-gram order: '))
(lyrics, lines, bow, line_endings) = scrape_lyrics.get_lyrics(artist)
lm = train_char_lm(lyrics, order)
print generate_text(lm, order)