def debug_source(): lines = poetryutils2.utils.lines_from_file('/Users/cmyr/tweetdbm/may09.txt') filters = [poetryutils2.filters.url_filter, poetryutils2.filters.ascii_filter, poetryutils2.filters.low_letter_filter(0.9)] source = poetryutils2.line_iter(lines, filters) return source
def debug_source(): lines = poetryutils2.utils.lines_from_file( '/Users/cmyr/tweetdbm/may09.txt') filters = [ poetryutils2.filters.url_filter, poetryutils2.filters.ascii_filter, poetryutils2.filters.low_letter_filter(0.9) ] source = poetryutils2.line_iter(lines, filters) return source
def get_lines(source, filters): syllables = (6,9) lines = defaultdict(list) for line in poetryutils2.line_iter(source, filters): c = poetryutils2.count_syllables(line) if c in syllables: lines[c].append(line) return lines
def line_iter(host="127.0.0.1", port="8069", request_kwargs=None): stream = tweet_filter(zmqstream.zmq_iter(host=host, port=port)) line_filters = [ poetry.filters.numeral_filter, poetry.filters.ascii_filter, poetry.filters.url_filter, poetry.filters.real_word_ratio_filter(0.9) ] for line in poetry.line_iter(stream, line_filters, key='text'): yield StreamResult(StreamResultItem, line)
def generate_haiku(debug=False, verbose=False): haikuer = poetryutils2.Haikuer(debug=(debug or verbose)) filters = init_filters() if not debug: stream_source = zmq_stream_source() source = poetryutils2.line_iter(stream_source, filters, key='text') else: source = debug_dict_wrapper(debug_source()) for beauty in haikuer.generate_from_source(source, key='text'): yield format_haiku(beauty)
def line_iter(host="127.0.0.1", port="8069", request_kwargs=None, save=False): poet = poetry.sorting.MultiPoet(poets=[ poetry.sorting.Haikuer(lang='fr'), poetry.sorting.Coupler(lang='fr', syllable_counts=[10, 12]), poetry.sorting.Limericker(), ]) stream = tweet_filter( zmqstream.zmq_iter(host=host, port=port), langs=['en', 'fr']) line_filters = [ poetry.filters.numeral_filter, poetry.filters.url_filter, poetry.filters.hashtag_filter, poetry.filters.screenname_filter, poetry.filters.low_letter_filter(0.75), poetry.filters.blacklist_filter(skip_words_en), poetry.filters.emoji_filter ] lang_filters = { 'en': [ poetry.filters.real_word_ratio_filter(0.8, lang='en')], 'fr': [ poetry.filters.real_word_ratio_filter(0.8, lang='fr'), poetry.filters.bad_swears_filter('fr')] } last_send = time.time() for line in poetry.line_iter(stream, line_filters, key='text'): if time.time() - last_send > KEEP_ALIVE_INTERVAL: last_send = time.time() yield StreamResult(StreamResultKeepAlive, None) if (line.get("lang") not in ('en', 'fr') or not all(f(poetry.utils.unicodify(line['text'])) for f in lang_filters[line['lang']])): continue poem = poet.add_keyed_line(line, key='text') if not isinstance(poem, list): poem = [poem] for p in poem: if isinstance(p, poetry.sorting.Poem): if save: save_poem(p) last_send = time.time() yield StreamResult(StreamResultItem, {'poem': p.to_dict()})
def haiku_test(sourcepath): '''sourcepath should be a path to a file of newline-delimited text''' lines = poetryutils2.utils.lines_from_file(sourcepath) haik = poetryutils2.Haikuer() filters = [ poetryutils2.filters.url_filter, poetryutils2.filters.ascii_filter, poetryutils2.filters.low_letter_filter(0.9), poetryutils2.filters.real_word_ratio_filter(0.75)] source = poetryutils2.line_iter(lines, filters) for beauty in haik.generate_from_source(source): print(beauty)
def run(host="127.0.0.1", port="8069", debug=False, save_json=False): poet = poetry.Limericker(debug) tweet_texts = tweet_filter(zmqstream.zmq_iter(host=host, port=port)) line_filters = [ poetry.filters.numeral_filter, poetry.filters.ascii_filter, poetry.filters.url_filter, poetry.filters.real_word_ratio_filter(0.9) ] source = poetry.line_iter(tweet_texts, line_filters, key="text") for poem in poet.generate_from_source(iter_wrapper(source, key="text"), key="text"): print(poet.prettify(poem)) if save_json: save_as_json(poet.dictify(poem))
def run(host="127.0.0.1", port="8069", debug=False, save_json=False): poet = poetry.Coupler() tweet_texts = tweet_filter(zmqstream.zmq_iter(host=host, port=port)) # tweet_texts = open(os.path.expanduser('~/tweetdbm/may04.txt')).readlines() line_filters = [ poetry.filters.numeral_filter, poetry.filters.ascii_filter, poetry.filters.url_filter, poetry.filters.real_word_ratio_filter(0.9) ] source = poetry.line_iter(tweet_texts, line_filters) for poem in poet.generate_from_source(iter_wrapper(source)): print(poet.prettify(poem)) if save_json: save_as_json(poet.dictify(poem))
def freeverse(): poem = list() count = 0 for rec in line_iter(source, filters): poem.append(rec) if len(poem) == 5: yield """ <div class="panel panel-default" style="display:inline-block;"> <div class="panel-heading"> <h3 class="panel-title">Free verse {0}</h3> </div> <div class="panel-body"> <div>{1}</div> <div>{2}</div> <div>{3}</div> <div>{4}</div> <div>{5}</div> </div> </div>""".format(count, poem[0], poem[1], poem[2], poem[3], poem[4]) count += 1 poem = list() time.sleep(0.3)
def main(args=sys.argv): import argparse parser = argparse.ArgumentParser(description=__doc__) parser.add_argument('src', type=str, nargs='+', help="source file") parser.add_argument('-u', '--url-filter', help='filter out lines containing urls', action='store_true') parser.add_argument('-a', '--ascii-filter', help='filter out line with non-ascii characters', action='store_true') parser.add_argument('-n', '--numeral-filter', help='filter out lines with numerals', action='store_true') parser.add_argument('-t', '--title-case-filter', help='only tweets in title case', action='store_true') parser.add_argument('--re', type=str, help='regular expression filter') parser.add_argument('--blacklist', type=str, help='blacklisted words') parser.add_argument('-L', '--letter-ratio', type=float, help='filter out tweets with low letter ratio') parser.add_argument('-R', '--real-word-ratio', type=float, help='filter out tweets with low real-word ratio') parser.add_argument('--rhyme', type=str, help='filter to lines that rhyme with input') parser.add_argument('-l', '--line-length', type=str, help='allowed line lengths') parser.add_argument('-s', '--syllable-count', type=str, help='allowed line syllables') parser.add_argument('-i', '--ignore-case', help='regex is case-insensitive', action='store_true') args = parser.parse_args() poet_filters = [] if not args.src: print('please specify a source file') if args.title_case_filter: poet_filters.append(poetryutils2.filters.title_case_filter) if args.line_length: poet_filters.append(poetryutils2.filters.line_length_filter(args.line_length)) if args.blacklist: blacklist = args.blacklist.split(',') print('blacklist: %s' % repr(blacklist)) poet_filters.append(poetryutils2.filters.blacklist_filter(blacklist)) if args.ascii_filter: poet_filters.append(poetryutils2.filters.ascii_filter) if args.numeral_filter: poet_filters.append(poetryutils2.filters.numeral_filter) if args.letter_ratio: poet_filters.append( poetryutils2.filters.low_letter_filter(args.letter_ratio)) if args.url_filter: poet_filters.append(poetryutils2.filters.url_filter) if args.real_word_ratio: poet_filters.append( poetryutils2.filters.real_word_ratio_filter(args.real_word_ratio)) if args.re: poet_filters.append( poetryutils2.filters.regex_filter(args.re, args.ignore_case)) if args.syllable_count: poet_filters.append( poetryutils2.filters.syllable_count_filter(args.syllable_count)) if args.rhyme: poet_filters.append(poetryutils2.filters.rhyme_filter(args.rhyme)) # print(args.src) # return # paths = args.src source = get_source_iter(args.src) return poetryutils2.line_iter(source, poet_filters)