def write_story(): output_text = [][:] # First, set up table of filenames section_filenames = [][:] for which_section in range(1, 1 + sections_in_chapter): section_filenames.append( '%s/%02d.txt' % (wandering_rocks_sections_path, which_section)) log_it("INFO: filenames table set up") log_it(" length is %d" % len(section_filenames), 2) log_it("\n and the filenames table is:\n" + pformat(section_filenames)) stats_file = open(wandering_rocks_stats_file) the_line = stats_file.readline() # Read and ignore the header line log_it( "INFO: header read from stats file, about to parse stats file and start generating text" ) for which_section in range(1, 1 + sections_in_chapter): the_line = stats_file.readline( ) # Read another line from the stats file log_it("INFO: Parsing the line '%s'." % the_line.split(), 2) sec, pars, sents, words = map(int, the_line.split(',')) log_it( " sec: %d; pars: %d; sents: %d; words: %d" % (sec, pars, sents, words), 2) if sec != which_section: # elementary sanity check raise IndexError( "The stats file for Wandering Rocks is corrupt: section number %d encountered out of order." % sec) log_it( " generating based on sections %d, %d, %d." % (1 + (which_section + 17) % 19, which_section, (which_section + 1) % 19), 2) log_it( " asking for %d sentences with paragraph break probability of %f." % (sents, pars / sents)) which_rocks_sections = [ section_filenames[1 + (which_section + 17) % 19 - 1], section_filenames[which_section - 1], section_filenames[(which_section + 1) % 19 - 1] ] section_genny = tg.TextGenerator( name="Wandering Rocks generator for section %d" % which_section) train_with_mixins(section_genny, chain_length, which_rocks_sections, glob.glob('%s/*txt' % mixin_texts_dir)) output_text.append( section_genny.gen_text(sentences_desired=sents, paragraph_break_probability=(pars / sents))) return '\n<center>* * *</center>\n'.join(output_text)
def write_generic_story( chain_length, chapter_length, # In sentences sentences_per_paragraph, # On average joyce_text_path, # A list mixin_texts_dir, # Full path joyce_ratio=1.2): genny = tg.TextGenerator() train_with_mixins(genny, chain_length, [joyce_text_path], glob.glob('%s/*txt' % mixin_texts_dir), joyce_ratio) return genny.gen_text( sentences_desired=chapter_length, paragraph_break_probability=(1 / sentences_per_paragraph))
import os, glob, sys sys.path.append('/UlyssesRedux/scripts/') from directory_structure import * # Gets us the listing of file and directory locations. sys.path.append(markov_generator_path) import text_generator as tg import patrick_logger # From https://github.com/patrick-brian-mooney/personal-library from patrick_logger import log_it patrick_logger.verbosity_level = 0 log_it("INFO: Imports successful, moving on", 2) # Create the necessary sets of Markov chains once, at the beginning of the script's run headlines_genny = tg.TextGenerator(name="Aeolus headlines generator") headlines_genny.train(the_files=[aeolus_headlines_path], markov_length=headline_chain_length) joyce_text_length = os.stat(aeolus_nonheadlines_path).st_size mixin_texts_length = 0 articles_files = glob.glob('%s/07/*txt' % current_run_corpus_directory) for which_file in articles_files: mixin_texts_length += os.stat(which_file).st_size ratio = int(round( (mixin_texts_length / joyce_text_length) * joyce_ratio )) articles_files = [aeolus_nonheadlines_path] * ratio + articles_files articles_genny = tg.TextGenerator(name="Aeolus articles generator") articles_genny.train(the_files=articles_files, markov_length=nonheadline_chain_length) log_it("INFO: trained generators for both headlines and non-headlines files, moving on", 2) def getParagraph(genny, num_sents, num_words):
def output_to_terminal(self, group_parser): """ Outputs a listing of each symptom burst and its details to the terminal depending on the level of verbosity selected. """ text_generator.TextGenerator(group_parser, self.flags.verbosity).write_output()
with open('/lovecraft/current-tags') as tagfile: the_tags = ', '.join([t.strip() for t in tagfile.readlines()]) story_length = random.choice(list(range(25, 71))) the_content = '' # Utility functions def print_usage(): # Note that, currently, nothing calls this. """Print the docstring as a usage message to stdout""" patrick_logger.log_it("INFO: print_usage() was called") print(__doc__) patrick_logger.log_it("INFO: tags and sentence lengths set up ...", 2) genny = tg.TextGenerator(name='Lovecraft Generator') genny.chains.read_chains(chains_file) patrick_logger.log_it("INFO: chains read, starting run ...", 2) # Next, pick out a title between 10 and 70 characters the_length = 300 patrick_logger.log_it("INFO: getting a story title ...", 2) while not 10 <= the_length <= 70: the_title = genny.gen_text().strip() the_length = len(the_title) patrick_logger.log_it("INFO: The story title generated was '%s'" % the_title, 2) patrick_logger.log_it("INFO: And the length of that title is: " + str(the_length), 2) if the_title in open('/lovecraft/titles.txt').read(): patrick_logger.log_it("That title's been used! Trying again ...\n\n\n") the_length = 300 # Force the loop to grind through again
def write_story(): corpora = {}.copy() log_it("INFO: about to start processing corpora.") for which_corpus in sorted(glob.glob(circe_corpora_path + '*txt')): log_it(' INFO: processing "%s".' % which_corpus, 2) corpus_name = os.path.basename(which_corpus)[:-4] genny = tg.TextGenerator(name="%s generator" % corpus_name) train_with_mixins(genny, chain_length, [which_corpus], glob.glob('%s/*txt' % mixin_texts_dir)) corpora[corpus_name] = genny log_it("DEBUGGING: Corpora are: \n" + pprint.pformat(corpora), 3) the_chapter = [][:] def get_speaker_text(speaker_name, num_sentences): if speaker_name in corpora: which_index = speaker_name elif speaker_name == 'STAGE': which_index = 'STAGE DIRECTIONS' else: which_index = 'MINOR CHARACTERS' return corpora[which_index].gen_text(sentences_desired=num_sentences, paragraph_break_probability=0) log_it("INFO: About to process stats file.") with open(circe_stats_path) as circe_stats_file: for the_encoded_paragraph in circe_stats_file: # Process each line, using it as a map of the corresponding paragraph in 'Circe'. # Structure of these lines is defined in /UlyssesRedux/scripts/utility_scripts/analyze-chapter-15.py. # But here's a quick reminder: # Two parts: first, the name of a speaker (or "STAGE" if it's a paragraph of stage directions) # Then, a series of codes for "chunks" of the paragraph. # A "chunk" is a number of sentences. If the number is preceded by opening parens, it's an intraparagraph stage direction. # Parts of the line, and chunk descriptions, are separated by vertical bars (pipe characters), hence the .psv extension. log_it( 'INFO: Processing coded line "%s".' % the_encoded_paragraph.strip(), 2) code_to_process = the_encoded_paragraph.split('|') speaker_name = code_to_process.pop(0) log_it(' speaker name is "%s".' % speaker_name, 2) if speaker_name != 'STAGE': # Unless the name is 'STAGE', add it to the beginning of this paragraph this_paragraph = '%s: ' % speaker_name else: # In which case, begin with an opening parenthesis. this_paragraph = '(' while len(code_to_process) > 0: chunk_descriptor = code_to_process.pop(0) log_it(' processing chunk "%s".' % chunk_descriptor.strip(), 2) if chunk_descriptor[0] == '(': this_paragraph = this_paragraph + '(%s) ' % get_speaker_text( 'STAGE', int(chunk_descriptor[1:])).strip() else: this_paragraph = this_paragraph + '%s ' % ( get_speaker_text(speaker_name, int(chunk_descriptor))) log_it( ' current paragraph length is now %d.' % len(this_paragraph), 3) if speaker_name == 'STAGE': this_paragraph = this_paragraph.strip() + ')' log_it( ' done with this paragraph; total length is %d.' % len(this_paragraph), 2) the_chapter.append(this_paragraph) return '\n'.join(the_chapter)
import text_generator as tg import patrick_logger # From https://github.com/patrick-brian-mooney/personal-library from patrick_logger import log_it # First, set up constants questions_chain_length = 1 answers_chain_length = 2 mixin_texts_dir = '%s17' % current_run_corpus_directory patrick_logger.verbosity_level = 0 log_it("INFO: Imports successful, moving on", 2) # Create the necessary sets of Markov chains once, at the beginning of the script's run questions_genny = tg.TextGenerator(name="Ithaca questions generator") questions_genny.train([ithaca_questions_path], markov_length=questions_chain_length) answers_genny = tg.TextGenerator(name="Ithaca answers generator") train_with_mixins(answers_genny, joyce_text_list=[ithaca_answers_path], mixin_texts_list=glob.glob('%s/*txt' % mixin_texts_dir), chain_length=answers_chain_length) log_it("INFO: trained generators for both questions and answers; moving on ...", 2) # Unlike the 'Aeolus' script, this script makes no effort to enforce sticking within word-limit boundaries. # You can see that in the next two routines, which just call sentence_generator.gen_text() directly. def getQuestion(num_sents, num_words):