def getBestLines(dbconn, hard_constraints, lines, poem_form, line_index, previous_line=None, count=1): if len(lines) == 0: return [] if 'rhyme_part' not in hard_constraints: rhyme_counts = map(lambda x:(dbreader.rhymeCountForRhyme(dbconn, x['word'], x['rhyme_part'])), lines) total_rhymes = sum(rhyme_counts) total_rhymes = total_rhymes / len(lines) lines = filter(lambda x: dbreader.rhymeCountForRhyme(dbconn, x['word'], x['rhyme_part']) >= total_rhymes/2, lines) ## Sort based on how you're likely to continue if (not poem_form.lines[line_index].starts and poem_form.order[line_index] < poem_form.order[line_index-1] and count is 1 ): lines = sorted(lines, key = lambda x: dbreader.posCountsForLine(dbconn, x, 'leading'), reverse=True ) elif (not poem_form.lines[line_index].ends and poem_form.order[line_index] < poem_form.order[line_index+1] and count is 1 ): lines = sorted(lines, key = lambda x: dbreader.posCountsForLine(dbconn, x, 'lagging'), reverse=True ) else: lines = sorted(lines, key = lambda x: random.random()) ## Finally, sort within the sort by how close to the original page the lines are lines = sorted(lines, key = lambda x: x['group_level']) return lines[:count]
def getBestLines(dbconn, hard_constraints, lines, poem_form, line_index, previous_line=None, count=1): if len(lines) == 0: return [] if 'rhyme_part' not in hard_constraints: rhyme_counts = map( lambda x: (dbreader.rhymeCountForRhyme(dbconn, x['word'], x['rhyme_part'])), lines) total_rhymes = sum(rhyme_counts) total_rhymes = total_rhymes / len(lines) lines = filter( lambda x: dbreader.rhymeCountForRhyme(dbconn, x['word'], x[ 'rhyme_part']) >= total_rhymes / 2, lines) ## Sort based on how you're likely to continue if (not poem_form.lines[line_index].starts and poem_form.order[line_index] < poem_form.order[line_index - 1] and count is 1): lines = sorted( lines, key=lambda x: dbreader.posCountsForLine(dbconn, x, 'leading'), reverse=True) elif (not poem_form.lines[line_index].ends and poem_form.order[line_index] < poem_form.order[line_index + 1] and count is 1): lines = sorted( lines, key=lambda x: dbreader.posCountsForLine(dbconn, x, 'lagging'), reverse=True) else: lines = sorted(lines, key=lambda x: random.random()) ## Finally, sort within the sort by how close to the original page the lines are lines = sorted(lines, key=lambda x: x['group_level']) return lines[:count]
def poemForPageID(pageID, sonnet_form_name, dbconfig, multi=False, output_queue=None, callback=None, user_info=None): dbconn = dbconnect.MySQLDatabaseConnection(dbconfig["database"], dbconfig["user"], dbconfig["host"], dbconfig["password"]) ## Decide what kind of poem you're going to write poem_form = poemform.PoemForm.NamedPoemForm(sonnet_form_name) # poem_form.scrambleOrder() ## Follow the redirect, if you need to pageID = dbreader.followRedirectForPageID(dbconn, pageID) ## Get the groups associated with a given page (perhaps construct table views for speed?) search_groups = [{'pageIDs':[pageID]}, {'pageIDs':dbreader.pagesLinkedFromPageID(dbconn, pageID), 'page_major_category':dbreader.categoryForPageID(dbconn, pageID, 'major')}, # {'line_minor_category':dbreader.categoryForPageID(dbconn, pageID, 'minor')}, # {'line_major_category':dbreader.categoryForPageID(dbconn, pageID, 'major')}, {'page_minor_category':dbreader.categoryForPageID(dbconn, pageID, 'minor')}, {'page_major_category':dbreader.categoryForPageID(dbconn, pageID, 'major')}, {}] ## This 'none' group will search through the entire corpus composed_lines = [None for _ in poem_form.lines] ## First, get random lines from the starting page itself (if any exist) hard_constraints = [] flexible_constraints = [] possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, [{'pageIDs':[pageID]}], composed_lines, possibility_count=20) filled_stanzas = [] if (possible_lines): source_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, 0, count=10) source_lines = filter(lambda x: dbreader.rhymeCountForRhyme(dbconn, x['word'], x['rhyme_part']) > 0, source_lines) order = range(len(poem_form.stanzas)) random.shuffle(order) for o in order: s = poem_form.stanzas[o] if len(s) > 2: valid_indexes = range(len(source_lines)) else: valid_indexes = [i for i in range(len(source_lines)) if source_lines[i]['starts'] or source_lines[i]['ends']] if valid_indexes: random.shuffle(valid_indexes) line = source_lines.pop(valid_indexes[0]) if line['starts']: r = min(s) composed_lines[min(s)] = line elif line['ends']: r = max(s) composed_lines[max(s)] = line else: r = random.randint(min(s)+1,max(s)-1) composed_lines[r] = line poem_form.setStanzaStart(o, r-min(s)) filled_stanzas.append(o) ## For parallelization, separate out each stanza unfilled_stanzas = [poem_form.stanzas[i] for i in range(len(poem_form.stanzas)) if i not in filled_stanzas] starting_lines = [poem_form.lines[i[0]] for i in unfilled_stanzas] parallel_starts = [s for s in starting_lines if s.starts] parallel_ends = [s for s in starting_lines if s.ends] parallel_mids = [s for s in starting_lines if not s.ends and not s.starts] ## Compose all the parallelizable starting lines at once if parallel_starts: idx = parallel_starts[0].index hard_constraints = hardConstraints(idx, poem_form, composed_lines) flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines) possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=20) starting_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10) starting_lines = random.sample(starting_lines, len(parallel_starts)) for i,l in enumerate(starting_lines): composed_lines[parallel_starts[i].index] = l ## Do the same for all of the parallelizable ending lines if parallel_ends: idx = parallel_ends[0].index hard_constraints = hardConstraints(idx, poem_form, composed_lines) flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines) possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=10) ending_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10) ending_lines = random.sample(ending_lines, len(parallel_ends)) for i,l in enumerate(ending_lines): composed_lines[parallel_ends[i].index] = l ## Finally, same for all the parallelizable mid lines if parallel_mids: idx = parallel_mids[0].index hard_constraints = hardConstraints(idx, poem_form, composed_lines) flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines) possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=10) midding_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10) midding_lines = random.sample(midding_lines, len(parallel_mids)) for i,l in enumerate(midding_lines): composed_lines[parallel_mids[i].index] = l ## If there's a callback, call it if callback is not None: callback(composed_lines, user_info) ## Now compose each stanza in parallel stanzas = poem_form.stanzas dbconn.close() if output_queue is not None: for x in stanzas: output_queue.put((composeLinesAtIndexes, (pageID, poem_form, dbconfig, search_groups, composed_lines, x, callback, user_info), callback, user_info)) return if multi: manager = Manager() managed_composed_lines = manager.list(composed_lines) pool = Pool(processes=4) pp = [pool.apply_async(composeLinesAtIndexes, args=(pageID, poem_form, dbconfig, search_groups, managed_composed_lines, x, callback, user_info)) for x in stanzas] poem_pieces = [p.get() for p in pp]; pool.close() pool.join() else: poem_pieces = [composeLinesAtIndexes(pageID, poem_form, dbconfig, search_groups, composed_lines, x, callback, user_info) for x in stanzas] ## Piece the results back together for i,l in enumerate(composed_lines): if l is None: for p in poem_pieces: if p[i] is not None: composed_lines[i] = p[i] break return composed_lines
def poemForPageID(pageID, sonnet_form_name, dbconfig, multi=False, output_queue=None, callback=None, user_info=None): dbconn = dbconnect.MySQLDatabaseConnection(dbconfig["database"], dbconfig["user"], dbconfig["host"], dbconfig["password"]) ## Decide what kind of poem you're going to write poem_form = poemform.PoemForm.NamedPoemForm(sonnet_form_name) # poem_form.scrambleOrder() ## Follow the redirect, if you need to pageID = dbreader.followRedirectForPageID(dbconn, pageID) ## Get the groups associated with a given page (perhaps construct table views for speed?) search_groups = [ { 'pageIDs': [pageID] }, { 'pageIDs': dbreader.pagesLinkedFromPageID(dbconn, pageID), 'page_major_category': dbreader.categoryForPageID(dbconn, pageID, 'major') }, # {'line_minor_category':dbreader.categoryForPageID(dbconn, pageID, 'minor')}, # {'line_major_category':dbreader.categoryForPageID(dbconn, pageID, 'major')}, { 'page_minor_category': dbreader.categoryForPageID(dbconn, pageID, 'minor') }, { 'page_major_category': dbreader.categoryForPageID(dbconn, pageID, 'major') }, {} ] ## This 'none' group will search through the entire corpus composed_lines = [None for _ in poem_form.lines] ## First, get random lines from the starting page itself (if any exist) hard_constraints = [] flexible_constraints = [] possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, [{ 'pageIDs': [pageID] }], composed_lines, possibility_count=20) filled_stanzas = [] if (possible_lines): source_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, 0, count=10) source_lines = filter( lambda x: dbreader.rhymeCountForRhyme(dbconn, x['word'], x[ 'rhyme_part']) > 0, source_lines) order = range(len(poem_form.stanzas)) random.shuffle(order) for o in order: s = poem_form.stanzas[o] if len(s) > 2: valid_indexes = range(len(source_lines)) else: valid_indexes = [ i for i in range(len(source_lines)) if source_lines[i]['starts'] or source_lines[i]['ends'] ] if valid_indexes: random.shuffle(valid_indexes) line = source_lines.pop(valid_indexes[0]) if line['starts']: r = min(s) composed_lines[min(s)] = line elif line['ends']: r = max(s) composed_lines[max(s)] = line else: r = random.randint(min(s) + 1, max(s) - 1) composed_lines[r] = line poem_form.setStanzaStart(o, r - min(s)) filled_stanzas.append(o) ## For parallelization, separate out each stanza unfilled_stanzas = [ poem_form.stanzas[i] for i in range(len(poem_form.stanzas)) if i not in filled_stanzas ] starting_lines = [poem_form.lines[i[0]] for i in unfilled_stanzas] parallel_starts = [s for s in starting_lines if s.starts] parallel_ends = [s for s in starting_lines if s.ends] parallel_mids = [s for s in starting_lines if not s.ends and not s.starts] ## Compose all the parallelizable starting lines at once if parallel_starts: idx = parallel_starts[0].index hard_constraints = hardConstraints(idx, poem_form, composed_lines) flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines) possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=20) starting_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10) starting_lines = random.sample(starting_lines, len(parallel_starts)) for i, l in enumerate(starting_lines): composed_lines[parallel_starts[i].index] = l ## Do the same for all of the parallelizable ending lines if parallel_ends: idx = parallel_ends[0].index hard_constraints = hardConstraints(idx, poem_form, composed_lines) flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines) possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=10) ending_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10) ending_lines = random.sample(ending_lines, len(parallel_ends)) for i, l in enumerate(ending_lines): composed_lines[parallel_ends[i].index] = l ## Finally, same for all the parallelizable mid lines if parallel_mids: idx = parallel_mids[0].index hard_constraints = hardConstraints(idx, poem_form, composed_lines) flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines) possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=10) midding_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10) midding_lines = random.sample(midding_lines, len(parallel_mids)) for i, l in enumerate(midding_lines): composed_lines[parallel_mids[i].index] = l ## If there's a callback, call it if callback is not None: callback(composed_lines, user_info) ## Now compose each stanza in parallel stanzas = poem_form.stanzas dbconn.close() if output_queue is not None: for x in stanzas: output_queue.put( (composeLinesAtIndexes, (pageID, poem_form, dbconfig, search_groups, composed_lines, x, callback, user_info), callback, user_info)) return if multi: manager = Manager() managed_composed_lines = manager.list(composed_lines) pool = Pool(processes=4) pp = [ pool.apply_async(composeLinesAtIndexes, args=(pageID, poem_form, dbconfig, search_groups, managed_composed_lines, x, callback, user_info)) for x in stanzas ] poem_pieces = [p.get() for p in pp] pool.close() pool.join() else: poem_pieces = [ composeLinesAtIndexes(pageID, poem_form, dbconfig, search_groups, composed_lines, x, callback, user_info) for x in stanzas ] ## Piece the results back together for i, l in enumerate(composed_lines): if l is None: for p in poem_pieces: if p[i] is not None: composed_lines[i] = p[i] break return composed_lines