Beispiel #1
0
def getBestLines(dbconn, hard_constraints, lines, poem_form, line_index, previous_line=None, count=1):
    if len(lines) == 0:
        return []
    if 'rhyme_part' not in hard_constraints:
        rhyme_counts = map(lambda x:(dbreader.rhymeCountForRhyme(dbconn, x['word'], x['rhyme_part'])), lines)
        total_rhymes = sum(rhyme_counts)
        total_rhymes = total_rhymes / len(lines)
        lines = filter(lambda x: dbreader.rhymeCountForRhyme(dbconn, x['word'], x['rhyme_part']) >= total_rhymes/2, lines)

    ## Sort based on how you're likely to continue
    if (not poem_form.lines[line_index].starts and
        poem_form.order[line_index] < poem_form.order[line_index-1] and
        count is 1
        ):
        lines = sorted(lines, key = lambda x: dbreader.posCountsForLine(dbconn, x, 'leading'), reverse=True )
    elif (not poem_form.lines[line_index].ends and
        poem_form.order[line_index] < poem_form.order[line_index+1] and
        count is 1
        ):
        lines = sorted(lines, key = lambda x: dbreader.posCountsForLine(dbconn, x, 'lagging'), reverse=True )
    else:
        lines = sorted(lines, key = lambda x: random.random())

    ## Finally, sort within the sort by how close to the original page the lines are
    lines = sorted(lines, key = lambda x: x['group_level'])

    return lines[:count]
Beispiel #2
0
def getBestLines(dbconn,
                 hard_constraints,
                 lines,
                 poem_form,
                 line_index,
                 previous_line=None,
                 count=1):
    if len(lines) == 0:
        return []
    if 'rhyme_part' not in hard_constraints:
        rhyme_counts = map(
            lambda x:
            (dbreader.rhymeCountForRhyme(dbconn, x['word'], x['rhyme_part'])),
            lines)
        total_rhymes = sum(rhyme_counts)
        total_rhymes = total_rhymes / len(lines)
        lines = filter(
            lambda x: dbreader.rhymeCountForRhyme(dbconn, x['word'], x[
                'rhyme_part']) >= total_rhymes / 2, lines)

    ## Sort based on how you're likely to continue
    if (not poem_form.lines[line_index].starts
            and poem_form.order[line_index] < poem_form.order[line_index - 1]
            and count is 1):
        lines = sorted(
            lines,
            key=lambda x: dbreader.posCountsForLine(dbconn, x, 'leading'),
            reverse=True)
    elif (not poem_form.lines[line_index].ends
          and poem_form.order[line_index] < poem_form.order[line_index + 1]
          and count is 1):
        lines = sorted(
            lines,
            key=lambda x: dbreader.posCountsForLine(dbconn, x, 'lagging'),
            reverse=True)
    else:
        lines = sorted(lines, key=lambda x: random.random())

    ## Finally, sort within the sort by how close to the original page the lines are
    lines = sorted(lines, key=lambda x: x['group_level'])

    return lines[:count]
Beispiel #3
0
def poemForPageID(pageID, sonnet_form_name, dbconfig, multi=False, output_queue=None, callback=None, user_info=None):
    dbconn = dbconnect.MySQLDatabaseConnection(dbconfig["database"], dbconfig["user"], dbconfig["host"], dbconfig["password"])

    ## Decide what kind of poem you're going to write
    poem_form = poemform.PoemForm.NamedPoemForm(sonnet_form_name)
    # poem_form.scrambleOrder()

    ## Follow the redirect, if you need to
    pageID = dbreader.followRedirectForPageID(dbconn, pageID)

    ## Get the groups associated with a given page (perhaps construct table views for speed?)
    search_groups = [{'pageIDs':[pageID]},
                    {'pageIDs':dbreader.pagesLinkedFromPageID(dbconn, pageID), 'page_major_category':dbreader.categoryForPageID(dbconn, pageID, 'major')},
                    # {'line_minor_category':dbreader.categoryForPageID(dbconn, pageID, 'minor')},
                    # {'line_major_category':dbreader.categoryForPageID(dbconn, pageID, 'major')},
                    {'page_minor_category':dbreader.categoryForPageID(dbconn, pageID, 'minor')},
                    {'page_major_category':dbreader.categoryForPageID(dbconn, pageID, 'major')},
                    {}] ## This 'none' group will search through the entire corpus

    composed_lines = [None for _ in poem_form.lines]

    ## First, get random lines from the starting page itself (if any exist)
    hard_constraints = []
    flexible_constraints = []
    possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, [{'pageIDs':[pageID]}], composed_lines, possibility_count=20)
    filled_stanzas = []
    if (possible_lines):
        source_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, 0, count=10)
        source_lines = filter(lambda x: dbreader.rhymeCountForRhyme(dbconn, x['word'], x['rhyme_part']) > 0, source_lines)
        order = range(len(poem_form.stanzas))
        random.shuffle(order)
        for o in order:
            s = poem_form.stanzas[o]
            if len(s) > 2:
                valid_indexes = range(len(source_lines))
            else:
                valid_indexes = [i for i in range(len(source_lines)) if source_lines[i]['starts'] or source_lines[i]['ends']]
            if valid_indexes:
                random.shuffle(valid_indexes)
                line = source_lines.pop(valid_indexes[0])
                if line['starts']:
                    r = min(s)
                    composed_lines[min(s)] = line
                elif line['ends']:
                    r = max(s)
                    composed_lines[max(s)] = line
                else:
                    r = random.randint(min(s)+1,max(s)-1)
                    composed_lines[r] = line
                poem_form.setStanzaStart(o, r-min(s))
                filled_stanzas.append(o)

    ## For parallelization, separate out each stanza
    unfilled_stanzas = [poem_form.stanzas[i] for i in range(len(poem_form.stanzas)) if i not in filled_stanzas]
    starting_lines = [poem_form.lines[i[0]] for i in unfilled_stanzas]
    parallel_starts = [s for s in starting_lines if s.starts]
    parallel_ends = [s for s in starting_lines if s.ends]
    parallel_mids = [s for s in starting_lines if not s.ends and not s.starts]

    ## Compose all the parallelizable starting lines at once
    if parallel_starts:
        idx = parallel_starts[0].index
        hard_constraints = hardConstraints(idx, poem_form, composed_lines)
        flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines)
        possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=20)
        starting_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10)
        starting_lines = random.sample(starting_lines, len(parallel_starts))
        for i,l in enumerate(starting_lines):
            composed_lines[parallel_starts[i].index] = l

    ## Do the same for all of the parallelizable ending lines
    if parallel_ends:
        idx = parallel_ends[0].index
        hard_constraints = hardConstraints(idx, poem_form, composed_lines)
        flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines)
        possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=10)
        ending_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10)
        ending_lines = random.sample(ending_lines, len(parallel_ends))
        for i,l in enumerate(ending_lines):
            composed_lines[parallel_ends[i].index] = l

    ## Finally, same for all the parallelizable mid lines
    if parallel_mids:
        idx = parallel_mids[0].index
        hard_constraints = hardConstraints(idx, poem_form, composed_lines)
        flexible_constraints = flexibleConstraints(idx, poem_form, composed_lines)
        possible_lines = computePossibleLines(dbconn, hard_constraints, flexible_constraints, search_groups, composed_lines, possibility_count=10)
        midding_lines = getBestLines(dbconn, hard_constraints, possible_lines, poem_form, idx, count=10)
        midding_lines = random.sample(midding_lines, len(parallel_mids))
        for i,l in enumerate(midding_lines):
            composed_lines[parallel_mids[i].index] = l

    ## If there's a callback, call it
    if callback is not None:
        callback(composed_lines, user_info)

    ## Now compose each stanza in parallel
    stanzas = poem_form.stanzas

    dbconn.close()

    if output_queue is not None:
        for x in stanzas:
            output_queue.put((composeLinesAtIndexes, (pageID, poem_form, dbconfig, search_groups, composed_lines, x, callback, user_info), callback, user_info))
        return

    if multi:
        manager = Manager()
        managed_composed_lines = manager.list(composed_lines)
        pool = Pool(processes=4)
        pp = [pool.apply_async(composeLinesAtIndexes, args=(pageID, poem_form, dbconfig, search_groups, managed_composed_lines, x, callback, user_info)) for x in stanzas]
        poem_pieces = [p.get() for p in pp];
        pool.close()
        pool.join()
    else:
        poem_pieces = [composeLinesAtIndexes(pageID, poem_form, dbconfig, search_groups, composed_lines, x, callback, user_info) for x in stanzas]

    ## Piece the results back together
    for i,l in enumerate(composed_lines):
        if l is None:
            for p in poem_pieces:
                if p[i] is not None:
                    composed_lines[i] = p[i]
                    break
    return composed_lines
Beispiel #4
0
def poemForPageID(pageID,
                  sonnet_form_name,
                  dbconfig,
                  multi=False,
                  output_queue=None,
                  callback=None,
                  user_info=None):
    dbconn = dbconnect.MySQLDatabaseConnection(dbconfig["database"],
                                               dbconfig["user"],
                                               dbconfig["host"],
                                               dbconfig["password"])

    ## Decide what kind of poem you're going to write
    poem_form = poemform.PoemForm.NamedPoemForm(sonnet_form_name)
    # poem_form.scrambleOrder()

    ## Follow the redirect, if you need to
    pageID = dbreader.followRedirectForPageID(dbconn, pageID)

    ## Get the groups associated with a given page (perhaps construct table views for speed?)
    search_groups = [
        {
            'pageIDs': [pageID]
        },
        {
            'pageIDs':
            dbreader.pagesLinkedFromPageID(dbconn, pageID),
            'page_major_category':
            dbreader.categoryForPageID(dbconn, pageID, 'major')
        },
        # {'line_minor_category':dbreader.categoryForPageID(dbconn, pageID, 'minor')},
        # {'line_major_category':dbreader.categoryForPageID(dbconn, pageID, 'major')},
        {
            'page_minor_category':
            dbreader.categoryForPageID(dbconn, pageID, 'minor')
        },
        {
            'page_major_category':
            dbreader.categoryForPageID(dbconn, pageID, 'major')
        },
        {}
    ]  ## This 'none' group will search through the entire corpus

    composed_lines = [None for _ in poem_form.lines]

    ## First, get random lines from the starting page itself (if any exist)
    hard_constraints = []
    flexible_constraints = []
    possible_lines = computePossibleLines(dbconn,
                                          hard_constraints,
                                          flexible_constraints, [{
                                              'pageIDs': [pageID]
                                          }],
                                          composed_lines,
                                          possibility_count=20)
    filled_stanzas = []
    if (possible_lines):
        source_lines = getBestLines(dbconn,
                                    hard_constraints,
                                    possible_lines,
                                    poem_form,
                                    0,
                                    count=10)
        source_lines = filter(
            lambda x: dbreader.rhymeCountForRhyme(dbconn, x['word'], x[
                'rhyme_part']) > 0, source_lines)
        order = range(len(poem_form.stanzas))
        random.shuffle(order)
        for o in order:
            s = poem_form.stanzas[o]
            if len(s) > 2:
                valid_indexes = range(len(source_lines))
            else:
                valid_indexes = [
                    i for i in range(len(source_lines))
                    if source_lines[i]['starts'] or source_lines[i]['ends']
                ]
            if valid_indexes:
                random.shuffle(valid_indexes)
                line = source_lines.pop(valid_indexes[0])
                if line['starts']:
                    r = min(s)
                    composed_lines[min(s)] = line
                elif line['ends']:
                    r = max(s)
                    composed_lines[max(s)] = line
                else:
                    r = random.randint(min(s) + 1, max(s) - 1)
                    composed_lines[r] = line
                poem_form.setStanzaStart(o, r - min(s))
                filled_stanzas.append(o)

    ## For parallelization, separate out each stanza
    unfilled_stanzas = [
        poem_form.stanzas[i] for i in range(len(poem_form.stanzas))
        if i not in filled_stanzas
    ]
    starting_lines = [poem_form.lines[i[0]] for i in unfilled_stanzas]
    parallel_starts = [s for s in starting_lines if s.starts]
    parallel_ends = [s for s in starting_lines if s.ends]
    parallel_mids = [s for s in starting_lines if not s.ends and not s.starts]

    ## Compose all the parallelizable starting lines at once
    if parallel_starts:
        idx = parallel_starts[0].index
        hard_constraints = hardConstraints(idx, poem_form, composed_lines)
        flexible_constraints = flexibleConstraints(idx, poem_form,
                                                   composed_lines)
        possible_lines = computePossibleLines(dbconn,
                                              hard_constraints,
                                              flexible_constraints,
                                              search_groups,
                                              composed_lines,
                                              possibility_count=20)
        starting_lines = getBestLines(dbconn,
                                      hard_constraints,
                                      possible_lines,
                                      poem_form,
                                      idx,
                                      count=10)
        starting_lines = random.sample(starting_lines, len(parallel_starts))
        for i, l in enumerate(starting_lines):
            composed_lines[parallel_starts[i].index] = l

    ## Do the same for all of the parallelizable ending lines
    if parallel_ends:
        idx = parallel_ends[0].index
        hard_constraints = hardConstraints(idx, poem_form, composed_lines)
        flexible_constraints = flexibleConstraints(idx, poem_form,
                                                   composed_lines)
        possible_lines = computePossibleLines(dbconn,
                                              hard_constraints,
                                              flexible_constraints,
                                              search_groups,
                                              composed_lines,
                                              possibility_count=10)
        ending_lines = getBestLines(dbconn,
                                    hard_constraints,
                                    possible_lines,
                                    poem_form,
                                    idx,
                                    count=10)
        ending_lines = random.sample(ending_lines, len(parallel_ends))
        for i, l in enumerate(ending_lines):
            composed_lines[parallel_ends[i].index] = l

    ## Finally, same for all the parallelizable mid lines
    if parallel_mids:
        idx = parallel_mids[0].index
        hard_constraints = hardConstraints(idx, poem_form, composed_lines)
        flexible_constraints = flexibleConstraints(idx, poem_form,
                                                   composed_lines)
        possible_lines = computePossibleLines(dbconn,
                                              hard_constraints,
                                              flexible_constraints,
                                              search_groups,
                                              composed_lines,
                                              possibility_count=10)
        midding_lines = getBestLines(dbconn,
                                     hard_constraints,
                                     possible_lines,
                                     poem_form,
                                     idx,
                                     count=10)
        midding_lines = random.sample(midding_lines, len(parallel_mids))
        for i, l in enumerate(midding_lines):
            composed_lines[parallel_mids[i].index] = l

    ## If there's a callback, call it
    if callback is not None:
        callback(composed_lines, user_info)

    ## Now compose each stanza in parallel
    stanzas = poem_form.stanzas

    dbconn.close()

    if output_queue is not None:
        for x in stanzas:
            output_queue.put(
                (composeLinesAtIndexes,
                 (pageID, poem_form, dbconfig, search_groups, composed_lines,
                  x, callback, user_info), callback, user_info))
        return

    if multi:
        manager = Manager()
        managed_composed_lines = manager.list(composed_lines)
        pool = Pool(processes=4)
        pp = [
            pool.apply_async(composeLinesAtIndexes,
                             args=(pageID, poem_form, dbconfig, search_groups,
                                   managed_composed_lines, x, callback,
                                   user_info)) for x in stanzas
        ]
        poem_pieces = [p.get() for p in pp]
        pool.close()
        pool.join()
    else:
        poem_pieces = [
            composeLinesAtIndexes(pageID, poem_form, dbconfig, search_groups,
                                  composed_lines, x, callback, user_info)
            for x in stanzas
        ]

    ## Piece the results back together
    for i, l in enumerate(composed_lines):
        if l is None:
            for p in poem_pieces:
                if p[i] is not None:
                    composed_lines[i] = p[i]
                    break
    return composed_lines