Example #1
0
def main():
    global boiled_clues
    args = get_args('create clue index')
    outf = open_output()

    boiled_clues = load_clues()

    biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues))

    bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ]

    nreused = len([bc for n, bc, _ in bcs if n > 1])
    biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues))

    cluepages_to_make = set()

    # add all boiled clues from all input .xd files
    for fn, contents in find_files(*args.inputs, ext='.xd'):
        progress(fn)
        xd = xdfile.xdfile(contents.decode('utf-8'), fn)
        for pos, mainclue, mainanswer in xd.iterclues():
            cluepages_to_make.add(boil(mainclue))


    # add top 100 most used boiled clues from corpus
    biggest_clues += '<h2>Most used clues</h2>'

    biggest_clues += '<table class="clues most-used-clues">'
    biggest_clues += th("clue", "# uses", "answers used with this clue")
    for n, bc, ans in sorted(bcs, reverse=True)[:100]:
        cluepages_to_make.add(bc)
        biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans))

    biggest_clues += '</table>'

    most_ambig = "<h2>Most ambiguous clues</h2>"
    most_ambig += '(clues with the largest number of different answers)'
    most_ambig += '<table class="clues most-different-answers">'
    most_ambig += th("Clue", "answers")

    for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]:
        cluepages_to_make.add(bc)
        clue = mkhref(unboil(bc), bc)
        if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc:
            most_ambig += td(clue, html_select_options(ans), rowclass="theme")
        else:
            most_ambig += td(clue, html_select_options(ans))

    most_ambig += '</table>'

    for bc in cluepages_to_make:
        contents = mkwww_cluepage(bc)
        if contents:
            outf.write_html('pub/clue/%s/index.html' % bc, contents, title=bc)

    outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
Example #2
0
def mkwww_cluepage(bc):
    bcs = boiled_clues[bc]

    clue_html = ''
    clue_html += '<div>Variants: ' + html_select_options([ ca.clue for ca in bcs ]) + '</div>'
    clue_html += '<hr/>'
    clue_html += '<div>Answers for this clue: ' + html_select_options([ ca.answer for ca in bcs ]) + '</div>'
    clue_html += '<hr/>'
    clue_html += pubyear.pubyear_html([ (ca.pubyear()[0], ca.pubyear()[1], 1) for ca in bcs ])
    
    return clue_html 
Example #3
0
def main():
    global all_uses
    args = get_args('create word pages and index')
    outf = open_output()

    all_uses = {}

    for ca in clues():
        if ca.answer not in all_uses:
            all_uses[ca.answer] = []
        all_uses[ca.answer].append(ca)

    h = '<li>%d different words</li>' % len(all_uses)

    h += '<h2>Most used words</h2>'
    h += '<table class="clues most-used-words">'
    h += th("word", "# uses", "clues used with this answer")

    wordpages_to_make = set(args.inputs)

    for answer, uses in sorted(all_uses.items(), reverse=True, key=lambda x: len(x[1]))[:100]:
        wordpages_to_make.add(answer)
        h += td(mkhref(answer.upper(), answer.upper()),
                len(uses),
                html_select_options(uses, strmaker=lambda ca: ca.clue))

    h += '</table>'

    for word in wordpages_to_make:
        outf.write_html('word/%s/index.html' % word.upper(), mkwww_wordpage(word), title=word)

    outf.write_html('word/index.html', h, title="Words")
Example #4
0
def mkwww_cluepage(bc):
    if bc not in boiled_clues:
        return ''

    bcs = boiled_clues[bc]

    clue_html = ''
    clue_html += '<div>Variants: ' + html_select_options([ ca.clue for ca in bcs ]) + '</div>'
    clue_html += '<hr/>'
    clue_html += '<div>Answers for this clue: ' + html_select_options([ ca.answer for ca in bcs ]) + '</div>'
    clue_html += '<hr/>'

# TODO: maybe add pubyear chart back in, using stats.tsv as source data (by day-of-week)
#    clue_html += pubyear.pubyear_html([ (ca.pubyear()[0], ca.pubyear()[1], 1) for ca in bcs ])

    return clue_html
Example #5
0
def main():
    global boiled_clues
    args = get_args('create clue index')
    outf = open_output()

    boiled_clues = load_clues()

    biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues))

    bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ]

    nreused = len([bc for n, bc, _ in bcs if n > 1])
    biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues))

    cluepages_to_make = set()

    biggest_clues += '<h2>Most used clues</h2>'

    biggest_clues += '<table class="clues most-used-clues">'
    biggest_clues += th("clue", "# uses", "answers used with this clue")
    for n, bc, ans in sorted(bcs, reverse=True)[:100]:
        cluepages_to_make.add(bc)
        biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans))

    biggest_clues += '</table>'

    most_ambig = "<h2>Most ambiguous clues</h2>"
    most_ambig += '(clues with the largest number of different answers)'
    most_ambig += '<table class="clues most-different-answers">'
    most_ambig += th("Clue", "answers")

    for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]:
        cluepages_to_make.add(bc)
        clue = mkhref(unboil(bc), bc)
        if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc:
            most_ambig += td(clue, html_select_options(ans), rowclass="theme")
        else:
            most_ambig += td(clue, html_select_options(ans))

    most_ambig += '</table>'

    for bc in cluepages_to_make:
        outf.write_html('pub/clue/%s/index.html' % bc, mkwww_cluepage(bc), title=bc)

    outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
Example #6
0
def mkwww_wordpage(answer):
    uses = all_uses[answer]

    h = ''
#    h += pubyear.pubyear_html([ (ca.pubyear()[0], ca.pubyear()[1], 1) for ca in uses ])
    h += '<hr/>'
    h += '<div>Clued as: ' + html_select_options([ ca.clue for ca in uses ]) + '</div>'
    h += '<h2>%d uses</h2>' % len(uses)

    h += '<table>'
    for ca in sorted(uses, reverse=True, key=lambda ca: ca.date):
        try:
            md = metadb.xd_puzzle(ca.xdid())
            h += td(md.xdid, ca.clue, md.Author, md.Copyright)
        except Exception as e:
            h += td(ca.xdid, ca.clue, str(e))
            if utils.get_args().debug:
                raise
    h += '</table>'

#    h += '<hr/>'
#    h += '<div>Mutations: '
#    h +='</div>'
    return h
Example #7
0
def main():
    args = utils.get_args('generates .html diffs with deep clues for all puzzles in similar.tsv')
    outf = utils.open_output()

    similars = utils.parse_tsv('gxd/similar.tsv', 'Similar')
    xdids_todo = [ parse_pathname(fn).base for fn in args.inputs ]
    if not xdids_todo:
        xdids_todo = [ xdid for xdid, matches in metadb.get_similar_grids().items() if matches ]

    for mainxdid in xdids_todo:
        progress(mainxdid)

        mainxd = xdfile.get_xd(mainxdid)
        if not mainxd:
            continue

        matches = metadb.get_similar_grids().get(mainxdid, [])

        xddates = {}
        xddates[mainxdid] = mainxd.date() # Dict to store XD dates for further sort
        html_grids = {}
        html_clues = {}

        # these are added directly to similar.tsv
        nstaleclues = 0
        nstaleanswers = 0
        ntotalclues = 0

        poss_answers = [] # TODO:
        pub_uses = {}  # [pubid] -> set(ClueAnswer)

        dcl_html = ''
        deepcl_html = [] # keep deep clues to parse later - per row
        for pos, mainclue, mainanswer in mainxd.iterclues():
            deepcl_html = [] # Temporary to be replaced late
            mainca = ClueAnswer(mainxdid, mainxd.date(), mainanswer, mainclue)

            # 'grid position' column
            deepcl_html.append('<td class="pos">%s.</td>' % pos)

            # find other uses of this clue, and other answers, in a single pass
            for clueans in find_clue_variants(mainclue):
                if clueans.answer != mainanswer:
                    poss_answers.append(clueans)

                if clueans.answer == mainanswer:
                    if clueans.pubid in pub_uses:
                        otherpubs = pub_uses[clueans.pubid]
                    else:
                        otherpubs = set()  # set of ClueAnswer
                        pub_uses[clueans.pubid] = otherpubs
                    otherpubs.add(clueans)

            # add 'other uses' to clues_html
            stale = False
            deepcl_html.append('<td class="other-uses">')

            if len(pub_uses) > 0:
                sortable_uses = []
                for pubid, uses in pub_uses.items():
                    # show the earliest unboiled clue
                    for u in sorted(uses, key=lambda x: x.date or ""):
                        # only show those published earlier
                        if u.date and u.date <= mainxd.date():
                            if pubid == mainxdid and u.date == mainxd.date():
                                pass
                            else:
                                stale = True
                                sortable_uses.append((u.date, u, 1))

                deepcl_html.append(html_select([ (clue, nuses) for dt, clue, nuses in sorted(sortable_uses, key=lambda x: x[0], reverse=True) ], top_option=mainclue))

            else:
                deepcl_html.append('<div class="original">%s</div>' % esc(mainclue))

            deepcl_html.append('</td>')

            # add 'other answers' to clues_html

            deepcl_html.append('<td class="other-answers">')
            deepcl_html.append(html_select_options(poss_answers, strmaker=lambda ca: ca.answer, force_top=mainca))
            deepcl_html.append('</td>')

            # add 'other clues' to clues_html
            deepcl_html.append('<td class="other-clues">')

            # bclues is all boiled clues for this particular answer: { [bc] -> #uses }
            bclues = load_answers().get(mainanswer, [])
            stale_answer = False

            if bclues:
                uses = []
                for bc, nuses in bclues.items():
                    # then find all clues besides this one
                    clue_usages = [ ca for ca in load_clues().get(bc, []) if ca.answer == mainanswer and ca.date < mainxd.date() ]

                    if clue_usages:
                        stale_answer = True
                        if nuses > 1:
                            # only use one (the most recent) ClueAnswer per boiled clue
                            # but use the clue only (no xdid)
                            ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1].clue
                        else:
                            ca = sorted(clue_usages, key=lambda ca: ca.date or "z")[-1]
                            uses.append((ca, nuses))

                if uses:
                    deepcl_html.append(html_select(uses))

            deepcl_html.append('</td>')  # end 'other-clues'

            if stale_answer:
                nstaleanswers += 1
            if stale:
                nstaleclues += 1
            ntotalclues += 1
            # Quick and dirty - to be replaced
            dcl_html += '<tr>' + ' '.join(deepcl_html) + '</tr>'

        # Store in list to make further formatting as html table easier
        mainxd = xdfile.get_xd(mainxdid)
        if mainxd:
            html_grids[mainxdid] = grid_diff_html(mainxd)

        # Add for main XD
        diff_l = []
        for pos, mainclue, mainanswer in mainxd.iterclues():
            diff_h = mktag('div','fullgrid main') + '%s.&nbsp;' %pos
            diff_h += mainclue
            diff_h += mktag('span', tagclass='main', inner='&nbsp;~&nbsp;' + mainanswer.upper())
            diff_l.append(diff_h)
        html_clues[mainxdid] = diff_l
        # Process for all matches
        for xdid in matches:
            xd = xdfile.get_xd(xdid)
            if not xd:
                continue
            xddates[xdid] = xd.date()
            # output each grid
            html_grids[xdid] = grid_diff_html(xd, compare_with=mainxd)
            diff_l = []
            # output comparison of each set of clues
            for pos, clue, answer in xd.iterclues():
                diff_h = mktag('div','fullgrid') + '%s.&nbsp;' %pos
                # Sometimes can return clue == None
                sm = difflib.SequenceMatcher(lambda x: x == ' ', mainxd.get_clue(pos) or '', clue)
                if sm.ratio() < 0.50:
                    diff_h += clue
                else:
                    # Compare based on op codes
                    for opcode in sm.get_opcodes():
                        c, a1, a2, b1, b2 = opcode
                        if c == 'equal':
                            diff_h += '<span class="match">%s</span>' % clue[b1:b2]
                        else:
                            diff_h += '<span class="diff">%s</span>' % clue[b1:b2]
                diff_h += mktag('span', tagclass=(answer == mainxd.get_answer(pos)) and 'match' or 'diff', inner='&nbsp;~&nbsp;' + answer.upper())
                diff_h += mktag('/div')
                diff_l.append(diff_h)
            html_clues[xdid] = diff_l

        # Wrap into table
        diff_h = mktag('table') + mktag('tr')
        # Sort by date
        sortedkeys = sorted(xddates.items(), key=operator.itemgetter(1))
        for w, dt in sortedkeys:
            # Wrap into table
            diff_h += mktag('td') + html_grids[w] + mktag('/td')
        diff_h += mktag('/tr')
        for i, clue in enumerate(html_clues[sortedkeys[0][0]]):
            diff_h += mktag('tr')
            for w, dt in sortedkeys:
                if i < len(html_clues[w]):
                    diff_h += mktag('td') + html_clues[w][i] + mktag('/td')
            diff_h += mktag('/tr')
        # Process deepclues
        diff_h += mktag('table') + dcl_html + mktag('/table')

        diff_h += mktag('/table')
        outf.write_html('pub/deep/%s/index.html' % mainxdid, diff_h,
                    title='Deep clue comparison for ' + mainxdid)
Example #8
0
def main():
    args = utils.get_args(
        'generates .html diffs with deep clues for all puzzles in similar.tsv')
    outf = utils.open_output()

    similars = utils.parse_tsv('gxd/similar.tsv', 'Similar')

    xds_todo = []
    for fn, contents in find_files(*args.inputs, ext='.xd'):
        xd = xdfile.xdfile(contents.decode('utf-8'), fn)
        xds_todo.append(xd)

    for mainxd in xds_todo:
        mainxdid = mainxd.xdid()
        progress(mainxdid)

        matches = metadb.xd_similar(mainxdid)

        xddates = {}
        xddates[mainxdid] = mainxd.date(
        )  # Dict to store XD dates for further sort
        html_grids = {}

        # these are added directly to similar.tsv
        nstaleclues = 0
        nstaleanswers = 0
        ntotalclues = 0

        dcl_html = '<tr>'
        dcl_html += '<th></th>'
        dcl_html += '<th>Clue</th>'
        dcl_html += '<th>ANSWERs</th>'
        dcl_html += '<th>Alt. clue possibilities</th>'
        dcl_html += '</tr>'

        deepcl_html = []  # keep deep clues to parse later - per row
        for pos, mainclue, mainanswer in mainxd.iterclues():
            if not pos:
                continue

            poss_answers = []  # TODO:
            pub_uses = {}  # [pubid] -> set(ClueAnswer)

            deepcl_html = []  # Temporary to be replaced late
            mainca = ClueAnswer(mainxdid, mainxd.date(), mainanswer, mainclue)

            # 'grid position' column
            deepcl_html.append('<td class="pos">%s.</td>' % pos)

            # find other uses of this clue, and other answers, in a single pass
            for clueans in find_clue_variants(mainclue):
                if clueans.answer != mainanswer:
                    poss_answers.append(clueans)

                if clueans.answer == mainanswer:
                    if clueans.pubid in pub_uses:
                        otherpubs = pub_uses[clueans.pubid]
                    else:
                        otherpubs = set()  # set of ClueAnswer
                        pub_uses[clueans.pubid] = otherpubs
                    otherpubs.add(clueans)

            # add 'other uses' to clues_html
            deepcl_html.append('<td class="other-uses">')

            prev = prev_uses(pub_uses, mainxd, mainclue)
            if prev:
                deepcl_html.append('<a href="/pub/clue/%s">%s [x%s]</a>' %
                                   (boil(mainclue), mainclue, len(prev)))
                nstaleclues += 1
            else:
                deepcl_html.append(mainclue)

            deepcl_html.append('</td>')

            # add 'other answers' to clues_html
            deepcl_html.append('<td class="other-answers">')
            deepcl_html.append(
                html_select_options(poss_answers,
                                    strmaker=lambda ca: ca.answer,
                                    force_top=mainca,
                                    add_total=False))
            deepcl_html.append('</td>')

            # add 'other clues' to clues_html
            deepcl_html.append('<td class="other-clues">')

            other_clues = html_other_clues(mainanswer, mainclue, mainxd)
            if other_clues:
                deepcl_html.append(other_clues)
                nstaleanswers += 1

            deepcl_html.append('</td>')  # end 'other-clues'

            ntotalclues += 1
            # Quick and dirty - to be replaced
            dcl_html += '<tr>' + ' '.join(deepcl_html) + '</tr>'

        # Process deepclues
        diff_h = '<div class="main-container">'
        diff_h += grid_to_html(mainxd)
        diff_h += mktag('table', 'deepclues') + dcl_html + mktag('/table')
        diff_h += '</div>'

        info('writing deepclues for %s' % mainxdid)
        outf.write_html('pub/deep/%s/index.html' % mainxdid,
                        diff_h,
                        title='Deep clue analysis for ' + mainxdid)