Example #1
0
def main():
    global all_uses
    args = get_args('create word pages and index')
    outf = open_output()

    all_uses = {}

    for ca in clues():
        if ca.answer not in all_uses:
            all_uses[ca.answer] = []
        all_uses[ca.answer].append(ca)

    h = '<li>%d different words</li>' % len(all_uses)

    h += '<h2>Most used words</h2>'
    h += '<table class="clues most-used-words">'
    h += th("word", "# uses", "clues used with this answer")

    wordpages_to_make = set(args.inputs)

    for answer, uses in sorted(all_uses.items(), reverse=True, key=lambda x: len(x[1]))[:100]:
        wordpages_to_make.add(answer)
        h += td(mkhref(answer.upper(), answer.upper()),
                len(uses),
                html_select_options(uses, strmaker=lambda ca: ca.clue))

    h += '</table>'

    for word in wordpages_to_make:
        outf.write_html('word/%s/index.html' % word.upper(), mkwww_wordpage(word), title=word)

    outf.write_html('word/index.html', h, title="Words")
Example #2
0
def main():
    utils.get_args(desc='get wordlist from corpus')
    wordlist = set()
    for ca in clues():
        wordlist.add(ca.answer)

    for w in sorted(list(wordlist)):
        print(w)
Example #3
0
def load_answers():
    if not g_answers:
        for ca in clues():
            if ca.answer not in g_answers:
                ans = dict()
                g_answers[ca.answer] = ans
            else:
                ans = g_answers[ca.answer]

            bc = boil(ca.clue)
            ans[bc] = ans.get(bc, 0) + 1

    return g_answers
Example #4
0
def main():
    args = utils.get_args('make clues.tsv files')
    outf = utils.open_output()  # should be .zip

    outf.log = False
    outf.toplevel = 'xd'
    outf.write_file('README', open('doc/zip-README').read())

    all_clues = [(ca.pubid, str(xdfile.year_from_date(ca.date)), ca.answer,
                  ca.clue) for ca in xdfile.clues()]

    clues_tsv = ''

    clues_tsv += '\t'.join("pubid year answer clue".split()) + '\n'
    clues_tsv += '\n'.join('\t'.join(cluerow) for cluerow in sorted(all_clues))
    outf.write_file('clues.tsv', clues_tsv)
Example #5
0
def load_clues():
    if not g_boiled_clues:
        for ca in clues():
            boiled_clue = boil(ca.clue)
            if not boiled_clue:
                continue

            if boiled_clue not in g_boiled_clues:
                real_clues = []
                g_boiled_clues[boiled_clue] = real_clues
            else:
                real_clues = g_boiled_clues[boiled_clue]

            real_clues.append(ca)

    return g_boiled_clues
Example #6
0
def main():
    args = utils.get_args('make clues.tsv files')
    outf = utils.open_output()  # should be .zip

    outf.log = False
    outf.toplevel = 'xd'
    outf.write_file('README', open('doc/zip-README').read())

    all_clues = [(ca.pubid, str(xdfile.year_from_date(ca.date)), ca.answer, ca.clue) for ca in xdfile.clues()]

    clues_tsv = ''

    clues_tsv += '\t'.join("pubid year answer clue".split()) + '\n'
    clues_tsv += '\n'.join('\t'.join(cluerow) for cluerow in sorted(all_clues))
    outf.write_file('clues.tsv', clues_tsv)
Example #7
0
def main():
    global boiled_clues
    args = get_args('create clue index')
    outf = open_output()

    boiled_clues = load_clues()

    biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues))

    bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ]

    nreused = len([bc for n, bc, _ in bcs if n > 1])
    biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues))

    cluepages_to_make = set()

    biggest_clues += '<h2>Most used clues</h2>'

    biggest_clues += '<table class="clues most-used-clues">'
    biggest_clues += th("clue", "# uses", "answers used with this clue")
    for n, bc, ans in sorted(bcs, reverse=True)[:100]:
        cluepages_to_make.add(bc)
        biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans))

    biggest_clues += '</table>'

    most_ambig = "<h2>Most ambiguous clues</h2>"
    most_ambig += '(clues with the largest number of different answers)'
    most_ambig += '<table class="clues most-different-answers">'
    most_ambig += th("Clue", "answers")

    for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]:
        cluepages_to_make.add(bc)
        clue = mkhref(unboil(bc), bc)
        if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc:
            most_ambig += td(clue, html_select_options(ans), rowclass="theme")
        else:
            most_ambig += td(clue, html_select_options(ans))

    most_ambig += '</table>'

    for bc in cluepages_to_make:
        outf.write_html('pub/clue/%s/index.html' % bc, mkwww_cluepage(bc), title=bc)

    outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
Example #8
0
def main():
    global boiled_clues
    args = get_args('create clue index')
    outf = open_output()

    boiled_clues = load_clues()

    biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues))

    bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ]

    nreused = len([bc for n, bc, _ in bcs if n > 1])
    biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues))

    cluepages_to_make = set()

    # add all boiled clues from all input .xd files
    for fn, contents in find_files(*args.inputs, ext='.xd'):
        progress(fn)
        xd = xdfile.xdfile(contents.decode('utf-8'), fn)
        for pos, mainclue, mainanswer in xd.iterclues():
            cluepages_to_make.add(boil(mainclue))


    # add top 100 most used boiled clues from corpus
    biggest_clues += '<h2>Most used clues</h2>'

    biggest_clues += '<table class="clues most-used-clues">'
    biggest_clues += th("clue", "# uses", "answers used with this clue")
    for n, bc, ans in sorted(bcs, reverse=True)[:100]:
        cluepages_to_make.add(bc)
        biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans))

    biggest_clues += '</table>'

    most_ambig = "<h2>Most ambiguous clues</h2>"
    most_ambig += '(clues with the largest number of different answers)'
    most_ambig += '<table class="clues most-different-answers">'
    most_ambig += th("Clue", "answers")

    for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]:
        cluepages_to_make.add(bc)
        clue = mkhref(unboil(bc), bc)
        if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc:
            most_ambig += td(clue, html_select_options(ans), rowclass="theme")
        else:
            most_ambig += td(clue, html_select_options(ans))

    most_ambig += '</table>'

    for bc in cluepages_to_make:
        contents = mkwww_cluepage(bc)
        if contents:
            outf.write_html('pub/clue/%s/index.html' % bc, contents, title=bc)

    outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")