def main(): global all_uses args = get_args('create word pages and index') outf = open_output() all_uses = {} for ca in clues(): if ca.answer not in all_uses: all_uses[ca.answer] = [] all_uses[ca.answer].append(ca) h = '<li>%d different words</li>' % len(all_uses) h += '<h2>Most used words</h2>' h += '<table class="clues most-used-words">' h += th("word", "# uses", "clues used with this answer") wordpages_to_make = set(args.inputs) for answer, uses in sorted(all_uses.items(), reverse=True, key=lambda x: len(x[1]))[:100]: wordpages_to_make.add(answer) h += td(mkhref(answer.upper(), answer.upper()), len(uses), html_select_options(uses, strmaker=lambda ca: ca.clue)) h += '</table>' for word in wordpages_to_make: outf.write_html('word/%s/index.html' % word.upper(), mkwww_wordpage(word), title=word) outf.write_html('word/index.html', h, title="Words")
def main(): utils.get_args(desc='get wordlist from corpus') wordlist = set() for ca in clues(): wordlist.add(ca.answer) for w in sorted(list(wordlist)): print(w)
def load_answers(): if not g_answers: for ca in clues(): if ca.answer not in g_answers: ans = dict() g_answers[ca.answer] = ans else: ans = g_answers[ca.answer] bc = boil(ca.clue) ans[bc] = ans.get(bc, 0) + 1 return g_answers
def main(): args = utils.get_args('make clues.tsv files') outf = utils.open_output() # should be .zip outf.log = False outf.toplevel = 'xd' outf.write_file('README', open('doc/zip-README').read()) all_clues = [(ca.pubid, str(xdfile.year_from_date(ca.date)), ca.answer, ca.clue) for ca in xdfile.clues()] clues_tsv = '' clues_tsv += '\t'.join("pubid year answer clue".split()) + '\n' clues_tsv += '\n'.join('\t'.join(cluerow) for cluerow in sorted(all_clues)) outf.write_file('clues.tsv', clues_tsv)
def load_clues(): if not g_boiled_clues: for ca in clues(): boiled_clue = boil(ca.clue) if not boiled_clue: continue if boiled_clue not in g_boiled_clues: real_clues = [] g_boiled_clues[boiled_clue] = real_clues else: real_clues = g_boiled_clues[boiled_clue] real_clues.append(ca) return g_boiled_clues
def main(): global boiled_clues args = get_args('create clue index') outf = open_output() boiled_clues = load_clues() biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues)) bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ] nreused = len([bc for n, bc, _ in bcs if n > 1]) biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues)) cluepages_to_make = set() biggest_clues += '<h2>Most used clues</h2>' biggest_clues += '<table class="clues most-used-clues">' biggest_clues += th("clue", "# uses", "answers used with this clue") for n, bc, ans in sorted(bcs, reverse=True)[:100]: cluepages_to_make.add(bc) biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans)) biggest_clues += '</table>' most_ambig = "<h2>Most ambiguous clues</h2>" most_ambig += '(clues with the largest number of different answers)' most_ambig += '<table class="clues most-different-answers">' most_ambig += th("Clue", "answers") for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]: cluepages_to_make.add(bc) clue = mkhref(unboil(bc), bc) if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc: most_ambig += td(clue, html_select_options(ans), rowclass="theme") else: most_ambig += td(clue, html_select_options(ans)) most_ambig += '</table>' for bc in cluepages_to_make: outf.write_html('pub/clue/%s/index.html' % bc, mkwww_cluepage(bc), title=bc) outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
def main(): global boiled_clues args = get_args('create clue index') outf = open_output() boiled_clues = load_clues() biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues)) bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ] nreused = len([bc for n, bc, _ in bcs if n > 1]) biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues)) cluepages_to_make = set() # add all boiled clues from all input .xd files for fn, contents in find_files(*args.inputs, ext='.xd'): progress(fn) xd = xdfile.xdfile(contents.decode('utf-8'), fn) for pos, mainclue, mainanswer in xd.iterclues(): cluepages_to_make.add(boil(mainclue)) # add top 100 most used boiled clues from corpus biggest_clues += '<h2>Most used clues</h2>' biggest_clues += '<table class="clues most-used-clues">' biggest_clues += th("clue", "# uses", "answers used with this clue") for n, bc, ans in sorted(bcs, reverse=True)[:100]: cluepages_to_make.add(bc) biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans)) biggest_clues += '</table>' most_ambig = "<h2>Most ambiguous clues</h2>" most_ambig += '(clues with the largest number of different answers)' most_ambig += '<table class="clues most-different-answers">' most_ambig += th("Clue", "answers") for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]: cluepages_to_make.add(bc) clue = mkhref(unboil(bc), bc) if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc: most_ambig += td(clue, html_select_options(ans), rowclass="theme") else: most_ambig += td(clue, html_select_options(ans)) most_ambig += '</table>' for bc in cluepages_to_make: contents = mkwww_cluepage(bc) if contents: outf.write_html('pub/clue/%s/index.html' % bc, contents, title=bc) outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")