def main(): global boiled_clues args = get_args('create clue index') outf = open_output() boiled_clues = load_clues() biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues)) bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ] nreused = len([bc for n, bc, _ in bcs if n > 1]) biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues)) cluepages_to_make = set() # add all boiled clues from all input .xd files for fn, contents in find_files(*args.inputs, ext='.xd'): progress(fn) xd = xdfile.xdfile(contents.decode('utf-8'), fn) for pos, mainclue, mainanswer in xd.iterclues(): cluepages_to_make.add(boil(mainclue)) # add top 100 most used boiled clues from corpus biggest_clues += '<h2>Most used clues</h2>' biggest_clues += '<table class="clues most-used-clues">' biggest_clues += th("clue", "# uses", "answers used with this clue") for n, bc, ans in sorted(bcs, reverse=True)[:100]: cluepages_to_make.add(bc) biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans)) biggest_clues += '</table>' most_ambig = "<h2>Most ambiguous clues</h2>" most_ambig += '(clues with the largest number of different answers)' most_ambig += '<table class="clues most-different-answers">' most_ambig += th("Clue", "answers") for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]: cluepages_to_make.add(bc) clue = mkhref(unboil(bc), bc) if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc: most_ambig += td(clue, html_select_options(ans), rowclass="theme") else: most_ambig += td(clue, html_select_options(ans)) most_ambig += '</table>' for bc in cluepages_to_make: contents = mkwww_cluepage(bc) if contents: outf.write_html('pub/clue/%s/index.html' % bc, contents, title=bc) outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
def main(): global all_uses args = get_args('create word pages and index') outf = open_output() all_uses = {} for ca in clues(): if ca.answer not in all_uses: all_uses[ca.answer] = [] all_uses[ca.answer].append(ca) h = '<li>%d different words</li>' % len(all_uses) h += '<h2>Most used words</h2>' h += '<table class="clues most-used-words">' h += th("word", "# uses", "clues used with this answer") wordpages_to_make = set(args.inputs) for answer, uses in sorted(all_uses.items(), reverse=True, key=lambda x: len(x[1]))[:100]: wordpages_to_make.add(answer) h += td(mkhref(answer.upper(), answer.upper()), len(uses), html_select_options(uses, strmaker=lambda ca: ca.clue)) h += '</table>' for word in wordpages_to_make: outf.write_html('word/%s/index.html' % word.upper(), mkwww_wordpage(word), title=word) outf.write_html('word/index.html', h, title="Words")
def main(): global boiled_clues args = get_args('create clue index') outf = open_output() boiled_clues = load_clues() biggest_clues = "<li>%d total clues, which boil down to %d distinct clues" % (len(clues()), len(boiled_clues)) bcs = [ (len(v), bc, answers_from(v)) for bc, v in boiled_clues.items() ] nreused = len([bc for n, bc, _ in bcs if n > 1]) biggest_clues += "<li>%d (%d%%) of these clues are used in more than one puzzle" % (nreused, nreused*100/len(boiled_clues)) cluepages_to_make = set() biggest_clues += '<h2>Most used clues</h2>' biggest_clues += '<table class="clues most-used-clues">' biggest_clues += th("clue", "# uses", "answers used with this clue") for n, bc, ans in sorted(bcs, reverse=True)[:100]: cluepages_to_make.add(bc) biggest_clues += td(mkhref(unboil(bc), bc), n, html_select_options(ans)) biggest_clues += '</table>' most_ambig = "<h2>Most ambiguous clues</h2>" most_ambig += '(clues with the largest number of different answers)' most_ambig += '<table class="clues most-different-answers">' most_ambig += th("Clue", "answers") for n, bc, ans in sorted(bcs, reverse=True, key=lambda x: len(set(x[2])))[:100]: cluepages_to_make.add(bc) clue = mkhref(unboil(bc), bc) if 'quip' in bc or 'quote' in bc or 'theme' in bc or 'riddle' in bc: most_ambig += td(clue, html_select_options(ans), rowclass="theme") else: most_ambig += td(clue, html_select_options(ans)) most_ambig += '</table>' for bc in cluepages_to_make: outf.write_html('pub/clue/%s/index.html' % bc, mkwww_cluepage(bc), title=bc) outf.write_html('pub/clue/index.html', biggest_clues + most_ambig, title="Clues")
def grid_diff_html(xd, compare_with=None): if compare_with: r = mktag('div', tagclass='fullgrid') else: r = mktag('div', tagclass='fullgrid main') similarity_pct = '' if compare_with: real_pct = grid_similarity(xd, compare_with) if real_pct < 25: return '' similarity_pct = " (%d%%)" % real_pct xdlink = mktag('div', tagclass='xdid', inner=mkhref("%s %s" % (xd.xdid(), similarity_pct), '/pub/' + xd.xdid())) if compare_with is not None: r += xdlink else: r += mktag('b', inner=xdlink) r += headers_to_html(xd) r += grid_to_html(xd, compare_with) r += '</div>' # solution return r
def grid_diff_html(xd, compare_with=None): if compare_with: r = mktag("div", tagclass="fullgrid") else: r = mktag("div", tagclass="fullgrid main") similarity_pct = "" if compare_with: real_pct = grid_similarity(xd, compare_with) if real_pct < 25: return "" similarity_pct = " (%d%%)" % real_pct xdlink = mktag("div", tagclass="xdid", inner=mkhref("%s %s" % (xd.xdid(), similarity_pct), "/pub/" + xd.xdid())) if compare_with is not None: r += xdlink else: r += mktag("b", inner=xdlink) r += headers_to_html(xd) r += grid_to_html(xd, compare_with) r += "</div>" # solution return r
def pubyear_svg(rows, height=svg_h, width=svg_w, pubid='', year=''): bgclass = 'notexists' rects = '' """ pubid CHAR(6), -- "nyt" year CHAR(4), -- "2006" weekday CHAR(3), -- "Mon" Size TEXT, -- most common entry Editor TEXT, -- most common entry Copyright TEXT, -- most common, after removing Date/Author NumExisting INTEGER, -- known or assumed to be in existence (0 means unknown) NumXd INTEGER, -- total number in xd NumPublic INTEGER, -- available for public download -- duplicate grids, same author NumReprints INTEGER, -- 100% grid match NumTouchups INTEGER, -- 75-99% grid match NumRedone INTEGER, -- 20-75% grid match -- duplicate grids, different author NumSuspicious INTEGER, -- >50% similar grid NumThemeCopies INTEGER -- >50% similar grid """ row = utils.AttrDict(rows[0]) svgtitle = '{} {}\n'.format(row.pubid, row.year) svgtitle += 'Copyright: {}\n'.format( row.Copyright) if row.Copyright else '' svgtitle += 'Editor: {}'.format(row.Editor) if row.Editor else '' for i, wd in enumerate(utils.WEEKDAYS): #range(0, 7): row = utils.AttrDict(rows[i]) y = i * 2 + 2 num_existing = 52 if 's' not in year else 520 # (eventually number of this weekday in that year, *10 for decades) num_xd = int(row.NumXd) if num_xd > 0: bgclass = 'exists' #dup_length is length of dup/orange line num_dup = int(row.NumReprints) + int(row.NumTouchups) + int( row.NumRedone) # susp_length is length of suspicious/red line num_susp = int(row.NumSuspicious) num_theme = int(row.NumThemeCopies) # TODO: base color on suspicious vs theme (darker when only suspicious) num_pub = int(row.NumPublic) num_priv = num_xd - num_pub pixel_prexd = 0 pixel_postxd = 0 if num_xd < num_existing: # for now; eventually should use earliest/latest date and puzzle to determine which side has gap # npre = weekdays_between(date(year_from(firstxd.Date), 1, 1), firstxd.Date, i) # npost = weekdays_between(lastxd.Date, date(year_from(lastxd.Date), 12, 31), i) pixel_prexd = 1 pixel_postxd = 1 if not num_xd or not num_existing: continue pixel_total = width - pixel_prexd - pixel_postxd if num_xd <= num_existing: pixel_xd = pixel_total * num_xd / num_existing else: pixel_xd = pixel_total # then convert num_* to pixel_*, num_existing to pixel_total pixel_susp = num_susp * pixel_xd / num_xd pixel_theme = num_theme * pixel_xd / num_xd pixel_dup = num_dup * pixel_xd / num_xd pixel_pub = num_pub * pixel_xd / num_xd pixel_priv = num_priv * pixel_xd / num_xd if pixel_theme > 0 and pixel_theme < 1: pixel_theme = 1 if pixel_susp > 0 and pixel_susp < 1: pixel_susp = 1 if pixel_dup > 0 and pixel_dup < 1: pixel_dup = 1 m = re.match(r'(\d+?)x(\d+?).*', row.Size) if m: sz = int(m.group(1)) * int(m.group(2)) if sz > 17 * 17: h = 4 else: h = 3 else: h = 3 x = 0 w = 6 rects += '''<g id="{}" transform="translate(0,{y})">'''.format( utils.WEEKDAYS[i], y=int(y)) w = pixel_prexd # rects += rect(x, y, w, h, 'prexd') x += w w = pixel_susp rects += rect(x, y, w, h, 'suspxd') x += w w = pixel_theme rects += rect(x, y, w, h, 'themexd') x += w w = pixel_dup rects += rect(x, y, w, h, 'dupxd') x += w if x <= pixel_total: w = min(pixel_total - x, max(0, pixel_priv)) rects += rect(x, y, w, h, 'privxd') x += w if x <= pixel_total: w = min(pixel_total - x, max(0, pixel_pub)) rects += rect(x, y, w, h, 'pubxd') x += w # w = pixel_postxd # rects += rect(x, y, w, h, 'postxd') rects += '</g>' href = "/pub/%s%s/index.html" % (pubid, year) ret = html.mkhref( pys.format(w=width, h=height, classes=bgclass, body=rects), href, svgtitle) return ret
def main(): p = utils.args_parser(desc="generate pubyear svg and pubyear pages") p.add_argument('-p', '--pubonly', action="store_true", default=False, help='only output root map') args = utils.get_args(parser=p) outf = utils.open_output() pubyears = defaultdict(list) pubyears_idx = defaultdict(list) # years_idx = [] for r in metadb.read_rows('pub/stats'): y = r.year or '0000' pubyear = r.pubid + str(y) pubyears[pubyear].append(r) if y not in pubyears_idx[r.pubid]: pubyears_idx[r.pubid].append(y) # if r.year not in years_idx: # years_idx.append(r.year) # Making collapsed decades depends on args allyears = [] for i in range(DECADE_SKIP_START // 10, DECADE_SKIP_END // 10 + 1): allyears.append("%s0s" % i) allyears.extend( [str(y) for y in range(DECADE_SKIP_END + 10, date.today().year + 1)]) html_out = [] html_out.append( '<p>Grouped by publication-year and broken out by day-of-week (Monday at top, Sunday at bottom).</p>' ) html_out.append(legend) # See definition above html_out.append('<table id="pubyearmap" cellspacing="0" cellpadding="0">') # Table header with years \ decades year_header = gen_year_header(allyears) html_out.extend(year_header) pubs_total = {} for pubid in pubyears_idx: pubs_total[pubid] = len(metadb.xd_puzzles(pubid)) # sort rows by number of puzzles sorted_pubs = sorted(pubs_total.keys(), key=lambda pubid: pubs_total[pubid], reverse=True) for pub in args.inputs or sorted_pubs: if pubs_total[pub] < 20: continue # Process each pub in index pubobj = metadb.xd_publications().get(pub) if pubobj: pubname = pubobj.PublicationName or pubobj.PublisherName else: pubname = pub html_out.append('<tr><td class="header">{}</td>'.format( html.mkhref(pubname, 'pub/' + pub))) for year in sorted(allyears): html_out.append('<td class="year_widget">') py_td = td_for_pubyear(pubyears, pub, year) if py_td: html_out.append(py_td) if not args.pubonly: outf.write_html( 'pub/{pub}{year}/index.html'.format(**locals()), pubyear_html(pub, year), "{pubname}, {year}".format(**locals())) else: # otherwise width = svg_w if 's' not in year else svg_w * decade_scale html_out.append( pys.format(w=width, h=svg_h, title='', classes='notexists', body='')) html_out.append('</td>') # Add totals + publishers html_out.append('<td class="header">{}</td>'.format(pubs_total[pub])) html_out.append('<td class="header">{}</td>'.format( html.mkhref(pubname, 'pub/' + pub))) html_out.append('</tr>') html_out.extend(year_header) html_out.append('</table>') total_xd = len(metadb.xd_puzzles()) outf.write_html('index.html', "".join(html_out), "Comparison of %s published crossword grids" % total_xd)
def mkcell(text, href="", title=""): r = '<div>' r += mkhref(text, href, title=title) r += '</div>' return r
def pubyear_svg(rows, height=svg_h, width=svg_w, pubid='', year=''): bgclass = 'notexists' rects = '' """ pubid CHAR(6), -- "nyt" year CHAR(4), -- "2006" weekday CHAR(3), -- "Mon" Size TEXT, -- most common entry Editor TEXT, -- most common entry Copyright TEXT, -- most common, after removing Date/Author NumExisting INTEGER, -- known or assumed to be in existence (0 means unknown) NumXd INTEGER, -- total number in xd NumPublic INTEGER, -- available for public download -- duplicate grids, same author NumReprints INTEGER, -- 100% grid match NumTouchups INTEGER, -- 75-99% grid match NumRedone INTEGER, -- 20-75% grid match -- duplicate grids, different author NumSuspicious INTEGER, -- >50% similar grid NumThemeCopies INTEGER -- >50% similar grid """ row = utils.AttrDict(rows[0]) svgtitle = '{} {}\n'.format(row.pubid, row.year) svgtitle += 'Copyright: {}\n'.format(row.Copyright) if row.Copyright else '' svgtitle += 'Editor: {}'.format(row.Editor) if row.Editor else '' for i, wd in enumerate(utils.WEEKDAYS): #range(0, 7): row = utils.AttrDict(rows[i]) y = i*2 + 2 num_existing = 52 if 's' not in year else 520 # (eventually number of this weekday in that year, *10 for decades) num_xd = int(row.NumXd) if num_xd > 0: bgclass = 'exists' #dup_length is length of dup/orange line num_dup = int(row.NumReprints) + int(row.NumTouchups) + int(row.NumRedone) # susp_length is length of suspicious/red line num_susp = int(row.NumSuspicious) num_theme = int(row.NumThemeCopies) # TODO: base color on suspicious vs theme (darker when only suspicious) num_pub = int(row.NumPublic) num_priv = num_xd - num_pub pixel_prexd = 0 pixel_postxd = 0 if num_xd < num_existing: # for now; eventually should use earliest/latest date and puzzle to determine which side has gap # npre = weekdays_between(date(year_from(firstxd.Date), 1, 1), firstxd.Date, i) # npost = weekdays_between(lastxd.Date, date(year_from(lastxd.Date), 12, 31), i) pixel_prexd = 1 pixel_postxd = 1 if not num_xd or not num_existing: continue pixel_total = width - pixel_prexd - pixel_postxd if num_xd <= num_existing: pixel_xd = pixel_total * num_xd / num_existing else: pixel_xd = pixel_total # then convert num_* to pixel_*, num_existing to pixel_total pixel_susp = num_susp*pixel_xd/num_xd pixel_theme = num_theme*pixel_xd/num_xd pixel_dup = num_dup*pixel_xd/num_xd pixel_pub = num_pub*pixel_xd/num_xd pixel_priv = num_priv*pixel_xd/num_xd if pixel_theme > 0 and pixel_theme < 1: pixel_theme = 1 if pixel_susp > 0 and pixel_susp < 1: pixel_susp = 1 if pixel_dup > 0 and pixel_dup < 1: pixel_dup = 1 m = re.match(r'(\d+?)x(\d+?).*', row.Size) if m: sz = int(m.group(1)) * int(m.group(2)) if sz > 17*17: h = 4 else: h = 3 else: h = 3 x = 0 w = 6 rects += '''<g id="{}" transform="translate(0,{y})">'''.format(utils.WEEKDAYS[i],y=int(y)) w = pixel_prexd # rects += rect(x, y, w, h, 'prexd') x += w w = pixel_susp rects += rect(x, y, w, h, 'suspxd') x += w w = pixel_theme rects += rect(x, y, w, h, 'themexd') x += w w = pixel_dup rects += rect(x, y, w, h, 'dupxd') x += w if x <= pixel_total: w = min(pixel_total - x, max(0, pixel_priv)) rects += rect(x, y, w, h, 'privxd') x += w if x <= pixel_total: w = min(pixel_total - x, max(0, pixel_pub)) rects += rect(x, y, w, h, 'pubxd') x += w # w = pixel_postxd # rects += rect(x, y, w, h, 'postxd') rects += '</g>' href = "/pub/%s%s/index.html" % (pubid, year) ret = html.mkhref(pys.format(w=width,h=height,classes=bgclass,body=rects), href, svgtitle) return ret
def main(): p = utils.args_parser(desc="generate pubyear svg and pubyear pages") p.add_argument('-p', '--pubonly', action="store_true", default=False, help='only output root map') args = utils.get_args(parser=p) outf = utils.open_output() pubyears = defaultdict(list) pubyears_idx = defaultdict(list) # years_idx = [] for r in metadb.read_rows('pub/stats'): y = r.year or '0000' pubyear = r.pubid + str(y) pubyears[pubyear].append(r) if y not in pubyears_idx[r.pubid]: pubyears_idx[r.pubid].append(y) # if r.year not in years_idx: # years_idx.append(r.year) # Making collapsed decades depends on args allyears = [] for i in range(DECADE_SKIP_START//10, DECADE_SKIP_END//10 + 1): allyears.append("%s0s" % i) allyears.extend([ str(y) for y in range(DECADE_SKIP_END + 10, date.today().year + 1) ]) html_out = [] html_out.append('<p>Grouped by publication-year and broken out by day-of-week (Monday at top, Sunday at bottom).</p>') html_out.append(legend) # See definition above html_out.append('<table id="pubyearmap" cellspacing="0" cellpadding="0">') # Table header with years \ decades year_header = gen_year_header(allyears) html_out.extend(year_header) pubs_total = {} for pubid in pubyears_idx: pubs_total[pubid] = len(metadb.xd_puzzles(pubid)) # sort rows by number of puzzles sorted_pubs = sorted(pubs_total.keys(), key=lambda pubid: pubs_total[pubid], reverse=True) for pub in args.inputs or sorted_pubs: if pubs_total[pub] < 20: continue # Process each pub in index pubobj = metadb.xd_publications().get(pub) if pubobj: pubname = pubobj.PublicationName or pubobj.PublisherName else: pubname = pub html_out.append('<tr><td class="header">{}</td>'.format(html.mkhref(pubname, pub))) for year in sorted(allyears): html_out.append('<td class="year_widget">') py_td = td_for_pubyear(pubyears, pub, year) if py_td: html_out.append(py_td) if not args.pubonly: outf.write_html('pub/{pub}{year}/index.html'.format(**locals()), pubyear_html(pub, year), "{pubname}, {year}".format(**locals())) else: # otherwise width = svg_w if 's' not in year else svg_w*decade_scale html_out.append(pys.format(w=width, h=svg_h, title='', classes='notexists', body='')) html_out.append('</td>') # Add totals + publishers html_out.append('<td class="header">{}</td>'.format(pubs_total[pub])) html_out.append('<td class="header">{}</td>'.format(html.mkhref(pubname, pub))) html_out.append('</tr>') html_out.extend(year_header) html_out.append('</table>') total_xd = len(metadb.xd_puzzles()) outf.write_html('index.html', "".join(html_out), "Comparison of %s published crossword grids" % total_xd)