def get_shelf_path(xd, pubid, mdtext): publisher = "" if not pubid: pubid = find_pubid(mdtext) if pubid: publ = metadb.xd_publications()[pubid] else: publ = get_publication(xd) if publ: pubid = publ.PublicationAbbr else: return None if not pubid: utils.warn("unknown pubid for '%s'" % xd.filename) return None publisher = publ.PublisherAbbr num = xd.get_header('Number') if num: return "%s/%s-%03d" % (publisher or pubid, pubid, int(num)) dt = xd.get_header("Date") if not dt: utils.warn("neither Number nor Date for '%s'" % xd.filename) return 'misc/' + xd.filename year = xdfile.year_from_date(dt) return "%s/%s/%s%s" % (publisher, year, pubid, dt)
def get_publication(xd): matching_publications = set() all_headers = xd.get_header("Copyright").lower() # source filename/metadata must be the priority abbr = utils.parse_pubid(xd.filename) all_pubs = metadb.xd_publications() for publ in all_pubs.values(): if publ.PublicationAbbr == abbr.lower(): matching_publications.add((1, publ)) if publ.PublicationName and publ.PublicationName.lower() in all_headers: matching_publications.add((2, publ)) if publ.PublisherName and publ.PublisherName.lower() in all_headers: matching_publications.add((3, publ)) if not matching_publications: return None elif len(matching_publications) == 1: return matching_publications.pop()[1] # otherwise, filter out 'self' publications matching_pubs = set([(pri, p) for pri, p in matching_publications if 'self' not in p.PublisherAbbr]) if not matching_pubs: matching_pubs = matching_publications # right back where we started elif len(matching_pubs) == 1: return matching_pubs.pop()[1] return sorted(matching_pubs)[0][1]
def get_publication(xd): matching_publications = set() all_headers = xd.get_header("Copyright").lower() # source filename/metadata must be the priority abbr = utils.parse_pubid(xd.filename) all_pubs = metadb.xd_publications() for publ in all_pubs.values(): if publ.PublicationAbbr == abbr.lower(): matching_publications.add((1, publ)) if publ.PublicationName and publ.PublicationName.lower( ) in all_headers: matching_publications.add((2, publ)) if publ.PublisherName and publ.PublisherName.lower() in all_headers: matching_publications.add((3, publ)) if not matching_publications: return None elif len(matching_publications) == 1: return matching_publications.pop()[1] # otherwise, filter out 'self' publications matching_pubs = set([(pri, p) for pri, p in matching_publications if 'self' not in p.PublisherAbbr]) if not matching_pubs: matching_pubs = matching_publications # right back where we started elif len(matching_pubs) == 1: return matching_pubs.pop()[1] return sorted(matching_pubs)[0][1]
def main(): p = utils.args_parser(desc="generate pubyear svg and pubyear pages") p.add_argument('-p', '--pubonly', action="store_true", default=False, help='only output root map') args = utils.get_args(parser=p) outf = utils.open_output() pubyears = defaultdict(list) pubyears_idx = defaultdict(list) # years_idx = [] for r in metadb.read_rows('pub/stats'): y = r.year or '0000' pubyear = r.pubid + str(y) pubyears[pubyear].append(r) if y not in pubyears_idx[r.pubid]: pubyears_idx[r.pubid].append(y) # if r.year not in years_idx: # years_idx.append(r.year) # Making collapsed decades depends on args allyears = [] for i in range(DECADE_SKIP_START // 10, DECADE_SKIP_END // 10 + 1): allyears.append("%s0s" % i) allyears.extend( [str(y) for y in range(DECADE_SKIP_END + 10, date.today().year + 1)]) html_out = [] html_out.append( '<p>Grouped by publication-year and broken out by day-of-week (Monday at top, Sunday at bottom).</p>' ) html_out.append(legend) # See definition above html_out.append('<table id="pubyearmap" cellspacing="0" cellpadding="0">') # Table header with years \ decades year_header = gen_year_header(allyears) html_out.extend(year_header) pubs_total = {} for pubid in pubyears_idx: pubs_total[pubid] = len(metadb.xd_puzzles(pubid)) # sort rows by number of puzzles sorted_pubs = sorted(pubs_total.keys(), key=lambda pubid: pubs_total[pubid], reverse=True) for pub in args.inputs or sorted_pubs: if pubs_total[pub] < 20: continue # Process each pub in index pubobj = metadb.xd_publications().get(pub) if pubobj: pubname = pubobj.PublicationName or pubobj.PublisherName else: pubname = pub html_out.append('<tr><td class="header">{}</td>'.format( html.mkhref(pubname, 'pub/' + pub))) for year in sorted(allyears): html_out.append('<td class="year_widget">') py_td = td_for_pubyear(pubyears, pub, year) if py_td: html_out.append(py_td) if not args.pubonly: outf.write_html( 'pub/{pub}{year}/index.html'.format(**locals()), pubyear_html(pub, year), "{pubname}, {year}".format(**locals())) else: # otherwise width = svg_w if 's' not in year else svg_w * decade_scale html_out.append( pys.format(w=width, h=svg_h, title='', classes='notexists', body='')) html_out.append('</td>') # Add totals + publishers html_out.append('<td class="header">{}</td>'.format(pubs_total[pub])) html_out.append('<td class="header">{}</td>'.format( html.mkhref(pubname, 'pub/' + pub))) html_out.append('</tr>') html_out.extend(year_header) html_out.append('</table>') total_xd = len(metadb.xd_puzzles()) outf.write_html('index.html', "".join(html_out), "Comparison of %s published crossword grids" % total_xd)
def pubyear_html(pubyears=[], skip_decades=None): """ skip_decades, default { 'start': 1910, 'end': 1970 } """ global g_all_pubyears if not g_all_pubyears: g_all_pubyears = utils.parse_tsv_data( open("pub/pubyears.tsv").read(), "pubyear") # Read similars to make background of widgets similar_d = defaultdict(dict) for xdid, v in utils.parse_tsv('gxd/similar.tsv', "similar").items(): xd_split = utils.split_xdid(xdid) if xd_split: pubid, year, mon, day = xd_split if year in similar_d[pubid]: similar_d[pubid][year].append(int(v.similar_grid_pct)) else: similar_d[pubid][year] = [int(v.similar_grid_pct)] b = [] # Body # Making collapsed decades depends on args skip_decades = skip_decades if skip_decades else { 'start': 1910, 'end': 1970 } allyears = [] for i in range(skip_decades['start'] // 10, skip_decades['end'] // 10 + 1): allyears.append("%s0s" % i) allyears.extend([ str(y) for y in range(skip_decades['end'] + 10, date.today().year + 1) ]) pubs = defaultdict(dict) # generate widget for each year for dowl in g_all_pubyears: dow = {} pubid, year, total = dowl[:3] hint = '' for d, v in zip(utils.WEEKDAYS, dowl[3:]): dow[d] = {'count': int(v) // 2, 'class': ''} dow[d]['class'] = 'red' if d == 'Sun' else 'ord' hint += '%s - %s\n' % (d, v) hint += 'Total: %s\n' % (total) # Define fill class based on average similarity fill_class = None # default fill class for widget if year in similar_d[pubid]: s_avg = sum(similar_d[pubid][year]) / len(similar_d[pubid][year]) hint += 'Avg similarity: %.2f%%' % (s_avg) # Example if average > 10 % fill_class = 'similar10' if s_avg >= 10 else None # Fill pubs with defferent blocks will be used below pubs[pubid][year] = { 'dow_data': dow, 'widget': year_widget(dow, total, fill_class), 'hint': hint, 'total': int(total), } # Process for all decades for dec_year in [x for x in allyears if 's' in x]: for pubid in pubs: year_key = dec_year[:-2] # Remove last year and "s" from the end total = 0 for yf in [x for x in pubs[pubid] if year_key in x]: total += pubs[pubid][yf]['total'] hint = 'Total: %s' % (total) if total > 0: pubs[pubid][dec_year] = { 'widget': decade_widget(total), 'hint': hint, 'total': int(total), } # main table b.append('<table class="pubyears">') yhdr = [' '] + [split_year(y) for y in allyears] yhdr.append("all") b.append( td_with_class(*yhdr, classes=get_pubheader_classes(*yhdr), rowclass="pubyearhead", tag="th")) b.append(tr_empty()) # Process each pubid sorted by earliest year for pubid in sorted(pubs, key=lambda x: min(pubs[x])): pub = metadb.xd_publications().get(pubid) pubname = pub.PublicationName if pub else '' # Pub id to first column b.append(mktag('tr')) b.append(mktag('td', 'pub')) b.append(mkcell( space_with_nbsp(pubname or pubid), "/pub/" + pubid, )) b.append(mktag('/td')) # Process each year not collapsed into decade for yi in allyears: if yi in pubs[pubid] and pubs[pubid][yi]['total'] > 0: b.append(mktag('td', 'this')) # Put link directly to year or to decade href = "/pub/%s%s" % ( pubid, yi) if 's' not in yi else "/pub/%s/index.html#%s" % ( pubid, yi[:-1]) b.append( mkcell(pubs[pubid][yi]['widget'], href=href, title=pubs[pubid][yi]['hint'])) b.append(mktag('/td')) else: b.append(mktag('td', 'block')) b.append(' ') b.append(mktag('/td')) b.append(mktag('td')) b.append( str(sum([pubs[pubid][x]['total'] for x in pubs[pubid].keys()]))) b.append(mktag('/td')) b.append(mktag('/tr')) b.append(mktag('/table')) return (" ".join(b))
def main(): p = utils.args_parser(desc="generate pubyear svg and pubyear pages") p.add_argument('-p', '--pubonly', action="store_true", default=False, help='only output root map') args = utils.get_args(parser=p) outf = utils.open_output() pubyears = defaultdict(list) pubyears_idx = defaultdict(list) # years_idx = [] for r in metadb.read_rows('pub/stats'): y = r.year or '0000' pubyear = r.pubid + str(y) pubyears[pubyear].append(r) if y not in pubyears_idx[r.pubid]: pubyears_idx[r.pubid].append(y) # if r.year not in years_idx: # years_idx.append(r.year) # Making collapsed decades depends on args allyears = [] for i in range(DECADE_SKIP_START//10, DECADE_SKIP_END//10 + 1): allyears.append("%s0s" % i) allyears.extend([ str(y) for y in range(DECADE_SKIP_END + 10, date.today().year + 1) ]) html_out = [] html_out.append('<p>Grouped by publication-year and broken out by day-of-week (Monday at top, Sunday at bottom).</p>') html_out.append(legend) # See definition above html_out.append('<table id="pubyearmap" cellspacing="0" cellpadding="0">') # Table header with years \ decades year_header = gen_year_header(allyears) html_out.extend(year_header) pubs_total = {} for pubid in pubyears_idx: pubs_total[pubid] = len(metadb.xd_puzzles(pubid)) # sort rows by number of puzzles sorted_pubs = sorted(pubs_total.keys(), key=lambda pubid: pubs_total[pubid], reverse=True) for pub in args.inputs or sorted_pubs: if pubs_total[pub] < 20: continue # Process each pub in index pubobj = metadb.xd_publications().get(pub) if pubobj: pubname = pubobj.PublicationName or pubobj.PublisherName else: pubname = pub html_out.append('<tr><td class="header">{}</td>'.format(html.mkhref(pubname, pub))) for year in sorted(allyears): html_out.append('<td class="year_widget">') py_td = td_for_pubyear(pubyears, pub, year) if py_td: html_out.append(py_td) if not args.pubonly: outf.write_html('pub/{pub}{year}/index.html'.format(**locals()), pubyear_html(pub, year), "{pubname}, {year}".format(**locals())) else: # otherwise width = svg_w if 's' not in year else svg_w*decade_scale html_out.append(pys.format(w=width, h=svg_h, title='', classes='notexists', body='')) html_out.append('</td>') # Add totals + publishers html_out.append('<td class="header">{}</td>'.format(pubs_total[pub])) html_out.append('<td class="header">{}</td>'.format(html.mkhref(pubname, pub))) html_out.append('</tr>') html_out.extend(year_header) html_out.append('</table>') total_xd = len(metadb.xd_puzzles()) outf.write_html('index.html', "".join(html_out), "Comparison of %s published crossword grids" % total_xd)
def pubyear_html(pubyears=[], skip_decades=None): """ skip_decades, default { 'start': 1910, 'end': 1970 } """ global g_all_pubyears if not g_all_pubyears: g_all_pubyears = utils.parse_tsv_data(open("pub/pubyears.tsv").read(), "pubyear") # Read similars to make background of widgets similar_d = defaultdict(dict) for xdid, v in utils.parse_tsv('gxd/similar.tsv', "similar").items(): xd_split = utils.split_xdid(xdid) if xd_split: pubid, year, mon, day = xd_split if year in similar_d[pubid]: similar_d[pubid][year].append(int(v.similar_grid_pct)) else: similar_d[pubid][year] = [ int(v.similar_grid_pct) ] b = [] # Body # Making collapsed decades depends on args skip_decades = skip_decades if skip_decades else { 'start': 1910, 'end': 1970 } allyears = [] for i in range(skip_decades['start']//10, skip_decades['end']//10 + 1): allyears.append("%s0s" % i) allyears.extend([ str(y) for y in range(skip_decades['end'] + 10, date.today().year + 1) ]) pubs = defaultdict(dict) # generate widget for each year for dowl in g_all_pubyears: dow = {} pubid, year, total = dowl[:3] hint = '' for d, v in zip(utils.WEEKDAYS, dowl[3:]): dow[d] = { 'count': int(v)//2, 'class':'' } dow[d]['class'] = 'red' if d == 'Sun' else 'ord' hint += '%s - %s\n' % (d, v) hint += 'Total: %s\n' % (total) # Define fill class based on average similarity fill_class = None # default fill class for widget if year in similar_d[pubid]: s_avg = sum(similar_d[pubid][year]) / len(similar_d[pubid][year]) hint += 'Avg similarity: %.2f%%' % (s_avg) # Example if average > 10 % fill_class = 'similar10' if s_avg >= 10 else None # Fill pubs with defferent blocks will be used below pubs[pubid][year] = { 'dow_data': dow, 'widget': year_widget(dow, total, fill_class), 'hint': hint, 'total': int(total), } # Process for all decades for dec_year in [x for x in allyears if 's' in x]: for pubid in pubs: year_key = dec_year[:-2] # Remove last year and "s" from the end total = 0 for yf in [x for x in pubs[pubid] if year_key in x]: total += pubs[pubid][yf]['total'] hint = 'Total: %s' % (total) if total > 0: pubs[pubid][dec_year] = { 'widget': decade_widget(total), 'hint': hint, 'total': int(total), } # main table b.append('<table class="pubyears">') yhdr = [ ' ' ] + [ split_year(y) for y in allyears ] yhdr.append("all") b.append(td_with_class(*yhdr, classes=get_pubheader_classes(*yhdr), rowclass="pubyearhead",tag="th")) b.append(tr_empty()) # Process each pubid sorted by earliest year for pubid in sorted(pubs, key=lambda x:min(pubs[x])): pub = metadb.xd_publications().get(pubid) pubname = pub.PublicationName if pub else '' # Pub id to first column b.append(mktag('tr')) b.append(mktag('td','pub')) b.append(mkcell(space_with_nbsp(pubname or pubid), "/pub/" + pubid, )) b.append(mktag('/td')) # Process each year not collapsed into decade for yi in allyears: if yi in pubs[pubid] and pubs[pubid][yi]['total'] > 0: b.append(mktag('td','this')) # Put link directly to year or to decade href = "/pub/%s%s" % (pubid, yi) if 's' not in yi else "/pub/%s/index.html#%s" % (pubid, yi[:-1]) b.append(mkcell(pubs[pubid][yi]['widget'], href=href, title=pubs[pubid][yi]['hint'])) b.append(mktag('/td')) else: b.append(mktag('td', 'block')) b.append(' ') b.append(mktag('/td')) b.append(mktag('td')) b.append(str(sum([ pubs[pubid][x]['total'] for x in pubs[pubid].keys() ]))) b.append(mktag('/td')) b.append(mktag('/tr')) b.append(mktag('/table')) return (" ".join(b))