def get_ungotten_dates(pubid, before_date, after_date, days_to_advance, ret=None): def prev_period(start_date, period=days_to_advance): return add_days(start_date, -period * 2) if ret is None: ret = [] newret = [] pub_gotten = set() for puzrow in metadb.xd_puzzles(pubid): pub_gotten.add(datestr_to_datetime(puzrow.Date)) if before_date > after_date: before_date, after_date = after_date, before_date before_before_date = prev_period(before_date) days_diff = (after_date - before_before_date).days for offset in range(days_to_advance, days_diff + 1, days_to_advance): dt = add_days(before_before_date, offset) if dt not in pub_gotten: newret.append(dt) ret.extend(reversed(newret)) if newret: return get_ungotten_dates(pubid, prev_period(before_before_date), before_before_date, days_to_advance, ret) else: return ret
def main(): p = utils.args_parser(desc="generate pubyear svg and pubyear pages") p.add_argument('-p', '--pubonly', action="store_true", default=False, help='only output root map') args = utils.get_args(parser=p) outf = utils.open_output() pubyears = defaultdict(list) pubyears_idx = defaultdict(list) # years_idx = [] for r in metadb.read_rows('pub/stats'): y = r.year or '0000' pubyear = r.pubid + str(y) pubyears[pubyear].append(r) if y not in pubyears_idx[r.pubid]: pubyears_idx[r.pubid].append(y) # if r.year not in years_idx: # years_idx.append(r.year) # Making collapsed decades depends on args allyears = [] for i in range(DECADE_SKIP_START // 10, DECADE_SKIP_END // 10 + 1): allyears.append("%s0s" % i) allyears.extend( [str(y) for y in range(DECADE_SKIP_END + 10, date.today().year + 1)]) html_out = [] html_out.append( '<p>Grouped by publication-year and broken out by day-of-week (Monday at top, Sunday at bottom).</p>' ) html_out.append(legend) # See definition above html_out.append('<table id="pubyearmap" cellspacing="0" cellpadding="0">') # Table header with years \ decades year_header = gen_year_header(allyears) html_out.extend(year_header) pubs_total = {} for pubid in pubyears_idx: pubs_total[pubid] = len(metadb.xd_puzzles(pubid)) # sort rows by number of puzzles sorted_pubs = sorted(pubs_total.keys(), key=lambda pubid: pubs_total[pubid], reverse=True) for pub in args.inputs or sorted_pubs: if pubs_total[pub] < 20: continue # Process each pub in index pubobj = metadb.xd_publications().get(pub) if pubobj: pubname = pubobj.PublicationName or pubobj.PublisherName else: pubname = pub html_out.append('<tr><td class="header">{}</td>'.format( html.mkhref(pubname, 'pub/' + pub))) for year in sorted(allyears): html_out.append('<td class="year_widget">') py_td = td_for_pubyear(pubyears, pub, year) if py_td: html_out.append(py_td) if not args.pubonly: outf.write_html( 'pub/{pub}{year}/index.html'.format(**locals()), pubyear_html(pub, year), "{pubname}, {year}".format(**locals())) else: # otherwise width = svg_w if 's' not in year else svg_w * decade_scale html_out.append( pys.format(w=width, h=svg_h, title='', classes='notexists', body='')) html_out.append('</td>') # Add totals + publishers html_out.append('<td class="header">{}</td>'.format(pubs_total[pub])) html_out.append('<td class="header">{}</td>'.format( html.mkhref(pubname, 'pub/' + pub))) html_out.append('</tr>') html_out.extend(year_header) html_out.append('</table>') total_xd = len(metadb.xd_puzzles()) outf.write_html('index.html', "".join(html_out), "Comparison of %s published crossword grids" % total_xd)
def pubyear_html(pub, year): calendars_html = '<table class="puzzles">' colnames = [year] + pubyear_header calendars_html += html.table_row(colnames, colnames, tag='th') # write out /pub/nyt199x c_grids = {} # utils.info('Generating meta for {pub}{year}'.format(**locals())) for row in sorted(metadb.xd_similar(pub + year)): dt = utils.parse_iso8601(row.xdid) dt2 = utils.parse_iso8601(row.match_xdid) if not dt or not dt2: continue if dt < dt2: continue # dt = row["date"] # without - as GridCalendar needs; or fix GC if dt not in c_grids: c_grids[dt] = {'title': '', 'class': ''} if row.match_pct == 0: continue c_grids[dt]['link'] = '/pub/' + row.xdid matchxdid = row.match_xdid aut1 = metadb.get_author(row.xdid) or '' aut2 = metadb.get_author(matchxdid) or '' # if aut1 is None or aut2 is None: # continue pct = row.match_pct similargrids = '(%s%%) %s [%s]\n' % (pct, aut2, matchxdid) c_grids[dt]["title"] += similargrids ##deduce_similarity_type c_grids[dt]["class"] += ret_classes(aut1, aut2, pct) c_grids_b = {} # For those are not in c_grids # Generate grids for available puzzles for row in metadb.xd_puzzles(pub + year): if row.Date and row.Date not in c_grids_b and row.Date not in c_grids: # add styles only for those are not similar etc. c_grids_b[row.Date] = { 'title': '', 'class': 'privxd' if int(row.Date[:4]) > 1965 else 'pubxd', } # Generate calendars z = c_grids.copy() z.update(c_grids_b) if year[-1] == 's': # decade from_year = int(year[:4]) to_year = int(year[:4]) + 10 else: from_year = int(year) to_year = int(year) + 1 for year in range(from_year, to_year): for month in range(1, 13): dups_table = [] for dt, d in c_grids.items(): if not dt.startswith("%s-%02d" % (year, month)): continue row_dict = {} # Map row and style xdid = pub + dt puzmd = metadb.xd_puzzle(xdid) if not puzmd: continue row_dict['class'] = d['class'] row_dict['tag_params'] = { 'onclick': 'location.href=\'/pub/%s\'' % xdid, 'class': d['class'] + ' hrefrow puzrow', } row_dict['row'] = [ xdid, puzmd.Date, puzmd.Size, puzmd.Title, puzmd.Author, puzmd.Editor, puzmd.Copyright, puzmd.A1_D1, d["title"].replace("\n", "<br/>") ] dups_table.append(row_dict) calendars_html += '<tr class="calendar"><td class="calendar" rowspan="%s">' % ( len(dups_table) + 1) calendars_html += html.GridCalendar(z).formatmonth( int(year), month) + '</td></tr>' for r in sorted(dups_table, key=lambda x: x['row'][1]): calendars_html += html.table_row(r["row"], pubyear_header, tag_params=r['tag_params']) calendars_html += '</table>' ret = '''%s <div class="calendars">%s</div> <hr/>''' % (legend, calendars_html) return ret
def main(): p = utils.args_parser(desc="generate pubyear svg and pubyear pages") p.add_argument('-p', '--pubonly', action="store_true", default=False, help='only output root map') args = utils.get_args(parser=p) outf = utils.open_output() pubyears = defaultdict(list) pubyears_idx = defaultdict(list) # years_idx = [] for r in metadb.read_rows('pub/stats'): y = r.year or '0000' pubyear = r.pubid + str(y) pubyears[pubyear].append(r) if y not in pubyears_idx[r.pubid]: pubyears_idx[r.pubid].append(y) # if r.year not in years_idx: # years_idx.append(r.year) # Making collapsed decades depends on args allyears = [] for i in range(DECADE_SKIP_START//10, DECADE_SKIP_END//10 + 1): allyears.append("%s0s" % i) allyears.extend([ str(y) for y in range(DECADE_SKIP_END + 10, date.today().year + 1) ]) html_out = [] html_out.append('<p>Grouped by publication-year and broken out by day-of-week (Monday at top, Sunday at bottom).</p>') html_out.append(legend) # See definition above html_out.append('<table id="pubyearmap" cellspacing="0" cellpadding="0">') # Table header with years \ decades year_header = gen_year_header(allyears) html_out.extend(year_header) pubs_total = {} for pubid in pubyears_idx: pubs_total[pubid] = len(metadb.xd_puzzles(pubid)) # sort rows by number of puzzles sorted_pubs = sorted(pubs_total.keys(), key=lambda pubid: pubs_total[pubid], reverse=True) for pub in args.inputs or sorted_pubs: if pubs_total[pub] < 20: continue # Process each pub in index pubobj = metadb.xd_publications().get(pub) if pubobj: pubname = pubobj.PublicationName or pubobj.PublisherName else: pubname = pub html_out.append('<tr><td class="header">{}</td>'.format(html.mkhref(pubname, pub))) for year in sorted(allyears): html_out.append('<td class="year_widget">') py_td = td_for_pubyear(pubyears, pub, year) if py_td: html_out.append(py_td) if not args.pubonly: outf.write_html('pub/{pub}{year}/index.html'.format(**locals()), pubyear_html(pub, year), "{pubname}, {year}".format(**locals())) else: # otherwise width = svg_w if 's' not in year else svg_w*decade_scale html_out.append(pys.format(w=width, h=svg_h, title='', classes='notexists', body='')) html_out.append('</td>') # Add totals + publishers html_out.append('<td class="header">{}</td>'.format(pubs_total[pub])) html_out.append('<td class="header">{}</td>'.format(html.mkhref(pubname, pub))) html_out.append('</tr>') html_out.extend(year_header) html_out.append('</table>') total_xd = len(metadb.xd_puzzles()) outf.write_html('index.html', "".join(html_out), "Comparison of %s published crossword grids" % total_xd)
def pubyear_html(pub, year): calendars_html = '<table class="puzzles">' colnames = [ year ] + pubyear_header calendars_html += html.table_row(colnames, colnames, tag='th') # write out /pub/nyt199x c_grids = {} # utils.info('Generating meta for {pub}{year}'.format(**locals())) for row in sorted(metadb.xd_similar(pub+year)): dt = utils.parse_iso8601(row.xdid) dt2 = utils.parse_iso8601(row.match_xdid) if not dt or not dt2: continue if dt < dt2: continue # dt = row["date"] # without - as GridCalendar needs; or fix GC if dt not in c_grids: c_grids[dt] = { 'title': '', 'class': '' } if row.match_pct == 0: continue c_grids[dt]['link'] = '/pub/' + row.xdid matchxdid = row.match_xdid aut1 = metadb.get_author(row.xdid) or '' aut2 = metadb.get_author(matchxdid) or '' # if aut1 is None or aut2 is None: # continue pct = row.match_pct similargrids = '(%s%%) %s [%s]\n' % (pct, aut2, matchxdid) c_grids[dt]["title"] += similargrids ##deduce_similarity_type c_grids[dt]["class"] += ret_classes(aut1, aut2, pct) c_grids_b = {} # For those are not in c_grids # Generate grids for available puzzles for row in metadb.xd_puzzles(pub+year): if row.Date and row.Date not in c_grids_b and row.Date not in c_grids: # add styles only for those are not similar etc. c_grids_b[row.Date] = { 'title': '', 'class': 'privxd' if int(row.Date[:4]) > 1965 else 'pubxd', } # Generate calendars z = c_grids.copy() z.update(c_grids_b) if year[-1] == 's': # decade from_year = int(year[:4]) to_year = int(year[:4]) + 10 else: from_year = int(year) to_year = int(year) + 1 for year in range(from_year, to_year): for month in range(1, 13): dups_table = [] for dt, d in c_grids.items(): if not dt.startswith("%s-%02d" % (year, month)): continue row_dict = {} # Map row and style xdid = pub + dt puzmd = metadb.xd_puzzle(xdid) if not puzmd: continue row_dict['class'] = d['class'] row_dict['tag_params'] = { 'onclick': 'location.href=\'/pub/%s\'' % xdid, 'class': d['class'] + ' hrefrow puzrow', } row_dict['row'] = [ xdid, puzmd.Date, puzmd.Size, puzmd.Title, puzmd.Author, puzmd.Editor, puzmd.Copyright, puzmd.A1_D1, d["title"].replace("\n", "<br/>") ] dups_table.append(row_dict) calendars_html += '<tr class="calendar"><td class="calendar" rowspan="%s">' % (len(dups_table) + 1) calendars_html += html.GridCalendar(z).formatmonth(int(year), month) + '</td></tr>' for r in sorted(dups_table, key=lambda x: x['row'][1]): calendars_html += html.table_row(r["row"], pubyear_header, tag_params=r['tag_params']) calendars_html += '</table>' ret = '''%s <div class="calendars">%s</div> <hr/>''' % (legend, calendars_html) return ret