Exemple #1
0
def get_shelf_path(xd, pubid, mdtext):
    publisher = ""
    if not pubid:
        pubid = find_pubid(mdtext)

    if pubid:
        publ = metadb.xd_publications()[pubid]
    else:
        publ = get_publication(xd)
        if publ:
            pubid = publ.PublicationAbbr
        else:
            return None

    if not pubid:
        utils.warn("unknown pubid for '%s'" % xd.filename)
        return None

    publisher = publ.PublisherAbbr

    num = xd.get_header('Number')
    if num:
        return "%s/%s-%03d" % (publisher or pubid, pubid, int(num))

    dt = xd.get_header("Date")
    if not dt:
        utils.warn("neither Number nor Date for '%s'" % xd.filename)
        return 'misc/' + xd.filename

    year = xdfile.year_from_date(dt)
    return "%s/%s/%s%s" % (publisher, year, pubid, dt)
Exemple #2
0
def get_publication(xd):
    matching_publications = set()

    all_headers = xd.get_header("Copyright").lower()

    # source filename/metadata must be the priority
    abbr = utils.parse_pubid(xd.filename)

    all_pubs = metadb.xd_publications()

    for publ in all_pubs.values():
        if publ.PublicationAbbr == abbr.lower():
            matching_publications.add((1, publ))

        if publ.PublicationName and publ.PublicationName.lower() in all_headers:
            matching_publications.add((2, publ))

        if publ.PublisherName and publ.PublisherName.lower() in all_headers:
            matching_publications.add((3, publ))

    if not matching_publications:
        return None
    elif len(matching_publications) == 1:
        return matching_publications.pop()[1]

    # otherwise, filter out 'self' publications
    matching_pubs = set([(pri, p) for pri, p in matching_publications if 'self' not in p.PublisherAbbr])

    if not matching_pubs:
        matching_pubs = matching_publications  # right back where we started
    elif len(matching_pubs) == 1:
        return matching_pubs.pop()[1]

    return sorted(matching_pubs)[0][1]
Exemple #3
0
def get_shelf_path(xd, pubid, mdtext):
    publisher = ""
    if not pubid:
        pubid = find_pubid(mdtext)

    if pubid:
        publ = metadb.xd_publications()[pubid]
    else:
        publ = get_publication(xd)
        if publ:
            pubid = publ.PublicationAbbr
        else:
            return None

    if not pubid:
        utils.warn("unknown pubid for '%s'" % xd.filename)
        return None

    publisher = publ.PublisherAbbr

    num = xd.get_header('Number')
    if num:
        return "%s/%s-%03d" % (publisher or pubid, pubid, int(num))

    dt = xd.get_header("Date")
    if not dt:
        utils.warn("neither Number nor Date for '%s'" % xd.filename)
        return 'misc/' + xd.filename

    year = xdfile.year_from_date(dt)
    return "%s/%s/%s%s" % (publisher, year, pubid, dt)
Exemple #4
0
def get_publication(xd):
    matching_publications = set()

    all_headers = xd.get_header("Copyright").lower()

    # source filename/metadata must be the priority
    abbr = utils.parse_pubid(xd.filename)

    all_pubs = metadb.xd_publications()

    for publ in all_pubs.values():
        if publ.PublicationAbbr == abbr.lower():
            matching_publications.add((1, publ))

        if publ.PublicationName and publ.PublicationName.lower(
        ) in all_headers:
            matching_publications.add((2, publ))

        if publ.PublisherName and publ.PublisherName.lower() in all_headers:
            matching_publications.add((3, publ))

    if not matching_publications:
        return None
    elif len(matching_publications) == 1:
        return matching_publications.pop()[1]

    # otherwise, filter out 'self' publications
    matching_pubs = set([(pri, p) for pri, p in matching_publications
                         if 'self' not in p.PublisherAbbr])

    if not matching_pubs:
        matching_pubs = matching_publications  # right back where we started
    elif len(matching_pubs) == 1:
        return matching_pubs.pop()[1]

    return sorted(matching_pubs)[0][1]
Exemple #5
0
def main():
    p = utils.args_parser(desc="generate pubyear svg and pubyear pages")
    p.add_argument('-p',
                   '--pubonly',
                   action="store_true",
                   default=False,
                   help='only output root map')
    args = utils.get_args(parser=p)
    outf = utils.open_output()

    pubyears = defaultdict(list)
    pubyears_idx = defaultdict(list)
    # years_idx = []
    for r in metadb.read_rows('pub/stats'):
        y = r.year or '0000'
        pubyear = r.pubid + str(y)
        pubyears[pubyear].append(r)
        if y not in pubyears_idx[r.pubid]:
            pubyears_idx[r.pubid].append(y)
        # if r.year not in years_idx:
        #    years_idx.append(r.year)

    # Making collapsed decades depends on args
    allyears = []
    for i in range(DECADE_SKIP_START // 10, DECADE_SKIP_END // 10 + 1):
        allyears.append("%s0s" % i)
    allyears.extend(
        [str(y) for y in range(DECADE_SKIP_END + 10,
                               date.today().year + 1)])

    html_out = []
    html_out.append(
        '<p>Grouped by publication-year and broken out by day-of-week (Monday at top, Sunday at bottom).</p>'
    )
    html_out.append(legend)  # See definition above
    html_out.append('<table id="pubyearmap" cellspacing="0" cellpadding="0">')

    # Table header with years \ decades
    year_header = gen_year_header(allyears)
    html_out.extend(year_header)

    pubs_total = {}
    for pubid in pubyears_idx:
        pubs_total[pubid] = len(metadb.xd_puzzles(pubid))

    # sort rows by number of puzzles
    sorted_pubs = sorted(pubs_total.keys(),
                         key=lambda pubid: pubs_total[pubid],
                         reverse=True)
    for pub in args.inputs or sorted_pubs:
        if pubs_total[pub] < 20:
            continue

        # Process each pub in index
        pubobj = metadb.xd_publications().get(pub)
        if pubobj:
            pubname = pubobj.PublicationName or pubobj.PublisherName
        else:
            pubname = pub
        html_out.append('<tr><td class="header">{}</td>'.format(
            html.mkhref(pubname, 'pub/' + pub)))

        for year in sorted(allyears):
            html_out.append('<td class="year_widget">')
            py_td = td_for_pubyear(pubyears, pub, year)
            if py_td:
                html_out.append(py_td)
                if not args.pubonly:
                    outf.write_html(
                        'pub/{pub}{year}/index.html'.format(**locals()),
                        pubyear_html(pub, year),
                        "{pubname}, {year}".format(**locals()))
            else:
                # otherwise
                width = svg_w if 's' not in year else svg_w * decade_scale
                html_out.append(
                    pys.format(w=width,
                               h=svg_h,
                               title='',
                               classes='notexists',
                               body=''))

            html_out.append('</td>')

        # Add totals + publishers
        html_out.append('<td class="header">{}</td>'.format(pubs_total[pub]))
        html_out.append('<td class="header">{}</td>'.format(
            html.mkhref(pubname, 'pub/' + pub)))
        html_out.append('</tr>')

    html_out.extend(year_header)
    html_out.append('</table>')
    total_xd = len(metadb.xd_puzzles())
    outf.write_html('index.html', "".join(html_out),
                    "Comparison of %s published crossword grids" % total_xd)
Exemple #6
0
def pubyear_html(pubyears=[], skip_decades=None):
    """
    skip_decades, default  { 'start': 1910, 'end': 1970 }
    """
    global g_all_pubyears
    if not g_all_pubyears:
        g_all_pubyears = utils.parse_tsv_data(
            open("pub/pubyears.tsv").read(), "pubyear")

    # Read similars to make background of widgets
    similar_d = defaultdict(dict)
    for xdid, v in utils.parse_tsv('gxd/similar.tsv', "similar").items():
        xd_split = utils.split_xdid(xdid)
        if xd_split:
            pubid, year, mon, day = xd_split
            if year in similar_d[pubid]:
                similar_d[pubid][year].append(int(v.similar_grid_pct))
            else:
                similar_d[pubid][year] = [int(v.similar_grid_pct)]

    b = []  # Body

    # Making collapsed decades depends on args
    skip_decades = skip_decades if skip_decades else {
        'start': 1910,
        'end': 1970
    }
    allyears = []
    for i in range(skip_decades['start'] // 10, skip_decades['end'] // 10 + 1):
        allyears.append("%s0s" % i)
    allyears.extend([
        str(y) for y in range(skip_decades['end'] + 10,
                              date.today().year + 1)
    ])

    pubs = defaultdict(dict)
    # generate widget for each year
    for dowl in g_all_pubyears:
        dow = {}
        pubid, year, total = dowl[:3]
        hint = ''
        for d, v in zip(utils.WEEKDAYS, dowl[3:]):
            dow[d] = {'count': int(v) // 2, 'class': ''}
            dow[d]['class'] = 'red' if d == 'Sun' else 'ord'
            hint += '%s - %s\n' % (d, v)
        hint += 'Total: %s\n' % (total)
        # Define fill class based on average similarity
        fill_class = None  # default fill class for widget
        if year in similar_d[pubid]:
            s_avg = sum(similar_d[pubid][year]) / len(similar_d[pubid][year])
            hint += 'Avg similarity: %.2f%%' % (s_avg)
            # Example if average > 10 %
            fill_class = 'similar10' if s_avg >= 10 else None

        # Fill pubs with defferent blocks will be used below
        pubs[pubid][year] = {
            'dow_data': dow,
            'widget': year_widget(dow, total, fill_class),
            'hint': hint,
            'total': int(total),
        }
    # Process for all decades
    for dec_year in [x for x in allyears if 's' in x]:
        for pubid in pubs:
            year_key = dec_year[:-2]  # Remove last year and "s" from the end
            total = 0
            for yf in [x for x in pubs[pubid] if year_key in x]:
                total += pubs[pubid][yf]['total']
            hint = 'Total: %s' % (total)
            if total > 0:
                pubs[pubid][dec_year] = {
                    'widget': decade_widget(total),
                    'hint': hint,
                    'total': int(total),
                }

    # main table
    b.append('<table class="pubyears">')
    yhdr = ['&nbsp;'] + [split_year(y) for y in allyears]
    yhdr.append("all")
    b.append(
        td_with_class(*yhdr,
                      classes=get_pubheader_classes(*yhdr),
                      rowclass="pubyearhead",
                      tag="th"))
    b.append(tr_empty())

    # Process each pubid sorted by earliest year
    for pubid in sorted(pubs, key=lambda x: min(pubs[x])):
        pub = metadb.xd_publications().get(pubid)
        pubname = pub.PublicationName if pub else ''
        # Pub id to first column
        b.append(mktag('tr'))
        b.append(mktag('td', 'pub'))
        b.append(mkcell(
            space_with_nbsp(pubname or pubid),
            "/pub/" + pubid,
        ))
        b.append(mktag('/td'))

        # Process each year not collapsed into decade
        for yi in allyears:
            if yi in pubs[pubid] and pubs[pubid][yi]['total'] > 0:
                b.append(mktag('td', 'this'))
                # Put link directly to year or to decade
                href = "/pub/%s%s" % (
                    pubid,
                    yi) if 's' not in yi else "/pub/%s/index.html#%s" % (
                        pubid, yi[:-1])
                b.append(
                    mkcell(pubs[pubid][yi]['widget'],
                           href=href,
                           title=pubs[pubid][yi]['hint']))
                b.append(mktag('/td'))
            else:
                b.append(mktag('td', 'block'))
                b.append('&nbsp;')
                b.append(mktag('/td'))

        b.append(mktag('td'))
        b.append(
            str(sum([pubs[pubid][x]['total'] for x in pubs[pubid].keys()])))
        b.append(mktag('/td'))
        b.append(mktag('/tr'))

    b.append(mktag('/table'))
    return (" ".join(b))
Exemple #7
0
def main():
    p = utils.args_parser(desc="generate pubyear svg and pubyear pages")
    p.add_argument('-p', '--pubonly', action="store_true", default=False, help='only output root map')
    args = utils.get_args(parser=p)
    outf = utils.open_output()

    pubyears = defaultdict(list)
    pubyears_idx = defaultdict(list)
    # years_idx = []
    for r in metadb.read_rows('pub/stats'):
        y = r.year or '0000'
        pubyear = r.pubid + str(y)
        pubyears[pubyear].append(r)
        if y not in pubyears_idx[r.pubid]:
            pubyears_idx[r.pubid].append(y)
        # if r.year not in years_idx:
        #    years_idx.append(r.year)

    # Making collapsed decades depends on args
    allyears = []
    for i in range(DECADE_SKIP_START//10, DECADE_SKIP_END//10 + 1):
        allyears.append("%s0s" % i)
    allyears.extend([ str(y) for y in range(DECADE_SKIP_END + 10, date.today().year + 1) ])

    html_out = []
    html_out.append('<p>Grouped by publication-year and broken out by day-of-week (Monday at top, Sunday at bottom).</p>')
    html_out.append(legend) # See definition above
    html_out.append('<table id="pubyearmap" cellspacing="0" cellpadding="0">')

    # Table header with years \ decades
    year_header = gen_year_header(allyears)
    html_out.extend(year_header)

    pubs_total = {}
    for pubid in pubyears_idx:
        pubs_total[pubid] = len(metadb.xd_puzzles(pubid))

    # sort rows by number of puzzles
    sorted_pubs = sorted(pubs_total.keys(), key=lambda pubid: pubs_total[pubid], reverse=True)
    for pub in args.inputs or sorted_pubs:
        if pubs_total[pub] < 20:
            continue

        # Process each pub in index
        pubobj = metadb.xd_publications().get(pub)
        if pubobj:
            pubname = pubobj.PublicationName or pubobj.PublisherName
        else:
            pubname = pub
        html_out.append('<tr><td class="header">{}</td>'.format(html.mkhref(pubname, pub)))

        for year in sorted(allyears):
            html_out.append('<td class="year_widget">')
            py_td = td_for_pubyear(pubyears, pub, year)
            if py_td:
                html_out.append(py_td)
                if not args.pubonly:
                    outf.write_html('pub/{pub}{year}/index.html'.format(**locals()), pubyear_html(pub, year),
                                    "{pubname}, {year}".format(**locals()))
            else:
                # otherwise
                width = svg_w if 's' not in year else svg_w*decade_scale
                html_out.append(pys.format(w=width, h=svg_h, title='', classes='notexists', body=''))

            html_out.append('</td>')

        # Add totals + publishers
        html_out.append('<td class="header">{}</td>'.format(pubs_total[pub]))
        html_out.append('<td class="header">{}</td>'.format(html.mkhref(pubname, pub)))
        html_out.append('</tr>')


    html_out.extend(year_header)
    html_out.append('</table>')
    total_xd = len(metadb.xd_puzzles())
    outf.write_html('index.html', "".join(html_out), "Comparison of %s published crossword grids" % total_xd)
Exemple #8
0
def pubyear_html(pubyears=[], skip_decades=None):
    """
    skip_decades, default  { 'start': 1910, 'end': 1970 }
    """
    global g_all_pubyears
    if not g_all_pubyears:
        g_all_pubyears = utils.parse_tsv_data(open("pub/pubyears.tsv").read(), "pubyear")


    # Read similars to make background of widgets
    similar_d = defaultdict(dict)
    for xdid, v in utils.parse_tsv('gxd/similar.tsv', "similar").items():
        xd_split = utils.split_xdid(xdid)
        if xd_split:
            pubid, year, mon, day = xd_split
            if year in similar_d[pubid]:
                similar_d[pubid][year].append(int(v.similar_grid_pct))
            else:
                similar_d[pubid][year] = [ int(v.similar_grid_pct) ] 

    b = [] # Body
    
    # Making collapsed decades depends on args
    skip_decades = skip_decades if skip_decades else { 'start': 1910, 'end': 1970 } 
    allyears = []
    for i in range(skip_decades['start']//10, skip_decades['end']//10 + 1):
        allyears.append("%s0s" % i)
    allyears.extend([ str(y) for y in range(skip_decades['end'] + 10, date.today().year + 1) ])
    
    pubs = defaultdict(dict)
    # generate widget for each year
    for dowl in g_all_pubyears:
        dow = {}
        pubid, year, total = dowl[:3]
        hint = ''
        for d, v in zip(utils.WEEKDAYS, dowl[3:]):
            dow[d] = { 'count': int(v)//2, 'class':'' }
            dow[d]['class'] = 'red' if d == 'Sun' else 'ord'
            hint += '%s - %s\n' % (d, v)
        hint += 'Total: %s\n' % (total)
        # Define fill class based on average similarity
        fill_class = None # default fill class for widget
        if year in similar_d[pubid]:
            s_avg = sum(similar_d[pubid][year]) / len(similar_d[pubid][year]) 
            hint += 'Avg similarity: %.2f%%' % (s_avg)
            # Example if average > 10 %
            fill_class = 'similar10' if s_avg >= 10 else None

        # Fill pubs with defferent blocks will be used below
        pubs[pubid][year] = {
                'dow_data': dow,
                'widget': year_widget(dow, total, fill_class),
                'hint': hint,
                'total': int(total),
                }
    # Process for all decades
    for dec_year in [x for x in allyears if 's' in x]:
        for pubid in pubs:
            year_key = dec_year[:-2] # Remove last year and "s" from the end
            total = 0
            for yf in [x for x in pubs[pubid] if year_key in x]:
                total += pubs[pubid][yf]['total']
            hint = 'Total: %s' % (total)
            if total > 0:
                pubs[pubid][dec_year] = {
                    'widget': decade_widget(total),
                    'hint': hint,
                    'total': int(total),
                    }

    # main table
    b.append('<table class="pubyears">')
    yhdr = [ '&nbsp;' ] + [ split_year(y) for y in allyears ]
    yhdr.append("all")
    b.append(td_with_class(*yhdr, classes=get_pubheader_classes(*yhdr),
            rowclass="pubyearhead",tag="th"))
    b.append(tr_empty())

    # Process each pubid sorted by earliest year
    for pubid in sorted(pubs, key=lambda x:min(pubs[x])):
        pub = metadb.xd_publications().get(pubid)
        pubname = pub.PublicationName if pub else ''
        # Pub id to first column
        b.append(mktag('tr'))
        b.append(mktag('td','pub'))
        b.append(mkcell(space_with_nbsp(pubname or pubid), "/pub/" + pubid, ))
        b.append(mktag('/td'))

        # Process each year not collapsed into decade
        for yi in allyears:
            if yi in pubs[pubid] and pubs[pubid][yi]['total'] > 0:
                b.append(mktag('td','this'))
                # Put link directly to year or to decade
                href = "/pub/%s%s" % (pubid, yi) if 's' not in yi else "/pub/%s/index.html#%s" % (pubid, yi[:-1])
                b.append(mkcell(pubs[pubid][yi]['widget'], href=href, 
                        title=pubs[pubid][yi]['hint']))
                b.append(mktag('/td'))
            else:
                b.append(mktag('td', 'block'))
                b.append('&nbsp;')
                b.append(mktag('/td'))

        b.append(mktag('td'))
        b.append(str(sum([ pubs[pubid][x]['total'] for x in pubs[pubid].keys() ])))
        b.append(mktag('/td'))
        b.append(mktag('/tr'))

    b.append(mktag('/table'))
    return (" ".join(b))