def __init__(self, idir, name2file): super(Sleigh, self).__init__('', idir) self.venues = [] self.n2f = name2file jsons = {} skip4Now = [] for d in glob.glob(idir + '/*.json'): if lastSlash(d).split('.')[0] in skip4Now: print( C.red('Skipping') + ' ' + C.purple(d) + ' ' + C.red('for now')) continue jsons[lastSlash(d).split('.')[0]] = d for d in glob.glob(idir + '/*'): cont = False for end in ('.md', '.json', '/frem', '/edif'): if d.endswith(end): cont = True if d.split('/')[-1] in skip4Now: print( C.red('Skipping') + ' ' + C.purple(d) + ' ' + C.red('for now')) cont = True if cont: continue if lastSlash(d) not in jsons.keys(): print(C.red('Legacy non-top definition of'), d) if lastSlash(d) not in ('edif', 'frem'): self.venues.append(Venue(d, idir, name2file, self)) else: self.venues.append(Venue(d, idir, name2file, self))
def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' if not os.path.exists(fn): # if it still does not exist, let us create a minimal one f = open(fn, 'w', encoding='utf-8') f.write('{{\n\t"title": "{name}",\n\t"type": "proceedings",\n\t"year": {year}\n}}'.format(\ name=lastSlash(fn)[:-5].replace('-', ' '), year=findYear(lastSlash(fn))\ )) f.close() print('[ {} ] {}'.format(C.yellow('MADE'), fn)) return 2 f = open(fn, 'r', encoding='utf-8') lines = f.readlines()[1:-1] f.close() for line in lines: if line.find('"year"') > -1 and findYear(line) > 3000: os.remove(fn) print('[ {} ] {}'.format(C.red('KILL'), fn)) return 1 flines = sorted([strictstrip(s) for s in lines]) plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]]) if flines != plines: f1 = [line for line in flines if line not in plines] f2 = [line for line in plines if line not in flines] print('∆:', f1, '\nvs', f2) if flines == plines: return 0 else: return 1
paperAuths.extend(auths) paperLnk = li.get('id') hope = li.find_all('a') if hope and hope[0].get('href').endswith('.pdf'): paperPdf = urlstart + hope[0].get('href') else: paperPdf = '' paperEntry = {'type': 'inproceedings', 'series': 'CEUR Workshop Proceedings',\ 'publisher': 'CEUR-WS.org', 'year': volYear, 'booktitle': volTitles[-1],\ 'editor': volEds, 'volume': volNr.split('-')[-1], 'title': paperTitle,\ 'author': paperAuths, 'pages': paperPages, 'venue': volVenue} if paperPdf: paperEntry['openpdf'] = paperPdf if paperLnk: paperEntry['url'] = urlstart + '#' + paperLnk paperFilename = lastSlash(outputdir) + '-' + paperAuths[0].split( ' ')[-1] for a in paperAuths[1:]: print(a) paperFilename += a.split(' ')[-1][0] if paperFilename in done: paperFilename += 'a' while paperFilename in done: paperFilename = paperFilename[:-1] + chr( ord(paperFilename[-1]) + 1) # print(jsonify(paperEntry), '-->', outputdir+'/'+paperFilename+'.json') f = open(outputdir + '/' + paperFilename + '.json', 'w', encoding='utf-8') f.write(jsonify(paperEntry)) f.close()
def purenameof(f): return lastSlash(f)[:-4]
print('Error fetching URL: ' + url) return '' if __name__ == "__main__": if len(sys.argv) not in (3, 5): print('Usage:\n\t{} <URI> <DIR> [<FROM> <TO>]\n'.format(sys.argv[0])) print(('e.g.: {} http://dblp.uni-trier.de/db/conf/sigplan/sigplan82.html ' + '../json/corpus/PLDI/1982/SCC-1982').format(sys.argv[0])) sys.exit(1) dblp = safely_load_url(sys.argv[1]) ldir = sys.argv[2] year = ldir.split('/')[4] allxmls = [xmlname for xmlname in dblp.split('"') if xmlname.endswith('.xml')] if len(sys.argv) == 5: entry1 = lastSlash(sys.argv[3]) entry2 = lastSlash(sys.argv[4]) else: entry1 = purenameof(allxmls[0]) entry2 = purenameof(allxmls[-1]) if not os.path.exists(ldir): os.makedirs(ldir) ps = 0 yeswecan = False for xmlname in allxmls: if yeswecan: if purenameof(xmlname) == entry2: yeswecan = False else: if purenameof(xmlname) == entry1: yeswecan = True
C.purple('='*42))) ps = [] # flatten the sleigh bykey = {} for v in sleigh.venues: bykey[v.getKey()] = v for c in v.getConfs(): bykey[c.getKey()] = c for p in c.papers: bykey[p.getKey()] = p print(C.purple('BibSLEIGH flattened to {} entries'.format(len(bykey)))) # tagged = [] # for k in ts.keys(): peoples = {} for fn in glob.glob(ienputdir + '/people/*.json'): k = lastSlash(fn)[:-5] ps.append(k) # TODO: get rid of ps in favour of peoples f = open('{}/person/{}.html'.format(outputdir, k), 'w', encoding='utf-8') persondef = parseJSON(fn) peoples[k] = persondef # what to google? # links = [] # if 'g' not in persondef.keys(): # links.append(kv2link('g', tagdef['name'] if 'namefull' in tagdef.keys() else k)) # title = tagdef['namefull'] if 'namefull' in tagdef.keys() else tagdef['name'] # subt = ('<br/><em>'+tagdef['namelong']+'</em>') if 'namelong' in tagdef.keys() else '' # links = '<strong>{}</strong>{}<hr/>'.format(title, subt) + '\n'.join(sorted(links)) # TODO: sort by venues!
def getHtmlName(self): s = lastSlash(self.getPureName()) if s.endswith('.json'): s = s[:-5] return s if s.endswith('.html') else s + '.html'
def last(xx): return lastSlash(xx).replace('.json', '')
print('[ {} ] {} → {}'.format(statuses[r], fn1, fn2)) return r if __name__ == "__main__": print('{} conference renamer\n{}'.format(\ C.purple('BibSLEIGH'), C.purple('='*42))) if len(sys.argv) < 2: print('Usage:\n\t{} [<DIR>]'.format(sys.argv[0])) sys.exit(1) verbose = sys.argv[-1] == '-v' if sys.argv[1].startswith(ienputdir): path = sys.argv[1] name = path.replace(ienputdir + '/corpus/', '') namem = lastSlash(name) else: name = sys.argv[1] path = ienputdir + '/corpus/' + name namem = lastSlash(name) cx = {0: 0, 1: 0, 2: 0} if not os.path.exists(path): report(name, name, 1) sys.exit(1) # for all papers... for fn in glob.glob(path + '/*.json'): pureold = fn.split(namem + '/')[1] if pureold.endswith('.json'): pureold = pureold[:-5] purenew = pureold if purenew[-2:] == namem[-2:]:
def main(): print('{}: {} venues, {} papers\n{}'.format(C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.purple('=' * 42))) # generate the index f = open(outputdir + '/index.html', 'w', encoding='utf-8') f.write(sleigh.getPage()) f.close() # generate all individual pages # if False: for v in sleigh.venues: r = C.blue(v.getKey()) f = open(outputdir + '/' + v.getKey() + '.html', 'w', encoding='utf-8') f.write(v.getPage()) f.close() if v.brands: r += '{' + '+'.join([C.blue(b.getKey()) for b in v.brands]) + '}' for b in v.brands: f = open(outputdir + '/' + b.getKey() + '.brand.html', 'w', encoding='utf-8') f.write(b.getPage()) f.close() r += ' => ' for c in v.getConfs(): f = open(outputdir + '/' + c.getKey() + '.html', 'w', encoding='utf-8') f.write(c.getPage()) f.close() for p in c.papers: f = open(outputdir + '/' + p.getKey() + '.html', 'w', encoding='utf-8') f.write(p.getPage()) f.close() purekey = c.getKey().replace(v.getKey(), '').replace('-', ' ').strip() r += '{} [{}], '.format(purekey, C.yellow(len(c.papers))) print(r) # generate the icon lineup icons = [] linked = [] pngs = [ lastSlash(png).split('.')[0] for png in glob.glob(outputdir + '/stuff/*.png') ] pngs = [png for png in pngs \ if not (png.startswith('a-') or png.startswith('p-') or png.startswith('ico-') or png in ('cc-by', 'xhtml', 'css', 'open-knowledge', 'edit'))] for brand in glob.glob(outputdir + '/*.brand.html'): pure = lastSlash(brand).split('.')[0] img = pure.lower().replace(' ', '') if img in pngs: pic = '<div class="wider"><a href="{0}.brand.html"><img class="abc" src="{1}" alt="{0}"/></a><span>{0}</span></div>'.format( \ pure, 'stuff/' + img + '.png') pngs.remove(img) icons.append(pic) else: # print('No image for', pure) pass corner = { 'ada': 'TRI-Ada', 'comparch': 'CompArch', 'floc': 'FLoC', 'bibsleigh': 'index' } for pure in pngs: venueCandidate = corner[pure] if pure in corner else pure.upper() canlink = sorted(glob.glob(outputdir + '/' + venueCandidate + '*.html'), key=len) if canlink: pic = '<div class="wider"><a href="{0}"><img class="abc" src="stuff/{1}.png" alt="{2}"/></a><span>{2}</span></div>'.format( \ canlink[0].split('/')[-1], pure, venueCandidate, canlink[0].split('/')[0]) elif pure == 'twitter': pic = '<div class="wider"><a href="https://about.twitter.com/company/brand-assets"><img class="abc" src="stuff/twitter.png" alt="Twitter"/></a><span>Twitter</span></div>' elif pure == 'email': pic = '<div class="wider"><a href="mailto:[email protected]"><img class="abc" src="stuff/email.png" alt="e-mail"/></a><span>email</span></div>' else: print('Lonely', pure) pic = '<img class="abc" src="stuff/{0}.png" alt="{0}"/>'.format( pure) icons.append(pic) # find last year of each venue # for ven in glob.glob(corpusdir + '/*'): # venname = lastSlash(ven) # newstuff += '<strong><a href="http://dblp.uni-trier.de/db/conf/{}/">{} {}</a></strong>, '.format(venname.lower(), venname, nextYear(ven)) # print(lastSlash(ven), ':', lastYear(ven)) # write "more info" file f = open(outputdir + '/about.html', 'w', encoding='utf-8') f.write( aboutHTML.format( len(icons), '<div class="minibar">' + '\n'.join(sorted(icons)) + '</div>')) f.close() # generate the DBLP sync page cell_by_conf_by_year = {} Ys = [ 2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009 ] dblplinks = {} with open(ienputdir + '/meta/dblpguide.sync', 'r') as f: for line in f: if not line or line.startswith('#'): continue words = line.split('|') if len(words) != 3: print('- Metaline {} skipped!'.format(words)) continue name = words[0].strip() dome = words[1].strip() dblp = words[2].strip() cell_by_conf_by_year[name] = {} dblplinks[name] = dblp for y in Ys: cell_by_conf_by_year[name][y] = '(no)' v = sleigh.getVenue(dome) if v: for yy in Ys: y = v.getYear(yy) if y: ckey = '{}-{}'.format(name, yy) c = y.getConf(ckey) if c: cell_by_conf_by_year[name][yy] = c.getIconItem2( '', '') else: # print('- Conference {} of year {} in venue {} not found in the corpus'.format(ckey, yy, name)) for alt in 'v1', 'p1', 'c1', '1', 'J': ckey = '{}-{}-{}'.format(name, alt, yy) c = y.getConf(ckey) if c: cell_by_conf_by_year[name][ yy] = c.getIconItem2('', '') break # else: # print('- Year {} in venue {} not found in the corpus among {}'.format(yy, name, [z.year for z in v.years])) # else: # print('- Venue {} not found in the corpus'.format(name)) table = '<table>' table += '<tr><td></td>' for y in Ys: table += '<th>{}</th>\n'.format(y) table += '</tr>' # print (cell_by_conf_by_year) for name in sorted(cell_by_conf_by_year.keys()): table += '<tr><th><a href="{}.brand.html">[@]</a> <a href="{}">{}</a></th>'.format( name, dblplinks[name], name) for y in Ys: table += '<td>{}</td>\n'.format(cell_by_conf_by_year[name][y]) table += '</tr>' table += '</table>' with open(outputdir + '/sync.html', 'w', encoding='utf-8') as f: f.write(syncHTML.format(table)) print('{}\nDone with {} venues, {} papers.'.format( C.purple('=' * 42), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers())))
def next_year(vvv): return int(lastSlash(sorted(glob.glob(vvv + '/*'))[-2])) + 1
# non-verbose mode by default if verbose or r != 0: print('[ {} ] {} → {}'.format(statuses[r], fn1, fn2)) return r if __name__ == "__main__": print('{} conference renamer\n{}'.format(\ C.purple('BibSLEIGH'), C.purple('='*42))) if len(sys.argv) < 3: print('Usage:\n\t{} <OLD-NAME> <NEW-NAME>'.format(sys.argv[0])) sys.exit(1) nameold, namenew = sys.argv[1:3] verbose = sys.argv[-1] == '-v' nameoldm = lastSlash(nameold) namenewm = lastSlash(namenew) print(nameoldm) cx = {0: 0, 1: 0, 2: 0} if not os.path.exists(ienputdir + '/corpus/' + nameold)\ or not os.path.isdir(ienputdir + '/corpus/' + nameold)\ or os.path.exists(ienputdir + '/corpus/' + namenew): report(nameold, namenew, 1) sys.exit(1) os.makedirs(ienputdir + '/corpus/' + namenew) cx[report('∅', namenew, 2)] += 2 # for all papers... for fn in glob.glob(ienputdir + '/corpus/' + nameold + '/*.json'): pureold = fn.split(nameoldm + '/')[1] if pureold.endswith('.json'): pureold = pureold[:-5]