def getAuthors(self): if 'author' in self.json.keys(): return ', '.join(listify(self.json['author'])) elif 'editor' in self.json.keys(): return ', '.join(listify(self.json['editor'])) else: return ''
def getBoxLinks(self): links = [[], [], []] # Crossref to the parent # DOC: real crossrefs are nice, but all the global searches slows the engine down # DOC: (running time 5m11.559s vs 0m7.396s is 42x) # if 'crossref' in self.json.keys(): # xref = self.top().seek(self.json['crossref']) # if xref: # links.append('<strong><a href="{}.html">{}</a></strong>'.format(\ # xref.getKey(), # xref.getKey().replace('-', ' '))) # else: # links.append('Xref not found') # else: # links.append('no Xref') # DOC: instead, we just link to the parent! links[0].append('<strong><a href="{}.html">{}</a></strong>'.format(\ self.up().getKey(), self.up().getKey().replace('-', ' '))) # DBLP if 'dblpkey' in self.json: for dblpk in listify(self.json['dblpkey']): links[1].append('<a href="http://dblp.uni-trier.de/rec/html/{}">DBLP</a>'.format(dblpk)) elif 'dblpurl' in self.json: links[1].append('<a href="{}">DBLP</a>'.format(self.json['dblpurl'])) else: links[1].append('no DBLP info') # Scholar if 'title' in self.json.keys(): links[1].append('<a href="https://scholar.google.com/scholar?q=%22{}%22">Scholar</a>'.format(\ str(self.json['title']).replace(' ', '+'))) # Some publishers if 'ee' in self.json.keys(): for e in listify(self.json['ee']): if e.startswith('http://dl.acm.org') \ or e.startswith('http://doi.acm.org')\ or e.startswith('http://portal.acm.org'): links[2].append('<a href="{}">ACM DL</a>'.format(e)) elif e.startswith('http://ieeexplore.ieee.org'): links[2].append('<a href="{}">IEEE Xplore</a>'.format(e)) elif e.startswith('http://ieeecomputersociety.org'): links[2].append('<a href="{}">IEEE CS</a>'.format(e)) elif e.startswith('http://drops.dagstuhl.de'): links[2].append('<a href="{}">Dagstuhl</a>'.format(e)) elif e.find('computer.org/csdl/proceedings') > 0: links[2].append('<a href="{}">CSDL</a>'.format(e)) elif e.startswith('http://journal.ub.tu-berlin.de/eceasst'): links[2].append('<a href="{}">EC-EASST</a>'.format(e)) elif e.startswith('http://ceur-ws.org'): links[2].append('<a href="{}">CEUR</a>'.format(e)) elif e.startswith('http://dx.doi.org'): pass else: links[2].append('<a href="{}">?EE?</a>'.format(e)) if 'doi' in self.json.keys(): links[2].append('<a href="http://dx.doi.org/{}">DOI</a>'.format(self.json['doi'])) return '<hr/>'.join(['<br/>\n'.join(lb) for lb in links if lb])
def getAbbrAuthors(self): # <abbr title="Rainer Koschke">RK</abbr> if 'author' not in self.json.keys(): return '' return ' ('+', '.join(['<abbr title="{0}">{1}</abbr>'.format(a,\ ''.join([w[0] for w in a.replace('-', ' ').split(' ') if w]))\ for a in listify(self.json['author'])])+')'
def offer(self, year, conf): ckey = conf.getKey() for rule in listify(self.json['select']): if ckey.startswith(rule): self.addConf(year, conf) # print(C.red(conf.getKey()), ' was accepted by ', C.red(self.name)) return
def kv2link(k, v): if k == 'g': ico = makeimg('g', 'Google') r = '<a href="https://www.google.com/search?q={}">{}</a>'.format( escape(v), v) elif k.endswith('.wp'): lang = k.split('.')[0] # Using ISO 639-1 language names ico = makeimg('wp', 'Wikipedia') + makeimg(lang, ISONames[lang]) lang = k.split('.')[0] r = '<a href="https://{}.wikipedia.org/wiki/{}">{}</a>'.format(\ lang, \ escape(v).replace('%20', '_'), \ v) elif k.endswith('.wb'): lang = k.split('.')[0] # Using ISO 639-1 language names ico = makeimg('wb', 'Wikibooks') + makeimg(lang, ISONames[lang]) lang = k.split('.')[0] r = '<a href="https://{}.wikibooks.org/wiki/{}">{}</a>'.format(\ lang, \ escape(v).replace('%20', '_'), \ v) elif k == 'wd': ico = makeimg('wd', 'Wikidata') r = '<a href="https://www.wikidata.org/wiki/{0}">{0}</a>'.format(v) elif k == 'hwiki': ico = makeimg('h', 'Haskell Wiki') r = '<a href="https://wiki.haskell.org/{}">{}</a>'.format(escape(v), v) elif k == 'so': ico = makeimg('so', 'Stack Overflow') r = '<a href="http://stackoverflow.com/questions/tagged?tagnames={0}">{0}</a>'.format( v) elif k == 'www': ico = '' #makeimg('www', 'Homepage') if not v.startswith('http'): y = v v = 'http://' + v else: y = v.replace('http://', '').replace('https://', '') r = '<a href="{0}">{1}</a>'.format(v, y) elif k == 'aka': ico = '' r = '<br/>'.join(['a.k.a.: “{}”'.format(z) for z in listify(v)]) else: ico = '' r = '?{}?{}?'.format(k, v) return ico + ' ' + r + '<br/>'
def getPage(self): if self.getTags(): cnt = '<h3>Tags:</h3><ul class="tri">' cnt += '\n'.join([ '<li class="tag"><a href="tag/{}.html">#{}</a></li>'.format( escape(t), t) for t in self.tags ]) cnt += '</ul><hr/>' else: cnt = '' if 'stemmed' in self.json.keys(): stemmed = listify(self.json['stemmed']) title = self.get('title') ltitle = title.lower() words = string2words(title) words.reverse() fancytitle = '' for w in words: i = ltitle.rindex(w) try: stem = stemmed[len(words) - words.index(w) - 1] except: print('Abnormal title in', self.getKey()) print('\tCould not get', w, 'from', stemmed) break if ifApproved(stem): fancytitle = '<a href="word/{}.html">{}</a>{}'.format(\ stem, title[i:i+len(w)], title[i+len(w):]) + fancytitle else: fancytitle = title[i:] + fancytitle ltitle = ltitle[:i] title = title[:i] fancytitle = title + fancytitle else: fancytitle = self.get('title') return bibHTML.format(\ filename=self.getJsonName(), title=self.get('title'), stemmedTitle=fancytitle, img=self.get('venue').lower(), # geticon? authors=self.getAuthors(), short='{}, {}'.format(self.get('venue'), self.get('year')), code=self.getCode(), bib=self.getBib(), boxlinks=self.getBoxLinks(), contents=cnt\ )
def kv2link(k, v): if k == 'g': ico = makeimg('g', 'Google') r = '<a href="https://www.google.com/search?q={}">{}</a>'.format(escape(v), v) elif k.endswith('.wp'): lang = k.split('.')[0] # Using ISO 639-1 language names ico = makeimg('wp', 'Wikipedia') + makeimg(lang, ISONames[lang]) lang = k.split('.')[0] r = '<a href="https://{}.wikipedia.org/wiki/{}">{}</a>'.format(\ lang, \ escape(v).replace('%20', '_'), \ v) elif k.endswith('.wb'): lang = k.split('.')[0] # Using ISO 639-1 language names ico = makeimg('wb', 'Wikibooks') + makeimg(lang, ISONames[lang]) lang = k.split('.')[0] r = '<a href="https://{}.wikibooks.org/wiki/{}">{}</a>'.format(\ lang, \ escape(v).replace('%20', '_'), \ v) elif k == 'wd': ico = makeimg('wd', 'Wikidata') r = '<a href="https://www.wikidata.org/wiki/{0}">{0}</a>'.format(v) elif k == 'hwiki': ico = makeimg('h', 'Haskell Wiki') r = '<a href="https://wiki.haskell.org/{}">{}</a>'.format(escape(v), v) elif k == 'so': ico = makeimg('so', 'Stack Overflow') r = '<a href="http://stackoverflow.com/questions/tagged?tagnames={0}">{0}</a>'.format(v) elif k == 'www': ico = ''#makeimg('www', 'Homepage') if not v.startswith('http'): y = v v = 'http://'+v else: y = v.replace('http://', '').replace('https://', '') r = '<a href="{0}">{1}</a>'.format(v, y) elif k == 'aka': ico = '' r = '<br/>'.join(['a.k.a.: “{}”'.format(z) for z in listify(v)]) else: ico = '' r = '?{}?{}?'.format(k, v) return ico + ' ' + r + '<br/>'
def getPage(self): if self.getTags(): cnt = '<h3>Tags:</h3><ul class="tri">' cnt += '\n'.join(['<li class="tag"><a href="tag/{}.html">#{}</a></li>'.format(escape(t), t) for t in self.tags]) cnt += '</ul><hr/>' else: cnt = '' if 'stemmed' in self.json.keys(): stemmed = listify(self.json['stemmed']) title = self.get('title') ltitle = title.lower() words = string2words(title) words.reverse() fancytitle = '' for w in words: i = ltitle.rindex(w) try: stem = stemmed[len(words) - words.index(w)-1] except: print('Abnormal title in', self.getKey()) break if ifApproved(stem): fancytitle = '<a href="word/{}.html">{}</a>{}'.format(\ stem, title[i:i+len(w)], title[i+len(w):]) + fancytitle else: fancytitle = title[i:] + fancytitle ltitle = ltitle[:i] title = title[:i] fancytitle = title + fancytitle else: fancytitle = self.get('title') return bibHTML.format(\ filename=self.getJsonName(), title=self.get('title'), stemmedTitle=fancytitle, img=self.get('venue').lower(), # geticon? authors=self.getAuthors(), short='{}, {}'.format(self.get('venue'), self.get('year')), code=self.getCode(), bib=self.getBib(), boxlinks=self.getBoxLinks(), contents=cnt\ )
# if a manual rule does the same as the other heuristic, it’s dumb for a in sorted(aka.keys()): if len(aka[a]) == 1 and aka[a][0] in (nodiaLatin(a), simpleLatin(a)): print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was unnecessary manual work') elif len(aka[a]) == 2 and (aka[a] == [nodiaLatin(a), simpleLatin(a)] \ or aka[a] == [simpleLatin(a), nodiaLatin(a)]): print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was a lot of unnecessary manual work') elif nodiaLatin(a) in aka[a] or simpleLatin(a) in aka[a]: print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing contains some unnecessary manual work') # auto-aliasing heuristic: # for each author with diacritics, its non-diacritic twin is considered harmful people = set() for v in sleigh.venues: for c in v.getConfs(): if 'editor' in c.json: people.update(listify(c.json['editor'])) for p in c.papers: if 'author' in p.json: people.update(listify(p.json['author'])) for a in people: for na in (nodiaLatin(a), simpleLatin(a)): if na != a: aka.setdefault(a, []) aka[a].append(na) # invert aliasing for akey in aka: if akey in ('ZZZZZZZZZZ', 'FILE'): continue for aval in aka[akey]: renameto[aval] = akey f = open('_renameto.json', 'w', encoding='utf-8')
# cs = ads = [c.json['address'][-1] \ for c in {bykey[p].up() for p in persondef['authored'] if p in bykey} \ if 'address' in c.json] if ads: clist = {a: ads.count(a) for a in ads} adds = '<code>Travelled to:</code><hr/>' \ + '<br/>\n'.join(sorted(['{} × {}'.format(clist[a], a) for a in clist])) boxlinks += adds # collaborated with... clist = {} for p in persondef['authored']: if p not in bykey: continue if 'author' in bykey[p].json.keys(): coas = listify(bykey[p].get('author')) if ' ' in coas: print('ERROR in [{}] - [{}] - [{}]'.format( p, bykey[p].getKey(), coas)) D = len(coas) if D == 1: # solo papers count as coauthoring with yourself a = '∅' if a not in clist: clist[a] = 0 if persondef['name'] in name2file.keys(): name2file['∅'] = name2file[persondef['name']] else: name2file['∅'] = persondef['name'] clist[a] += 1 / 2 continue
# if a manual rule does the same as the other heuristic, it’s dumb for a in sorted(aka.keys()): if len(aka[a]) == 1 and aka[a][0] in (nodiaLatin(a), simpleLatin(a)): print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing was unnecessary manual work') elif len(aka[a]) == 2 and (aka[a] == [nodiaLatin(a), simpleLatin(a)] \ or aka[a] == [simpleLatin(a), nodiaLatin(a)]): print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing was a lot of unnecessary manual work') elif nodiaLatin(a) in aka[a] or simpleLatin(a) in aka[a]: print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing contains some unnecessary manual work') # auto-aliasing heuristic: # for each author with diacritics, its non-diacritic twin is considered harmful people = set() for v in sleigh.venues: for c in v.getConfs(): if 'editor' in c.json: people.update(listify(c.json['editor'])) for p in c.papers: if 'author' in p.json: people.update(listify(p.json['author'])) for a in people: for na in (nodiaLatin(a), simpleLatin(a)): if na != a: aka.setdefault(a, []) aka[a].append(na) # invert aliasing for akey in aka: if akey in ('ZZZZZZZZZZ', 'FILE'): continue for aval in aka[akey]: renameto[aval] = akey f = open('_renameto.json', 'w', encoding='utf8')
if p['name'] in people.keys(): cx[report(C.red('duplicate') + ' ' + C.yellow(p), 1)] += 1 continue people[p['name']] = p print('{}: {} venues, {} papers written by {} people\n{}'.format(\ C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.red(len(people)), C.purple('='*42))) # traverse ALL the papers! for v in sleigh.venues: for c in v.getConfs(): for p in c.papers: if 'author' in p.json.keys(): for a in listify(p.json['author']): if a in people.keys(): if 'authored' not in people[a].keys(): people[a]['authored'] = [] if p.getKey() not in people[a]['authored']: people[a]['authored'].append(p.getKey()) else: report(C.yellow('Author not found: ') + a, 0) if 'editor' in c.json.keys(): for e in listify(c.json['editor']): if e in people.keys(): if 'edited' not in people[e].keys(): people[e]['edited'] = [] if c.getKey() not in people[e]['edited']: people[e]['edited'].append(c.getKey()) else:
def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' f = open(fn, 'r') lines = f.readlines()[1:-1] f.close() flines = json2lines(lines) plines = sorted(json2lines(o.getJSON().split('\n'))) # "url" from DBLP are useless if 'url' in o.json.keys(): o.json['url'] = [link.replace('https://', 'http://')\ for link in listify(o.json['url'])\ if not link.startswith('db/conf/')\ and not link.startswith('db/series/')\ and not link.startswith('db/books/')\ and not link.startswith('db/journals/')] if not o.json['url']: del o.json['url'] elif len(o.json['url']) == 1: o.json['url'] = o.json['url'][0] if 'ee' in o.json.keys() and 'doi' not in o.json.keys(): if isinstance(o.json['ee'], list): if verbose: print(C.red('Manylink:'), o.json['ee']) newee = [] for onelink in listify(o.json['ee']): if onelink.startswith('http://dx.doi.org/'): o.json['doi'] = onelink[18:] elif onelink.startswith('http://doi.acm.org/'): o.json['doi'] = onelink[19:] elif onelink.startswith('http://doi.ieeecomputersociety.org/'): o.json['doi'] = onelink[35:] elif onelink.startswith('http://dl.acm.org/citation.cfm?id='): o.json['acmid'] = onelink[34:] elif onelink.startswith('http://portal.acm.org/citation.cfm?id='): o.json['acmid'] = onelink[38:] elif onelink.startswith('http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=')\ or onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber='): o.json['ieeearid'] = onelink.split('=')[-1] elif onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=')\ and onelink.find('arnumber') > -1: o.json['ieeearid'] = onelink.split('arnumber=')[-1].split('&')[0] elif onelink.startswith('http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber='): o.json['ieeepuid'] = onelink.split('=')[-1] elif onelink.startswith('http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber='): o.json['ieeeisid'] = onelink.split('=')[-1] elif onelink.startswith('http://eceasst.cs.tu-berlin.de/index.php/eceasst/article/view/'): newee.append('http://journal.ub.tu-berlin.de/eceasst/article/view/' + onelink.split('/')[-1]) elif onelink.endswith('.pdf') and \ (onelink.startswith('http://computer.org/proceedings/')\ or onelink.startswith('http://csdl.computer.org/')): # Bad: http://computer.org/proceedings/icsm/1189/11890007.pdf # Bad: http://csdl.computer.org/comp/proceedings/date/2003/1870/02/187020040.pdf # Good: http://www.computer.org/csdl/proceedings/icsm/2001/1189/00/11890004.pdf if onelink.startswith('http://csdl'): cname, _, cid, mid, pid = onelink.split('/')[5:10] else: cname, cid, pid = onelink.split('/')[4:7] # heuristic if pid.startswith(cid): mid = pid[len(cid):len(cid)+2] else: mid = '00' newee.append('http://www.computer.org/csdl/proceedings/{}/{}/{}/{}/{}'.format(\ cname, o.get('year'), cid, mid, pid)) else: if onelink.find('ieee') > -1: print(C.purple('IEEE'), onelink) if verbose: print(C.yellow('Missed opportunity:'), onelink) # nothing matches => preserve newee.append(onelink) if len(newee) == 0: del o.json['ee'] elif len(newee) == 1: o.json['ee'] = newee[0] else: o.json['ee'] = newee # post-processing normalisation if 'acmid' in o.json.keys() and not isinstance(o.json['acmid'], int) and o.json['acmid'].isdigit(): o.json['acmid'] = int(o.json['acmid']) if 'eventuri' in o.json.keys(): o.json['eventurl'] = o.json['eventuri'] del o.json['eventuri'] if 'eventurl' in o.json.keys() and o.json['eventurl'].startswith('https://'): o.json['eventurl'] = o.json['eventurl'].replace('https://', 'http://') nlines = sorted(json2lines(o.getJSON().split('\n'))) if flines != plines: return 1 elif plines != nlines: f = open(fn, 'w') f.write(o.getJSON()) f.close() return 2 else: return 0
def getBoxLinks(self): links = [[], [], []] # Crossref to the parent # DOC: real crossrefs are nice, but all the global searches slows the engine down # DOC: (running time 5m11.559s vs 0m7.396s is 42x) # if 'crossref' in self.json.keys(): # xref = self.top().seek(self.json['crossref']) # if xref: # links.append('<strong><a href="{}.html">{}</a></strong>'.format(\ # xref.getKey(), # xref.getKey().replace('-', ' '))) # else: # links.append('Xref not found') # else: # links.append('no Xref') # DOC: instead, we just link to the parent! links[0].append('<strong><a href="{}.html">{}</a></strong>'.format(\ self.up().getKey(), self.up().getKey().replace('-', ' '))) # DBLP if 'dblpkey' in self.json: for dblpk in listify(self.json['dblpkey']): links[1].append( '<a href="http://dblp.org/rec/html/{}">DBLP</a>'.format( dblpk)) elif 'dblpurl' in self.json: links[1].append('<a href="{}">DBLP</a>'.format( self.json['dblpurl'])) else: links[1].append('no DBLP info') # Scholar if 'title' in self.json.keys(): links[1].append('<a href="https://scholar.google.com/scholar?q=%22{}%22">Scholar</a>'.format(\ str(self.json['title']).replace(' ', '+'))) # Some publishers if 'ee' in self.json.keys(): for e in listify(self.json['ee']): if e.startswith('http://dl.acm.org') \ or e.startswith('http://doi.acm.org')\ or e.startswith('http://portal.acm.org'): links[2].append('<a href="{}">ACM DL</a>'.format(e)) elif e.startswith('http://ieeexplore.ieee.org'): links[2].append('<a href="{}">IEEE Xplore</a>'.format(e)) elif e.startswith('http://ieeecomputersociety.org'): links[2].append('<a href="{}">IEEE CS</a>'.format(e)) elif e.startswith('http://drops.dagstuhl.de'): links[2].append('<a href="{}">Dagstuhl</a>'.format(e)) elif e.find('computer.org/csdl/proceedings') > 0: links[2].append('<a href="{}">CSDL</a>'.format(e)) elif e.startswith('http://journal.ub.tu-berlin.de/eceasst'): links[2].append('<a href="{}">EC-EASST</a>'.format(e)) elif e.startswith('http://ceur-ws.org'): links[2].append('<a href="{}">CEUR</a>'.format(e)) elif e.startswith('http://dx.doi.org'): pass else: links[2].append('<a href="{}">?EE?</a>'.format(e)) if 'doi' in self.json.keys(): links[2].append('<a href="http://dx.doi.org/{}">DOI</a>'.format( self.json['doi'])) return '<hr/>'.join(['<br/>\n'.join(lb) for lb in links if lb])
if p['name'] in people.keys(): cx[report(C.red('duplicate')+' '+C.yellow(p), 1)] += 1 continue people[p['name']] = p print('{}: {} venues, {} papers written by {} people\n{}'.format(\ C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.red(len(people)), C.purple('='*42))) # traverse ALL the papers! for v in sleigh.venues: for c in v.getConfs(): for p in c.papers: if 'author' in p.json.keys(): for a in listify(p.json['author']): if a in people.keys(): if 'authored' not in people[a].keys(): people[a]['authored'] = [] if p.getKey() not in people[a]['authored']: people[a]['authored'].append(p.getKey()) else: report(C.yellow('Author not found: ') + a, 0) if 'editor' in c.json.keys(): for e in listify(c.json['editor']): if e in people.keys(): if 'edited' not in people[e].keys(): people[e]['edited'] = [] if c.getKey() not in people[e]['edited']: people[e]['edited'].append(c.getKey()) else:
def checkon(fn, o): if os.path.isdir(fn): fn = fn + '.json' f = open(fn, 'r') lines = f.readlines()[1:-1] f.close() flines = [strictstrip(s) for s in lines] plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]]) if flines != plines: return 1 ts = [] # precise case-sensitive match mcs = o.get('title') # precise match for substrings mes = baretext(mcs) # precise match for words mew = mes.split(' ') # imprecise match for substrings mis = superbaretext(mes) # imprecise match for words miw = mis.split(' ') # now match! for t in tags: # print('Check',t,'vs',mes) if 'name' not in t.keys(): print(C.red('ERROR:'), 'no name for tag from file', t['FILE']) continue if all([not k.startswith('match') for k in t.keys()]): print(C.red('ERROR:'), 'no match rules for tag', t['name']) continue for k in t.keys(): if k == 'matchentry': if o.getKey() in t[k]: ts += [t['name']] elif k.startswith('match'): ts += [t['name'] for s in listify(t[k]) if matchModes[k](s, mcs, mes, mew, mis, miw)] # ts += [t['name'] for s in listify(t[k]) if fmm(t, k, s, mcs, mes, mew, mis, miw)] # second pass: check reliefs for t in tags: if 'relieves' in t.keys(): for r in listify(t['relieves']): if t['name'] in ts and r in ts: ts.remove(r) if t['name'] not in relieved.keys(): relieved[t['name']] = 0 relieved[t['name']] += 1 if ts: if not o.tags: o.tags = [] for t in ts: if t not in o.tags: o.tags.append(t) # uncomment the following one line to overwrite all tags o.tags = uniq(ts) # let’s keep tags clean and sorted o.tags = sorted(o.tags) nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]]) if plines != nlines: f = open(fn, 'w') f.write(o.getJSON()) f.close() return 2 else: return 0
def getBib(self): if len(self.json) < 1: return '@misc{EMPTY,}' s = '@%s{%s,\n' % (self.get('type'), self.getKey()) n2f = self.n2f if self.n2f else self.back.n2f for k in sorted(self.json.keys()): if k == k.upper() or k.endswith('short') or k == 'tag': # secret key continue if k in ('author', 'editor'): # TODO: add (correct!) links aelinks = [\ '<a href="{}">{}</a>'.format(n2f[ae], ae) if ae in n2f.keys() else ae for ae in listify(self.json[k]) ] s += '\t{:<13} = "{}",\n'.format(k, ' and '.join(aelinks)) elif k in ('title', 'booktitle', 'series', 'publisher', 'journal'): if k + 'short' not in self.json.keys(): s += '\t{0:<13} = "{{{1}}}",\n'.format(k, self.json[k]) else: s += '\t{0:<13} = "{{<span id="{0}">{1}</span>}}",\n'.format( k, self.json[k]) elif k in ('crossref', 'key', 'type', 'venue', 'twitter', \ 'eventtitle', 'eventurl', 'nondblpkey', 'dblpkey', 'dblpurl', \ 'programchair', 'generalchair', 'roles', 'tagged', 'stemmed', \ 'status', 'ieeepuid', 'ieeearid', 'ieeeisid', 'cite'): # TODO: ban 'ee' as well pass elif k == 'doi': s += '<span class="uri">\t{0:<13} = "<a href="http://dx.doi.org/{1}">{1}</a>",\n</span>'.format( k, self.json[k]) elif k == 'acmid': s += '<span class="uri">\t{0:<13} = "<a href="http://dl.acm.org/citation.cfm?id={1}">{1}</a>",\n</span>'.format( k, self.json[k]) elif k == 'ieeearid': s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber={1}">{1}</a>",\n</span>'.format( k, self.json[k]) elif k == 'ieeepuid': s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber={1}">{1}</a>",\n</span>'.format( k, self.json[k]) elif k == 'ieeeisid': s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber={1}">{1}</a>",\n</span>'.format( k, self.json[k]) elif k == 'dblpkey': # Legacy! # s += '\t{0:<13} = "<a href="http://dblp.uni-trier.de/db/{1}">{1}</a>",\n</span>'.format(k, self.json[k]) s += '\t{0:<13} = "<a href="http://dblp.uni-trier.de/rec/html/{1}">{1}</a>",\n'.format( k, self.json[k]) elif k == 'isbn': s += '<span id="isbn">\t{:<13} = "{}",\n</span>'.format( k, self.json[k]) elif k in ('ee', 'url'): for e in listify(self.json[k]): # VVZ: eventually would like to get rid of EE completely # VVZ: limiting it for now to possibly interesting cases if k == 'ee' and (e.startswith('http://dx.doi.org') or \ e.startswith('http://dl.acm.org') or\ e.startswith('http://doi.ieeecomputersociety.org')\ ): continue s += '<span class="uri">\t{0:<13} = "<a href=\"{1}\">{1}</a>",\n</span>'.format( k, e) elif k in ('year', 'volume', 'issue', 'number') and isinstance( self.json[k], int): s += '\t{0:<13} = {1},\n'.format(k, self.json[k]) elif k == 'pages': s += '\t{0:<13} = "{1}",\n'.format(k, self.getPagesBib()) elif k == 'address': if isinstance(self.json[k], str): a = self.json[k] elif self.json[k][1]: a = ', '.join(self.json[k]) else: a = self.json[k][0] + ', ' + self.json[k][2] s += '\t{0:<13} = "{1}",\n'.format(k, a) else: s += '\t{0:<13} = "{1}",\n'.format(k, self.json[k]) s += '}' return s.replace('<i>', '\\emph{').replace('</i>', '}')
def checkon(fn, o): if os.path.isdir(fn): fn = fn + '.json' f = open(fn, 'r', encoding='utf-8') lines = f.readlines()[1:-1] f.close() flines = [strictstrip(s) for s in lines] plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]]) if flines != plines: return 1 ts = [] # precise case-sensitive match mcs = o.get('title') # precise match for substrings mes = baretext(mcs) # precise match for words mew = mes.split(' ') # imprecise match for substrings mis = superbaretext(mes) # imprecise match for words miw = mis.split(' ') # now match! for t in tags: # print('Check',t,'vs',mes) if 'name' not in t.keys(): print(C.red('ERROR:'), 'no name for tag from file', t['FILE']) continue if all([not k.startswith('match') for k in t.keys()]): print(C.red('ERROR:'), 'no match rules for tag', t['name']) continue for k in t.keys(): if k == 'matchentry': if o.getKey() in t[k]: ts += [t['name']] elif k.startswith('match'): ts += [ t['name'] for s in listify(t[k]) if matchModes[k](s, mcs, mes, mew, mis, miw) ] # ts += [t['name'] for s in listify(t[k]) if fmm(t, k, s, mcs, mes, mew, mis, miw)] # second pass: check reliefs for t in tags: if 'relieves' in t.keys(): for r in listify(t['relieves']): if t['name'] in ts and r in ts: ts.remove(r) if t['name'] not in relieved.keys(): relieved[t['name']] = 0 relieved[t['name']] += 1 if ts: if not o.tags: o.tags = [] for t in ts: if t not in o.tags: o.tags.append(t) # uncomment the following one line to overwrite all tags o.tags = uniq(ts) # let’s keep tags clean and sorted o.tags = sorted(o.tags) nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]]) if plines != nlines: f = open(fn, 'w', encoding='utf-8') f.write(o.getJSON()) f.close() return 2 else: return 0
print('[', C.red('NOGO'), ']', 'No name in', fn) continue people[p['name']] = p print('{}: {} venues, {} papers\n{}'.format(\ C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.purple('='*42))) # All people who ever contributed names = [] for v in sleigh.venues: for c in v.getConfs(): for p in c.papers: for k in ('author', 'editor'): if k in p.json.keys(): names += [a for a in listify(p.json[k]) if a not in names] # caching peoplekeys = people.keys() if os.path.exists('_established.json'): established = json.load(open('_established.json', 'r')) else: established = {} # print(people) CXread = len(people) for name in names: if name not in peoplekeys: p = {'name': name,\ 'FILE': ienputdir + '/people/' + fileify(name) + '.json',\ 'dblp': dblpify(name)} people[p['name']] = p # flatten conferences for easy lookup
def getBib(self): if len(self.json) < 1: return '@misc{EMPTY,}' s = '@%s{%s,\n' % (self.get('type'), self.getKey()) n2f = self.n2f if self.n2f else self.back.n2f for k in sorted(self.json.keys()): if k == k.upper() or k.endswith('short') or k == 'tag': # secret key continue if k in ('author', 'editor'): # TODO: add (correct!) links aelinks = [\ '<a href="{}">{}</a>'.format(n2f[ae], ae) if ae in n2f.keys() else ae for ae in listify(self.json[k]) ] s += '\t{:<13} = "{}",\n'.format(k, ' and '.join(aelinks)) elif k in ('title', 'booktitle', 'series', 'publisher', 'journal'): if k+'short' not in self.json.keys(): s += '\t{0:<13} = "{{{1}}}",\n'.format(k, self.json[k]) else: s += '\t{0:<13} = "{{<span id="{0}">{1}</span>}}",\n'.format(k, self.json[k]) elif k in ('crossref', 'key', 'type', 'venue', 'twitter', \ 'eventtitle', 'eventurl', 'nondblpkey', 'dblpkey', 'dblpurl', \ 'programchair', 'generalchair', 'roles', 'tagged', 'stemmed', \ 'status', 'ieeepuid', 'ieeearid', 'ieeeisid', 'cite'): # TODO: ban 'ee' as well pass elif k == 'doi': s += '<span class="uri">\t{0:<13} = "<a href="http://dx.doi.org/{1}">{1}</a>",\n</span>'.format(k, self.json[k]) elif k == 'acmid': s += '<span class="uri">\t{0:<13} = "<a href="http://dl.acm.org/citation.cfm?id={1}">{1}</a>",\n</span>'.format(k, self.json[k]) elif k == 'ieeearid': s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber={1}">{1}</a>",\n</span>'.format(k, self.json[k]) elif k == 'ieeepuid': s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber={1}">{1}</a>",\n</span>'.format(k, self.json[k]) elif k == 'ieeeisid': s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber={1}">{1}</a>",\n</span>'.format(k, self.json[k]) elif k == 'dblpkey': # Legacy! # s += '\t{0:<13} = "<a href="http://dblp.uni-trier.de/db/{1}">{1}</a>",\n</span>'.format(k, self.json[k]) s += '\t{0:<13} = "<a href="http://dblp.uni-trier.de/rec/html/{1}">{1}</a>",\n'.format(k, self.json[k]) elif k == 'isbn': s += '<span id="isbn">\t{:<13} = "{}",\n</span>'.format(k, self.json[k]) elif k in ('ee', 'url'): for e in listify(self.json[k]): # VVZ: eventually would like to get rid of EE completely # VVZ: limiting it for now to possibly interesting cases if k == 'ee' and (e.startswith('http://dx.doi.org') or \ e.startswith('http://dl.acm.org') or\ e.startswith('http://doi.ieeecomputersociety.org')\ ): continue s += '<span class="uri">\t{0:<13} = "<a href=\"{1}\">{1}</a>",\n</span>'.format(k, e) elif k in ('year', 'volume', 'issue', 'number') and isinstance(self.json[k], int): s += '\t{0:<13} = {1},\n'.format(k, self.json[k]) elif k == 'pages': s += '\t{0:<13} = "{1}",\n'.format(k, self.getPagesBib()) elif k == 'address': if isinstance(self.json[k], str): a = self.json[k] elif self.json[k][1]: a = ', '.join(self.json[k]) else: a = self.json[k][0] + ', ' + self.json[k][2] s += '\t{0:<13} = "{1}",\n'.format(k, a) else: s += '\t{0:<13} = "{1}",\n'.format(k, self.json[k]) s += '}' return s.replace('<i>', '\\emph{').replace('</i>', '}')
def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' f = open(fn, 'r', encoding='utf-8') lines = f.readlines()[1:-1] f.close() flines = json2lines(lines) plines = sorted(json2lines(o.getJSON().split('\n'))) # "url" from DBLP are useless if 'url' in o.json.keys(): o.json['url'] = [link.replace('https://', 'http://')\ for link in listify(o.json['url'])\ if not link.startswith('db/conf/')\ and not link.startswith('db/series/')\ and not link.startswith('db/books/')\ and not link.startswith('db/journals/')] if not o.json['url']: del o.json['url'] elif len(o.json['url']) == 1: o.json['url'] = o.json['url'][0] if 'ee' in o.json.keys() and 'doi' not in o.json.keys(): if isinstance(o.json['ee'], list): if verbose: print(C.red('Manylink:'), o.json['ee']) newee = [] for onelink in listify(o.json['ee']): if onelink.startswith('http://dx.doi.org/'): o.json['doi'] = onelink[18:] elif onelink.startswith('http://doi.acm.org/'): o.json['doi'] = onelink[19:] elif onelink.startswith('http://doi.ieeecomputersociety.org/'): o.json['doi'] = onelink[35:] elif onelink.startswith('http://dl.acm.org/citation.cfm?id='): o.json['acmid'] = onelink[34:] elif onelink.startswith('http://portal.acm.org/citation.cfm?id='): o.json['acmid'] = onelink[38:] elif onelink.startswith('http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=')\ or onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber='): o.json['ieeearid'] = onelink.split('=')[-1] elif onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=')\ and onelink.find('arnumber') > -1: o.json['ieeearid'] = onelink.split('arnumber=')[-1].split( '&')[0] elif onelink.startswith( 'http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber=' ): o.json['ieeepuid'] = onelink.split('=')[-1] elif onelink.startswith( 'http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber='): o.json['ieeeisid'] = onelink.split('=')[-1] elif onelink.startswith( 'http://eceasst.cs.tu-berlin.de/index.php/eceasst/article/view/' ): newee.append( 'http://journal.ub.tu-berlin.de/eceasst/article/view/' + onelink.split('/')[-1]) elif onelink.endswith('.pdf') and \ (onelink.startswith('http://computer.org/proceedings/')\ or onelink.startswith('http://csdl.computer.org/')): # Bad: http://computer.org/proceedings/icsm/1189/11890007.pdf # Bad: http://csdl.computer.org/comp/proceedings/date/2003/1870/02/187020040.pdf # Good: http://www.computer.org/csdl/proceedings/icsm/2001/1189/00/11890004.pdf if onelink.startswith('http://csdl'): cname, _, cid, mid, pid = onelink.split('/')[5:10] else: cname, cid, pid = onelink.split('/')[4:7] # heuristic if pid.startswith(cid): mid = pid[len(cid):len(cid) + 2] else: mid = '00' newee.append('http://www.computer.org/csdl/proceedings/{}/{}/{}/{}/{}'.format(\ cname, o.get('year'), cid, mid, pid)) else: if onelink.find('ieee') > -1: print(C.purple('IEEE'), onelink) if verbose: print(C.yellow('Missed opportunity:'), onelink) # nothing matches => preserve newee.append(onelink) if len(newee) == 0: del o.json['ee'] elif len(newee) == 1: o.json['ee'] = newee[0] else: o.json['ee'] = newee # post-processing normalisation if 'acmid' in o.json.keys() and not isinstance( o.json['acmid'], int) and o.json['acmid'].isdigit(): o.json['acmid'] = int(o.json['acmid']) if 'eventuri' in o.json.keys(): o.json['eventurl'] = o.json['eventuri'] del o.json['eventuri'] if 'eventurl' in o.json.keys() and o.json['eventurl'].startswith( 'https://'): o.json['eventurl'] = o.json['eventurl'].replace('https://', 'http://') nlines = sorted(json2lines(o.getJSON().split('\n'))) if flines != plines: return 1 elif plines != nlines: f = open(fn, 'w', encoding='utf-8') f.write(o.getJSON()) f.close() return 2 else: return 0
# travelled to... # NB: code clone of AST::Venue # cs = ads = [c.json['address'][-1] \ for c in {bykey[p].up() for p in persondef['authored']} \ if 'address' in c.json] if ads: clist = {a:ads.count(a) for a in ads} adds = '<code>Travelled to:</code><hr/>' \ + '<br/>\n'.join(sorted(['{} × {}'.format(clist[a], a) for a in clist])) boxlinks += adds # collaborated with... clist = {} for p in persondef['authored']: if 'author' in bykey[p].json.keys(): coas = listify(bykey[p].get('author')) if ' ' in coas: print('ERROR in [{}] - [{}] - [{}]'.format(p, bykey[p].getKey(), coas)) D = len(coas) if D == 1: # solo papers count as coauthoring with yourself a = '∅' if a not in clist: clist[a] = 0 if persondef['name'] in name2file.keys(): name2file['∅'] = name2file[persondef['name']] else: name2file['∅'] = persondef['name'] clist[a] += 1/2 continue for a in coas: