Esempio n. 1
0
def checkreport(fn, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	r = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		report(statuses[r], fn)
	return r
Esempio n. 2
0
def checkreport(fn, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('WARN'))
	r, msg = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {}: {}'.format(statuses[r], fn, msg))
	return r
Esempio n. 3
0
def report(fn, r):
    statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('UNEX'))
    special = ('', '- no crossref found!', '- illegal crossref')
    # non-verbose mode by default
    if verbose or r != 0:
        print('[ {} ] {} {}'.format(statuses[r], fn, special[r]))
    return r
Esempio n. 4
0
def checkreport(m, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	r = checkon(m, o)
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {}'.format(statuses[r], o.filename))
	return r
Esempio n. 5
0
def report(fn, r):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('UNEX'))
	special = ('', '- no crossref found!', '- illegal crossref')
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {} {}'.format(statuses[r], fn, special[r]))
	return r
Esempio n. 6
0
def checkreport(fn, o):
    statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
    r = checkon(fn, o)
    # non-verbose mode by default
    if verbose or r != 0:
        print('[ {} ] {}'.format(statuses[r], fn))
    return r
Esempio n. 7
0
def checkreport(fn, o, br):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	if br:
		r = checkbrand(fn, br)
	else:
		r = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {}'.format(statuses[r], fn))
	return r
Esempio n. 8
0
def checkreport(fn, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	if isinstance(o, int):
		r = o
	else:
		r = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {}'.format(statuses[r], fn))
	return r
Esempio n. 9
0
def checkon(m, o):
	# if no common model found, we failed
	if not m:
		return 1
	if 'type' in m.keys() and m['type'] in ('inproceedings', 'article'):
		m['type'] = 'proceedings'
	if 'type' in m.keys() and m['type'] == 'incollection':
		m['type'] = 'book'
	if 'crossref' in m.keys():
		del m['crossref']
	if 'booktitle' in m.keys():
		m['title'] = m['booktitle']
		del m['booktitle']
	if 'booktitleshort' in m.keys():
		# TODO: ???
		del m['booktitleshort']
	r = 0
	n = {}
	for k in m.keys():
		if o.get(k) == m[k]:
			if verbose:
				print(C.blue('Confirmed:  '), k, 'as', m[k])
		else:
			if verbose:
				print(C.red('Conflicted: '), k, 'as', m[k], 'vs', o.get(k))
			v = heurichoose(k, m[k], o.json[k]) if k in o.json.keys() else m[k]
			if verbose:
				print(C.yellow('Settled for:'), v)
			n[k] = v
			r = 2
	if r == 0:
		return r
	if r == 2 and not n:
		# nothing to fix?!
		return 0
	if not os.path.exists(o.filename):
		return 0
	if os.path.isdir(o.filename):
		fn = o.filename + '.json'
	else:
		fn = o.filename
	if os.path.exists(fn):
		f = open(fn, 'r', encoding='utf-8')
		lines = f.read()
		f.close()
		if lines != o.getJSON():
			# strange, should be equal (run all normalisers first!)
			return 1
	for k in n.keys():
		o.json[k] = n[k]
	f = open(fn, 'w', encoding='utf-8')
	f.write(o.getJSON())
	f.close()
	return 2
Esempio n. 10
0
def checkon(m, o):
	# if no common model found, we failed
	if not m:
		return 1
	if 'type' in m.keys() and m['type'] in ('inproceedings', 'article'):
		m['type'] = 'proceedings'
	if 'type' in m.keys() and m['type'] == 'incollection':
		m['type'] = 'book'
	if 'crossref' in m.keys():
		del m['crossref']
	if 'booktitle' in m.keys():
		m['title'] = m['booktitle']
		del m['booktitle']
	if 'booktitleshort' in m.keys():
		# TODO: ???
		del m['booktitleshort']
	r = 0
	n = {}
	for k in m.keys():
		if o.get(k) == m[k]:
			if verbose:
				print(C.blue('Confirmed:  '), k, 'as', m[k])
		else:
			if verbose:
				print(C.red('Conflicted: '), k, 'as', m[k], 'vs', o.get(k))
			v = heurichoose(k, m[k], o.json[k]) if k in o.json.keys() else m[k]
			if verbose:
				print(C.yellow('Settled for:'), v)
			n[k] = v
			r = 2
	if r == 0:
		return r
	if r == 2 and not n:
		# nothing to fix?!
		return 0
	if not os.path.exists(o.filename):
		return 0
	if os.path.isdir(o.filename):
		fn = o.filename + '.json'
	else:
		fn = o.filename
	if os.path.exists(fn):
		f = open(fn, 'r')
		lines = f.read()
		f.close()
		if lines != o.getJSON():
			# strange, should be equal (run all normalisers first!)
			return 1
	for k in n.keys():
		o.json[k] = n[k]
	f = open(fn, 'w')
	f.write(o.getJSON())
	f.close()
	return 2
Esempio n. 11
0
        else:
            paperPdf = ''
        paperEntry = {'type': 'inproceedings', 'series': 'CEUR Workshop Proceedings',\
         'publisher': 'CEUR-WS.org', 'year': volYear, 'booktitle': volTitles[-1],\
         'editor': volEds, 'volume': volNr.split('-')[-1], 'title': paperTitle,\
         'author': paperAuths, 'pages': paperPages, 'venue': volVenue}
        if paperPdf:
            paperEntry['openpdf'] = paperPdf
        if paperLnk:
            paperEntry['url'] = urlstart + '#' + paperLnk
        paperFilename = lastSlash(outputdir) + '-' + paperAuths[0].split(
            ' ')[-1]
        for a in paperAuths[1:]:
            print(a)
            paperFilename += a.split(' ')[-1][0]
        if paperFilename in done:
            paperFilename += 'a'
            while paperFilename in done:
                paperFilename = paperFilename[:-1] + chr(
                    ord(paperFilename[-1]) + 1)
        # print(jsonify(paperEntry), '-->', outputdir+'/'+paperFilename+'.json')
        f = open(outputdir + '/' + paperFilename + '.json',
                 'w',
                 encoding='utf-8')
        f.write(jsonify(paperEntry))
        f.close()
        cx += 1
        done.append(paperFilename)
    print(C.red(volVenue), '-', C.yellow(volTitles[-1]), '-', C.blue(cx),
          'papers.')
Esempio n. 12
0
def checkon(fn, o):
	if 'dblpkey' not in o.json.keys():
		print('[ {} ] {}'.format(C.red('DONT'), 'DBLP key not found on the entry'))
		return 1
	mykey = o.get('dblpkey')
	# for the rare case of multiple dblpkeys
	# (can happen as a DBLP error or when same proceedings span over multiple volumes)
	if isinstance(mykey, list):
		mykey = mykey[0]
	if mykey not in procs.keys():
		print('[ {} ] {}'.format(C.red('DONT'), 'DBLP key not found in the dump'))
		return 1
	title = procs[mykey]
	if title.endswith('.'):
		title = title[:-1]
	ws = title.replace(' - ', ', ').replace(' (', ', ').split(', ')
	country = findOneIn(knownCountries, ws)
	state = findOneIn(usaStateNames, ws)
	found = False
	if country:
		town = ws[ws.index(country)-1]
		state = '?'
		# what if "town" is an USA state? (full)
		if country == 'USA' and town in usaStateNames:
			state = town
			town = ws[ws.index(town)-1]
		# what if "town" is an USA state? (abbreviated)
		if country == 'USA' and town in usaStateAB:
			state = usaStateNames[usaStateAB.index(town)]
			town = ws[ws.index(town)-1]
		# what if "town" is a Canadian state? (full)
		if country == 'Canada' and town in canStateNames:
			state = town
			town = ws[ws.index(town)-1]
		# what if "town" is a Canadian state? (abbreviated)
		if country == 'Canada' and town in canStateAB:
			state = canStateNames[canStateAB.index(town)]
			town = ws[ws.index(town)-1]
		# the same can happen in the UK
		if country in ('UK', 'United Kingdom') and town in ('Scotland', 'Scottland'):
			state = town
			town = ws[ws.index(town)-1]
		# Georgia the country vs Georgia the state
		if country == 'Georgia' and town == 'Atlanta':
			state = country
			country = 'USA'
		# near Something
		if town.startswith('near '):
			town = ws[ws.index(town)-1]
		# Luxembourg, Luxembourg
		if country == 'Luxembourg':
			town = 'Luxembourg'
		# Saint-Malo / St. Malo
		if country == 'France' and town == 'St. Malo':
			town = 'Saint-Malo'
		# Florence / Firenze
		if country == 'Italy' and town.find('Firenze') > -1:
			town = 'Florence'
		found = True
	elif state:
		country = 'USA'
		town = ws[ws.index(state)-1]
		found = True
	else:
		# desperate times
		for sol in desperateSolutions.keys():
			if sol in ws:
				town, state, country = desperateSolutions[sol]
				found = True
	# normalise
	if country in countryMap.keys():
		country = countryMap[country]
	if country == 'United Kingdom' and state == '?':
		if town.endswith('London') or town in ('Birmingham', 'York',\
		'Coventry', 'Nottingham', 'Lancaster', 'Oxford', 'Manchester',\
		'Southampton', 'Norwich', 'Leicester', 'Canterbury'):
			state = 'England'
		elif town in ('Edinburgh', 'Glasgow'):
			state = 'Scotland'
	# report
	if 'address' in o.json.keys():
		print('[ {} ] {}'.format(C.blue('OLDA'), o.get('address')))
	if 'location' in o.json.keys():
		print('[ {} ] {}'.format(C.blue('OLDL'), o.get('location')))
	if found:
		# print('[ {} ] {}'.format(C.blue('KNOW'), country))
		print('[ {} ] {}'.format(C.blue('AD||'), title))
		print('[ {} ] {:30} || {:30} || {:20}'.format(C.blue('AD->'), C.yellow(town), C.yellow(state), C.yellow(country)))
		# TODO: perhaps later we can act more aggressively
		newaddr = [town, '' if state=='?' else state, country]
		if 'address' not in o.json.keys() or newaddr != o.json['address']:
			o.json['address'] = newaddr
			f = open(o.json['FILE'], 'w')
			f.write(o.getJSON())
			f.close()
			return 2
		# nothing changed
		return 0
	print('[ {} ] {}'.format(C.yellow('AD??'), title))
	return 1
Esempio n. 13
0
		# 	allstems += x.getBareStems()
		# siblings = {stem:allstems.count(stem) for stem in allstems if stem != k and ifApproved(stem)}
		# NB: the following code is faster:
		siblings = Counter()
		for x in stems[k]:
			siblings.update([s for s in x.getBareStems() if s != k and ifApproved(s)])
		box = '<code>Used together with:</code><hr/>' + \
			'\n<br/>'.join(['<span class="tag"><a href="{0}.html">{0}</a></span> ({1})'.format(\
				*sn) for sn in siblings.most_common(5)])
		f.write(wordHTML.format(\
			stem=k,
			inthebox=box,
			listname='{} papers'.format(len(lst)),
			dl='<dl class="toc">' + '\n'.join(lst).replace('href="', 'href="../') + '</dl>'))
		f.close()
	print('Word pages:', C.yellow('{}'.format(len(stems))), C.blue('generated'))
	# stem index
	f = open(outputdir+'/words.html', 'w', encoding='utf-8')
	keyz = [k for k in stems.keys() if len(stems[k]) > 100 and ifApproved(k)]
	keyz.sort(key=lambda t: -len(t), reverse=True)
	lst = ['<li><a href="word/{}.html">{}</a>$ ({})</li>'.format(\
		escape(t), t, len(stems[t])) for t in keyz]
	ul = '<ul class="tri">' + '\n'.join(lst) + '</ul>'
	CX = sum([len(stems[t]) for t in stems.keys()])
	f.write(wordlistHTML.format(\
		title='All known stems',
		listname='{} stems known and {} shown from {} notable words'.format(len(stems), len(keyz), CX),
		ul=ul))
	f.close()
	print('Stem index:', C.blue('created'))
	print('{}\nDone with {} venues, {} papers, {} tags.'.format(\
Esempio n. 14
0
		# 	allstems += x.getBareStems()
		# siblings = {stem:allstems.count(stem) for stem in allstems if stem != k and ifApproved(stem)}
		# NB: the following code is faster:
		siblings = Counter()
		for x in stems[k]:
			siblings.update([s for s in x.getBareStems() if s != k and ifApproved(s)])
		box = '<code>Used together with:</code><hr/>' + \
			'\n<br/>'.join(['<span class="tag"><a href="{0}.html">{0}</a></span> ({1})'.format(\
				*sn) for sn in siblings.most_common(5)])
		f.write(wordHTML.format(\
			stem=k,
			inthebox=box,
			listname='{} papers'.format(len(lst)),
			dl='<dl class="toc">' + '\n'.join(lst).replace('href="', 'href="../') + '</dl>'))
		f.close()
	print('Word pages:', C.yellow('{}'.format(len(stems))), C.blue('generated'))
	# stem index
	f = open(outputdir+'/words.html', 'w')
	keyz = [k for k in stems.keys() if len(stems[k]) > 100 and ifApproved(k)]
	keyz.sort(key=lambda t: -len(t), reverse=True)
	lst = ['<li><a href="word/{}.html">{}</a>$ ({})</li>'.format(\
		escape(t), t, len(stems[t])) for t in keyz]
	ul = '<ul class="tri">' + '\n'.join(lst) + '</ul>'
	CX = sum([len(stems[t]) for t in stems.keys()])
	f.write(wordlistHTML.format(\
		title='All known stems',
		listname='{} stems known and {} shown from {} notable words'.format(len(stems), len(keyz), CX),
		ul=ul))
	f.close()
	print('Stem index:', C.blue('created'))
	print('{}\nDone with {} venues, {} papers, {} tags.'.format(\
Esempio n. 15
0
		cx[1] += 1
		return dblpLatin(s)+':'
	ws = s.split(' ')
	i = -1
	if ws[i] in ('Jr', 'Jr.'):
		i -= 1
	sur = dblpLatin(' '.join(ws[i:]))
	rest = dblpLatin(' '.join(ws[:i])).replace(' ', '_')
	for c in ".'-":
		rest = rest.replace(c, '=')
	return sur+':'+rest

if __name__ == "__main__":
	verbose = sys.argv[-1] == '-v'
	if not os.path.exists('_renameto.json'):
		print('Run', C.blue('refine-aliases.py'), 'to build the aliasing/renaming relation and cache it.')
		sys.exit(1)
	# aka = parseJSON(ienputdir + '/aliases.json')
	dis = parseJSON(ienputdir + '/disambig.json')
	renameto = parseJSON('_renameto.json')
	# Data from the conferenceMetrics repo
	csv = []
	f = open('../conferenceMetrics/data/SE-conf-roles.csv', 'r')
	for line in f.readlines():
		# Conference;Year;First Name;Last Name;Sex;Role
		csv.append(line.strip().split(';'))
	f.close()
	f = open('scrap-committees/scraped-by-grammarware.csv', 'r')
	for line in f.readlines():
		csv.append(line.strip().split(';'))
	f.close()
Esempio n. 16
0
	return r

if __name__ == "__main__":
	verbose = sys.argv[-1] == '-v'
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.green(len(sleigh.venues)),
		C.green(sleigh.numOfPapers()),
		C.purple('='*42)))
	aka = parseJSON(ienputdir + '/aliases.json')
	CX = sum([len(aka[a]) for a in aka])
	# self-adaptation heuristic:
	#  if a manual rule does the same as the other heuristic, it’s dumb
	for a in sorted(aka.keys()):
		if len(aka[a]) == 1 and aka[a][0] in (nodiaLatin(a), simpleLatin(a)):
			print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was unnecessary manual work')
		elif len(aka[a]) == 2 and (aka[a] == [nodiaLatin(a), simpleLatin(a)] \
							    or aka[a] == [simpleLatin(a), nodiaLatin(a)]):
			print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was a lot of unnecessary manual work')
		elif nodiaLatin(a) in aka[a] or simpleLatin(a) in aka[a]:
			print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing contains some unnecessary manual work')
	# auto-aliasing heuristic:
	#  for each author with diacritics, its non-diacritic twin is considered harmful
	people = set()
	for v in sleigh.venues:
		for c in v.getConfs():
			if 'editor' in c.json:
				people.update(listify(c.json['editor']))
			for p in c.papers:
				if 'author' in p.json:
					people.update(listify(p.json['author']))
Esempio n. 17
0
		title = tagdef['namefull'] if 'namefull' in tagdef.keys() else tagdef['name']
		subt = ('<br/><em>'+tagdef['namelong']+'</em>') if 'namelong' in tagdef.keys() else ''
		links = '<strong>{}</strong>{}<hr/>'.format(title, subt) + '\n'.join(sorted(links))
		dl = '<dl class="toc">' + '\n'.join(lst) + '</dl>'
		# hack to get from tags to papers
		dl = dl.replace('href="', 'href="../')
		f.write(tagHTML.format(\
			title=key+' tag',
			etag=escape(key),
			tag=key,
			above='',
			boxlinks=links,
			listname='{} papers'.format(len(lst)),
			dl=dl))
		f.close()
	print('Tag pages:', C.yellow('{}'.format(len(ts))), C.blue('generated'))
	# tag index
	f = open(outputdir+'/tag/index.html', 'w')
	keyz = [q for q in ts.keys() if len(ts[q]) > 2]
	keyz.sort(key=lambda t: len(ts[t]), reverse=True)
	lst = ['<li>#<a href="{}.html">{}</a> ({})</li>'.format(escape(t), t, len(ts[t])) for t in keyz]
	ul = '<ul class="tri mul">' + '\n'.join(lst) + '</ul>'
	CX = sum([len(ts[t]) for t in ts.keys()])
	f.write(taglistHTML.format(\
		title='All known tags',
		listname='{} tags known from {} markings'.format(len(ts), CX),
		ul=ul))
	f.close()
	print('Tag index:', C.blue('created'))
	# untagged papers
	f = open(outputdir+'/tag/untagged.html', 'w')
Esempio n. 18
0
def checkon(fn, o):
    if 'dblpkey' not in o.json.keys():
        print('[ {} ] {}'.format(C.red('DONT'),
                                 'DBLP key not found on the entry'))
        return 1
    mykey = o.get('dblpkey')
    # for the rare case of multiple dblpkeys
    # (can happen as a DBLP error or when same proceedings span over multiple volumes)
    if isinstance(mykey, list):
        mykey = mykey[0]
    if mykey not in procs.keys():
        print('[ {} ] {}'.format(C.red('DONT'),
                                 'DBLP key not found in the dump'))
        return 1
    title = procs[mykey]
    if title.endswith('.'):
        title = title[:-1]
    ws = title.replace(' - ', ', ').replace(' (', ', ').split(', ')
    country = findOneIn(knownCountries, ws)
    state = findOneIn(usaStateNames, ws)
    found = False
    if country:
        town = ws[ws.index(country) - 1]
        state = '?'
        # what if "town" is an USA state? (full)
        if country == 'USA' and town in usaStateNames:
            state = town
            town = ws[ws.index(town) - 1]
        # what if "town" is an USA state? (abbreviated)
        if country == 'USA' and town in usaStateAB:
            state = usaStateNames[usaStateAB.index(town)]
            town = ws[ws.index(town) - 1]
        # what if "town" is a Canadian state? (full)
        if country == 'Canada' and town in canStateNames:
            state = town
            town = ws[ws.index(town) - 1]
        # what if "town" is a Canadian state? (abbreviated)
        if country == 'Canada' and town in canStateAB:
            state = canStateNames[canStateAB.index(town)]
            town = ws[ws.index(town) - 1]
        # the same can happen in the UK
        if country in ('UK', 'United Kingdom') and town in ('Scotland',
                                                            'Scottland'):
            state = town
            town = ws[ws.index(town) - 1]
        # Georgia the country vs Georgia the state
        if country == 'Georgia' and town == 'Atlanta':
            state = country
            country = 'USA'
        # near Something
        if town.startswith('near '):
            town = ws[ws.index(town) - 1]
        # Luxembourg, Luxembourg
        if country == 'Luxembourg':
            town = 'Luxembourg'
        # Saint-Malo / St. Malo
        if country == 'France' and town == 'St. Malo':
            town = 'Saint-Malo'
        # Florence / Firenze
        if country == 'Italy' and town.find('Firenze') > -1:
            town = 'Florence'
        found = True
    elif state:
        country = 'USA'
        town = ws[ws.index(state) - 1]
        found = True
    else:
        # desperate times
        for sol in desperateSolutions.keys():
            if sol in ws:
                town, state, country = desperateSolutions[sol]
                found = True
    # normalise
    if country in countryMap.keys():
        country = countryMap[country]
    if country == 'United Kingdom' and state == '?':
        if town.endswith('London') or town in ('Birmingham', 'York',\
        'Coventry', 'Nottingham', 'Lancaster', 'Oxford', 'Manchester',\
        'Southampton', 'Norwich', 'Leicester', 'Canterbury'):
            state = 'England'
        elif town in ('Edinburgh', 'Glasgow'):
            state = 'Scotland'
    # report
    if 'address' in o.json.keys():
        print('[ {} ] {}'.format(C.blue('OLDA'), o.get('address')))
    if 'location' in o.json.keys():
        print('[ {} ] {}'.format(C.blue('OLDL'), o.get('location')))
    if found:
        # print('[ {} ] {}'.format(C.blue('KNOW'), country))
        print('[ {} ] {}'.format(C.blue('AD||'), title))
        print('[ {} ] {:30} || {:30} || {:20}'.format(C.blue('AD->'),
                                                      C.yellow(town),
                                                      C.yellow(state),
                                                      C.yellow(country)))
        # TODO: perhaps later we can act more aggressively
        newaddr = [town, '' if state == '?' else state, country]
        if 'address' not in o.json.keys() or newaddr != o.json['address']:
            o.json['address'] = newaddr
            f = open(o.json['FILE'], 'w', encoding='utf-8')
            f.write(o.getJSON())
            f.close()
            return 2
        # nothing changed
        return 0
    print('[ {} ] {}'.format(C.yellow('AD??'), title))
    return 1
Esempio n. 19
0
def report(one, two):
	print('[ {} ] {}'.format(one, two))

def checkreport(fn, o):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	r = checkon(fn, o)
	# non-verbose mode by default
	if verbose or r != 0:
		report(statuses[r], fn)
	return r

if __name__ == "__main__":
	if len(sys.argv) > 1:
		verbose = sys.argv[1] == '-v'
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	cx = {0: 0, 1: 0, 2: 0}
	for v in sleigh.venues:
		for c in v.getConfs():
			cx[checkreport(c.filename, c)] += 1
			for p in c.papers:
				cx[checkreport(p.filename, p)] += 1
	print('{} files checked, {} ok, {} fixed, {} failed'.format(\
		C.bold(cx[0] + cx[1] + cx[2]),
		C.blue(cx[0]),
		C.yellow(cx[2]),
		C.red(cx[1])))
Esempio n. 20
0
def main():
    print('{}: {} venues, {} papers\n{}'.format(C.purple('BibSLEIGH'),
                                                C.red(len(sleigh.venues)),
                                                C.red(sleigh.numOfPapers()),
                                                C.purple('=' * 42)))
    # generate the index
    f = open(outputdir + '/index.html', 'w', encoding='utf-8')
    f.write(sleigh.getPage())
    f.close()
    # generate all individual pages
    # if False:
    for v in sleigh.venues:
        r = C.blue(v.getKey())
        f = open(outputdir + '/' + v.getKey() + '.html', 'w', encoding='utf-8')
        f.write(v.getPage())
        f.close()
        if v.brands:
            r += '{' + '+'.join([C.blue(b.getKey()) for b in v.brands]) + '}'
            for b in v.brands:
                f = open(outputdir + '/' + b.getKey() + '.brand.html',
                         'w',
                         encoding='utf-8')
                f.write(b.getPage())
                f.close()
        r += ' => '
        for c in v.getConfs():
            f = open(outputdir + '/' + c.getKey() + '.html',
                     'w',
                     encoding='utf-8')
            f.write(c.getPage())
            f.close()
            for p in c.papers:
                f = open(outputdir + '/' + p.getKey() + '.html',
                         'w',
                         encoding='utf-8')
                f.write(p.getPage())
                f.close()
            purekey = c.getKey().replace(v.getKey(), '').replace('-',
                                                                 ' ').strip()
            r += '{} [{}], '.format(purekey, C.yellow(len(c.papers)))
        print(r)
    # generate the icon lineup
    icons = []
    linked = []
    pngs = [
        lastSlash(png).split('.')[0]
        for png in glob.glob(outputdir + '/stuff/*.png')
    ]
    pngs = [png for png in pngs \
            if not (png.startswith('a-') or png.startswith('p-') or png.startswith('ico-')
                    or png in ('cc-by', 'xhtml', 'css', 'open-knowledge', 'edit'))]
    for brand in glob.glob(outputdir + '/*.brand.html'):
        pure = lastSlash(brand).split('.')[0]
        img = pure.lower().replace(' ', '')
        if img in pngs:
            pic = '<div class="wider"><a href="{0}.brand.html"><img class="abc" src="{1}" alt="{0}"/></a><span>{0}</span></div>'.format( \
                pure,
                'stuff/' + img + '.png')
            pngs.remove(img)
            icons.append(pic)
        else:
            # print('No image for', pure)
            pass
    corner = {
        'ada': 'TRI-Ada',
        'comparch': 'CompArch',
        'floc': 'FLoC',
        'bibsleigh': 'index'
    }
    for pure in pngs:
        venueCandidate = corner[pure] if pure in corner else pure.upper()
        canlink = sorted(glob.glob(outputdir + '/' + venueCandidate +
                                   '*.html'),
                         key=len)
        if canlink:
            pic = '<div class="wider"><a href="{0}"><img class="abc" src="stuff/{1}.png" alt="{2}"/></a><span>{2}</span></div>'.format( \
                canlink[0].split('/')[-1],
                pure,
                venueCandidate,
                canlink[0].split('/')[0])
        elif pure == 'twitter':
            pic = '<div class="wider"><a href="https://about.twitter.com/company/brand-assets"><img class="abc" src="stuff/twitter.png" alt="Twitter"/></a><span>Twitter</span></div>'
        elif pure == 'email':
            pic = '<div class="wider"><a href="mailto:[email protected]"><img class="abc" src="stuff/email.png" alt="e-mail"/></a><span>email</span></div>'
        else:
            print('Lonely', pure)
            pic = '<img class="abc" src="stuff/{0}.png" alt="{0}"/>'.format(
                pure)
        icons.append(pic)
    # find last year of each venue
    # for ven in glob.glob(corpusdir + '/*'):
    # 	venname = lastSlash(ven)
    # 	newstuff += '<strong><a href="http://dblp.uni-trier.de/db/conf/{}/">{} {}</a></strong>, '.format(venname.lower(), venname, nextYear(ven))
    # print(lastSlash(ven), ':', lastYear(ven))
    # write "more info" file
    f = open(outputdir + '/about.html', 'w', encoding='utf-8')
    f.write(
        aboutHTML.format(
            len(icons),
            '<div class="minibar">' + '\n'.join(sorted(icons)) + '</div>'))
    f.close()

    # generate the DBLP sync page
    cell_by_conf_by_year = {}
    Ys = [
        2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009
    ]
    dblplinks = {}

    with open(ienputdir + '/meta/dblpguide.sync', 'r') as f:
        for line in f:
            if not line or line.startswith('#'):
                continue
            words = line.split('|')
            if len(words) != 3:
                print('- Metaline {} skipped!'.format(words))
                continue
            name = words[0].strip()
            dome = words[1].strip()
            dblp = words[2].strip()
            cell_by_conf_by_year[name] = {}
            dblplinks[name] = dblp
            for y in Ys:
                cell_by_conf_by_year[name][y] = '(no)'
            v = sleigh.getVenue(dome)
            if v:
                for yy in Ys:
                    y = v.getYear(yy)
                    if y:
                        ckey = '{}-{}'.format(name, yy)
                        c = y.getConf(ckey)
                        if c:
                            cell_by_conf_by_year[name][yy] = c.getIconItem2(
                                '', '')
                        else:
                            # print('- Conference {} of year {} in venue {} not found in the corpus'.format(ckey, yy, name))
                            for alt in 'v1', 'p1', 'c1', '1', 'J':
                                ckey = '{}-{}-{}'.format(name, alt, yy)
                                c = y.getConf(ckey)
                                if c:
                                    cell_by_conf_by_year[name][
                                        yy] = c.getIconItem2('', '')
                                    break
                # else:
                # 	print('- Year {} in venue {} not found in the corpus among {}'.format(yy, name, [z.year for z in v.years]))
        # else:
        # 	print('- Venue {} not found in the corpus'.format(name))

    table = '<table>'
    table += '<tr><td></td>'
    for y in Ys:
        table += '<th>{}</th>\n'.format(y)
    table += '</tr>'
    # print (cell_by_conf_by_year)
    for name in sorted(cell_by_conf_by_year.keys()):
        table += '<tr><th><a href="{}.brand.html">[@]</a> <a href="{}">{}</a></th>'.format(
            name, dblplinks[name], name)
        for y in Ys:
            table += '<td>{}</td>\n'.format(cell_by_conf_by_year[name][y])
        table += '</tr>'
    table += '</table>'

    with open(outputdir + '/sync.html', 'w', encoding='utf-8') as f:
        f.write(syncHTML.format(table))

    print('{}\nDone with {} venues, {} papers.'.format(
        C.purple('=' * 42), C.red(len(sleigh.venues)),
        C.red(sleigh.numOfPapers())))
Esempio n. 21
0
def report(s, r):
    statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
    # non-verbose mode by default
    if verbose or r != 0:
        print('[ {} ] {}'.format(statuses[r], simpleLatin(s)))
    return r
Esempio n. 22
0
		C.purple('='*42)))
	bundles = {}
	for b in glob.glob(ienputdir + '/bundles/*.json'):
		purename = b.split('/')[-1][:-5]
		bun = json.load(open(b, 'r'))
		prevcx = pcx
		uberlist = '<h2>{1} papers</h2>{0}'.format(processSortedRel(bun['contents']), pcx-prevcx)
		f = open(outputdir + '/bundle/' + purename + '.html', 'w')
		f.write(bunHTML.format(\
			title=purename+' bundle',
			bundle=bun['name'],
			ebundle=escape(purename),
			dl=uberlist.replace('href="', 'href="../').replace('../mailto', 'mailto')))
		f.close()
		bundles[purename] = pcx-prevcx
	print('Bundle pages:', C.yellow('{}'.format(len(bundles))), C.blue('generated'))
	# now for the index
	f = open(outputdir+'/bundle/index.html', 'w')
	lst = ['<li><a href="{}.html">{}</a> ({})</li>'.format(\
		escape(b),
		b,
		bundles[b]) for b in sorted(bundles.keys())]
	ul = '<ul class="tri">' + '\n'.join(lst) + '</ul>'
	f.write(bunListHTML.format(\
		title='All specified bundles',
		listname='{} bundles known with {} papers'.format(len(bundles), sum(bundles.values())),
		ul='<ul class="tri">' + '\n'.join(lst) + '</ul>'))
	f.close()
	print('Bundle index:', C.blue('created'))
	print('{}\nDone with {} venues, {} papers.'.format(\
		C.purple('='*42),
Esempio n. 23
0
				 + ' \n'.join(['<span class="tag"><a href="../word/{0}.html">{0}</a></span> ({1})'.format(S, stems[S]) \
				 	for S in stemkeys[:10]])
			boxlinks += adds
			# combine boxlinks
			if boxlinks:
				boxlinks = '<div class="tbox">' + boxlinks + '</div>'
		f.write(personHTML.format(\
			title=k,
			gender=gender,
			boxlinks=boxlinks,
			eperson=escape(k),
			person=persondef['name'],
			# boxlinks=links
			namedlists=dls))
		f.close()
	print('Person pages:', C.yellow('{}'.format(len(ps))), C.blue('generated'))
	# person index
	# keyz = [k for k in ps.keys() if len(ts[k]) > 2]
	# keyz = sorted(keyz, key=lambda t:len(ts[t]), reverse=True)
	keyz = ps#sorted(ps.keys())
	letters = [chr(x) for x in range(ord('a'), ord('z')+1)]
	indices = {x:[] for x in letters}
	for t in keyz:
		ws = t.split('_')
		i = -1
		if ws[i] == 'Jr':
			i -= 1
		letter = ws[i][0].lower()
		if not letter.isalpha():
			print(C.red('ERROR')+':', 'wrong name', t)
			letter = ws[i-1][0].lower()
Esempio n. 24
0
def report(fn1, fn2, r):
	statuses = (C.blue(' PASS '), C.red(' FAIL '), C.yellow('RENAME'))
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {} → {}'.format(statuses[r], fn1, fn2))
	return r
Esempio n. 25
0
           + ' \n'.join(['<span class="tag"><a href="../word/{0}.html">{0}</a></span> ({1})'.format(S, stems[S]) \
            for S in stemkeys[:10]])
         boxlinks += adds
         # combine boxlinks
         if boxlinks:
             boxlinks = '<div class="tbox">' + boxlinks + '</div>'
     f.write(personHTML.format(\
      title=k,
      gender=gender,
      boxlinks=boxlinks,
      eperson=escape(k),
      person=persondef['name'],
      # boxlinks=links
      namedlists=dls))
     f.close()
 print('Person pages:', C.yellow('{}'.format(len(ps))), C.blue('generated'))
 # person index
 # keyz = [k for k in ps.keys() if len(ts[k]) > 2]
 # keyz = sorted(keyz, key=lambda t:len(ts[t]), reverse=True)
 keyz = ps  #sorted(ps.keys())
 letters = [chr(x) for x in range(ord('a'), ord('z') + 1)]
 indices = {x: [] for x in letters}
 for t in keyz:
     ws = t.split('_')
     i = -1
     if ws[i] == 'Jr':
         i -= 1
     letter = ws[i][0].lower()
     if not letter.isalpha():
         print(C.red('ERROR') + ':', 'wrong name', t)
         letter = ws[i - 1][0].lower()
Esempio n. 26
0
			paperAuths = paperAuths[:-1]
			paperAuths.extend(auths)
		paperLnk = li.get('id')
		hope = li.find_all('a')
		if hope and hope[0].get('href').endswith('.pdf'):
			paperPdf = urlstart + hope[0].get('href')
		else:
			paperPdf = ''
		paperEntry = {'type': 'inproceedings', 'series': 'CEUR Workshop Proceedings',\
			'publisher': 'CEUR-WS.org', 'year': volYear, 'booktitle': volTitles[-1],\
			'editor': volEds, 'volume': volNr.split('-')[-1], 'title': paperTitle,\
			'author': paperAuths, 'pages': paperPages, 'venue': volVenue}
		if paperPdf:
			paperEntry['openpdf'] = paperPdf
		if paperLnk:
			paperEntry['url'] = urlstart + '#' + paperLnk
		paperFilename = outputdir.split('/')[-1] + '-' + paperAuths[0].split(' ')[-1]
		for a in paperAuths[1:]:
			paperFilename += a.split(' ')[-1][0]
		if paperFilename in done:
			paperFilename += 'a'
			while paperFilename in done:
				paperFilename = paperFilename[:-1] + chr(ord(paperFilename[-1])+1)
		# print(jsonify(paperEntry), '-->', outputdir+'/'+paperFilename+'.json')
		f = open(outputdir+'/'+paperFilename+'.json', 'w')
		f.write(jsonify(paperEntry))
		f.close()
		cx += 1
		done.append(paperFilename)
	print(C.red(volVenue), '-', C.yellow(volTitles[-1]), '-', C.blue(cx), 'papers.')
Esempio n. 27
0
def report(fn1, fn2, r):
    statuses = (C.blue(' PASS '), C.red(' FAIL '), C.yellow('RENAME'))
    # non-verbose mode by default
    if verbose or r != 0:
        print('[ {} ] {} → {}'.format(statuses[r], fn1, fn2))
    return r
Esempio n. 28
0
             '</em>') if 'namelong' in tagdef.keys() else ''
     links = '<strong>{}</strong>{}<hr/>'.format(title, subt) + '\n'.join(
         sorted(links))
     dl = '<dl class="toc">' + '\n'.join(lst) + '</dl>'
     # hack to get from tags to papers
     dl = dl.replace('href="', 'href="../')
     f.write(tagHTML.format(\
      title=key+' tag',
      etag=escape(key),
      tag=key,
      above='',
      boxlinks=links,
      listname='{} papers'.format(len(lst)),
      dl=dl))
     f.close()
 print('Tag pages:', C.yellow('{}'.format(len(ts))), C.blue('generated'))
 # tag index
 f = open(outputdir + '/tag/index.html', 'w', encoding='utf-8')
 keyz = [q for q in ts.keys() if len(ts[q]) > 2]
 keyz.sort(key=lambda t: len(ts[t]), reverse=True)
 lst = [
     '<li>#<a href="{}.html">{}</a> ({})</li>'.format(
         escape(t), t, len(ts[t])) for t in keyz
 ]
 ul = '<ul class="tri mul">' + '\n'.join(lst) + '</ul>'
 CX = sum([len(ts[t]) for t in ts.keys()])
 f.write(taglistHTML.format(\
  title='All known tags',
  listname='{} tags known from {} markings'.format(len(ts), CX),
  ul=ul))
 f.close()
Esempio n. 29
0
def report(s, r):
	statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
	# non-verbose mode by default
	if verbose or r != 0:
		print('[ {} ] {}'.format(statuses[r], s))
	return r
Esempio n. 30
0
if __name__ == "__main__":
    verbose = sys.argv[-1] == '-v'
    peoplez = glob.glob(ienputdir + '/people/*.json')
    print('{}: {} venues, {} papers by {} people\n{}'.format(\
     C.purple('BibSLEIGH'),
     C.red(len(sleigh.venues)),
     C.red(sleigh.numOfPapers()),
     C.red(len(peoplez)),
     C.purple('='*42)))
    cx = {0: 0, 1: 0, 2: 0}
    # stem ALL the papers!
    for v in sleigh.venues:
        for c in v.getConfs():
            for p in c.papers:
                cx[checkreport(p.filename, p, None)] += 1
        for b in v.getBrands():
            cx[checkreport(b.filename, None, b)] += 1
    # write all stems
    listOfStems = sorted(filter(ifApproved, ALLSTEMS),
                         key=lambda w: two(len(w)) + w)
    f = open(ienputdir + '/stems.json', 'w', encoding='utf-8')
    f.write('[\n\t"' + '",\n\t"'.join(listOfStems) + '"\n]')
    f.close()
    print(C.red(len(ALLSTEMS)), 'stems found.')
    print('{} files checked, {} ok, {} fixed, {} failed'.format(\
     C.bold(cx[0] + cx[1] + cx[2]),
     C.blue(cx[0]),
     C.yellow(cx[2]),
     C.red(cx[1])))
Esempio n. 31
0
	return r

if __name__ == "__main__":
	verbose = sys.argv[-1] == '-v'
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	aka = parseJSON(ienputdir + '/aliases.json')
	CX = sum([len(aka[a]) for a in aka])
	# self-adaptation heuristic:
	#  if a manual rule does the same as the other heuristic, it’s dumb
	for a in sorted(aka.keys()):
		if len(aka[a]) == 1 and aka[a][0] in (nodiaLatin(a), simpleLatin(a)):
			print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing was unnecessary manual work')
		elif len(aka[a]) == 2 and (aka[a] == [nodiaLatin(a), simpleLatin(a)] \
							    or aka[a] == [simpleLatin(a), nodiaLatin(a)]):
			print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing was a lot of unnecessary manual work')
		elif nodiaLatin(a) in aka[a] or simpleLatin(a) in aka[a]:
			print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing contains some unnecessary manual work')
	# auto-aliasing heuristic:
	#  for each author with diacritics, its non-diacritic twin is considered harmful
	people = set()
	for v in sleigh.venues:
		for c in v.getConfs():
			if 'editor' in c.json:
				people.update(listify(c.json['editor']))
			for p in c.papers:
				if 'author' in p.json:
					people.update(listify(p.json['author']))