def report(one, two): print('[ {} ] {}'.format(one, two)) def checkreport(fn, o): statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD')) r = checkon(fn, o) # non-verbose mode by default if verbose or r != 0: report(statuses[r], fn) return r if __name__ == "__main__": if len(sys.argv) > 1: verbose = sys.argv[1] == '-v' print('{}: {} venues, {} papers\n{}'.format(\ C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.purple('='*42))) cx = {0: 0, 1: 0, 2: 0} for v in sleigh.venues: for c in v.getConfs(): cx[checkreport(c.filename, c)] += 1 for p in c.papers: cx[checkreport(p.filename, p)] += 1 print('{} files checked, {} ok, {} fixed, {} failed'.format(\ C.bold(cx[0] + cx[1] + cx[2]), C.blue(cx[0]), C.yellow(cx[2]), C.red(cx[1])))
def two(n): if n < 10: return '0{}'.format(n) else: return '{}'.format(n) if __name__ == "__main__": verbose = sys.argv[-1] == '-v' peoplez = glob.glob(ienputdir + '/people/*.json') print('{}: {} venues, {} papers by {} people\n{}'.format(\ C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.red(len(peoplez)), C.purple('='*42))) cx = {0: 0, 1: 0, 2: 0} # stem ALL the papers! for v in sleigh.venues: for c in v.getConfs(): for p in c.papers: cx[checkreport(p.filename, p, None)] += 1 for b in v.getBrands(): cx[checkreport(b.filename, None, b)] += 1 # write all stems listOfStems = sorted(filter(ifApproved, ALLSTEMS), key=lambda w: two(len(w)) + w) f = open(ienputdir + '/stems.json', 'w', encoding='utf-8') f.write('[\n\t"' + '",\n\t"'.join(listOfStems) + '"\n]')
return 0 def checkreport(fn, o): statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD')) r = checkon(fn, o) # non-verbose mode by default if verbose or r != 0: print('[ {} ] {}'.format(statuses[r], fn)) return r if __name__ == "__main__": verbose = sys.argv[-1] == '-v' print('{}: {} venues, {} papers\n{}'.format(\ C.purple('BibSLEIGH'), C.green(len(sleigh.venues)), C.green(sleigh.numOfPapers()), C.purple('='*42))) aka = parseJSON(ienputdir + '/aliases.json') CX = sum([len(aka[a]) for a in aka]) # self-adaptation heuristic: # if a manual rule does the same as the other heuristic, it’s dumb for a in sorted(aka.keys()): if len(aka[a]) == 1 and aka[a][0] in (nodiaLatin(a), simpleLatin(a)): print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was unnecessary manual work') elif len(aka[a]) == 2 and (aka[a] == [nodiaLatin(a), simpleLatin(a)] \ or aka[a] == [simpleLatin(a), nodiaLatin(a)]): print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was a lot of unnecessary manual work') elif nodiaLatin(a) in aka[a] or simpleLatin(a) in aka[a]: print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing contains some unnecessary manual work') # auto-aliasing heuristic: # for each author with diacritics, its non-diacritic twin is considered harmful