print('\t', C.red(len(dunno)), 'people with too much info on') # should we build it? maken2f = not os.path.exists(n2f_name) if maken2f: name2file = {} for k in peoplekeys: p = people[k] if maken2f: name2file[k] = 'person/' + p['FILE'].split('/')[-1].replace('.json', '.html') if p['FILE']: if os.path.exists(p['FILE']): cur = parseJSON(p['FILE']) if cur == p: cx[0] += 1 if verbose: print('[', C.green('FIXD'), ']', p['name']) continue print('[', C.yellow('FIXD'), ']', p['name']) cx[2] += 1 f = open(p['FILE'], 'w') del p['FILE'] f.write(jsonify(p)) f.close() else: print('How can that be?') # caching to be used later (in other scripts mostly) if maken2f: f = open(n2f_name, 'w', encoding='utf8') f.write(json.dumps(name2file, sort_keys=True, separators=(',\n\t', ': '), ensure_ascii=False)) f.close() cx[1] = len(dunno)
CONFZ = {\ 'FOSE': 'FoSE', 'MODELS': 'MoDELS'\ } BLANK = ' ' lines = [] cur = '' for fn in sys.argv[1:-1]: if cur != fn.split('-')[0]: if cur != '': print() name = fn.split('-')[0].upper() if name in CONFZ: name = CONFZ[name] print('[{}]'.format(C.green(name)), end=': ') cur = fn.split('-')[0] print("'{}".format(fn.split('-')[-1][-6:-4]), end=' ') f = open(fn, 'r', encoding='utf-8') lines += [(fn, line[:10], line[10:].strip()) for line in f.readlines()\ if line.strip() \ and line[:10] != ' ' \ and not line.startswith('##########')] f.close() print() succ = fail = 0 males = set( line.strip() for line in open('../naming/male.txt', 'r', encoding='utf-8').readlines()) femes = set(line.strip() for line in open(
else: return 0 def checkreport(fn, o): statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD')) r = checkon(fn, o) # non-verbose mode by default if verbose or r != 0: print('[ {} ] {}'.format(statuses[r], fn)) return r if __name__ == "__main__": verbose = sys.argv[-1] == '-v' print('{}: {} venues, {} papers\n{}'.format(\ C.purple('BibSLEIGH'), C.green(len(sleigh.venues)), C.green(sleigh.numOfPapers()), C.purple('='*42))) aka = parseJSON(ienputdir + '/aliases.json') CX = sum([len(aka[a]) for a in aka]) # self-adaptation heuristic: # if a manual rule does the same as the other heuristic, it’s dumb for a in sorted(aka.keys()): if len(aka[a]) == 1 and aka[a][0] in (nodiaLatin(a), simpleLatin(a)): print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was unnecessary manual work') elif len(aka[a]) == 2 and (aka[a] == [nodiaLatin(a), simpleLatin(a)] \ or aka[a] == [simpleLatin(a), nodiaLatin(a)]): print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was a lot of unnecessary manual work') elif nodiaLatin(a) in aka[a] or simpleLatin(a) in aka[a]: print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing contains some unnecessary manual work') # auto-aliasing heuristic:
report( '{}: “{}” == “{}”?'.format(surname, variants[0], variants[1]), 2) # print pvariants = ['“{}”'.format(v) for v in variants] report('{}: {}'.format(surname, ' vs '.join(pvariants)), 0) # write back if changed for k in people.keys(): p = people[k] if p['FILE']: if os.path.exists(p['FILE']): cur = parseJSON(p['FILE']) if cur == p: cx[0] += 1 if verbose: print('[', C.green('FIXD'), ']', p['name']) continue print('[', C.yellow('FIXD'), ']', p['name']) cx[2] += 1 f = open(p['FILE'], 'w', encoding='utf-8') del p['FILE'] f.write(jsonify(p)) f.close() else: print('How can that be?') print('{} people checked, {} ok, {} fixed, {} failed'.format(\ C.bold(cx[0] + cx[1] + cx[2]), C.blue(cx[0]), C.yellow(cx[2]), C.red(cx[1])))