def __init__(self, d, hdir, name2file, parent): super(Venue, self).__init__(d, hdir) self.years = [] self.brands = [] self.n2f = name2file if os.path.exists(d+'.json'): # new style # print(C.blue(d), 'is new style') self.json = parseJSON(d+'.json') else: # legacy style print(C.red(d), 'is legacy style') self.json = [] for f in glob.glob(d+'/*.json'): if not self.json: self.json = parseJSON(f) else: self.brands.append(Brand(f, self.homedir, name2file, self)) for f in glob.glob(d+'/*'): if f.endswith('.json'): # already processed continue elif os.path.isdir(f): y = Year(f, self.homedir, name2file, self) self.years.append(y) for b in self.brands: for c in y.confs: b.offer(y.year, c) else: print('File out of place:', f) self.back = parent
def __init__(self, d, hdir, name2file, parent): super(Venue, self).__init__(d, hdir) self.years = [] self.brands = [] self.n2f = name2file if os.path.exists(d + '.json'): # new style # print(C.blue(d), 'is new style') self.json = parseJSON(d + '.json') else: # legacy style print(C.red(d), 'is legacy style') self.json = {} for f in glob.glob(d + '/*.json'): if not self.json: self.json = parseJSON(f) else: self.brands.append(Brand(f, self.homedir, name2file, self)) for f in glob.glob(d + '/*'): if f.endswith('.json'): # already processed continue elif os.path.isdir(f): y = Year(f, self.homedir, name2file, self) self.years.append(y) for b in self.brands: for c in y.confs: b.offer(y.year, c) else: print('File out of place:', f) self.back = parent
def __init__(self, f, hdir, parent): super(Paper, self).__init__(f, hdir) self.json = parseJSON(f) # NB: self.tags is a list in Paper, but a dict in all other classes if 'tag' in self.json.keys(): if isinstance(self.json['tag'], list): self.tags = self.json['tag'] else: self.tags = [self.json['tag']] del self.json['tag'] self.back = parent
def __init__(self, f, hdir, name2file, parent): super(Brand, self).__init__(f, hdir) self.name = last(f) self.confs = {} self.json = parseJSON(f) if 'vocabulary' in self.json: self.json['vocabulary'] = Counter({\ self.json['vocabulary'][2*i]:self.json['vocabulary'][2*i+1] \ for i in range(0, len(self.json['vocabulary'])//2)}) if 'collocations' in self.json: self.json['collocations'] = Counter({\ tuple(self.json['collocations'][2*i]):self.json['collocations'][2*i+1] \ for i in range(0, len(self.json['collocations'])//2)}) self.back = parent
def __init__(self, d, hdir, name2file, parent): super(Year, self).__init__(d, hdir) self.year = last(d) self.confs = [] jsonsfound = [] jsonsused = [] for f in glob.glob(d+'/*'): if os.path.isdir(f): self.confs.append(Conf(f, self.homedir, name2file, self)) if os.path.exists(f+'.json'): self.confs[-1].json = parseJSON(f+'.json') jsonsused.append(f+'.json') # print('Conf has a JSON! %s' % self.confs[-1].json) elif f.endswith('.json'): jsonsfound.append(f) else: print('File out of place:', f) for f in jsonsfound: if f not in jsonsused: # print('Houston, we have a JSON:', f) self.confs.append(Conf(f[:f.rindex('.')], self.homedir, name2file, self)) self.confs[-1].json = parseJSON(f) self.back = parent
def __init__(self, d, hdir, name2file, parent): super(Year, self).__init__(d, hdir) self.year = last(d) self.confs = [] jsonsfound = [] jsonsused = [] for f in glob.glob(d + '/*'): if os.path.isdir(f): self.confs.append(Conf(f, self.homedir, name2file, self)) if os.path.exists(f + '.json'): self.confs[-1].json = parseJSON(f + '.json') jsonsused.append(f + '.json') # print('Conf has a JSON! %s' % self.confs[-1].json) elif f.endswith('.json'): jsonsfound.append(f) else: print('File out of place:', f) for f in jsonsfound: if f not in jsonsused: # print('Houston, we have a JSON:', f) self.confs.append( Conf(f[:f.rindex('.')], self.homedir, name2file, self)) self.confs[-1].json = parseJSON(f) self.back = parent
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- # # a module for enforcing aliases import sys, os.path, json from fancy.ANSI import C from fancy.Latin import nodiaLatin, simpleLatin from lib.AST import Sleigh from lib.JSON import parseJSON from lib.LP import listify from lib.NLP import strictstrip ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False renameto = {} def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]]) for ae in ('author', 'editor'): if ae in o.json.keys(): if isinstance(o.json[ae], str): if o.json[ae] in renameto.keys(): o.json[ae] = renameto[o.json[ae]] else: for i, x in enumerate(o.json[ae]):
def report(s, r): statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD')) # non-verbose mode by default if verbose or r != 0: print('[ {} ] {}'.format(statuses[r], s)) return r if __name__ == "__main__": verbose = sys.argv[-1] == '-v' # Load all contributors people = {} for fn in glob.glob(ienputdir + '/people/*.json'): p = parseJSON(fn) people[p['name']] = p print('{}: {} people\n{}'.format(\ C.purple('BibSLEIGH'), C.red(len(people)), C.purple('='*42))) # check for duplicates bysurname = {} for name in people.keys(): byword = name.split(' ') j = -1 while -j < len(byword) and (byword[j - 1][0].islower() or byword[j - 1].lower() in ('de', 'di', 'du', 'van', 'von', 'le' 'la')): j -= 1
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- # # a module for assigning proper names to papers, venues and journals import sys, os.path from fancy.ANSI import C from fancy.KnownNames import unfoldName, short2long from lib.AST import Sleigh from lib.JSON import parseJSON, json2lines ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False wheretolook = ('journal', 'series', 'booktitle', 'publisher') def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' f = open(fn, 'r') lines = f.readlines()[1:-1] f.close() flines = json2lines(lines) plines = sorted(json2lines(o.getJSON().split('\n'))) # bad variants for bad in unfoldName: for key in wheretolook: if o.get(key) == bad: o.json[key] = unfoldName[bad]
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- # # a module for cross-checking information on people available from different sources import sys, glob, os.path, json from fancy.ANSI import C from fancy.Latin import simpleLatin, dblpLatin, nodiaLatin from lib.AST import Sleigh from lib.JSON import parseJSON, jsonify from lib.LP import listify ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False cx = {0: 0, 1: 0, 2: 0} renameto = {} dis = {} def nomidnames(s): # ns = s.split(' ') # while len(ns) > 1 and len(ns[1]) == 2 and ns[1][0].isupper() and ns[1][1] == '.': # del ns[1] # return ' '.join(ns) s = s.replace('.', '. ').replace(' ', ' ') return ' '.join([n for n in s.split(' ') if len(n)!=2 or not n[0].isupper() or n[1]!='.']) def fileify(s): return simpleLatin(s).replace('.', '').replace("'", '').replace(' ', '_')
#!/c/Users/vadim/AppData/Local/Programs/Python/Python35/python # -*- coding: utf-8 -*- # # a module for enriching conference definitions with chairs/committees import sys, os.path from fancy.ANSI import C from lib.AST import Sleigh from lib.JSON import parseJSON from lib.NLP import nrs, strictstrip ienputdir = '../json' rt_name = '_renameto.json' renameto = parseJSON(rt_name) if os.path.exists(rt_name) else {} # FIXME mr = parseJSON('_established.json') for m in mr.keys(): if m not in renameto.keys(): renameto[m] = mr[m] sleigh = Sleigh(ienputdir + '/corpus', {}) verbose = False lookat = [] roles = {} def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' if o.get('type') not in ('proceedings', 'book'): # we don't go per paper return 0
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- # # a module for retagging LRJs in the adjacent repository import sys, os.path, re, glob from fancy.ANSI import C from lib.AST import Sleigh from lib.JSON import parseJSON from lib.LP import listify, uniq from lib.NLP import strictstrip, baretext, superbaretext ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False tags = [] relieved = {} matchModes = {\ 'matchsensitive': lambda s, mcs, mes, mew, mis, miw: mcs.find(s) > -1, 'matchword': lambda s, mcs, mes, mew, mis, miw: s in miw, 'matchwordexact': lambda s, mcs, mes, mew, mis, miw: s in mew, 'matchsub': lambda s, mcs, mes, mew, mis, miw: mis.find(s) > -1, 'matchsubexact': lambda s, mcs, mes, mew, mis, miw: mes.find(s) > -1, 'matchstart': lambda s, mcs, mes, mew, mis, miw: mes.startswith(s), 'matchend': lambda s, mcs, mes, mew, mis, miw: mes.endswith(s), 'matchre': lambda s, mcs, mes, mew, mis, miw: re.match('^'+s+'$', mes)\ }
# The idea is to generate a colour between FFFDE7 (for 'a') and F57F17 (for 'z') # FFFDE7 is Yellow/50 and F57F17 is Yellow/900 in Material Design def genColour(az): # get something between 0 and 25 i = ord(az) - ord('a') r = 0xFF - (0xFF - 0xF5) * i // 26 g = 0xFD - (0xFD - 0x7F) * i // 26 b = 0xE7 - (0xE7 - 0x17) * i // 26 return hex(r)[-2:] + hex(g)[-2:] + hex(b)[-2:] ienputdir = '../json' outputdir = '../frontend' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) def makeimg(ifn, alt, w=''): if w: return '<img src="../stuff/{}.png" alt="{}" width="{}px"/>'.format( ifn, alt, w) else: return '<img src="../stuff/{}.png" alt="{}"/>'.format(ifn, alt) def dict2links(d): rs = [] for k in sorted(d.keys()): if k.isupper() or k in ('name', 'authored', 'roles'):
#!/c/Users/vadim/AppData/Local/Programs/Python/Python35/python # -*- coding: utf-8 -*- # # a module for enforcing aliases import sys, os.path, json from fancy.ANSI import C from fancy.Latin import nodiaLatin, simpleLatin from lib.AST import Sleigh from lib.JSON import parseJSON from lib.LP import listify from lib.NLP import strictstrip ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False renameto = {} def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]]) for ae in ('author', 'editor'): if ae in o.json.keys(): if isinstance(o.json[ae], str): if o.json[ae] in renameto.keys(): o.json[ae] = renameto[o.json[ae]] else: for i, x in enumerate(o.json[ae]):
# # a module for exporting LRJ definitions of tags to the HTML frontpages import os.path from fancy.ANSI import C from fancy.Languages import ISONames from fancy.Templates import taglistHTML, tagHTML from lib.AST import Sleigh, escape from lib.JSON import parseJSON from lib.LP import listify from lib.NLP import string2words, trash ienputdir = '../json' outputdir = '../frontend' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) def makeimg(fn, alt): return '<img src="../stuff/ico-{}.png" alt="{}"/>'.format(fn, alt) def kv2link(k, v): if k == 'g': ico = makeimg('g', 'Google') r = '<a href="https://www.google.com/search?q={}">{}</a>'.format(escape(v), v) elif k.endswith('.wp'): lang = k.split('.')[0] # Using ISO 639-1 language names ico = makeimg('wp', 'Wikipedia') + makeimg(lang, ISONames[lang]) lang = k.split('.')[0] r = '<a href="https://{}.wikipedia.org/wiki/{}">{}</a>'.format(\
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- # # a module for enriching conference definitions with chairs/committees import sys, os.path from fancy.ANSI import C from lib.AST import Sleigh from lib.JSON import parseJSON from lib.NLP import nrs, strictstrip ienputdir = '../json' rt_name = '_renameto.json' renameto = parseJSON(rt_name) if os.path.exists(rt_name) else {} # FIXME mr = parseJSON('_established.json') for m in mr.keys(): if m not in renameto.keys(): renameto[m] = mr[m] sleigh = Sleigh(ienputdir + '/corpus', {}) verbose = False lookat = [] roles = {} def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' if o.get('type') not in ('proceedings', 'book'): # we don't go per paper return 0 if o.getKey() not in roles.keys():
from lib.NLP import shorten, ifIgnored # The idea is to generate a colour between FFFDE7 (for 'a') and F57F17 (for 'z') # FFFDE7 is Yellow/50 and F57F17 is Yellow/900 in Material Design def genColour(az): # get something between 0 and 25 i = ord(az) - ord('a') r = 0xFF - (0xFF - 0xF5)*i//26 g = 0xFD - (0xFD - 0x7F)*i//26 b = 0xE7 - (0xE7 - 0x17)*i//26 return hex(r)[-2:] + hex(g)[-2:] + hex(b)[-2:] ienputdir = '../json' outputdir = '../frontend' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) def makeimg(ifn, alt, w=''): if w: return '<img src="../stuff/{}.png" alt="{}" width="{}px"/>'.format(ifn, alt, w) else: return '<img src="../stuff/{}.png" alt="{}"/>'.format(ifn, alt) def dict2links(d): rs = [] for k in sorted(d.keys()): if k.isupper() or k in ('name', 'authored', 'roles'): continue v = d[k] if k == 'g':
# a module for stemming paper titles LRJ import sys, os.path, glob from fancy.ANSI import C from lib.AST import Sleigh from lib.JSON import parseJSON from lib.NLP import string2words, ifApproved from collections import Counter # import stemming.porter2 import snowballstemmer # from nltk.stem.snowball import SnowballStemmer ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False ALLSTEMS = set() def guessYear(P): cys = [int(w) for w in P.split('-') if len(w) == 4 and w.isdigit()] if len(cys) == 1: return cys[0] else: j = sleigh.seekByKey(P) if 'year' in j.json.keys(): return j.get('year') elif 'year' in dir(j): return j.year
#!/c/Users/vadim/AppData/Local/Programs/Python/Python37-32/python # -*- coding: utf-8 -*- # # a module for retagging LRJs in the adjacent repository import sys, os.path, re, glob from fancy.ANSI import C from lib.AST import Sleigh from lib.JSON import parseJSON from lib.LP import listify, uniq from lib.NLP import strictstrip, baretext, superbaretext ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False tags = [] relieved = {} matchModes = {\ 'matchsensitive': lambda s, mcs, mes, mew, mis, miw: mcs.find(s) > -1, 'matchword': lambda s, mcs, mes, mew, mis, miw: s in miw, 'matchwordexact': lambda s, mcs, mes, mew, mis, miw: s in mew, 'matchsub': lambda s, mcs, mes, mew, mis, miw: mis.find(s) > -1, 'matchsubexact': lambda s, mcs, mes, mew, mis, miw: mes.find(s) > -1, 'matchstart': lambda s, mcs, mes, mew, mis, miw: mes.startswith(s), 'matchend': lambda s, mcs, mes, mew, mis, miw: mes.endswith(s), 'matchre': lambda s, mcs, mes, mew, mis, miw: re.match('^'+s+'$', mes)\ }