import sys, os.path, glob from fancy.ANSI import C from lib.AST import Sleigh from lib.JSON import parseJSON from lib.NLP import string2words, ifApproved from collections import Counter # import stemming.porter2 import snowballstemmer # from nltk.stem.snowball import SnowballStemmer ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False ALLSTEMS = set() def guessYear(P): cys = [int(w) for w in P.split('-') if len(w) == 4 and w.isdigit()] if len(cys) == 1: return cys[0] else: j = sleigh.seekByKey(P) if 'year' in j.json.keys(): return j.get('year') elif 'year' in dir(j): return j.year else:
# # a module for exporting stems/words to the HTML frontpages import os.path from fancy.ANSI import C from fancy.Templates import wordlistHTML, wordHTML from lib.AST import Sleigh, escape from lib.JSON import parseJSON from lib.NLP import ifApproved from collections import Counter ienputdir = '../json' outputdir = '../frontend' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) if __name__ == "__main__": print('{}: {} venues, {} papers\n{}'.format(\ C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.purple('='*42))) stems = sleigh.getStems() tagged = [] for k in stems.keys(): f = open('{}/word/{}.html'.format(outputdir, k), 'w', encoding='utf-8') # papers are displayed in reverse chronological order lst = [x.getIItem() for x in \ sorted(stems[k], key=lambda z: -z.json['year'] if 'year' in z.json.keys() else 0)] # collect other stems
#!/usr/local/bin/python3 # -*- coding: utf-8 -*- # # a module for assigning proper names to papers, venues and journals import sys, os.path from fancy.ANSI import C from fancy.KnownNames import unfoldName, short2long from lib.AST import Sleigh from lib.JSON import parseJSON, json2lines ienputdir = '../json' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) verbose = False wheretolook = ('journal', 'series', 'booktitle', 'publisher') def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' f = open(fn, 'r') lines = f.readlines()[1:-1] f.close() flines = json2lines(lines) plines = sorted(json2lines(o.getJSON().split('\n'))) # bad variants for bad in unfoldName: for key in wheretolook: if o.get(key) == bad: o.json[key] = unfoldName[bad]
#!/c/Users/vadim/AppData/Local/Programs/Python/Python35/python # -*- coding: utf-8 -*- # # a module for simply traversing all the LRJs and reading them in # if you run this and it fails, you’re in big trouble import sys, os.path from lib.AST import Sleigh from lib.NLP import strictstrip from lib.LP import lastSlash from fancy.ANSI import C ienputdir = '../json' n2f_name = '_name2file.json' # name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', {}) verbose = False def findYear(fn): s = ''.join([ch for ch in fn if ch.isdigit()]) return int(s) if s else 0 def checkon(fn, o): if not os.path.exists(fn) or os.path.isdir(fn): fn = fn + '.json' if not os.path.exists(fn): # if it still does not exist, let us create a minimal one f = open(fn, 'w', encoding='utf-8') f.write('{{\n\t"title": "{name}",\n\t"type": "proceedings",\n\t"year": {year}\n}}'.format(\ name=lastSlash(fn)[:-5].replace('-', ' '), year=findYear(lastSlash(fn))\
# The idea is to generate a colour between FFFDE7 (for 'a') and F57F17 (for 'z') # FFFDE7 is Yellow/50 and F57F17 is Yellow/900 in Material Design def genColour(az): # get something between 0 and 25 i = ord(az) - ord('a') r = 0xFF - (0xFF - 0xF5)*i//26 g = 0xFD - (0xFD - 0x7F)*i//26 b = 0xE7 - (0xE7 - 0x17)*i//26 return hex(r)[-2:] + hex(g)[-2:] + hex(b)[-2:] ienputdir = '../json' outputdir = '../frontend' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) def makeimg(ifn, alt, w=''): if w: return '<img src="../stuff/{}.png" alt="{}" width="{}px"/>'.format(ifn, alt, w) else: return '<img src="../stuff/{}.png" alt="{}"/>'.format(ifn, alt) def dict2links(d): rs = [] for k in sorted(d.keys()): if k.isupper() or k in ('name', 'authored', 'roles'): continue v = d[k] if k == 'g': rs.append(\
# a module for exporting LRJ definitions of tags to the HTML frontpages import os.path from fancy.ANSI import C from fancy.Languages import ISONames from fancy.Templates import taglistHTML, tagHTML from lib.AST import Sleigh, escape from lib.JSON import parseJSON from lib.LP import listify from lib.NLP import string2words, trash ienputdir = '../json' outputdir = '../frontend' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) def makeimg(fn, alt): return '<img src="../stuff/ico-{}.png" alt="{}"/>'.format(fn, alt) def kv2link(k, v): if k == 'g': ico = makeimg('g', 'Google') r = '<a href="https://www.google.com/search?q={}">{}</a>'.format(escape(v), v) elif k.endswith('.wp'): lang = k.split('.')[0] # Using ISO 639-1 language names ico = makeimg('wp', 'Wikipedia') + makeimg(lang, ISONames[lang]) lang = k.split('.')[0] r = '<a href="https://{}.wikipedia.org/wiki/{}">{}</a>'.format(\ lang, \
# # a module for exporting stems/words to the HTML frontpages import os.path from fancy.ANSI import C from fancy.Templates import wordlistHTML, wordHTML from lib.AST import Sleigh, escape from lib.JSON import parseJSON from lib.NLP import ifApproved from collections import Counter ienputdir = '../json' outputdir = '../frontend' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(ienputdir + '/corpus', name2file) if __name__ == "__main__": print('{}: {} venues, {} papers\n{}'.format(\ C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.purple('='*42))) stems = sleigh.getStems() tagged = [] for k in stems.keys(): f = open('{}/word/{}.html'.format(outputdir, k), 'w') # papers are displayed in reverse chronological order lst = [x.getIItem() for x in \ sorted(stems[k], key=lambda z: -z.json['year'] if 'year' in z.json.keys() else 0)] # collect other stems
# a module for exporting LRJs to the HTML frontpages import cProfile import os.path, glob from fancy.ANSI import C from fancy.Templates import aboutHTML, syncHTML from lib.AST import Sleigh from lib.JSON import parseJSON from lib.LP import lastSlash ienputdir = '../json' corpusdir = ienputdir + '/corpus' outputdir = '../frontend' n2f_name = '_name2file.json' name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {} sleigh = Sleigh(corpusdir, name2file) def next_year(vvv): return int(lastSlash(sorted(glob.glob(vvv + '/*'))[-2])) + 1 def main(): print('{}: {} venues, {} papers\n{}'.format(C.purple('BibSLEIGH'), C.red(len(sleigh.venues)), C.red(sleigh.numOfPapers()), C.purple('=' * 42))) # generate the index f = open(outputdir + '/index.html', 'w', encoding='utf-8') f.write(sleigh.getPage()) f.close()