Beispiel #1
0
	def __init__(self, d, hdir, name2file, parent):
		super(Venue, self).__init__(d, hdir)
		self.years = []
		self.brands = []
		self.n2f = name2file
		if os.path.exists(d+'.json'):
			# new style
			# print(C.blue(d), 'is new style')
			self.json = parseJSON(d+'.json')
		else:
			# legacy style
			print(C.red(d), 'is legacy style')
			self.json = []
		for f in glob.glob(d+'/*.json'):
			if not self.json:
				self.json = parseJSON(f)
			else:
				self.brands.append(Brand(f, self.homedir, name2file, self))
		for f in glob.glob(d+'/*'):
			if f.endswith('.json'):
				# already processed
				continue
			elif os.path.isdir(f):
				y = Year(f, self.homedir, name2file, self)
				self.years.append(y)
				for b in self.brands:
					for c in y.confs:
						b.offer(y.year, c)
			else:
				print('File out of place:', f)
		self.back = parent
Beispiel #2
0
 def __init__(self, d, hdir, name2file, parent):
     super(Venue, self).__init__(d, hdir)
     self.years = []
     self.brands = []
     self.n2f = name2file
     if os.path.exists(d + '.json'):
         # new style
         # print(C.blue(d), 'is new style')
         self.json = parseJSON(d + '.json')
     else:
         # legacy style
         print(C.red(d), 'is legacy style')
         self.json = {}
     for f in glob.glob(d + '/*.json'):
         if not self.json:
             self.json = parseJSON(f)
         else:
             self.brands.append(Brand(f, self.homedir, name2file, self))
     for f in glob.glob(d + '/*'):
         if f.endswith('.json'):
             # already processed
             continue
         elif os.path.isdir(f):
             y = Year(f, self.homedir, name2file, self)
             self.years.append(y)
             for b in self.brands:
                 for c in y.confs:
                     b.offer(y.year, c)
         else:
             print('File out of place:', f)
     self.back = parent
Beispiel #3
0
	def __init__(self, f, hdir, parent):
		super(Paper, self).__init__(f, hdir)
		self.json = parseJSON(f)
		# NB: self.tags is a list in Paper, but a dict in all other classes
		if 'tag' in self.json.keys():
			if isinstance(self.json['tag'], list):
				self.tags = self.json['tag']
			else:
				self.tags = [self.json['tag']]
			del self.json['tag']
		self.back = parent
Beispiel #4
0
 def __init__(self, f, hdir, parent):
     super(Paper, self).__init__(f, hdir)
     self.json = parseJSON(f)
     # NB: self.tags is a list in Paper, but a dict in all other classes
     if 'tag' in self.json.keys():
         if isinstance(self.json['tag'], list):
             self.tags = self.json['tag']
         else:
             self.tags = [self.json['tag']]
         del self.json['tag']
     self.back = parent
Beispiel #5
0
 def __init__(self, f, hdir, name2file, parent):
     super(Brand, self).__init__(f, hdir)
     self.name = last(f)
     self.confs = {}
     self.json = parseJSON(f)
     if 'vocabulary' in self.json:
         self.json['vocabulary'] = Counter({\
          self.json['vocabulary'][2*i]:self.json['vocabulary'][2*i+1] \
          for i in range(0, len(self.json['vocabulary'])//2)})
     if 'collocations' in self.json:
         self.json['collocations'] = Counter({\
          tuple(self.json['collocations'][2*i]):self.json['collocations'][2*i+1] \
          for i in range(0, len(self.json['collocations'])//2)})
     self.back = parent
Beispiel #6
0
	def __init__(self, d, hdir, name2file, parent):
		super(Year, self).__init__(d, hdir)
		self.year = last(d)
		self.confs = []
		jsonsfound = []
		jsonsused = []
		for f in glob.glob(d+'/*'):
			if os.path.isdir(f):
				self.confs.append(Conf(f, self.homedir, name2file, self))
				if os.path.exists(f+'.json'):
					self.confs[-1].json = parseJSON(f+'.json')
					jsonsused.append(f+'.json')
					# print('Conf has a JSON! %s' % self.confs[-1].json)
			elif f.endswith('.json'):
				jsonsfound.append(f)
			else:
				print('File out of place:', f)
		for f in jsonsfound:
			if f not in jsonsused:
				# print('Houston, we have a JSON:', f)
				self.confs.append(Conf(f[:f.rindex('.')], self.homedir, name2file, self))
				self.confs[-1].json = parseJSON(f)
		self.back = parent
Beispiel #7
0
	def __init__(self, f, hdir, name2file, parent):
		super(Brand, self).__init__(f, hdir)
		self.name = last(f)
		self.confs = {}
		self.json = parseJSON(f)
		if 'vocabulary' in self.json:
			self.json['vocabulary'] = Counter({\
				self.json['vocabulary'][2*i]:self.json['vocabulary'][2*i+1] \
				for i in range(0, len(self.json['vocabulary'])//2)})
		if 'collocations' in self.json:
			self.json['collocations'] = Counter({\
				tuple(self.json['collocations'][2*i]):self.json['collocations'][2*i+1] \
				for i in range(0, len(self.json['collocations'])//2)})
		self.back = parent
Beispiel #8
0
 def __init__(self, d, hdir, name2file, parent):
     super(Year, self).__init__(d, hdir)
     self.year = last(d)
     self.confs = []
     jsonsfound = []
     jsonsused = []
     for f in glob.glob(d + '/*'):
         if os.path.isdir(f):
             self.confs.append(Conf(f, self.homedir, name2file, self))
             if os.path.exists(f + '.json'):
                 self.confs[-1].json = parseJSON(f + '.json')
                 jsonsused.append(f + '.json')
                 # print('Conf has a JSON! %s' % self.confs[-1].json)
         elif f.endswith('.json'):
             jsonsfound.append(f)
         else:
             print('File out of place:', f)
     for f in jsonsfound:
         if f not in jsonsused:
             # print('Houston, we have a JSON:', f)
             self.confs.append(
                 Conf(f[:f.rindex('.')], self.homedir, name2file, self))
             self.confs[-1].json = parseJSON(f)
     self.back = parent
Beispiel #9
0
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#
# a module for enforcing aliases

import sys, os.path, json
from fancy.ANSI import C
from fancy.Latin import nodiaLatin, simpleLatin
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.LP import listify
from lib.NLP import strictstrip

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
renameto = {}

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	for ae in ('author', 'editor'):
		if ae in o.json.keys():
			if isinstance(o.json[ae], str):
				if o.json[ae] in renameto.keys():
					o.json[ae] = renameto[o.json[ae]]
			else:
				for i, x in enumerate(o.json[ae]):
Beispiel #10
0

def report(s, r):
    statuses = (C.blue('PASS'), C.red('FAIL'), C.yellow('FIXD'))
    # non-verbose mode by default
    if verbose or r != 0:
        print('[ {} ] {}'.format(statuses[r], s))
    return r


if __name__ == "__main__":
    verbose = sys.argv[-1] == '-v'
    # Load all contributors
    people = {}
    for fn in glob.glob(ienputdir + '/people/*.json'):
        p = parseJSON(fn)
        people[p['name']] = p
    print('{}: {} people\n{}'.format(\
     C.purple('BibSLEIGH'),
     C.red(len(people)),
     C.purple('='*42)))
    # check for duplicates
    bysurname = {}
    for name in people.keys():
        byword = name.split(' ')
        j = -1
        while -j < len(byword) and (byword[j - 1][0].islower()
                                    or byword[j - 1].lower()
                                    in ('de', 'di', 'du', 'van', 'von', 'le'
                                        'la')):
            j -= 1
Beispiel #11
0
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#
# a module for assigning proper names to papers, venues and journals

import sys, os.path
from fancy.ANSI import C
from fancy.KnownNames import unfoldName, short2long
from lib.AST import Sleigh
from lib.JSON import parseJSON, json2lines

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
wheretolook = ('journal', 'series', 'booktitle', 'publisher')

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	f = open(fn, 'r')
	lines = f.readlines()[1:-1]
	f.close()
	flines = json2lines(lines)
	plines = sorted(json2lines(o.getJSON().split('\n')))
	# bad variants
	for bad in unfoldName:
		for key in wheretolook:
			if o.get(key) == bad:
				o.json[key] = unfoldName[bad]
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#
# a module for cross-checking information on people available from different sources

import sys, glob, os.path, json
from fancy.ANSI import C
from fancy.Latin import simpleLatin, dblpLatin, nodiaLatin
from lib.AST import Sleigh
from lib.JSON import parseJSON, jsonify
from lib.LP import listify

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
cx = {0: 0, 1: 0, 2: 0}
renameto = {}
dis = {}

def nomidnames(s):
	# ns = s.split(' ')
	# while len(ns) > 1 and len(ns[1]) == 2 and ns[1][0].isupper() and ns[1][1] == '.':
	# 	del ns[1]
	# return ' '.join(ns)
	s = s.replace('.', '. ').replace('  ', ' ')
	return ' '.join([n for n in s.split(' ') if len(n)!=2 or not n[0].isupper() or n[1]!='.'])

def fileify(s):
	return simpleLatin(s).replace('.', '').replace("'", '').replace(' ', '_')
Beispiel #13
0
#!/c/Users/vadim/AppData/Local/Programs/Python/Python35/python
# -*- coding: utf-8 -*-
#
# a module for enriching conference definitions with chairs/committees

import sys, os.path
from fancy.ANSI import C
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.NLP import nrs, strictstrip

ienputdir = '../json'
rt_name = '_renameto.json'
renameto = parseJSON(rt_name) if os.path.exists(rt_name) else {}
# FIXME
mr = parseJSON('_established.json')
for m in mr.keys():
    if m not in renameto.keys():
        renameto[m] = mr[m]
sleigh = Sleigh(ienputdir + '/corpus', {})
verbose = False
lookat = []
roles = {}


def checkon(fn, o):
    if not os.path.exists(fn) or os.path.isdir(fn):
        fn = fn + '.json'
    if o.get('type') not in ('proceedings', 'book'):
        # we don't go per paper
        return 0
Beispiel #14
0
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#
# a module for retagging LRJs in the adjacent repository

import sys, os.path, re, glob
from fancy.ANSI import C
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.LP import listify, uniq
from lib.NLP import strictstrip, baretext, superbaretext

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
tags = []
relieved = {}

matchModes = {\
'matchsensitive': lambda s, mcs, mes, mew, mis, miw: mcs.find(s) > -1,
'matchword':      lambda s, mcs, mes, mew, mis, miw: s in miw,
'matchwordexact': lambda s, mcs, mes, mew, mis, miw: s in mew,
'matchsub':       lambda s, mcs, mes, mew, mis, miw: mis.find(s) > -1,
'matchsubexact':  lambda s, mcs, mes, mew, mis, miw: mes.find(s) > -1,
'matchstart':     lambda s, mcs, mes, mew, mis, miw: mes.startswith(s),
'matchend':       lambda s, mcs, mes, mew, mis, miw: mes.endswith(s),
'matchre':        lambda s, mcs, mes, mew, mis, miw: re.match('^'+s+'$', mes)\
}
Beispiel #15
0
# The idea is to generate a colour between FFFDE7 (for 'a') and F57F17 (for 'z')
# FFFDE7 is Yellow/50 and F57F17 is Yellow/900 in Material Design
def genColour(az):
    # get something between 0 and 25
    i = ord(az) - ord('a')
    r = 0xFF - (0xFF - 0xF5) * i // 26
    g = 0xFD - (0xFD - 0x7F) * i // 26
    b = 0xE7 - (0xE7 - 0x17) * i // 26
    return hex(r)[-2:] + hex(g)[-2:] + hex(b)[-2:]


ienputdir = '../json'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)


def makeimg(ifn, alt, w=''):
    if w:
        return '<img src="../stuff/{}.png" alt="{}" width="{}px"/>'.format(
            ifn, alt, w)
    else:
        return '<img src="../stuff/{}.png" alt="{}"/>'.format(ifn, alt)


def dict2links(d):
    rs = []
    for k in sorted(d.keys()):
        if k.isupper() or k in ('name', 'authored', 'roles'):
Beispiel #16
0
#!/c/Users/vadim/AppData/Local/Programs/Python/Python35/python
# -*- coding: utf-8 -*-
#
# a module for enforcing aliases

import sys, os.path, json
from fancy.ANSI import C
from fancy.Latin import nodiaLatin, simpleLatin
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.LP import listify
from lib.NLP import strictstrip

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
renameto = {}

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	for ae in ('author', 'editor'):
		if ae in o.json.keys():
			if isinstance(o.json[ae], str):
				if o.json[ae] in renameto.keys():
					o.json[ae] = renameto[o.json[ae]]
			else:
				for i, x in enumerate(o.json[ae]):
Beispiel #17
0
#
# a module for exporting LRJ definitions of tags to the HTML frontpages

import os.path
from fancy.ANSI import C
from fancy.Languages import ISONames
from fancy.Templates import taglistHTML, tagHTML
from lib.AST import Sleigh, escape
from lib.JSON import parseJSON
from lib.LP import listify
from lib.NLP import string2words, trash

ienputdir = '../json'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)

def makeimg(fn, alt):
	return '<img src="../stuff/ico-{}.png" alt="{}"/>'.format(fn, alt)

def kv2link(k, v):
	if k == 'g':
		ico = makeimg('g', 'Google')
		r = '<a href="https://www.google.com/search?q={}">{}</a>'.format(escape(v), v)
	elif k.endswith('.wp'):
		lang = k.split('.')[0]
		# Using ISO 639-1 language names
		ico = makeimg('wp', 'Wikipedia') + makeimg(lang, ISONames[lang])
		lang = k.split('.')[0]
		r = '<a href="https://{}.wikipedia.org/wiki/{}">{}</a>'.format(\
Beispiel #18
0
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#
# a module for enriching conference definitions with chairs/committees

import sys, os.path
from fancy.ANSI import C
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.NLP import nrs, strictstrip

ienputdir = '../json'
rt_name = '_renameto.json'
renameto = parseJSON(rt_name) if os.path.exists(rt_name) else {}
# FIXME
mr = parseJSON('_established.json')
for m in mr.keys():
	if m not in renameto.keys():
		renameto[m] = mr[m]
sleigh = Sleigh(ienputdir + '/corpus', {})
verbose = False
lookat = []
roles = {}

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if o.get('type') not in ('proceedings', 'book'):
		# we don't go per paper
		return 0
	if o.getKey() not in roles.keys():
Beispiel #19
0
from lib.NLP import shorten, ifIgnored

# The idea is to generate a colour between FFFDE7 (for 'a') and F57F17 (for 'z')
# FFFDE7 is Yellow/50 and F57F17 is Yellow/900 in Material Design
def genColour(az):
	# get something between 0 and 25
	i = ord(az) - ord('a')
	r = 0xFF - (0xFF - 0xF5)*i//26
	g = 0xFD - (0xFD - 0x7F)*i//26
	b = 0xE7 - (0xE7 - 0x17)*i//26
	return hex(r)[-2:] + hex(g)[-2:] + hex(b)[-2:]

ienputdir = '../json'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)

def makeimg(ifn, alt, w=''):
	if w:
		return '<img src="../stuff/{}.png" alt="{}" width="{}px"/>'.format(ifn, alt, w)
	else:
		return '<img src="../stuff/{}.png" alt="{}"/>'.format(ifn, alt)

def dict2links(d):
	rs = []
	for k in sorted(d.keys()):
		if k.isupper() or k in ('name', 'authored', 'roles'):
			continue
		v = d[k]
		if k == 'g':
Beispiel #20
0
# a module for stemming paper titles LRJ

import sys, os.path, glob
from fancy.ANSI import C
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.NLP import string2words, ifApproved
from collections import Counter

# import stemming.porter2
import snowballstemmer
# from nltk.stem.snowball import SnowballStemmer

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
ALLSTEMS = set()


def guessYear(P):
    cys = [int(w) for w in P.split('-') if len(w) == 4 and w.isdigit()]
    if len(cys) == 1:
        return cys[0]
    else:
        j = sleigh.seekByKey(P)
        if 'year' in j.json.keys():
            return j.get('year')
        elif 'year' in dir(j):
            return j.year
Beispiel #21
0
#!/c/Users/vadim/AppData/Local/Programs/Python/Python37-32/python
# -*- coding: utf-8 -*-
#
# a module for retagging LRJs in the adjacent repository

import sys, os.path, re, glob
from fancy.ANSI import C
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.LP import listify, uniq
from lib.NLP import strictstrip, baretext, superbaretext

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
tags = []
relieved = {}

matchModes = {\
'matchsensitive': lambda s, mcs, mes, mew, mis, miw: mcs.find(s) > -1,
'matchword':      lambda s, mcs, mes, mew, mis, miw: s in miw,
'matchwordexact': lambda s, mcs, mes, mew, mis, miw: s in mew,
'matchsub':       lambda s, mcs, mes, mew, mis, miw: mis.find(s) > -1,
'matchsubexact':  lambda s, mcs, mes, mew, mis, miw: mes.find(s) > -1,
'matchstart':     lambda s, mcs, mes, mew, mis, miw: mes.startswith(s),
'matchend':       lambda s, mcs, mes, mew, mis, miw: mes.endswith(s),
'matchre':        lambda s, mcs, mes, mew, mis, miw: re.match('^'+s+'$', mes)\
}