Ejemplo n.º 1
0
import sys, os.path, glob
from fancy.ANSI import C
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.NLP import string2words, ifApproved
from collections import Counter

# import stemming.porter2
import snowballstemmer
# from nltk.stem.snowball import SnowballStemmer

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
ALLSTEMS = set()


def guessYear(P):
    cys = [int(w) for w in P.split('-') if len(w) == 4 and w.isdigit()]
    if len(cys) == 1:
        return cys[0]
    else:
        j = sleigh.seekByKey(P)
        if 'year' in j.json.keys():
            return j.get('year')
        elif 'year' in dir(j):
            return j.year
        else:
Ejemplo n.º 2
0
#
# a module for exporting stems/words to the HTML frontpages

import os.path
from fancy.ANSI import C
from fancy.Templates import wordlistHTML, wordHTML
from lib.AST import Sleigh, escape
from lib.JSON import parseJSON
from lib.NLP import ifApproved
from collections import Counter

ienputdir = '../json'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)

if __name__ == "__main__":
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	stems = sleigh.getStems()
	tagged = []
	for k in stems.keys():
		f = open('{}/word/{}.html'.format(outputdir, k), 'w', encoding='utf-8')
		# papers are displayed in reverse chronological order
		lst = [x.getIItem() for x in \
			sorted(stems[k], key=lambda z: -z.json['year'] if 'year' in z.json.keys() else 0)]
		# collect other stems
Ejemplo n.º 3
0
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#
# a module for assigning proper names to papers, venues and journals

import sys, os.path
from fancy.ANSI import C
from fancy.KnownNames import unfoldName, short2long
from lib.AST import Sleigh
from lib.JSON import parseJSON, json2lines

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
wheretolook = ('journal', 'series', 'booktitle', 'publisher')

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	f = open(fn, 'r')
	lines = f.readlines()[1:-1]
	f.close()
	flines = json2lines(lines)
	plines = sorted(json2lines(o.getJSON().split('\n')))
	# bad variants
	for bad in unfoldName:
		for key in wheretolook:
			if o.get(key) == bad:
				o.json[key] = unfoldName[bad]
Ejemplo n.º 4
0
#!/c/Users/vadim/AppData/Local/Programs/Python/Python35/python
# -*- coding: utf-8 -*-
#
# a module for simply traversing all the LRJs and reading them in
# if you run this and it fails, you’re in big trouble

import sys, os.path
from lib.AST import Sleigh
from lib.NLP import strictstrip
from lib.LP import lastSlash
from fancy.ANSI import C

ienputdir = '../json'
n2f_name = '_name2file.json'
# name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', {})
verbose = False

def findYear(fn):
	s = ''.join([ch for ch in fn if ch.isdigit()])
	return int(s) if s else 0

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if not os.path.exists(fn):
		# if it still does not exist, let us create a minimal one
		f = open(fn, 'w', encoding='utf-8')
		f.write('{{\n\t"title": "{name}",\n\t"type": "proceedings",\n\t"year": {year}\n}}'.format(\
			name=lastSlash(fn)[:-5].replace('-', ' '),
			year=findYear(lastSlash(fn))\
Ejemplo n.º 5
0
# The idea is to generate a colour between FFFDE7 (for 'a') and F57F17 (for 'z')
# FFFDE7 is Yellow/50 and F57F17 is Yellow/900 in Material Design
def genColour(az):
	# get something between 0 and 25
	i = ord(az) - ord('a')
	r = 0xFF - (0xFF - 0xF5)*i//26
	g = 0xFD - (0xFD - 0x7F)*i//26
	b = 0xE7 - (0xE7 - 0x17)*i//26
	return hex(r)[-2:] + hex(g)[-2:] + hex(b)[-2:]

ienputdir = '../json'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)

def makeimg(ifn, alt, w=''):
	if w:
		return '<img src="../stuff/{}.png" alt="{}" width="{}px"/>'.format(ifn, alt, w)
	else:
		return '<img src="../stuff/{}.png" alt="{}"/>'.format(ifn, alt)

def dict2links(d):
	rs = []
	for k in sorted(d.keys()):
		if k.isupper() or k in ('name', 'authored', 'roles'):
			continue
		v = d[k]
		if k == 'g':
			rs.append(\
Ejemplo n.º 6
0
# a module for exporting LRJ definitions of tags to the HTML frontpages

import os.path
from fancy.ANSI import C
from fancy.Languages import ISONames
from fancy.Templates import taglistHTML, tagHTML
from lib.AST import Sleigh, escape
from lib.JSON import parseJSON
from lib.LP import listify
from lib.NLP import string2words, trash

ienputdir = '../json'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)

def makeimg(fn, alt):
	return '<img src="../stuff/ico-{}.png" alt="{}"/>'.format(fn, alt)

def kv2link(k, v):
	if k == 'g':
		ico = makeimg('g', 'Google')
		r = '<a href="https://www.google.com/search?q={}">{}</a>'.format(escape(v), v)
	elif k.endswith('.wp'):
		lang = k.split('.')[0]
		# Using ISO 639-1 language names
		ico = makeimg('wp', 'Wikipedia') + makeimg(lang, ISONames[lang])
		lang = k.split('.')[0]
		r = '<a href="https://{}.wikipedia.org/wiki/{}">{}</a>'.format(\
			lang, \
Ejemplo n.º 7
0
#
# a module for exporting stems/words to the HTML frontpages

import os.path
from fancy.ANSI import C
from fancy.Templates import wordlistHTML, wordHTML
from lib.AST import Sleigh, escape
from lib.JSON import parseJSON
from lib.NLP import ifApproved
from collections import Counter

ienputdir = '../json'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)

if __name__ == "__main__":
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	stems = sleigh.getStems()
	tagged = []
	for k in stems.keys():
		f = open('{}/word/{}.html'.format(outputdir, k), 'w')
		# papers are displayed in reverse chronological order
		lst = [x.getIItem() for x in \
			sorted(stems[k], key=lambda z: -z.json['year'] if 'year' in z.json.keys() else 0)]
		# collect other stems
Ejemplo n.º 8
0
# a module for exporting LRJs to the HTML frontpages

import cProfile
import os.path, glob
from fancy.ANSI import C
from fancy.Templates import aboutHTML, syncHTML
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.LP import lastSlash

ienputdir = '../json'
corpusdir = ienputdir + '/corpus'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(corpusdir, name2file)


def next_year(vvv):
    return int(lastSlash(sorted(glob.glob(vvv + '/*'))[-2])) + 1


def main():
    print('{}: {} venues, {} papers\n{}'.format(C.purple('BibSLEIGH'),
                                                C.red(len(sleigh.venues)),
                                                C.red(sleigh.numOfPapers()),
                                                C.purple('=' * 42)))
    # generate the index
    f = open(outputdir + '/index.html', 'w', encoding='utf-8')
    f.write(sleigh.getPage())
    f.close()