예제 #1
0
#!/c/Users/vadim/AppData/Local/Programs/Python/Python35/python
# -*- coding: utf-8 -*-
#
# a module for simply traversing all the LRJs and reading them in
# if you run this and it fails, you’re in big trouble

import sys, os.path
from lib.AST import Sleigh
from lib.NLP import strictstrip
from lib.LP import lastSlash
from fancy.ANSI import C

ienputdir = '../json'
n2f_name = '_name2file.json'
# name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', {})
verbose = False

def findYear(fn):
	s = ''.join([ch for ch in fn if ch.isdigit()])
	return int(s) if s else 0

def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if not os.path.exists(fn):
		# if it still does not exist, let us create a minimal one
		f = open(fn, 'w', encoding='utf-8')
		f.write('{{\n\t"title": "{name}",\n\t"type": "proceedings",\n\t"year": {year}\n}}'.format(\
			name=lastSlash(fn)[:-5].replace('-', ' '),
			year=findYear(lastSlash(fn))\
예제 #2
0
import sys, os.path, glob
from fancy.ANSI import C
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.NLP import string2words, ifApproved
from collections import Counter

# import stemming.porter2
import snowballstemmer
# from nltk.stem.snowball import SnowballStemmer

ienputdir = '../json'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(ienputdir + '/corpus', name2file)
verbose = False
ALLSTEMS = set()


def guessYear(P):
    cys = [int(w) for w in P.split('-') if len(w) == 4 and w.isdigit()]
    if len(cys) == 1:
        return cys[0]
    else:
        j = sleigh.seekByKey(P)
        if 'year' in j.json.keys():
            return j.get('year')
        elif 'year' in dir(j):
            return j.year
        else:
예제 #3
0
# a module for exporting LRJs to the HTML frontpages

import cProfile
import os.path, glob
from fancy.ANSI import C
from fancy.Templates import aboutHTML, syncHTML
from lib.AST import Sleigh
from lib.JSON import parseJSON
from lib.LP import lastSlash

ienputdir = '../json'
corpusdir = ienputdir + '/corpus'
outputdir = '../frontend'
n2f_name = '_name2file.json'
name2file = parseJSON(n2f_name) if os.path.exists(n2f_name) else {}
sleigh = Sleigh(corpusdir, name2file)


def next_year(vvv):
    return int(lastSlash(sorted(glob.glob(vvv + '/*'))[-2])) + 1


def main():
    print('{}: {} venues, {} papers\n{}'.format(C.purple('BibSLEIGH'),
                                                C.red(len(sleigh.venues)),
                                                C.red(sleigh.numOfPapers()),
                                                C.purple('=' * 42)))
    # generate the index
    f = open(outputdir + '/index.html', 'w', encoding='utf-8')
    f.write(sleigh.getPage())
    f.close()