Exemple #1
0
	def getAuthors(self):
		if 'author' in self.json.keys():
			return ', '.join(listify(self.json['author']))
		elif 'editor' in self.json.keys():
			return ', '.join(listify(self.json['editor']))
		else:
			return ''
Exemple #2
0
 def getAuthors(self):
     if 'author' in self.json.keys():
         return ', '.join(listify(self.json['author']))
     elif 'editor' in self.json.keys():
         return ', '.join(listify(self.json['editor']))
     else:
         return ''
Exemple #3
0
	def getBoxLinks(self):
		links = [[], [], []]
		# Crossref to the parent
		# DOC: real crossrefs are nice, but all the global searches slows the engine down
		# DOC: (running time 5m11.559s vs 0m7.396s is 42x)
		# if 'crossref' in self.json.keys():
		# 	xref = self.top().seek(self.json['crossref'])
		# 	if xref:
		# 		links.append('<strong><a href="{}.html">{}</a></strong>'.format(\
		# 			xref.getKey(),
		# 			xref.getKey().replace('-', ' ')))
		# 	else:
		# 		links.append('Xref not found')
		# else:
		# 	links.append('no Xref')
		# DOC: instead, we just link to the parent!
		links[0].append('<strong><a href="{}.html">{}</a></strong>'.format(\
			self.up().getKey(),
			self.up().getKey().replace('-', ' ')))
		# DBLP
		if 'dblpkey' in self.json:
			for dblpk in listify(self.json['dblpkey']):
				links[1].append('<a href="http://dblp.uni-trier.de/rec/html/{}">DBLP</a>'.format(dblpk))
		elif 'dblpurl' in self.json:
			links[1].append('<a href="{}">DBLP</a>'.format(self.json['dblpurl']))
		else:
			links[1].append('no DBLP info')
		# Scholar
		if 'title' in self.json.keys():
			links[1].append('<a href="https://scholar.google.com/scholar?q=%22{}%22">Scholar</a>'.format(\
				str(self.json['title']).replace(' ', '+')))
		# Some publishers
		if 'ee' in self.json.keys():
			for e in listify(self.json['ee']):
				if e.startswith('http://dl.acm.org') \
				or e.startswith('http://doi.acm.org')\
				or e.startswith('http://portal.acm.org'):
					links[2].append('<a href="{}">ACM DL</a>'.format(e))
				elif e.startswith('http://ieeexplore.ieee.org'):
					links[2].append('<a href="{}">IEEE Xplore</a>'.format(e))
				elif e.startswith('http://ieeecomputersociety.org'):
					links[2].append('<a href="{}">IEEE CS</a>'.format(e))
				elif e.startswith('http://drops.dagstuhl.de'):
					links[2].append('<a href="{}">Dagstuhl</a>'.format(e))
				elif e.find('computer.org/csdl/proceedings') > 0:
					links[2].append('<a href="{}">CSDL</a>'.format(e))
				elif e.startswith('http://journal.ub.tu-berlin.de/eceasst'):
					links[2].append('<a href="{}">EC-EASST</a>'.format(e))
				elif e.startswith('http://ceur-ws.org'):
					links[2].append('<a href="{}">CEUR</a>'.format(e))
				elif e.startswith('http://dx.doi.org'):
					pass
				else:
					links[2].append('<a href="{}">?EE?</a>'.format(e))
		if 'doi' in self.json.keys():
			links[2].append('<a href="http://dx.doi.org/{}">DOI</a>'.format(self.json['doi']))
		return '<hr/>'.join(['<br/>\n'.join(lb) for lb in links if lb])
Exemple #4
0
 def getAbbrAuthors(self):
     # <abbr title="Rainer Koschke">RK</abbr>
     if 'author' not in self.json.keys():
         return ''
     return ' ('+', '.join(['<abbr title="{0}">{1}</abbr>'.format(a,\
      ''.join([w[0] for w in a.replace('-', ' ').split(' ') if w]))\
      for a in listify(self.json['author'])])+')'
Exemple #5
0
	def getAbbrAuthors(self):
		# <abbr title="Rainer Koschke">RK</abbr>
		if 'author' not in self.json.keys():
			return ''
		return ' ('+', '.join(['<abbr title="{0}">{1}</abbr>'.format(a,\
			''.join([w[0] for w in a.replace('-', ' ').split(' ') if w]))\
			for a in listify(self.json['author'])])+')'
Exemple #6
0
 def offer(self, year, conf):
     ckey = conf.getKey()
     for rule in listify(self.json['select']):
         if ckey.startswith(rule):
             self.addConf(year, conf)
             # print(C.red(conf.getKey()), ' was accepted by ', C.red(self.name))
             return
Exemple #7
0
	def offer(self, year, conf):
		ckey = conf.getKey()
		for rule in listify(self.json['select']):
			if ckey.startswith(rule):
				self.addConf(year, conf)
				# print(C.red(conf.getKey()), ' was accepted by ', C.red(self.name))
				return
Exemple #8
0
def kv2link(k, v):
    if k == 'g':
        ico = makeimg('g', 'Google')
        r = '<a href="https://www.google.com/search?q={}">{}</a>'.format(
            escape(v), v)
    elif k.endswith('.wp'):
        lang = k.split('.')[0]
        # Using ISO 639-1 language names
        ico = makeimg('wp', 'Wikipedia') + makeimg(lang, ISONames[lang])
        lang = k.split('.')[0]
        r = '<a href="https://{}.wikipedia.org/wiki/{}">{}</a>'.format(\
         lang, \
         escape(v).replace('%20', '_'), \
         v)
    elif k.endswith('.wb'):
        lang = k.split('.')[0]
        # Using ISO 639-1 language names
        ico = makeimg('wb', 'Wikibooks') + makeimg(lang, ISONames[lang])
        lang = k.split('.')[0]
        r = '<a href="https://{}.wikibooks.org/wiki/{}">{}</a>'.format(\
         lang, \
         escape(v).replace('%20', '_'), \
         v)
    elif k == 'wd':
        ico = makeimg('wd', 'Wikidata')
        r = '<a href="https://www.wikidata.org/wiki/{0}">{0}</a>'.format(v)
    elif k == 'hwiki':
        ico = makeimg('h', 'Haskell Wiki')
        r = '<a href="https://wiki.haskell.org/{}">{}</a>'.format(escape(v), v)
    elif k == 'so':
        ico = makeimg('so', 'Stack Overflow')
        r = '<a href="http://stackoverflow.com/questions/tagged?tagnames={0}">{0}</a>'.format(
            v)
    elif k == 'www':
        ico = ''  #makeimg('www', 'Homepage')
        if not v.startswith('http'):
            y = v
            v = 'http://' + v
        else:
            y = v.replace('http://', '').replace('https://', '')
        r = '<a href="{0}">{1}</a>'.format(v, y)
    elif k == 'aka':
        ico = ''
        r = '<br/>'.join(['a.k.a.: “{}”'.format(z) for z in listify(v)])
    else:
        ico = ''
        r = '?{}?{}?'.format(k, v)
    return ico + ' ' + r + '<br/>'
Exemple #9
0
 def getPage(self):
     if self.getTags():
         cnt = '<h3>Tags:</h3><ul class="tri">'
         cnt += '\n'.join([
             '<li class="tag"><a href="tag/{}.html">#{}</a></li>'.format(
                 escape(t), t) for t in self.tags
         ])
         cnt += '</ul><hr/>'
     else:
         cnt = ''
     if 'stemmed' in self.json.keys():
         stemmed = listify(self.json['stemmed'])
         title = self.get('title')
         ltitle = title.lower()
         words = string2words(title)
         words.reverse()
         fancytitle = ''
         for w in words:
             i = ltitle.rindex(w)
             try:
                 stem = stemmed[len(words) - words.index(w) - 1]
             except:
                 print('Abnormal title in', self.getKey())
                 print('\tCould not get', w, 'from', stemmed)
                 break
             if ifApproved(stem):
                 fancytitle = '<a href="word/{}.html">{}</a>{}'.format(\
                  stem, title[i:i+len(w)], title[i+len(w):]) + fancytitle
             else:
                 fancytitle = title[i:] + fancytitle
             ltitle = ltitle[:i]
             title = title[:i]
         fancytitle = title + fancytitle
     else:
         fancytitle = self.get('title')
     return bibHTML.format(\
      filename=self.getJsonName(),
      title=self.get('title'),
      stemmedTitle=fancytitle,
      img=self.get('venue').lower(), # geticon?
      authors=self.getAuthors(),
      short='{}, {}'.format(self.get('venue'), self.get('year')),
      code=self.getCode(),
      bib=self.getBib(),
      boxlinks=self.getBoxLinks(),
      contents=cnt\
      )
Exemple #10
0
def kv2link(k, v):
	if k == 'g':
		ico = makeimg('g', 'Google')
		r = '<a href="https://www.google.com/search?q={}">{}</a>'.format(escape(v), v)
	elif k.endswith('.wp'):
		lang = k.split('.')[0]
		# Using ISO 639-1 language names
		ico = makeimg('wp', 'Wikipedia') + makeimg(lang, ISONames[lang])
		lang = k.split('.')[0]
		r = '<a href="https://{}.wikipedia.org/wiki/{}">{}</a>'.format(\
			lang, \
			escape(v).replace('%20', '_'), \
			v)
	elif k.endswith('.wb'):
		lang = k.split('.')[0]
		# Using ISO 639-1 language names
		ico = makeimg('wb', 'Wikibooks') + makeimg(lang, ISONames[lang])
		lang = k.split('.')[0]
		r = '<a href="https://{}.wikibooks.org/wiki/{}">{}</a>'.format(\
			lang, \
			escape(v).replace('%20', '_'), \
			v)
	elif k == 'wd':
		ico = makeimg('wd', 'Wikidata')
		r = '<a href="https://www.wikidata.org/wiki/{0}">{0}</a>'.format(v)
	elif k == 'hwiki':
		ico = makeimg('h', 'Haskell Wiki')
		r = '<a href="https://wiki.haskell.org/{}">{}</a>'.format(escape(v), v)
	elif k == 'so':
		ico = makeimg('so', 'Stack Overflow')
		r = '<a href="http://stackoverflow.com/questions/tagged?tagnames={0}">{0}</a>'.format(v)
	elif k == 'www':
		ico = ''#makeimg('www', 'Homepage')
		if not v.startswith('http'):
			y = v
			v = 'http://'+v
		else:
			y = v.replace('http://', '').replace('https://', '')
		r = '<a href="{0}">{1}</a>'.format(v, y)
	elif k == 'aka':
		ico = ''
		r = '<br/>'.join(['a.k.a.: “{}”'.format(z) for z in listify(v)])
	else:
		ico = ''
		r = '?{}?{}?'.format(k, v)
	return ico + ' ' + r + '<br/>'
Exemple #11
0
	def getPage(self):
		if self.getTags():
			cnt = '<h3>Tags:</h3><ul class="tri">'
			cnt += '\n'.join(['<li class="tag"><a href="tag/{}.html">#{}</a></li>'.format(escape(t), t) for t in self.tags])
			cnt += '</ul><hr/>'
		else:
			cnt = ''
		if 'stemmed' in self.json.keys():
			stemmed = listify(self.json['stemmed'])
			title = self.get('title')
			ltitle = title.lower()
			words = string2words(title)
			words.reverse()
			fancytitle = ''
			for w in words:
				i = ltitle.rindex(w)
				try:
					stem = stemmed[len(words) - words.index(w)-1]
				except:
					print('Abnormal title in', self.getKey())
					break
				if ifApproved(stem):
					fancytitle = '<a href="word/{}.html">{}</a>{}'.format(\
						stem, title[i:i+len(w)], title[i+len(w):]) + fancytitle
				else:
					fancytitle = title[i:] + fancytitle
				ltitle = ltitle[:i]
				title = title[:i]
			fancytitle = title + fancytitle
		else:
			fancytitle = self.get('title')
		return bibHTML.format(\
			filename=self.getJsonName(),
			title=self.get('title'),
			stemmedTitle=fancytitle,
			img=self.get('venue').lower(), # geticon?
			authors=self.getAuthors(),
			short='{}, {}'.format(self.get('venue'), self.get('year')),
			code=self.getCode(),
			bib=self.getBib(),
			boxlinks=self.getBoxLinks(),
			contents=cnt\
			)
Exemple #12
0
	#  if a manual rule does the same as the other heuristic, it’s dumb
	for a in sorted(aka.keys()):
		if len(aka[a]) == 1 and aka[a][0] in (nodiaLatin(a), simpleLatin(a)):
			print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was unnecessary manual work')
		elif len(aka[a]) == 2 and (aka[a] == [nodiaLatin(a), simpleLatin(a)] \
							    or aka[a] == [simpleLatin(a), nodiaLatin(a)]):
			print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing was a lot of unnecessary manual work')
		elif nodiaLatin(a) in aka[a] or simpleLatin(a) in aka[a]:
			print('[ {} ]'.format(C.blue('DUMB')), simpleLatin(a), 'aliasing contains some unnecessary manual work')
	# auto-aliasing heuristic:
	#  for each author with diacritics, its non-diacritic twin is considered harmful
	people = set()
	for v in sleigh.venues:
		for c in v.getConfs():
			if 'editor' in c.json:
				people.update(listify(c.json['editor']))
			for p in c.papers:
				if 'author' in p.json:
					people.update(listify(p.json['author']))
	for a in people:
		for na in (nodiaLatin(a), simpleLatin(a)):
			if na != a:
				aka.setdefault(a, [])
				aka[a].append(na)
	# invert aliasing
	for akey in aka:
		if akey in ('ZZZZZZZZZZ', 'FILE'):
			continue
		for aval in aka[akey]:
			renameto[aval] = akey
	f = open('_renameto.json', 'w', encoding='utf-8')
Exemple #13
0
 # cs =
 ads = [c.json['address'][-1] \
  for c in {bykey[p].up() for p in persondef['authored'] if p in bykey} \
  if 'address' in c.json]
 if ads:
     clist = {a: ads.count(a) for a in ads}
     adds = '<code>Travelled to:</code><hr/>' \
       + '<br/>\n'.join(sorted(['{} × {}'.format(clist[a], a) for a in clist]))
     boxlinks += adds
 # collaborated with...
 clist = {}
 for p in persondef['authored']:
     if p not in bykey:
         continue
     if 'author' in bykey[p].json.keys():
         coas = listify(bykey[p].get('author'))
         if ' ' in coas:
             print('ERROR in [{}] - [{}] - [{}]'.format(
                 p, bykey[p].getKey(), coas))
         D = len(coas)
         if D == 1:
             # solo papers count as coauthoring with yourself
             a = '∅'
             if a not in clist:
                 clist[a] = 0
                 if persondef['name'] in name2file.keys():
                     name2file['∅'] = name2file[persondef['name']]
                 else:
                     name2file['∅'] = persondef['name']
             clist[a] += 1 / 2
             continue
Exemple #14
0
	#  if a manual rule does the same as the other heuristic, it’s dumb
	for a in sorted(aka.keys()):
		if len(aka[a]) == 1 and aka[a][0] in (nodiaLatin(a), simpleLatin(a)):
			print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing was unnecessary manual work')
		elif len(aka[a]) == 2 and (aka[a] == [nodiaLatin(a), simpleLatin(a)] \
							    or aka[a] == [simpleLatin(a), nodiaLatin(a)]):
			print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing was a lot of unnecessary manual work')
		elif nodiaLatin(a) in aka[a] or simpleLatin(a) in aka[a]:
			print('[ {} ]'.format(C.blue('DUMB')), a, 'aliasing contains some unnecessary manual work')
	# auto-aliasing heuristic:
	#  for each author with diacritics, its non-diacritic twin is considered harmful
	people = set()
	for v in sleigh.venues:
		for c in v.getConfs():
			if 'editor' in c.json:
				people.update(listify(c.json['editor']))
			for p in c.papers:
				if 'author' in p.json:
					people.update(listify(p.json['author']))
	for a in people:
		for na in (nodiaLatin(a), simpleLatin(a)):
			if na != a:
				aka.setdefault(a, [])
				aka[a].append(na)
	# invert aliasing
	for akey in aka:
		if akey in ('ZZZZZZZZZZ', 'FILE'):
			continue
		for aval in aka[akey]:
			renameto[aval] = akey
	f = open('_renameto.json', 'w', encoding='utf8')
Exemple #15
0
     if p['name'] in people.keys():
         cx[report(C.red('duplicate') + ' ' + C.yellow(p), 1)] += 1
         continue
     people[p['name']] = p
 print('{}: {} venues, {} papers written by {} people\n{}'.format(\
  C.purple('BibSLEIGH'),
  C.red(len(sleigh.venues)),
  C.red(sleigh.numOfPapers()),
  C.red(len(people)),
  C.purple('='*42)))
 # traverse ALL the papers!
 for v in sleigh.venues:
     for c in v.getConfs():
         for p in c.papers:
             if 'author' in p.json.keys():
                 for a in listify(p.json['author']):
                     if a in people.keys():
                         if 'authored' not in people[a].keys():
                             people[a]['authored'] = []
                         if p.getKey() not in people[a]['authored']:
                             people[a]['authored'].append(p.getKey())
                     else:
                         report(C.yellow('Author not found: ') + a, 0)
         if 'editor' in c.json.keys():
             for e in listify(c.json['editor']):
                 if e in people.keys():
                     if 'edited' not in people[e].keys():
                         people[e]['edited'] = []
                     if c.getKey() not in people[e]['edited']:
                         people[e]['edited'].append(c.getKey())
                 else:
Exemple #16
0
def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	f = open(fn, 'r')
	lines = f.readlines()[1:-1]
	f.close()
	flines = json2lines(lines)
	plines = sorted(json2lines(o.getJSON().split('\n')))
	# "url" from DBLP are useless
	if 'url' in o.json.keys():
		o.json['url'] = [link.replace('https://', 'http://')\
						for link in listify(o.json['url'])\
		 				if not link.startswith('db/conf/')\
		 				and not link.startswith('db/series/')\
		 				and not link.startswith('db/books/')\
						and not link.startswith('db/journals/')]
		if not o.json['url']:
			del o.json['url']
		elif len(o.json['url']) == 1:
			o.json['url'] = o.json['url'][0]
	if 'ee' in o.json.keys() and 'doi' not in o.json.keys():
		if isinstance(o.json['ee'], list):
			if verbose:
				print(C.red('Manylink:'), o.json['ee'])
		newee = []
		for onelink in listify(o.json['ee']):
			if onelink.startswith('http://dx.doi.org/'):
				o.json['doi'] = onelink[18:]
			elif onelink.startswith('http://doi.acm.org/'):
				o.json['doi'] = onelink[19:]
			elif onelink.startswith('http://doi.ieeecomputersociety.org/'):
				o.json['doi'] = onelink[35:]
			elif onelink.startswith('http://dl.acm.org/citation.cfm?id='):
				o.json['acmid'] = onelink[34:]
			elif onelink.startswith('http://portal.acm.org/citation.cfm?id='):
				o.json['acmid'] = onelink[38:]
			elif onelink.startswith('http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=')\
			  or onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber='):
				o.json['ieeearid'] = onelink.split('=')[-1]
			elif onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=')\
			 and onelink.find('arnumber') > -1:
				o.json['ieeearid'] = onelink.split('arnumber=')[-1].split('&')[0]
			elif onelink.startswith('http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber='):
				o.json['ieeepuid'] = onelink.split('=')[-1]
			elif onelink.startswith('http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber='):
				o.json['ieeeisid'] = onelink.split('=')[-1]
			elif onelink.startswith('http://eceasst.cs.tu-berlin.de/index.php/eceasst/article/view/'):
				newee.append('http://journal.ub.tu-berlin.de/eceasst/article/view/' + onelink.split('/')[-1])
			elif onelink.endswith('.pdf') and \
			    (onelink.startswith('http://computer.org/proceedings/')\
			  or onelink.startswith('http://csdl.computer.org/')):
				# Bad: http://computer.org/proceedings/icsm/1189/11890007.pdf
				# Bad: http://csdl.computer.org/comp/proceedings/date/2003/1870/02/187020040.pdf
				# Good: http://www.computer.org/csdl/proceedings/icsm/2001/1189/00/11890004.pdf
				if onelink.startswith('http://csdl'):
					cname, _, cid, mid, pid = onelink.split('/')[5:10]
				else:
					cname, cid, pid = onelink.split('/')[4:7]
					# heuristic
					if pid.startswith(cid):
						mid = pid[len(cid):len(cid)+2]
					else:
						mid = '00'
				newee.append('http://www.computer.org/csdl/proceedings/{}/{}/{}/{}/{}'.format(\
					cname,
					o.get('year'),
					cid,
					mid,
					pid))
			else:
				if onelink.find('ieee') > -1:
					print(C.purple('IEEE'), onelink)
				if verbose:
					print(C.yellow('Missed opportunity:'), onelink)
				# nothing matches => preserve
				newee.append(onelink)
		if len(newee) == 0:
			del o.json['ee']
		elif len(newee) == 1:
			o.json['ee'] = newee[0]
		else:
			o.json['ee'] = newee
		# post-processing normalisation
		if 'acmid' in o.json.keys() and not isinstance(o.json['acmid'], int) and o.json['acmid'].isdigit():
			o.json['acmid'] = int(o.json['acmid'])
	if 'eventuri' in o.json.keys():
		o.json['eventurl'] = o.json['eventuri']
		del o.json['eventuri']
	if 'eventurl' in o.json.keys() and o.json['eventurl'].startswith('https://'):
		o.json['eventurl'] = o.json['eventurl'].replace('https://', 'http://')
	nlines = sorted(json2lines(o.getJSON().split('\n')))
	if flines != plines:
		return 1
	elif plines != nlines:
		f = open(fn, 'w')
		f.write(o.getJSON())
		f.close()
		return 2
	else:
		return 0
Exemple #17
0
 def getBoxLinks(self):
     links = [[], [], []]
     # Crossref to the parent
     # DOC: real crossrefs are nice, but all the global searches slows the engine down
     # DOC: (running time 5m11.559s vs 0m7.396s is 42x)
     # if 'crossref' in self.json.keys():
     # 	xref = self.top().seek(self.json['crossref'])
     # 	if xref:
     # 		links.append('<strong><a href="{}.html">{}</a></strong>'.format(\
     # 			xref.getKey(),
     # 			xref.getKey().replace('-', ' ')))
     # 	else:
     # 		links.append('Xref not found')
     # else:
     # 	links.append('no Xref')
     # DOC: instead, we just link to the parent!
     links[0].append('<strong><a href="{}.html">{}</a></strong>'.format(\
      self.up().getKey(),
      self.up().getKey().replace('-', ' ')))
     # DBLP
     if 'dblpkey' in self.json:
         for dblpk in listify(self.json['dblpkey']):
             links[1].append(
                 '<a href="http://dblp.org/rec/html/{}">DBLP</a>'.format(
                     dblpk))
     elif 'dblpurl' in self.json:
         links[1].append('<a href="{}">DBLP</a>'.format(
             self.json['dblpurl']))
     else:
         links[1].append('no DBLP info')
     # Scholar
     if 'title' in self.json.keys():
         links[1].append('<a href="https://scholar.google.com/scholar?q=%22{}%22">Scholar</a>'.format(\
          str(self.json['title']).replace(' ', '+')))
     # Some publishers
     if 'ee' in self.json.keys():
         for e in listify(self.json['ee']):
             if e.startswith('http://dl.acm.org') \
             or e.startswith('http://doi.acm.org')\
             or e.startswith('http://portal.acm.org'):
                 links[2].append('<a href="{}">ACM DL</a>'.format(e))
             elif e.startswith('http://ieeexplore.ieee.org'):
                 links[2].append('<a href="{}">IEEE Xplore</a>'.format(e))
             elif e.startswith('http://ieeecomputersociety.org'):
                 links[2].append('<a href="{}">IEEE CS</a>'.format(e))
             elif e.startswith('http://drops.dagstuhl.de'):
                 links[2].append('<a href="{}">Dagstuhl</a>'.format(e))
             elif e.find('computer.org/csdl/proceedings') > 0:
                 links[2].append('<a href="{}">CSDL</a>'.format(e))
             elif e.startswith('http://journal.ub.tu-berlin.de/eceasst'):
                 links[2].append('<a href="{}">EC-EASST</a>'.format(e))
             elif e.startswith('http://ceur-ws.org'):
                 links[2].append('<a href="{}">CEUR</a>'.format(e))
             elif e.startswith('http://dx.doi.org'):
                 pass
             else:
                 links[2].append('<a href="{}">?EE?</a>'.format(e))
     if 'doi' in self.json.keys():
         links[2].append('<a href="http://dx.doi.org/{}">DOI</a>'.format(
             self.json['doi']))
     return '<hr/>'.join(['<br/>\n'.join(lb) for lb in links if lb])
Exemple #18
0
		if p['name'] in people.keys():
			cx[report(C.red('duplicate')+' '+C.yellow(p), 1)] += 1
			continue
		people[p['name']] = p
	print('{}: {} venues, {} papers written by {} people\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.red(len(people)),
		C.purple('='*42)))
	# traverse ALL the papers!
	for v in sleigh.venues:
		for c in v.getConfs():
			for p in c.papers:
				if 'author' in p.json.keys():
					for a in listify(p.json['author']):
						if a in people.keys():
							if 'authored' not in people[a].keys():
								people[a]['authored'] = []
							if p.getKey() not in people[a]['authored']:
								people[a]['authored'].append(p.getKey())
						else:
							report(C.yellow('Author not found: ') + a, 0)
			if 'editor' in c.json.keys():
				for e in listify(c.json['editor']):
					if e in people.keys():
						if 'edited' not in people[e].keys():
							people[e]['edited'] = []
						if c.getKey() not in people[e]['edited']:
							people[e]['edited'].append(c.getKey())
					else:
Exemple #19
0
def checkon(fn, o):
	if os.path.isdir(fn):
		fn = fn + '.json'
	f = open(fn, 'r')
	lines = f.readlines()[1:-1]
	f.close()
	flines = [strictstrip(s) for s in lines]
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if flines != plines:
		return 1
	ts = []
	# precise case-sensitive match
	mcs = o.get('title')
	# precise match for substrings
	mes = baretext(mcs)
	# precise match for words
	mew = mes.split(' ')
	# imprecise match for substrings
	mis = superbaretext(mes)
	# imprecise match for words
	miw = mis.split(' ')
	# now match!
	for t in tags:
		# print('Check',t,'vs',mes)
		if 'name' not in t.keys():
			print(C.red('ERROR:'), 'no name for tag from file', t['FILE'])
			continue
		if all([not k.startswith('match') for k in t.keys()]):
			print(C.red('ERROR:'), 'no match rules for tag', t['name'])
			continue
		for k in t.keys():
			if k == 'matchentry':
				if o.getKey() in t[k]:
					ts += [t['name']]
			elif k.startswith('match'):
				ts += [t['name'] for s in listify(t[k]) if matchModes[k](s, mcs, mes, mew, mis, miw)]
				# ts += [t['name'] for s in listify(t[k]) if fmm(t, k, s, mcs, mes, mew, mis, miw)]
	# second pass: check reliefs
	for t in tags:
		if 'relieves' in t.keys():
			for r in listify(t['relieves']):
				if t['name'] in ts and r in ts:
					ts.remove(r)
					if t['name'] not in relieved.keys():
						relieved[t['name']] = 0
					relieved[t['name']] += 1
	if ts:
		if not o.tags:
			o.tags = []
		for t in ts:
			if t not in o.tags:
				o.tags.append(t)
	# uncomment the following one line to overwrite all tags
	o.tags = uniq(ts)
	# let’s keep tags clean and sorted
	o.tags = sorted(o.tags)
	nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if plines != nlines:
		f = open(fn, 'w')
		f.write(o.getJSON())
		f.close()
		return 2
	else:
		return 0
Exemple #20
0
 def getBib(self):
     if len(self.json) < 1:
         return '@misc{EMPTY,}'
     s = '@%s{%s,\n' % (self.get('type'), self.getKey())
     n2f = self.n2f if self.n2f else self.back.n2f
     for k in sorted(self.json.keys()):
         if k == k.upper() or k.endswith('short') or k == 'tag':
             # secret key
             continue
         if k in ('author', 'editor'):
             # TODO: add (correct!) links
             aelinks = [\
              '<a href="{}">{}</a>'.format(n2f[ae], ae)
               if ae in n2f.keys()
               else ae
              for ae in listify(self.json[k])
             ]
             s += '\t{:<13} = "{}",\n'.format(k, ' and '.join(aelinks))
         elif k in ('title', 'booktitle', 'series', 'publisher', 'journal'):
             if k + 'short' not in self.json.keys():
                 s += '\t{0:<13} = "{{{1}}}",\n'.format(k, self.json[k])
             else:
                 s += '\t{0:<13} = "{{<span id="{0}">{1}</span>}}",\n'.format(
                     k, self.json[k])
         elif k in ('crossref', 'key', 'type', 'venue', 'twitter', \
          'eventtitle', 'eventurl', 'nondblpkey', 'dblpkey', 'dblpurl', \
          'programchair', 'generalchair', 'roles', 'tagged', 'stemmed', \
          'status', 'ieeepuid', 'ieeearid', 'ieeeisid', 'cite'):
             # TODO: ban 'ee' as well
             pass
         elif k == 'doi':
             s += '<span class="uri">\t{0:<13} = "<a href="http://dx.doi.org/{1}">{1}</a>",\n</span>'.format(
                 k, self.json[k])
         elif k == 'acmid':
             s += '<span class="uri">\t{0:<13} = "<a href="http://dl.acm.org/citation.cfm?id={1}">{1}</a>",\n</span>'.format(
                 k, self.json[k])
         elif k == 'ieeearid':
             s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber={1}">{1}</a>",\n</span>'.format(
                 k, self.json[k])
         elif k == 'ieeepuid':
             s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber={1}">{1}</a>",\n</span>'.format(
                 k, self.json[k])
         elif k == 'ieeeisid':
             s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber={1}">{1}</a>",\n</span>'.format(
                 k, self.json[k])
         elif k == 'dblpkey':
             # Legacy!
             # s += '\t{0:<13} = "<a href="http://dblp.uni-trier.de/db/{1}">{1}</a>",\n</span>'.format(k, self.json[k])
             s += '\t{0:<13} = "<a href="http://dblp.uni-trier.de/rec/html/{1}">{1}</a>",\n'.format(
                 k, self.json[k])
         elif k == 'isbn':
             s += '<span id="isbn">\t{:<13} = "{}",\n</span>'.format(
                 k, self.json[k])
         elif k in ('ee', 'url'):
             for e in listify(self.json[k]):
                 # VVZ: eventually would like to get rid of EE completely
                 # VVZ: limiting it for now to possibly interesting cases
                 if k == 'ee' and (e.startswith('http://dx.doi.org') or \
                  e.startswith('http://dl.acm.org') or\
                  e.startswith('http://doi.ieeecomputersociety.org')\
                 ):
                     continue
                 s += '<span class="uri">\t{0:<13} = "<a href=\"{1}\">{1}</a>",\n</span>'.format(
                     k, e)
         elif k in ('year', 'volume', 'issue', 'number') and isinstance(
                 self.json[k], int):
             s += '\t{0:<13} = {1},\n'.format(k, self.json[k])
         elif k == 'pages':
             s += '\t{0:<13} = "{1}",\n'.format(k, self.getPagesBib())
         elif k == 'address':
             if isinstance(self.json[k], str):
                 a = self.json[k]
             elif self.json[k][1]:
                 a = ', '.join(self.json[k])
             else:
                 a = self.json[k][0] + ', ' + self.json[k][2]
             s += '\t{0:<13} = "{1}",\n'.format(k, a)
         else:
             s += '\t{0:<13} = "{1}",\n'.format(k, self.json[k])
     s += '}'
     return s.replace('<i>', '\\emph{').replace('</i>', '}')
Exemple #21
0
def checkon(fn, o):
    if os.path.isdir(fn):
        fn = fn + '.json'
    f = open(fn, 'r', encoding='utf-8')
    lines = f.readlines()[1:-1]
    f.close()
    flines = [strictstrip(s) for s in lines]
    plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    if flines != plines:
        return 1
    ts = []
    # precise case-sensitive match
    mcs = o.get('title')
    # precise match for substrings
    mes = baretext(mcs)
    # precise match for words
    mew = mes.split(' ')
    # imprecise match for substrings
    mis = superbaretext(mes)
    # imprecise match for words
    miw = mis.split(' ')
    # now match!
    for t in tags:
        # print('Check',t,'vs',mes)
        if 'name' not in t.keys():
            print(C.red('ERROR:'), 'no name for tag from file', t['FILE'])
            continue
        if all([not k.startswith('match') for k in t.keys()]):
            print(C.red('ERROR:'), 'no match rules for tag', t['name'])
            continue
        for k in t.keys():
            if k == 'matchentry':
                if o.getKey() in t[k]:
                    ts += [t['name']]
            elif k.startswith('match'):
                ts += [
                    t['name'] for s in listify(t[k])
                    if matchModes[k](s, mcs, mes, mew, mis, miw)
                ]
                # ts += [t['name'] for s in listify(t[k]) if fmm(t, k, s, mcs, mes, mew, mis, miw)]
    # second pass: check reliefs
    for t in tags:
        if 'relieves' in t.keys():
            for r in listify(t['relieves']):
                if t['name'] in ts and r in ts:
                    ts.remove(r)
                    if t['name'] not in relieved.keys():
                        relieved[t['name']] = 0
                    relieved[t['name']] += 1
    if ts:
        if not o.tags:
            o.tags = []
        for t in ts:
            if t not in o.tags:
                o.tags.append(t)
    # uncomment the following one line to overwrite all tags
    o.tags = uniq(ts)
    # let’s keep tags clean and sorted
    o.tags = sorted(o.tags)
    nlines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
    if plines != nlines:
        f = open(fn, 'w', encoding='utf-8')
        f.write(o.getJSON())
        f.close()
        return 2
    else:
        return 0
			print('[', C.red('NOGO'), ']', 'No name in', fn)
			continue
		people[p['name']] = p
	print('{}: {} venues, {} papers\n{}'.format(\
		C.purple('BibSLEIGH'),
		C.red(len(sleigh.venues)),
		C.red(sleigh.numOfPapers()),
		C.purple('='*42)))
	# All people who ever contributed
	names = []
	for v in sleigh.venues:
		for c in v.getConfs():
			for p in c.papers:
				for k in ('author', 'editor'):
					if k in p.json.keys():
						names += [a for a in listify(p.json[k]) if a not in names]
	# caching
	peoplekeys = people.keys()
	if os.path.exists('_established.json'):
		established = json.load(open('_established.json', 'r'))
	else:
		established = {}
	# print(people)
	CXread = len(people)
	for name in names:
		if name not in peoplekeys:
			p = {'name': name,\
				 'FILE': ienputdir + '/people/' + fileify(name) + '.json',\
				'dblp': dblpify(name)}
			people[p['name']] = p
	# flatten conferences for easy lookup
Exemple #23
0
	def getBib(self):
		if len(self.json) < 1:
			return '@misc{EMPTY,}'
		s = '@%s{%s,\n' % (self.get('type'), self.getKey())
		n2f = self.n2f if self.n2f else self.back.n2f
		for k in sorted(self.json.keys()):
			if k == k.upper() or k.endswith('short') or k == 'tag':
				# secret key
				continue
			if k in ('author', 'editor'):
				# TODO: add (correct!) links
				aelinks = [\
					'<a href="{}">{}</a>'.format(n2f[ae], ae)
						if ae in n2f.keys()
						else ae
					for ae in listify(self.json[k])
				]
				s += '\t{:<13} = "{}",\n'.format(k, ' and '.join(aelinks))
			elif k in ('title', 'booktitle', 'series', 'publisher', 'journal'):
				if k+'short' not in self.json.keys():
					s += '\t{0:<13} = "{{{1}}}",\n'.format(k, self.json[k])
				else:
					s += '\t{0:<13} = "{{<span id="{0}">{1}</span>}}",\n'.format(k, self.json[k])
			elif k in ('crossref', 'key', 'type', 'venue', 'twitter', \
				'eventtitle', 'eventurl', 'nondblpkey', 'dblpkey', 'dblpurl', \
				'programchair', 'generalchair', 'roles', 'tagged', 'stemmed', \
				'status', 'ieeepuid', 'ieeearid', 'ieeeisid', 'cite'):
				# TODO: ban 'ee' as well
				pass
			elif k == 'doi':
				s += '<span class="uri">\t{0:<13} = "<a href="http://dx.doi.org/{1}">{1}</a>",\n</span>'.format(k, self.json[k])
			elif k == 'acmid':
				s += '<span class="uri">\t{0:<13} = "<a href="http://dl.acm.org/citation.cfm?id={1}">{1}</a>",\n</span>'.format(k, self.json[k])
			elif k == 'ieeearid':
				s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber={1}">{1}</a>",\n</span>'.format(k, self.json[k])
			elif k == 'ieeepuid':
				s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber={1}">{1}</a>",\n</span>'.format(k, self.json[k])
			elif k == 'ieeeisid':
				s += '<span class="uri">\t{0:<13} = "<a href="http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber={1}">{1}</a>",\n</span>'.format(k, self.json[k])
			elif k == 'dblpkey':
				# Legacy!
				# s += '\t{0:<13} = "<a href="http://dblp.uni-trier.de/db/{1}">{1}</a>",\n</span>'.format(k, self.json[k])
				s += '\t{0:<13} = "<a href="http://dblp.uni-trier.de/rec/html/{1}">{1}</a>",\n'.format(k, self.json[k])
			elif k == 'isbn':
				s += '<span id="isbn">\t{:<13} = "{}",\n</span>'.format(k, self.json[k])
			elif k in ('ee', 'url'):
				for e in listify(self.json[k]):
					# VVZ: eventually would like to get rid of EE completely
					# VVZ: limiting it for now to possibly interesting cases
					if k == 'ee' and (e.startswith('http://dx.doi.org') or \
						e.startswith('http://dl.acm.org') or\
						e.startswith('http://doi.ieeecomputersociety.org')\
					):
						continue
					s += '<span class="uri">\t{0:<13} = "<a href=\"{1}\">{1}</a>",\n</span>'.format(k, e)
			elif k in ('year', 'volume', 'issue', 'number') and isinstance(self.json[k], int):
				s += '\t{0:<13} = {1},\n'.format(k, self.json[k])
			elif k == 'pages':
				s += '\t{0:<13} = "{1}",\n'.format(k, self.getPagesBib())
			elif k == 'address':
				if isinstance(self.json[k], str):
					a = self.json[k]
				elif self.json[k][1]:
					a = ', '.join(self.json[k])
				else:
					a = self.json[k][0] + ', ' + self.json[k][2]
				s += '\t{0:<13} = "{1}",\n'.format(k, a)
			else:
				s += '\t{0:<13} = "{1}",\n'.format(k, self.json[k])
		s += '}'
		return s.replace('<i>', '\\emph{').replace('</i>', '}')
Exemple #24
0
def checkon(fn, o):
    if not os.path.exists(fn) or os.path.isdir(fn):
        fn = fn + '.json'
    f = open(fn, 'r', encoding='utf-8')
    lines = f.readlines()[1:-1]
    f.close()
    flines = json2lines(lines)
    plines = sorted(json2lines(o.getJSON().split('\n')))
    # "url" from DBLP are useless
    if 'url' in o.json.keys():
        o.json['url'] = [link.replace('https://', 'http://')\
            for link in listify(o.json['url'])\
             if not link.startswith('db/conf/')\
             and not link.startswith('db/series/')\
             and not link.startswith('db/books/')\
            and not link.startswith('db/journals/')]
        if not o.json['url']:
            del o.json['url']
        elif len(o.json['url']) == 1:
            o.json['url'] = o.json['url'][0]
    if 'ee' in o.json.keys() and 'doi' not in o.json.keys():
        if isinstance(o.json['ee'], list):
            if verbose:
                print(C.red('Manylink:'), o.json['ee'])
        newee = []
        for onelink in listify(o.json['ee']):
            if onelink.startswith('http://dx.doi.org/'):
                o.json['doi'] = onelink[18:]
            elif onelink.startswith('http://doi.acm.org/'):
                o.json['doi'] = onelink[19:]
            elif onelink.startswith('http://doi.ieeecomputersociety.org/'):
                o.json['doi'] = onelink[35:]
            elif onelink.startswith('http://dl.acm.org/citation.cfm?id='):
                o.json['acmid'] = onelink[34:]
            elif onelink.startswith('http://portal.acm.org/citation.cfm?id='):
                o.json['acmid'] = onelink[38:]
            elif onelink.startswith('http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=')\
              or onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber='):
                o.json['ieeearid'] = onelink.split('=')[-1]
            elif onelink.startswith('http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=')\
             and onelink.find('arnumber') > -1:
                o.json['ieeearid'] = onelink.split('arnumber=')[-1].split(
                    '&')[0]
            elif onelink.startswith(
                    'http://ieeexplore.ieee.org/xpl/mostRecentIssue.jsp?punumber='
            ):
                o.json['ieeepuid'] = onelink.split('=')[-1]
            elif onelink.startswith(
                    'http://ieeexplore.ieee.org/xpl/tocresult.jsp?isnumber='):
                o.json['ieeeisid'] = onelink.split('=')[-1]
            elif onelink.startswith(
                    'http://eceasst.cs.tu-berlin.de/index.php/eceasst/article/view/'
            ):
                newee.append(
                    'http://journal.ub.tu-berlin.de/eceasst/article/view/' +
                    onelink.split('/')[-1])
            elif onelink.endswith('.pdf') and \
                (onelink.startswith('http://computer.org/proceedings/')\
              or onelink.startswith('http://csdl.computer.org/')):
                # Bad: http://computer.org/proceedings/icsm/1189/11890007.pdf
                # Bad: http://csdl.computer.org/comp/proceedings/date/2003/1870/02/187020040.pdf
                # Good: http://www.computer.org/csdl/proceedings/icsm/2001/1189/00/11890004.pdf
                if onelink.startswith('http://csdl'):
                    cname, _, cid, mid, pid = onelink.split('/')[5:10]
                else:
                    cname, cid, pid = onelink.split('/')[4:7]
                    # heuristic
                    if pid.startswith(cid):
                        mid = pid[len(cid):len(cid) + 2]
                    else:
                        mid = '00'
                newee.append('http://www.computer.org/csdl/proceedings/{}/{}/{}/{}/{}'.format(\
                 cname,
                 o.get('year'),
                 cid,
                 mid,
                 pid))
            else:
                if onelink.find('ieee') > -1:
                    print(C.purple('IEEE'), onelink)
                if verbose:
                    print(C.yellow('Missed opportunity:'), onelink)
                # nothing matches => preserve
                newee.append(onelink)
        if len(newee) == 0:
            del o.json['ee']
        elif len(newee) == 1:
            o.json['ee'] = newee[0]
        else:
            o.json['ee'] = newee
        # post-processing normalisation
        if 'acmid' in o.json.keys() and not isinstance(
                o.json['acmid'], int) and o.json['acmid'].isdigit():
            o.json['acmid'] = int(o.json['acmid'])
    if 'eventuri' in o.json.keys():
        o.json['eventurl'] = o.json['eventuri']
        del o.json['eventuri']
    if 'eventurl' in o.json.keys() and o.json['eventurl'].startswith(
            'https://'):
        o.json['eventurl'] = o.json['eventurl'].replace('https://', 'http://')
    nlines = sorted(json2lines(o.getJSON().split('\n')))
    if flines != plines:
        return 1
    elif plines != nlines:
        f = open(fn, 'w', encoding='utf-8')
        f.write(o.getJSON())
        f.close()
        return 2
    else:
        return 0
Exemple #25
0
			# travelled to...
			# NB: code clone of AST::Venue
			# cs = 
			ads = [c.json['address'][-1] \
				for c in {bykey[p].up() for p in persondef['authored']} \
				if 'address' in c.json]
			if ads:
				clist = {a:ads.count(a) for a in ads}
				adds = '<code>Travelled to:</code><hr/>' \
					 + '<br/>\n'.join(sorted(['{} × {}'.format(clist[a], a) for a in clist]))
				boxlinks += adds
			# collaborated with...
			clist = {}
			for p in persondef['authored']:
				if 'author' in bykey[p].json.keys():
					coas = listify(bykey[p].get('author'))
					if ' ' in coas:
						print('ERROR in [{}] - [{}] - [{}]'.format(p, bykey[p].getKey(), coas))
					D = len(coas)
					if D == 1:
						# solo papers count as coauthoring with yourself
						a = '∅'
						if a not in clist:
							clist[a] = 0
							if persondef['name'] in name2file.keys():
								name2file['∅'] = name2file[persondef['name']]
							else:
								name2file['∅'] = persondef['name']
						clist[a] += 1/2
						continue
					for a in coas: