Example #1
0
 def __init__(self, idir, name2file):
     super(Sleigh, self).__init__('', idir)
     self.venues = []
     self.n2f = name2file
     jsons = {}
     skip4Now = []
     for d in glob.glob(idir + '/*.json'):
         if lastSlash(d).split('.')[0] in skip4Now:
             print(
                 C.red('Skipping') + ' ' + C.purple(d) + ' ' +
                 C.red('for now'))
             continue
         jsons[lastSlash(d).split('.')[0]] = d
     for d in glob.glob(idir + '/*'):
         cont = False
         for end in ('.md', '.json', '/frem', '/edif'):
             if d.endswith(end):
                 cont = True
         if d.split('/')[-1] in skip4Now:
             print(
                 C.red('Skipping') + ' ' + C.purple(d) + ' ' +
                 C.red('for now'))
             cont = True
         if cont:
             continue
         if lastSlash(d) not in jsons.keys():
             print(C.red('Legacy non-top definition of'), d)
             if lastSlash(d) not in ('edif', 'frem'):
                 self.venues.append(Venue(d, idir, name2file, self))
         else:
             self.venues.append(Venue(d, idir, name2file, self))
Example #2
0
def checkon(fn, o):
	if not os.path.exists(fn) or os.path.isdir(fn):
		fn = fn + '.json'
	if not os.path.exists(fn):
		# if it still does not exist, let us create a minimal one
		f = open(fn, 'w', encoding='utf-8')
		f.write('{{\n\t"title": "{name}",\n\t"type": "proceedings",\n\t"year": {year}\n}}'.format(\
			name=lastSlash(fn)[:-5].replace('-', ' '),
			year=findYear(lastSlash(fn))\
		))
		f.close()
		print('[ {} ] {}'.format(C.yellow('MADE'), fn))
		return 2
	f = open(fn, 'r', encoding='utf-8')
	lines = f.readlines()[1:-1]
	f.close()
	for line in lines:
		if line.find('"year"') > -1 and findYear(line) > 3000:
			os.remove(fn)
			print('[ {} ] {}'.format(C.red('KILL'), fn))
			return 1
	flines = sorted([strictstrip(s) for s in lines])
	plines = sorted([strictstrip(s) for s in o.getJSON().split('\n')[1:-1]])
	if flines != plines:
		f1 = [line for line in flines if line not in plines]
		f2 = [line for line in plines if line not in flines]
		print('∆:', f1, '\nvs', f2)
	if flines == plines:
		return 0
	else:
		return 1
Example #3
0
     paperAuths.extend(auths)
 paperLnk = li.get('id')
 hope = li.find_all('a')
 if hope and hope[0].get('href').endswith('.pdf'):
     paperPdf = urlstart + hope[0].get('href')
 else:
     paperPdf = ''
 paperEntry = {'type': 'inproceedings', 'series': 'CEUR Workshop Proceedings',\
  'publisher': 'CEUR-WS.org', 'year': volYear, 'booktitle': volTitles[-1],\
  'editor': volEds, 'volume': volNr.split('-')[-1], 'title': paperTitle,\
  'author': paperAuths, 'pages': paperPages, 'venue': volVenue}
 if paperPdf:
     paperEntry['openpdf'] = paperPdf
 if paperLnk:
     paperEntry['url'] = urlstart + '#' + paperLnk
 paperFilename = lastSlash(outputdir) + '-' + paperAuths[0].split(
     ' ')[-1]
 for a in paperAuths[1:]:
     print(a)
     paperFilename += a.split(' ')[-1][0]
 if paperFilename in done:
     paperFilename += 'a'
     while paperFilename in done:
         paperFilename = paperFilename[:-1] + chr(
             ord(paperFilename[-1]) + 1)
 # print(jsonify(paperEntry), '-->', outputdir+'/'+paperFilename+'.json')
 f = open(outputdir + '/' + paperFilename + '.json',
          'w',
          encoding='utf-8')
 f.write(jsonify(paperEntry))
 f.close()
Example #4
0
def purenameof(f):
    return lastSlash(f)[:-4]
Example #5
0
    print('Error fetching URL: ' + url)
    return ''


if __name__ == "__main__":
    if len(sys.argv) not in (3, 5):
        print('Usage:\n\t{} <URI> <DIR> [<FROM> <TO>]\n'.format(sys.argv[0]))
        print(('e.g.: {} http://dblp.uni-trier.de/db/conf/sigplan/sigplan82.html ' +
               '../json/corpus/PLDI/1982/SCC-1982').format(sys.argv[0]))
        sys.exit(1)
    dblp = safely_load_url(sys.argv[1])
    ldir = sys.argv[2]
    year = ldir.split('/')[4]
    allxmls = [xmlname for xmlname in dblp.split('"') if xmlname.endswith('.xml')]
    if len(sys.argv) == 5:
        entry1 = lastSlash(sys.argv[3])
        entry2 = lastSlash(sys.argv[4])
    else:
        entry1 = purenameof(allxmls[0])
        entry2 = purenameof(allxmls[-1])
    if not os.path.exists(ldir):
        os.makedirs(ldir)
    ps = 0
    yeswecan = False
    for xmlname in allxmls:
        if yeswecan:
            if purenameof(xmlname) == entry2:
                yeswecan = False
        else:
            if purenameof(xmlname) == entry1:
                yeswecan = True
Example #6
0
  C.purple('='*42)))
 ps = []
 # flatten the sleigh
 bykey = {}
 for v in sleigh.venues:
     bykey[v.getKey()] = v
     for c in v.getConfs():
         bykey[c.getKey()] = c
         for p in c.papers:
             bykey[p.getKey()] = p
 print(C.purple('BibSLEIGH flattened to {} entries'.format(len(bykey))))
 # tagged = []
 # for k in ts.keys():
 peoples = {}
 for fn in glob.glob(ienputdir + '/people/*.json'):
     k = lastSlash(fn)[:-5]
     ps.append(k)
     # TODO: get rid of ps in favour of peoples
     f = open('{}/person/{}.html'.format(outputdir, k),
              'w',
              encoding='utf-8')
     persondef = parseJSON(fn)
     peoples[k] = persondef
     # what to google?
     # links = []
     # if 'g' not in persondef.keys():
     # 	links.append(kv2link('g', tagdef['name'] if 'namefull' in tagdef.keys() else k))
     # title = tagdef['namefull'] if 'namefull' in tagdef.keys() else tagdef['name']
     # subt = ('<br/><em>'+tagdef['namelong']+'</em>') if 'namelong' in tagdef.keys() else ''
     # links = '<strong>{}</strong>{}<hr/>'.format(title, subt) + '\n'.join(sorted(links))
     # TODO: sort by venues!
Example #7
0
 def getHtmlName(self):
     s = lastSlash(self.getPureName())
     if s.endswith('.json'):
         s = s[:-5]
     return s if s.endswith('.html') else s + '.html'
Example #8
0
def last(xx):
    return lastSlash(xx).replace('.json', '')
Example #9
0
        print('[ {} ] {} → {}'.format(statuses[r], fn1, fn2))
    return r


if __name__ == "__main__":
    print('{} conference renamer\n{}'.format(\
     C.purple('BibSLEIGH'),
     C.purple('='*42)))
    if len(sys.argv) < 2:
        print('Usage:\n\t{} [<DIR>]'.format(sys.argv[0]))
        sys.exit(1)
    verbose = sys.argv[-1] == '-v'
    if sys.argv[1].startswith(ienputdir):
        path = sys.argv[1]
        name = path.replace(ienputdir + '/corpus/', '')
        namem = lastSlash(name)
    else:
        name = sys.argv[1]
        path = ienputdir + '/corpus/' + name
        namem = lastSlash(name)
    cx = {0: 0, 1: 0, 2: 0}
    if not os.path.exists(path):
        report(name, name, 1)
        sys.exit(1)
    # for all papers...
    for fn in glob.glob(path + '/*.json'):
        pureold = fn.split(namem + '/')[1]
        if pureold.endswith('.json'):
            pureold = pureold[:-5]
        purenew = pureold
        if purenew[-2:] == namem[-2:]:
Example #10
0
def main():
    print('{}: {} venues, {} papers\n{}'.format(C.purple('BibSLEIGH'),
                                                C.red(len(sleigh.venues)),
                                                C.red(sleigh.numOfPapers()),
                                                C.purple('=' * 42)))
    # generate the index
    f = open(outputdir + '/index.html', 'w', encoding='utf-8')
    f.write(sleigh.getPage())
    f.close()
    # generate all individual pages
    # if False:
    for v in sleigh.venues:
        r = C.blue(v.getKey())
        f = open(outputdir + '/' + v.getKey() + '.html', 'w', encoding='utf-8')
        f.write(v.getPage())
        f.close()
        if v.brands:
            r += '{' + '+'.join([C.blue(b.getKey()) for b in v.brands]) + '}'
            for b in v.brands:
                f = open(outputdir + '/' + b.getKey() + '.brand.html',
                         'w',
                         encoding='utf-8')
                f.write(b.getPage())
                f.close()
        r += ' => '
        for c in v.getConfs():
            f = open(outputdir + '/' + c.getKey() + '.html',
                     'w',
                     encoding='utf-8')
            f.write(c.getPage())
            f.close()
            for p in c.papers:
                f = open(outputdir + '/' + p.getKey() + '.html',
                         'w',
                         encoding='utf-8')
                f.write(p.getPage())
                f.close()
            purekey = c.getKey().replace(v.getKey(), '').replace('-',
                                                                 ' ').strip()
            r += '{} [{}], '.format(purekey, C.yellow(len(c.papers)))
        print(r)
    # generate the icon lineup
    icons = []
    linked = []
    pngs = [
        lastSlash(png).split('.')[0]
        for png in glob.glob(outputdir + '/stuff/*.png')
    ]
    pngs = [png for png in pngs \
            if not (png.startswith('a-') or png.startswith('p-') or png.startswith('ico-')
                    or png in ('cc-by', 'xhtml', 'css', 'open-knowledge', 'edit'))]
    for brand in glob.glob(outputdir + '/*.brand.html'):
        pure = lastSlash(brand).split('.')[0]
        img = pure.lower().replace(' ', '')
        if img in pngs:
            pic = '<div class="wider"><a href="{0}.brand.html"><img class="abc" src="{1}" alt="{0}"/></a><span>{0}</span></div>'.format( \
                pure,
                'stuff/' + img + '.png')
            pngs.remove(img)
            icons.append(pic)
        else:
            # print('No image for', pure)
            pass
    corner = {
        'ada': 'TRI-Ada',
        'comparch': 'CompArch',
        'floc': 'FLoC',
        'bibsleigh': 'index'
    }
    for pure in pngs:
        venueCandidate = corner[pure] if pure in corner else pure.upper()
        canlink = sorted(glob.glob(outputdir + '/' + venueCandidate +
                                   '*.html'),
                         key=len)
        if canlink:
            pic = '<div class="wider"><a href="{0}"><img class="abc" src="stuff/{1}.png" alt="{2}"/></a><span>{2}</span></div>'.format( \
                canlink[0].split('/')[-1],
                pure,
                venueCandidate,
                canlink[0].split('/')[0])
        elif pure == 'twitter':
            pic = '<div class="wider"><a href="https://about.twitter.com/company/brand-assets"><img class="abc" src="stuff/twitter.png" alt="Twitter"/></a><span>Twitter</span></div>'
        elif pure == 'email':
            pic = '<div class="wider"><a href="mailto:[email protected]"><img class="abc" src="stuff/email.png" alt="e-mail"/></a><span>email</span></div>'
        else:
            print('Lonely', pure)
            pic = '<img class="abc" src="stuff/{0}.png" alt="{0}"/>'.format(
                pure)
        icons.append(pic)
    # find last year of each venue
    # for ven in glob.glob(corpusdir + '/*'):
    # 	venname = lastSlash(ven)
    # 	newstuff += '<strong><a href="http://dblp.uni-trier.de/db/conf/{}/">{} {}</a></strong>, '.format(venname.lower(), venname, nextYear(ven))
    # print(lastSlash(ven), ':', lastYear(ven))
    # write "more info" file
    f = open(outputdir + '/about.html', 'w', encoding='utf-8')
    f.write(
        aboutHTML.format(
            len(icons),
            '<div class="minibar">' + '\n'.join(sorted(icons)) + '</div>'))
    f.close()

    # generate the DBLP sync page
    cell_by_conf_by_year = {}
    Ys = [
        2020, 2019, 2018, 2017, 2016, 2015, 2014, 2013, 2012, 2011, 2010, 2009
    ]
    dblplinks = {}

    with open(ienputdir + '/meta/dblpguide.sync', 'r') as f:
        for line in f:
            if not line or line.startswith('#'):
                continue
            words = line.split('|')
            if len(words) != 3:
                print('- Metaline {} skipped!'.format(words))
                continue
            name = words[0].strip()
            dome = words[1].strip()
            dblp = words[2].strip()
            cell_by_conf_by_year[name] = {}
            dblplinks[name] = dblp
            for y in Ys:
                cell_by_conf_by_year[name][y] = '(no)'
            v = sleigh.getVenue(dome)
            if v:
                for yy in Ys:
                    y = v.getYear(yy)
                    if y:
                        ckey = '{}-{}'.format(name, yy)
                        c = y.getConf(ckey)
                        if c:
                            cell_by_conf_by_year[name][yy] = c.getIconItem2(
                                '', '')
                        else:
                            # print('- Conference {} of year {} in venue {} not found in the corpus'.format(ckey, yy, name))
                            for alt in 'v1', 'p1', 'c1', '1', 'J':
                                ckey = '{}-{}-{}'.format(name, alt, yy)
                                c = y.getConf(ckey)
                                if c:
                                    cell_by_conf_by_year[name][
                                        yy] = c.getIconItem2('', '')
                                    break
                # else:
                # 	print('- Year {} in venue {} not found in the corpus among {}'.format(yy, name, [z.year for z in v.years]))
        # else:
        # 	print('- Venue {} not found in the corpus'.format(name))

    table = '<table>'
    table += '<tr><td></td>'
    for y in Ys:
        table += '<th>{}</th>\n'.format(y)
    table += '</tr>'
    # print (cell_by_conf_by_year)
    for name in sorted(cell_by_conf_by_year.keys()):
        table += '<tr><th><a href="{}.brand.html">[@]</a> <a href="{}">{}</a></th>'.format(
            name, dblplinks[name], name)
        for y in Ys:
            table += '<td>{}</td>\n'.format(cell_by_conf_by_year[name][y])
        table += '</tr>'
    table += '</table>'

    with open(outputdir + '/sync.html', 'w', encoding='utf-8') as f:
        f.write(syncHTML.format(table))

    print('{}\nDone with {} venues, {} papers.'.format(
        C.purple('=' * 42), C.red(len(sleigh.venues)),
        C.red(sleigh.numOfPapers())))
Example #11
0
def next_year(vvv):
    return int(lastSlash(sorted(glob.glob(vvv + '/*'))[-2])) + 1
Example #12
0
    # non-verbose mode by default
    if verbose or r != 0:
        print('[ {} ] {} → {}'.format(statuses[r], fn1, fn2))
    return r


if __name__ == "__main__":
    print('{} conference renamer\n{}'.format(\
     C.purple('BibSLEIGH'),
     C.purple('='*42)))
    if len(sys.argv) < 3:
        print('Usage:\n\t{} <OLD-NAME> <NEW-NAME>'.format(sys.argv[0]))
        sys.exit(1)
    nameold, namenew = sys.argv[1:3]
    verbose = sys.argv[-1] == '-v'
    nameoldm = lastSlash(nameold)
    namenewm = lastSlash(namenew)
    print(nameoldm)
    cx = {0: 0, 1: 0, 2: 0}
    if not os.path.exists(ienputdir + '/corpus/' + nameold)\
    or not os.path.isdir(ienputdir + '/corpus/' + nameold)\
    or os.path.exists(ienputdir + '/corpus/' + namenew):
        report(nameold, namenew, 1)
        sys.exit(1)
    os.makedirs(ienputdir + '/corpus/' + namenew)
    cx[report('∅', namenew, 2)] += 2
    # for all papers...
    for fn in glob.glob(ienputdir + '/corpus/' + nameold + '/*.json'):
        pureold = fn.split(nameoldm + '/')[1]
        if pureold.endswith('.json'):
            pureold = pureold[:-5]