def print_venue(entry): result = '' if entry.end > time.localtime(): result += 'I shall attend ' else: result += 'I attended ' result += 'the ' if 'url' in entry: if 'abbreviation' in entry: result += entry.text + ' (<a href="' + entry.url + '">' + entry.abbreviation + '</a>)' else: result += '<a href="' + entry.url + '">' + entry.text + '</a>' else: result += entry.text if 'abbreviation' in entry: result += ' (' + entry.abbreviation + ')' result += ' in ' + placelink(entry.venue) result += ' (' + timeperiod(entry.begin, entry.end) + ')' if 'talkid' in entry: # read data XML file xml = XML2Dict() data = xml.fromstring(open('../xml/presentations.xml', 'r').read()) # transform date types for entry2 in data.presentations.conferences.talk: entry2.date = time.strptime(entry2.date, '%d %b %Y') for entry2 in data.presentations.workshops.talk: entry2.date = time.strptime(entry2.date, '%d %b %Y') # look for talk with given id for entry2 in data.presentations.conferences.talk: if entry2.id == entry.talkid: t = entry2.title d = entry2.date for entry2 in data.presentations.workshops.talk: if entry2.id == entry.talkid: t = entry2.title d = entry2.date result += ' and ' if entry.end > time.localtime(): result += 'give ' else: result += 'gave ' result += 'a talk on “<a href="presentations.html#' + entry.talkid + '">' + t + '</a>.”' # only print presentation date if venue is longer than one day if entry.begin != entry.end: result += ' (' + timeperiod(d, d) + ')' result += '.' return result
def print_pc(entry): result = 'I was nominated as ' if 'role' in entry: result += entry.role else: result += 'member' result += ' of the program committee of the ' if 'kind' in entry: result += entry.kind + ' ' result += '“' if 'abbreviation' in entry: result += entry.title if 'url' in entry: result += ' (<a href="' + entry.url + '">' + entry.abbreviation.replace(" ", " ") + '</a>)' else: result += ' (' + entry.abbreviation.replace(" ", " ") + ')' else: if 'url' in entry: result += '<a href="' + entry.url + '">' + entry.title + '</a>' else: result += entry.title result += '”' result += ' (' + timeperiod(entry.begin, entry.end) + ').' return result
def print_visit(entry): result = '' if entry.end > time.localtime(): result += 'I shall visit ' else: result += 'I visited ' result += entry.text result += ' (' + timeperiod(entry.begin, entry.end) + ').' return result
def print_free(entry): result = '' if 'url' in entry: result += '<a href="' + entry.url + '">' result += entry.text if 'url' in entry: result += '</a>' result += ' (' + timeperiod(entry.begin, entry.end) + ').' return result
def download_stats(): pool = multiprocessing.Pool(config.WIKIMEDIA_PARALLEL_DOWNLOADS) dump_files = [ wikimedia_stats_dump_path(day.year, day.month, day.day, hour) for day in timeperiod(config.STATS_PERIOD_START, config.STATS_PERIOD_END) for hour in range(24) ] for i, msg in enumerate(pool.imap(apply_download, dump_files)): print('({}/{})'.format(i + 1, len(dump_files)), msg) pool.close() pool.join()
def tools(): # add header template print("Generating tools.html...") output = open('../html/tools.html', 'r').read() # read data XML file xml = XML2Dict() data = xml.fromstring(open('../xml/tools.xml', 'r').read()) # fetch papers paperdata = generate_publications.init() first = True for entry in data.tooling.tools.tool: release = time.strptime(entry.release, '%d %b %Y') if first: output += '<h2 class="first">' + entry.name + '</h2>' first = False else: output += '<h2>' + entry.name + '</h2>' output += '<ul class="talks">\n' output += '<li>' + entry.description.replace(entry.name, '<strong>' + entry.name + '</strong>') + '</li>\n' if 'paper' in entry: output += generate_publications.formatentry(generate_publications.entrybyname(entry.paper, paperdata)) + '\n' if 'codevelopers' in entry: if entry.codevelopers.find('and') != -1: output += '<li>co-developers: ' + entry.codevelopers + '</li>\n' else: output += '<li>co-developer: ' + entry.codevelopers + '</li>\n' output += '<li>development status: ' + entry.status + '</li>\n' output += '<li>latest release: ' + timeperiod(release, release) + '</li>\n' output += '</ul>\n' # footer output += '</div></div>' output += footer() output += '</body></html>' print("Writing tools.html...") f = open('../../tools.html', 'w') f.write(output.replace(" & ", " & ").replace(" - ", " — ")) f.close()
def print_paper_accepted(entry, paperdata): paper = generate_publications.formatentry(generate_publications.entrybyname(entry.paper, paperdata)) foo = generate_publications.entrybyname(entry.paper, paperdata) if 'inproceedings' in foo: title = foo['inproceedings']['title']['value'] venue = 'for the ' + foo['inproceedings']['booktitle']['value'] if 'article' in foo: title = foo['article']['title']['value'] venue = 'for ' + foo['article']['journal']['value'] result = 'The paper “<a href="publications.html#' + foo.id + '">' + title + '</a>”' result += ' has been accepted ' + venue + '.' result += ' (' + timeperiod(entry.begin, entry.end) + ').' return result
def print_lecture(entry): result = '' if entry.end > time.localtime(): result += 'I shall give a talk ' else: result += 'I gave a talk ' if 'talkid' in entry: result += 'on “<a href="presentations.html#' + entry.talkid + '">' + entry.title + '</a>” at the ' else: result += 'on “' + entry.title + '” at the ' if 'url' in entry: result += '<a href="' + entry.url + '">' + entry.text + '</a>' else: result += entry.text result += ' in ' + placelink(entry.venue) result += ' (' + timeperiod(entry.begin, entry.end) + ')' return result
def load_stats(): files = [ os.path.join( config.STATS_DUMP_DIR, wikimedia_stats_dump_path(day.year, day.month, day.day, hour), ) for day in timeperiod( config.STATS_PERIOD_START, config.STATS_PERIOD_END ) for hour in range(24) ] stats = {site: defaultdict(int) for site in config.STATS_SITES} # Keep one free CPU core to help the main process keeping up with the # collection step. # # NOTE: If this becomes a real issue a fine way to fix this would be to # distribute the collection step into the loading tasks by using a # global lock on the computed structure. pool = multiprocessing.Pool(max(1, multiprocessing.cpu_count() - 1)) for i_file, file_stats in enumerate(pool.imap(load_file, files)): print( "({}/{}) {}: updating {} items".format( i_file + 1, len(files), files[i_file], sum(len(lang) for lang in file_stats.values()), ) ) for lang, pages in file_stats.items(): for page, views in pages.items(): stats[lang][page] += views pool.close() pool.join() return stats
def presentations(): # add header template print("Generating presentations.html...") output = open('../html/presentations.html', 'r').read() # read data XML file xml = XML2Dict() data = xml.fromstring(open('../xml/presentations.xml', 'r').read()) ######################### # part 1: Invited Talks # ######################### # transform date types for entry in data.presentations.invited.talk: entry.date = time.strptime(entry.date, '%d %b %Y') # sort by begin date data.presentations.invited.talk = sorted(data.presentations.invited.talk, key=lambda k: k.date, reverse=True) output += '<h2 class="first">Invited Presentations</h2>' output += '<ul class="talks">\n' for entry in data.presentations.invited.talk: output += '<li id="' + entry.id + '">' if 'slideshare' in entry: output += '<a href="' + entry.slideshare + '" title="slides of the talk at Slideshare"></a>' if 'vimeo' in entry: output += '<a href="' + entry.vimeo + '" title="video of the talk at Vimeo"></a>' output += '<strong>' + entry.title + '</strong><br/>' output += entry.detail + '<br/>' if entry.date > time.localtime(): output += 'to be held on ' output += timeperiod(entry.date, entry.date) + ' in ' + placelink(entry.venue) output += '</li>\n' # output for Slideshare.net stdoutput = '\nInvited presentation given by Niels Lohmann on ' + timeperiod(entry.date, entry.date) stdoutput += ' in ' + entry.venue + ' as ' + entry.detail if stdoutput[-1] != '.': stdoutput += '.\n' sys.stderr.write(stdoutput) output += '</ul>\n' ############################ # part 2: Conference Talks # ############################ # transform date types for entry in data.presentations.conferences.talk: entry.date = time.strptime(entry.date, '%d %b %Y') # sort by begin date data.presentations.conferences.talk = sorted(data.presentations.conferences.talk, key=lambda k: k.date, reverse=True) output += '<h2>Conference Presentations</h2>' output += '<ul class="talks">\n' for entry in data.presentations.conferences.talk: output += '<li id="' + entry.id + '">' if 'slideshare' in entry: output += '<a href="' + entry.slideshare + '" title="slides of the talk at Slideshare"></a>' if 'vimeo' in entry: output += '<a href="' + entry.vimeo + '" title="video of the talk at Vimeo"></a>' output += '<strong>' + entry.title + '</strong><br/>' if 'abbreviation' in entry: output += '<em>' + entry.conference + ' (<a href="' + entry.url + '">' + entry.abbreviation.replace(" ", " ") + '</a>)</em><br/>' else: output += '<em><a href="' + entry.url + '">' + entry.conference + ' </a></em><br/>' if entry.date > time.localtime(): output += 'to be held on ' output += timeperiod(entry.date, entry.date) + ' in ' + placelink(entry.venue) output += '</li>\n' # output for Slideshare.net stdoutput = '\nConference presentation given by Niels Lohmann on ' + timeperiod(entry.date, entry.date) stdoutput += ' in ' + entry.venue + ' at the ' + entry.conference if 'abbreviation' in entry: stdoutput += ' (' + entry.abbreviation + ')' stdoutput += '.\n' sys.stderr.write(stdoutput) output += '</ul>\n' ########################## # part 3: Workshop Talks # ########################## # transform date types for entry in data.presentations.workshops.talk: entry.date = time.strptime(entry.date, '%d %b %Y') # sort by begin date data.presentations.workshops.talk = sorted(data.presentations.workshops.talk, key=lambda k: k.date, reverse=True) output += '<h2>Workshop Presentations</h2>' output += '<ul class="talks">\n' for entry in data.presentations.workshops.talk: output += '<li id="' + entry.id + '">' if 'slideshare' in entry: output += '<a href="' + entry.slideshare + '" title="slides of the talk at Slideshare"></a>' if 'vimeo' in entry: output += '<a href="' + entry.vimeo + '" title="video of the talk at Vimeo"></a>' output += '<strong>' + entry.title + '</strong><br/>' if 'abbreviation' in entry: if 'url' in entry: output += '<em>' + entry.conference + ' (<a href="' + entry.url + '">' + entry.abbreviation.replace(" ", " ") + '</a>)' else: output += '<em>' + entry.conference + ' (' + entry.abbreviation.replace(" ", " ") + ')' else: if 'url' in entry: output += '<em><a href="' + entry.url + '">' + entry.conference + '</a>' else: output += '<em>' + entry.conference if 'colocation' in entry: if 'courl' in entry: output += '; part of <a href="' + entry.courl + '">' + entry.colocation.replace(" ", " ") + '</a>' else: output += '; part of ' + entry.colocation.replace(" ", " ") output += '</em><br/>' if entry.date > time.localtime(): output += 'to be held on ' output += timeperiod(entry.date, entry.date) + ' in ' + placelink(entry.venue) output += '</li>\n' # output for Slideshare.net stdoutput = '\nWorkshop presentation given by Niels Lohmann on ' + timeperiod(entry.date, entry.date) stdoutput += ' in ' + entry.venue + ' at the ' + entry.conference if 'abbreviation' in entry: stdoutput += ' (' + entry.abbreviation + ')' if 'colocation' in entry: stdoutput += '; part of ' + entry.colocation stdoutput += '.\n' sys.stderr.write(stdoutput) output += '</ul>\n' ########################## # part 4: Demonstrations # ########################## # transform date types for entry in data.presentations.demonstrations.talk: entry.date = time.strptime(entry.date, '%d %b %Y') # sort by begin date data.presentations.demonstrations.talk = sorted(data.presentations.demonstrations.talk, key=lambda k: k.date, reverse=True) output += '<h2>Tool Demonstrations</h2>' output += '<ul class="talks">\n' for entry in data.presentations.demonstrations.talk: output += '<li id="' + entry.id + '">' if 'slideshare' in entry: output += '<a href="' + entry.slideshare + '" title="slides of the talk at Slideshare"></a>' if 'vimeo' in entry: output += '<a href="' + entry.vimeo + '" title="video of the talk at Vimeo"></a>' output += '<strong>' + entry.title + '</strong><br/>' if 'abbreviation' in entry: if 'url' in entry: output += '<em>' + entry.conference + ' (<a href="' + entry.url + '">' + entry.abbreviation.replace(" ", " ") + '</a>)' else: output += '<em>' + entry.conference + ' (' + entry.abbreviation.replace(" ", " ") + ')' else: if 'url' in entry: output += '<em><a href="' + entry.url + '">' + entry.conference + '</a>' else: output += '<em>' + entry.conference if 'colocation' in entry: if 'courl' in entry: output += '; part of <a href="' + entry.courl + '">' + entry.colocation.replace(" ", " ") + '</a>' else: output += '; part of ' + entry.colocation.replace(" ", " ") output += '</em><br/>' output += timeperiod(entry.date, entry.date) + ' in ' + placelink(entry.venue) output += '</li>\n' # output for Slideshare.net stdoutput = '\nTool demonstration given by Niels Lohmann on ' + timeperiod(entry.date, entry.date) stdoutput += ' in ' + entry.venue + ' at the ' + entry.conference if 'abbreviation' in entry: stdoutput += ' (' + entry.abbreviation + ')' if 'colocation' in entry: stdoutput += '; part of ' + entry.colocation stdoutput += '.\n' sys.stderr.write(stdoutput) output += '</ul>\n' ######################### # part 5: Miscellaneous # ######################### # transform date types for entry in data.presentations.miscellaneous.talk: entry.date = time.strptime(entry.date, '%d %b %Y') # sort by begin date data.presentations.miscellaneous.talk = sorted(data.presentations.miscellaneous.talk, key=lambda k: k.date, reverse=True) output += '<h2>Miscellaneous</h2>' output += '<ul class="talks">\n' for entry in data.presentations.miscellaneous.talk: output += '<li id="' + entry.id + '">' output += '<a href="' + entry.slideshare + '" title="slides of the talk at Slideshare"></a>' if 'vimeo' in entry: output += '<a href="' + entry.vimeo + '" title="video of the talk at Vimeo"></a>' output += '<strong>' + entry.title + '</strong><br/>' output += entry.detail + '<br/>' output += timeperiod(entry.date, entry.date) + ' in ' + placelink(entry.venue) output += '</li>\n' # output for Slideshare.net stdoutput = '\nPresentation given by Niels Lohmann on ' + timeperiod(entry.date, entry.date) stdoutput += ' in ' + entry.venue + '; ' + entry.detail stdoutput += '.\n' sys.stderr.write(stdoutput) output += '</ul>\n' # footer output += '</div></div>' output += footer() output += '</body></html>' print("Writing presentations.html...") f = open('../../presentations.html', 'w') f.write(output.replace(" & ", " & ").replace(" - ", " — ")) f.close()
def teaching(): # add header template print("Generating teaching.html...") output = open('../html/teaching.html', 'r').read() # read data XML file xml = XML2Dict() data = xml.fromstring(open('../xml/teaching.xml', 'r').read()) ################################# # part 1: Supervision of Theses # ################################# output += '<h2 class="first">Supervision of Theses</h2>' output += '<ul class="talks">\n' for entry in data.teaching.supervisions.thesis: output += '<li>' + entry.author + '. ' output += '<strong>' + entry.title + '</strong><br/>' output += entry.type + ', ' + entry.institution if 'date' in entry: entry.date = time.strptime(entry.date, '%d %b %Y') output += ', submitted ' + timeperiod(entry.date, entry.date) else: output += ', expected ' + entry.expected output += '</li>\n' output += '</ul>\n' #################### # part 2: Lectures # #################### output += '<h2>Lectures</h2>' output += '<ul class="talks">\n' for entry in data.teaching.lectures.lecture: output += '<li>' output += '<strong>' + entry.title if 'byProxy' in entry: output += ' — by proxy' output += '</strong><br/>' if 'detail' in entry: output += entry.detail + ', ' output += entry.institution + ' (' + entry.term + ')' output += '</li>\n' output += '</ul>\n' #################### # part 3: Seminars # #################### output += '<h2>Seminars</h2>' output += '<ul class="talks">\n' for entry in data.teaching.seminars.seminar: output += '<li>' output += '<strong>' + entry.title if 'byProxy' in entry: output += ' — by proxy' output += '</strong><br/>' if 'detail' in entry: output += entry.detail + ', ' output += entry.institution + ' (' + entry.term + ')' output += '</li>\n' output += '</ul>\n' ##################### # part 4: Exercises # ##################### output += '<h2>Exercises</h2>' output += '<ul class="talks">\n' for entry in data.teaching.exercises.exercise: output += '<li>' # if 'evaluation' in entry: # output += '<a href="files/' + entry.evaluation + '" class="evaluation"></a>' output += '<strong>' if 'page' in entry: output += '<a href="teaching/' + entry.page + '.html">' + entry.title + '</a>' else: output += entry.title output += '</strong><br/>' if 'detail' in entry: output += entry.detail + ', ' output += entry.institution + ' (' + entry.term + ')' output += '</li>\n' output += '</ul>\n' # footer output += '</div></div>' output += footer() output += '</body></html>' print("Writing teaching.html...") f = open('../../teaching.html', 'w') f.write(output.replace(" & ", " & ").replace(" - ", " — ")) f.close()
def academic(): # add header template print("Generating academic.html...") output = open('../html/academic.html', 'r').read() # read data XML file xml = XML2Dict() data = xml.fromstring(open('../xml/academic.xml', 'r').read()) output += '<h2 class="first">Steering Committee Membership</h2>\n' output += '<ul class="talks">\n' output += '<li><strong><a href="http://zeus-workshop.eu/">ZEUS Workshop Series</a></strong><br />Central European Workshop on Services and their Composition</li>' output += '</ul>\n' ##################################### # part 1: Organization of Workshops # ##################################### # transform date types for entry in data.scientific.organization.workshop: entry.begin = time.strptime(entry.begin, '%d %b %Y') entry.end = time.strptime(entry.end, '%d %b %Y') # sort by begin date data.scientific.organization.workshop = sorted(data.scientific.organization.workshop, key=lambda k: k.begin, reverse=True) # output output += '<h2>Organization of Workshops</h2>\n' output += '<ul class="talks">\n' for entry in data.scientific.organization.workshop: # title output += '<li><strong>' if 'abbreviation' in entry: output += entry.title if 'url' in entry: output += ' (<a href="' + entry.url + '">' + entry.abbreviation.replace(" ", " ") + '</a>)' else: output += ' (' + entry.abbreviation.replace(" ", " ") + ')' else: if 'url' in entry: output += '<a href="' + entry.url + '">' + entry.title + '</a>' else: output += entry.title output += '</strong><br/>\n' # colocation if 'kind' in entry: output += entry.kind if 'colocation' in entry: output += ', ' if 'courl' in entry: output += 'part of <a href="' + entry.courl + '">' + entry.colocation + '</a>' else: output += 'part of ' + entry.colocation else: if 'colocation' in entry: if 'courl' in entry: output += 'part of <a href="' + entry.courl + '">' + entry.colocation + '</a>' else: output += 'part of ' + entry.colocation # date output += '<br/>' output += timeperiod(entry.begin, entry.end) # venue if 'venue' in entry: output += ' in ' + entry.venue output += '</li>\n' output += '</ul>\n' ########################## # part 2: PC memberships # ########################## # transform date types for entry in data.scientific.memberships.pc: entry.date = time.strptime(entry.date, '%d %b %Y') entry.begin = time.strptime(entry.begin, '%d %b %Y') entry.end = time.strptime(entry.end, '%d %b %Y') # sort by begin date data.scientific.memberships.pc = sorted(data.scientific.memberships.pc, key=lambda k: k.begin, reverse=True) # output output += '<h2>Program Committee Memberships</h2>\n' output += '<ul class="talks">\n' for entry in data.scientific.memberships.pc: # title output += '<li><strong>' if 'abbreviation' in entry: output += entry.title if 'url' in entry: output += ' (<a href="' + entry.url + '">' + entry.abbreviation.replace(" ", " ") + '</a>)' else: output += ' (' + entry.abbreviation.replace(" ", " ") + ')' else: if 'url' in entry: output += '<a href="' + entry.url + '">' + entry.title + '</a>' else: output += entry.title if 'role' in entry: output += ' — ' + entry.role output += '</strong><br/>\n' # colocation if 'kind' in entry: output += entry.kind if 'colocation' in entry: output += ', ' if 'courl' in entry: output += 'part of <a href="' + entry.courl + '">' + entry.colocation + '</a>' else: output += 'part of ' + entry.colocation else: if 'colocation' in entry: if 'courl' in entry: output += 'part of <a href="' + entry.courl + '">' + entry.colocation + '</a>' else: output += 'part of ' + entry.colocation # date output += '<br/>' output += timeperiod(entry.begin, entry.end) # venue if 'venue' in entry: output += ' in ' + entry.venue # call for paper if 'cfp' in entry and entry.begin > time.localtime(): output += '<br/><a href="' + entry.cfp + '">Call for papers</a>' output += '</li>\n' output += '</ul>\n' ########################### # part 3: Journal Reviews # ########################### # sort by journal title data.scientific.journalreviews.journal = sorted(data.scientific.journalreviews.journal, key=lambda k: k.title) # output output += '<h2>Journal Reviewing Activities</h2>\n' output += '<ul class="talks">\n' for entry in data.scientific.journalreviews.journal: # title output += '<li><strong>' if 'url' in entry: output += '<a href="' + entry.url + '">' + entry.title + '</a>' else: output += entry.title output += '</strong></li>\n' output += '</ul>\n' # footer output += '</div></div>' output += footer() output += '</body></html>' print("Writing academic.html...") f = open('../../academic.html', 'w') f.write(output.replace(" & ", " & ").replace(" - ", " — ")) f.close()