def build_website_list(data): sites = data.sites text_content = { "summary_title": "Top Website Stats", "summary_description": "Proportion of requests on page load that go toward third party trackers\ average number of trackers, and use Https in these top websites" } tracker_requests, tracker_buckets, https = summary_stats(sites) tracker_values = [] tracker_labels = [] for (k, v) in tracker_buckets.items(): tracker_values.append(v) tracker_labels.append(k) header_numbers = header_stats(data.sites) sorted_websites = sort_by_rank(sites) sorted_websites_cat = sort_by_cat(sites) # write to file with open('_site/websites.html', 'w') as fp: file_content = render_template(template=get_template( data, "websites.html"), content=text_content, website_list=sorted_websites, website_list_cat=sorted_websites_cat, header_numbers=header_numbers) fp.write(file_content) print_progress(text="Website list") return
def build_trackers_list(data): apps = data.apps text_content = { "summary_title": "Tracker Stats", "summary_description": "The proportion of page loads where cookies and\ fingerprinting are used for tracking, and requests are done over Https" } sorted_trackers = sorted(apps.values(), key=lambda a: a['overview']['reach'], reverse=True) sorted_trackers_cat = sorted( apps.values(), key=lambda a: data.get_app_name(a['overview']['id']) if 'company_id' not in a or 'company_id' in a and a[ 'company_id'] in [None, "None"] else a['company_id']) for t in sorted_trackers: if 'name' not in t: t['name'] = t['overview']['id'] with open('_site/trackers.html', 'w') as fp: file_content = render_template( template=get_template(data, name="trackers.html"), content=text_content, tracker_list=sorted_trackers, trackers_list_cat=sorted_trackers_cat, header_stats=tracker_header_stats(data.apps)) fp.write(file_content) print_progress(text="Tracker list") return
def build_reports_list(data, entity='reports'): with open('_site/{}.html'.format(entity), 'w') as fp: reports = [ parse(os.path.join("{}".format(entity), f)) for f in os.listdir("{}/".format(entity)) ] reports = [r for r in reports if r['publish']] content = render_template(template=get_template( data, "{}.html".format(entity)), reports=reports) fp.write(content) print_progress(text="{} List".format(entity.capitalize())) return
def html_index(sink, crumbs, root, datauri, querychannel): context = new_context() context.addGlobal('crumbs', crumbs) context.addGlobal('datarooturi', root) context.addGlobal('datauri', datauri) context.addGlobal('querychannel', querychannel) channels = sorted(sink.channels.keys()) if querychannel: title = "Channel #%s" % channels[0] else: title = "Some IRC discussion logs" context.addGlobal('title', title) if datauri == root: datauri2 = root + "index" else: datauri2 = datauri context.addGlobal('datauri2', datauri2) channeldata = [] for channel in channels: channelID = channel.strip("#").lower() channelURI = root + channelID + "#channel" channeldata.append({'uri': channelURI, 'name': channelID}) # XXX list works around a bug in simpletal days = list(reversed(sorted(sink.days.keys()))) context.addGlobal('channels', channeldata) context.addGlobal('days', days) context.addGlobal('day2channels', sink.day2channels) if querychannel: nicks = sorted(sink.channel2nicks[querychannel].keys()) else: nicks = sorted(sink.nicks) userdata = [] for nick in nicks: userURI = root + "users/%s#user" % nick userdata.append({'uri': userURI, 'name': nick}) context.addGlobal('users', userdata) context.addGlobal('nick2people', get_nick2people()) template = get_template('index') expand_template(template, context)
def build_company_pages(data): companies = data.companies template = get_template(data, "company-page.html") with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: futures = { executor.submit(company_page, template, company_data, data): company_data for (_, company_data) in companies.items() } for future in futures: future.result() print_progress(text="Company pages")
def build_report_pages(data, entity="reports"): template = get_template(data, "report-page.html", render_markdown=True, path_to_root='..') for f in os.listdir("{}/".format(entity)): report = parse(os.path.join("{}".format(entity), f)) with open("_site/{0}/{1}.html".format(entity, report.get("filename")), 'w') as fp: fp.write( render_template(path_to_root='..', template=template, report=report)) print_progress(text="{}".format(entity.capitalize()))
def close(self): context = self.context channelID = self.channel.strip("#").lower() channelURI = self.root + channelID + "#channel" context.addGlobal('channel', {'name': channelID, 'uri': channelURI}) context.addGlobal('timeprefix', self.timeprefix) context.addGlobal('title', self.title) context.addGlobal('events', self.events) template = get_template('channellog') expand_template(template, context)
def build_website_pages(data): sites = data.sites template = get_template(data, "website-page.html", path_to_root='..') # NOTE: trying to make faster with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: futures = { executor.submit(website_page, template, site_id, rank + 1, data): (site_id, site) for rank, (site_id, site) in enumerate( sorted(sites.items(), key=lambda s: s[1]['overview']['popularity'], reverse=True)) } for future in futures: future.result() print_progress(text="Website pages")
def build_tracker_pages(data): # site_values, values, rects = site_tree_map(sites) apps = data.apps template = get_template(data, name='tracker-page.html', path_to_root='..') # NOTE: trying to make faster with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor: futures = { executor.submit(tracker_page, template, aid, app, data): (aid, app) for (aid, app) in list( sorted(apps.items(), key=lambda a: a[1]['overview']['reach'], reverse=True)) } for future in futures: future.result() print_progress(text="Tracker Pages")
def render_user_index(sink, format, crumbs, datarooturi, datauri): freenodeURI = datarooturi + "#freenode" nicks = get_nicks() nick2people = get_nick2people() if format == "html": context = new_context() context.addGlobal('crumbs', crumbs) context.addGlobal('datarooturi', datarooturi) context.addGlobal('datauri', datauri) users = [] for nick in nicks: user = datarooturi + "users/%s#user" % nick users.append({'uri': user, 'nick': nick}) context.addGlobal('users', users) template = get_template('users') expand_template(template, context) elif format == "turtle": triples = [] for nick in nicks: user = datarooturi + "users/%s#user" % nick triples += [None, (freenodeURI, SIOC.space_of, user), (user, RDFS.label, PlainLiteral(nick)), (user, RDF.type, SIOC.User)] if nick in nick2people: triples += [(nick2people[nick], FOAF.holdsAccount, user)] writer = TurtleWriter(None, namespaces) title = "User index" writer.write([("", RDFS.label, PlainLiteral(title)), ("", FOAF.primaryTopic, freenodeURI)]) writer.write(triples) writer.close()
def build_home(data): apps = data.apps sorted_trackers = sorted(apps.values(), key=lambda a: a['overview']['reach'], reverse=True) sorted_trackers_cat = sorted( apps.values(), key=lambda a: '' if 'cat' not in a or 'cat' in a and a['cat'] is None else a['cat']) for t in sorted_trackers: if 'name' not in t: t['name'] = t['overview']['id'] for t in sorted_trackers_cat: if 'name' not in t: t['name'] = t['overview']['id'] # most tracked sites by cat most_tracked_sites = tracked_by_category(data.sites, worst=True) # least tracked sites by cat least_tracked_sites = tracked_by_category(data.sites, worst=False) top10 = company_reach(data.companies) header_graph = Markup(overview_bars(top10)) with open('_site/index.html', 'w') as fp: content = render_template(template=get_template(data, "index.html"), ts=header_graph, tracker_list=sorted_trackers[:20], trackers_list_cat=sorted_trackers_cat[:20], most_tracked_sites=most_tracked_sites, least_tracked_sites=least_tracked_sites) fp.write(content) print_progress(text="Home page") return
def render_user(sink, format, crumbs, datarooturi, nick, datauri, latestsink): userURI = datarooturi + "users/%s#user" % nick global Red import RDF as Red person = find_person(nick) error = None model = Red.Model() # XXX work around a bug in Redland? if person: try: model.load(person.rsplit('#', 1)[0], name='guess') except: error = "Error loading the FOAF info: %s" % sys.exc_info()[1] channels = sorted(sink.nick2channels.get(nick, {}).keys()) if format == "html": context = new_context() context.addGlobal('crumbs', crumbs) context.addGlobal('datarooturi', datarooturi) context.addGlobal('datauri', datauri) context.addGlobal('error', error) info = [] if person: for name in link_values(model, person, [FOAF.name, FOAF.firstName, FOAF.nick, RDFS.label]): info.append({'key': 'Name', 'value': "%s" % name}) for ircnick in nick_values(datarooturi, model, person, [FOAF.holdsAccount]): if userURI in ("%s" % ircnick): ircnick = ircnick + " <em>[confirms the Web ID claim]</em>" elif ircnick is None: ircnick = """None <em>[can't confirm the Web ID claim, should be <a href="%s">%s</a>]</em>""" % (userURI, nick) else: ircnick = ircnick + """ <em>[doesn't confirm the Web ID claim, should be <a href="%s">%s</a>]</em>""" % (userURI, nick) info.append({'key': 'IRC account', 'value': "%s" % ircnick}) for website in link_values(model, person, [FOAF.homepage]): info.append({'key': 'Website', 'value': "%s" % website}) for weblog in link_values(model, person, [FOAF.weblog]): info.append({'key': 'Weblog', 'value': "%s" % weblog}) for img in image_values(model, person, [FOAF.depiction, FOAF.img]): info.append({'key': 'Image', 'value': "%s" % img}) for known in friend_values(datarooturi, model, person, [FOAF.knows]): info.append({'key': 'Knows', 'value': "%s" % known}) context.addGlobal('here', {'nick': nick, 'person': {'webid': person, 'info': info}}) channeldata = [] for channel in channels: channelURI = datarooturi + "%s#channel" % channel channeldata.append({'uri': channelURI, 'name': "#"+channel}) context.addGlobal('channels', channeldata) context.addGlobal('events', latestsink.events) template = get_template('user') expand_template(template, context) elif format == "turtle": oldUserURI = "irc://freenode/%s,isuser" % nick triples = [None, (datarooturi + "#freenode", SIOC.space_of, userURI), (userURI, OWL.sameAs, oldUserURI), (userURI, RDFS.label, PlainLiteral(nick)), (userURI, RDF.type, SIOC.User), ] if person: triples += [None, (person, FOAF.holdsAccount, userURI)] triples += get_triples(model, person, [FOAF.name, FOAF.firstName, FOAF.nick, RDFS.label]) for channel in channels: channelURI = datarooturi + "%s#channel" % channel triples += [None, (channelURI, SIOC.has_subscriber, userURI), (channelURI, RDFS.label, PlainLiteral("#%s" % channel)), ] writer = TurtleWriter(None, namespaces) title = "About user %s" % nick writer.write([("", RDFS.label, PlainLiteral(title)), ("", FOAF.primaryTopic, userURI), ]) writer.write(triples) writer.close()