def build_imprint(data): with open('_site/imprint.html', 'w') as output: output.write(render_template( template=get_template(data, "imprint.html"), )) print_progress(text="Generate Imprint")
def build_home(data): apps = data.apps sorted_trackers = sorted(apps.values(), key=lambda a: a['overview']['reach'], reverse=True) sorted_trackers_cat = sorted(apps.values(), key=lambda a: a.get('cat', '') or '') for tracker in sorted_trackers: if 'name' not in tracker: tracker['name'] = tracker['overview']['id'] for tracker in sorted_trackers_cat: if 'name' not in tracker: tracker['name'] = tracker['overview']['id'] # most tracked sites by cat most_tracked_sites = tracked_by_category(data.sites, worst=True) # least tracked sites by cat least_tracked_sites = tracked_by_category(data.sites, worst=False) top10 = company_reach(data.companies) header_graph = Markup(overview_bars(top10)) with open('_site/index.html', 'w') as output: output.write( render_template(template=get_template(data, "index.html"), ts=header_graph, tracker_list=sorted_trackers[:20], trackers_list_cat=sorted_trackers_cat[:20], most_tracked_sites=most_tracked_sites, least_tracked_sites=least_tracked_sites)) print_progress(text="Generate home page")
def build_blogpost_list(data, blog_posts): with open('_site/blog.html', 'w') as output: output.write( render_template(template=get_template(data, "blog.html"), blog_posts=[p for p in blog_posts if p['publish']])) print_progress(text="Generate blog list")
def tracker_page(template, tracker_id, tracker, data): # Tracker Reach ts reach = data.trackers.get_reach(tracker_id) # page_reach trend line page_trend = Markup(ts_trend(ts=reach.get('page'), t=reach.get('ts'))) # domain_reach trend line site_trend = Markup(ts_trend(ts=reach.get('site'), t=reach.get('ts'))) # tag cloud data all_sites, sites_by_cat = tag_cloud_data(tracker_id, data) # for horizontal bar chart in profile website_types = data.trackers.get_presence_by_site_category(tracker_id, data.sites) with open(f'_site/{data.url_for("tracker", tracker_id)}', 'w') as output: output.write(render_template( path_to_root='..', template=template, app=tracker, profile=data.trackers.get_tracker(tracker_id), # profile-card hack reach=recent_tracker_reach(reach), tracking_methods=data.trackers.get_tracking_methods(tracker_id), website_list=all_sites, sites_by_cat=sites_by_cat, website_types=website_types[:5], # top 5 similar_trackers=data.trackers.similar_trackers(tracker_id), trends={'page': page_trend, 'site': site_trend}, trackers=data.trackers.summary_stats()['count'] ))
def build_explorer(): data = DataSource(populate=False) build_packed_data(data) temp_folder = Path("temp") if not temp_folder.exists(): temp_folder.mkdir() table_to_csv(data.trackers, "temp/trackers.csv") table_to_csv(data.sites, "temp/sites.csv") table_to_csv(data.companies, "temp/companies.csv") table_to_csv(data.sites_trackers, "temp/sites_trackers.csv") month = data.trackers.last_month shutil.make_archive( f"_site/data/wtm-data-{month}", "zip", "temp" ) shutil.rmtree(temp_folder.as_posix(), ignore_errors=True) with open(f"_site/explorer.html", "w") as output: output.write(render_template( template=get_template(data, name="explorer.html"), download_link=f"data/wtm-data-{month}.zip" )) print_progress(text="Generated Exporable Dataset")
def build_privacy_policy(data): with open('_site/privacy-policy.html', 'w') as output: output.write(render_template( template=get_template(data, "privacy-policy.html"), )) print_progress(text="Generate Privacy Policy")
def website_page(template, site, rank, data): site_id = site['overview']['site'] # website url is the most common subdomain website_url = sorted( site.get('subdomains').items(), key=itemgetter(1), reverse=True)[0][0] profile = { "rank": rank, "website_url": website_url, "name": site["overview"]["site"], "overview": site["overview"] } methods = data.sites.tracking_methods(site_id) # tracker presence data sankey_data = tracker_map_data(site_id, data) d_values, d_labels, d_total = website_doughnout(data.trackers, site) profile_dough = Markup(profile_doughnut(d_values, d_labels, d_total)) rendered_sankey = Markup(sankey_plot(sankey_data)) # apps per site data tracker_table = [] for a in site.get("apps"): freq = a.get("frequency") tracker_id = a.get("app") if data.trackers.get_tracker(tracker_id): tracker = data.trackers.get_tracker(tracker_id) tracker["frequency"] = freq tracker_table.append(tracker) sorted_trackers = sorted( tracker_table, key=lambda a: a['frequency'], reverse=True ) sorted_trackers_cat = sorted( tracker_table, key=lambda a: a.get("company_id", "") if a.get("company_id") is not None else "" ) with open('_site/websites/{}.html'.format(site["name"]), 'w') as output: output.write(render_template( path_to_root='..', template=template, site=site, profile=profile, methods=methods, sankey=rendered_sankey, doughnut=profile_dough, tracker_categories=d_labels, tracker_list=sorted_trackers, trackers_list_cat=sorted_trackers_cat ))
def website_page(template, site_id, rank, data): companies = data.companies apps = data.apps site = data.sites.get(site_id) # website url is the most common subdomain website_url = sorted(site.get('subdomains').items(), key=itemgetter(1), reverse=True)[0][0] profile = { "rank": rank, "website_url": website_url, "name": site["overview"]["site"], "overview": site["overview"] } methods = tracking_methods(site) tracker_changes = changes(site) # tracker presence data sankey_data = companies_present(companies, apps, site=site) d_values, d_labels, d_total = website_doughnout(apps, site) profile_dough = Markup(profile_doughnut(d_values, d_labels, d_total)) real_sankey = Markup(alluvial_plot(sankey_data)) # apps per site data apps_table = [] for a in site.get("apps"): f = a.get("frequency") apid = a.get("app") if apps.get(apid): appdict = apps.get(apid) appdict["frequency"] = f apps_table.append(appdict) sorted_trackers = sorted(apps_table, key=lambda a: a['frequency'], reverse=True) sorted_trackers_cat = sorted(apps_table, key=lambda a: a.get("company_id", "") if a.get("company_id") is not None else "") # write to file with open('_site/websites/{}.html'.format(site["name"]), 'w') as output: output.write( render_template(path_to_root='..', template=template, site=site, profile=profile, methods=methods, tracker_changes=tracker_changes, sankey=real_sankey, doughnut=profile_dough, tracker_categories=d_labels, tracker_list=sorted_trackers, trackers_list_cat=sorted_trackers_cat))
def build_trackers_list(data): with open('_site/trackers.html', 'w') as output: output.write( render_template(template=get_template(data, name="trackers.html"), tracker_list=data.trackers.sort_by(metric="reach"), trackers_list_company=data.trackers.sort_by( metric="company_id", descending=False), header_stats=data.trackers.summary_stats())) print_progress(text="Generate tracker list")
def company_page(template, company_data, data): company_data["logo"] = None company_id = company_data['overview']['id'] company_name = get_company_name(company_data) with open(f'_site/{data.url_for("company", company_id)}', 'w') as output: output.write( render_template(path_to_root='..', template=template, demographics=company_data, initials=company_name[:2]))
def build_company_reach_chart_page(data): top100 = company_reach(data.companies, n=100) chart = Markup(overview_bars(top100, highlight=10, custom_height=3000)) template = get_template(data, name='reach-chart-page.html', path_to_root='..') with open('_site/companies/reach-chart.html', 'w') as output: output.write(render_template( path_to_root='..', template=template, chart=chart, )) print_progress(text="Generate company reach chart")
def build_website_list(data): header_numbers = data.sites.summary_stats() sorted_websites = data.sites.sort_by(metric='popularity', descending=True) sorted_websites_cat = data.sites.sort_by(metric='category', descending=True) with open('_site/websites.html', 'w') as output: output.write(render_template( template=get_template(data, "websites.html"), website_list=sorted_websites, website_list_cat=sorted_websites_cat, header_numbers=header_numbers )) print_progress(text="Generate website list")
def build_blogpost_pages(data, blog_posts): template = get_template(data, "blog-page.html", render_markdown=True, path_to_root='..') for blog_post in blog_posts: with open(f'_site/blog/{blog_post.get("filename")}.html', 'w') as output: output.write( render_template(path_to_root='..', template=template, blog_post=blog_post)) print_progress(text="Generate blog posts")
def build_blogpost_pages(data, blog_posts): for blog_post in blog_posts: #TODO: Move template out after footnotes markdown extension does # not save global state template = get_template(data, "blog-page.html", render_markdown=True, path_to_root='..') with open(f'_site/blog/{blog_post.get("filename")}.html', 'w') as output: output.write( render_template(path_to_root='..', template=template, blog_post=blog_post)) print_progress(text="Generate blog posts")
def tracker_page(template, data): reach = data['reach_ts'] # page_reach trend line page_trend = Markup(ts_trend(ts=reach.get('page'), t=reach.get('ts'))) # domain_reach trend line - may not reach all the way back in time site_trend = Markup(ts_trend(ts=reach.get('site'), t=reach.get('ts')[-len(reach.get('site')):], percent=False)) with open(f'_site/{data["url"]}', 'w') as output: output.write(render_template( path_to_root='..', template=template, reach=recent_tracker_reach(reach), trends={'page': page_trend, 'site': site_trend}, **data, ))
def tracker_page(template, aid, app, data): if 'name' not in app: app['name'] = aid # Tracker Reach ts ts, page_reach, site_reach = timeseries(app) # page_reach trend line page_trend = Markup(ts_trend(ts=page_reach, t=ts)) # domain_reach trend line site_trend = Markup(ts_trend(ts=site_reach, t=ts)) methods = tracking_methods(app) # tag cloud data sites_table = tag_cloud_data(aid, app, data) sites_by_cat = sites_per_app_by_category(sites_table) # for horizontal bar chart in profile website_types = presence_by_site_type(app, data.sites) # similar trackers similar_tracker_list = similar_trackers(app, data.apps, n=4) # write to file with open('_site/{}'.format(data.url_for('app', aid)), 'w') as output: output.write( render_template( path_to_root='..', template=template, app=app, profile=app, # profile-card hack prevalence=prevalence(app), tracking_methods=methods, website_list=sites_table, sites_by_cat=sites_by_cat, website_types=website_types[:5], # top 3 similar_trackers=similar_tracker_list, trends={ "page": page_trend, "site": site_trend }))
def build_home(data): top10 = company_reach(data.companies) header_graph = Markup(overview_bars(top10)) with open('_site/index.html', 'w') as output: output.write( render_template( template=get_template(data, "index.html"), ts=header_graph, tracker_list=data.trackers.sort_by(metric="reach")[:20], trackers_list_company=data.trackers.sort_by( metric="company_id")[:20], most_tracked_sites=data.sites.sort_by(metric='trackers')[:20], least_tracked_sites=data.sites.sort_by(metric='trackers', descending=False)[:20], websites=data.sites.summary_stats(), tracker_stats=data.trackers.summary_stats(), top10=top10)) print_progress(text="Generate home page")
def build_explorer(data): build_packed_data(data) temp_folder = Path("temp") if not temp_folder.exists(): temp_folder.mkdir() data.trackers.df.to_csv("temp/trackers.csv") data.sites.df.to_csv("temp/sites.csv") data.companies.df.to_csv("temp/companies.csv") data.sites_trackers.df.to_csv("temp/sites_trackers.csv") month = datetime.strftime(max(data.trackers.df.month), '%Y-%m') shutil.make_archive(f"_site/data/wtm-data-{month}", "zip", "temp") shutil.rmtree(temp_folder.as_posix(), ignore_errors=True) with open(f"_site/explorer.html", "w") as output: output.write( render_template(template=get_template(data, name="explorer.html"), download_link=f"data/wtm-data-{month}.zip")) print_progress(text="Generated Exporable Dataset")
def website_page(template, site, rank, data): site_id = site.site # website url is the most common subdomain website_url = f'www.{site_id}' profile = { "rank": rank, "website_url": website_url, "name": site.site, } methods = { 'cookies': site.cookies > 0.2, 'fingerprinting': site.bad_qs > 0.1, } # tracker presence data sankey_data = tracker_map_data(site_id, data) d_values, d_labels, d_total = website_doughnout(site_id, data) profile_dough = Markup(profile_doughnut(d_values, d_labels, d_total)) rendered_sankey = Markup(sankey_plot(sankey_data)) # apps per site data tracker_table = data.sites.trackers.get_site(site_id) with open('_site/websites/{}.html'.format(site.site), 'w') as output: output.write( render_template( path_to_root='..', template=template, site={'overview': site._asdict()}, profile=profile, methods=methods, sankey=rendered_sankey, doughnut=profile_dough, tracker_categories=d_labels, tracker_list=tracker_table, ))
def build_trackers_list(data): apps = data.apps sorted_trackers = sorted(apps.values(), key=lambda a: a['overview']['reach'], reverse=True) sorted_trackers_cat = sorted( apps.values(), key=lambda a: data.get_app_name(a['overview']['id']) if ('company_id' not in a or a['company_id'] in [None, "None"]) else a[ 'company_id']) for tracker in sorted_trackers: if 'name' not in tracker: tracker['name'] = tracker['overview']['id'] with open('_site/trackers.html', 'w') as output: output.write( render_template(template=get_template(data, name="trackers.html"), tracker_list=sorted_trackers, trackers_list_cat=sorted_trackers_cat, header_stats=tracker_header_stats(data.apps))) print_progress(text="Generate tracker list")
def build_website_list(data): sites = data.sites tracker_requests, tracker_buckets, https = summary_stats(data.sites) # header stats tracker_values = [] tracker_labels = [] for (k, v) in tracker_buckets.items(): tracker_values.append(v) tracker_labels.append(k) header_numbers = header_stats(data.sites) sorted_websites = sort_by_rank(data.sites) sorted_websites_cat = sort_by_cat(data.sites) # write to file with open('_site/websites.html', 'w') as output: output.write( render_template(template=get_template(data, "websites.html"), website_list=sorted_websites, website_list_cat=sorted_websites_cat, header_numbers=header_numbers)) print_progress(text="Generate website list")