Example #1
0
def build_imprint(data):
    with open('_site/imprint.html', 'w') as output:
        output.write(render_template(
            template=get_template(data, "imprint.html"),
        ))

    print_progress(text="Generate Imprint")
Example #2
0
def build_home(data):
    apps = data.apps

    sorted_trackers = sorted(apps.values(),
                             key=lambda a: a['overview']['reach'],
                             reverse=True)
    sorted_trackers_cat = sorted(apps.values(),
                                 key=lambda a: a.get('cat', '') or '')

    for tracker in sorted_trackers:
        if 'name' not in tracker:
            tracker['name'] = tracker['overview']['id']

    for tracker in sorted_trackers_cat:
        if 'name' not in tracker:
            tracker['name'] = tracker['overview']['id']

    # most tracked sites by cat
    most_tracked_sites = tracked_by_category(data.sites, worst=True)
    # least tracked sites by cat
    least_tracked_sites = tracked_by_category(data.sites, worst=False)

    top10 = company_reach(data.companies)
    header_graph = Markup(overview_bars(top10))

    with open('_site/index.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, "index.html"),
                            ts=header_graph,
                            tracker_list=sorted_trackers[:20],
                            trackers_list_cat=sorted_trackers_cat[:20],
                            most_tracked_sites=most_tracked_sites,
                            least_tracked_sites=least_tracked_sites))

    print_progress(text="Generate home page")
Example #3
0
def build_blogpost_list(data, blog_posts):
    with open('_site/blog.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, "blog.html"),
                            blog_posts=[p for p in blog_posts
                                        if p['publish']]))
    print_progress(text="Generate blog list")
Example #4
0
def tracker_page(template, tracker_id, tracker, data):
    # Tracker Reach ts
    reach = data.trackers.get_reach(tracker_id)

    # page_reach trend line
    page_trend = Markup(ts_trend(ts=reach.get('page'), t=reach.get('ts')))

    # domain_reach trend line
    site_trend = Markup(ts_trend(ts=reach.get('site'), t=reach.get('ts')))

    # tag cloud data
    all_sites, sites_by_cat = tag_cloud_data(tracker_id, data)

    # for horizontal bar chart in profile
    website_types = data.trackers.get_presence_by_site_category(tracker_id, data.sites)

    with open(f'_site/{data.url_for("tracker", tracker_id)}', 'w') as output:
        output.write(render_template(
            path_to_root='..',
            template=template,
            app=tracker,
            profile=data.trackers.get_tracker(tracker_id),  # profile-card hack
            reach=recent_tracker_reach(reach),
            tracking_methods=data.trackers.get_tracking_methods(tracker_id),
            website_list=all_sites,
            sites_by_cat=sites_by_cat,
            website_types=website_types[:5],  # top 5
            similar_trackers=data.trackers.similar_trackers(tracker_id),
            trends={'page': page_trend, 'site': site_trend},
            trackers=data.trackers.summary_stats()['count']
        ))
Example #5
0
def build_explorer():
    data = DataSource(populate=False)

    build_packed_data(data)

    temp_folder = Path("temp")
    if not temp_folder.exists():
        temp_folder.mkdir()

    table_to_csv(data.trackers, "temp/trackers.csv")
    table_to_csv(data.sites, "temp/sites.csv")
    table_to_csv(data.companies, "temp/companies.csv")
    table_to_csv(data.sites_trackers, "temp/sites_trackers.csv")

    month = data.trackers.last_month
    shutil.make_archive(
        f"_site/data/wtm-data-{month}", "zip", "temp"
    )
    shutil.rmtree(temp_folder.as_posix(), ignore_errors=True)

    with open(f"_site/explorer.html", "w") as output:
        output.write(render_template(
            template=get_template(data, name="explorer.html"),
            download_link=f"data/wtm-data-{month}.zip"
        ))

    print_progress(text="Generated Exporable Dataset")
Example #6
0
def build_privacy_policy(data):
    with open('_site/privacy-policy.html', 'w') as output:
        output.write(render_template(
            template=get_template(data, "privacy-policy.html"),
        ))

    print_progress(text="Generate Privacy Policy")
Example #7
0
def website_page(template, site, rank, data):
    site_id = site['overview']['site']

    # website url is the most common subdomain
    website_url = sorted(
        site.get('subdomains').items(),
        key=itemgetter(1),
        reverse=True)[0][0]
    profile = {
        "rank": rank,
        "website_url": website_url,
        "name":  site["overview"]["site"],
        "overview": site["overview"]
    }

    methods = data.sites.tracking_methods(site_id)

    # tracker presence data
    sankey_data = tracker_map_data(site_id, data)
    d_values, d_labels, d_total = website_doughnout(data.trackers, site)
    profile_dough = Markup(profile_doughnut(d_values, d_labels, d_total))

    rendered_sankey = Markup(sankey_plot(sankey_data))

    # apps per site data
    tracker_table = []
    for a in site.get("apps"):
        freq = a.get("frequency")
        tracker_id = a.get("app")
        if data.trackers.get_tracker(tracker_id):
            tracker = data.trackers.get_tracker(tracker_id)
            tracker["frequency"] = freq
            tracker_table.append(tracker)

    sorted_trackers = sorted(
        tracker_table,
        key=lambda a: a['frequency'],
        reverse=True
    )
    sorted_trackers_cat = sorted(
        tracker_table,
        key=lambda a: a.get("company_id", "")
        if a.get("company_id") is not None else ""
    )

    with open('_site/websites/{}.html'.format(site["name"]), 'w') as output:
        output.write(render_template(
            path_to_root='..',
            template=template,
            site=site,
            profile=profile,
            methods=methods,
            sankey=rendered_sankey,
            doughnut=profile_dough,
            tracker_categories=d_labels,
            tracker_list=sorted_trackers,
            trackers_list_cat=sorted_trackers_cat
        ))
Example #8
0
def website_page(template, site_id, rank, data):
    companies = data.companies
    apps = data.apps
    site = data.sites.get(site_id)
    # website url is the most common subdomain
    website_url = sorted(site.get('subdomains').items(),
                         key=itemgetter(1),
                         reverse=True)[0][0]
    profile = {
        "rank": rank,
        "website_url": website_url,
        "name": site["overview"]["site"],
        "overview": site["overview"]
    }

    methods = tracking_methods(site)
    tracker_changes = changes(site)

    # tracker presence data
    sankey_data = companies_present(companies, apps, site=site)
    d_values, d_labels, d_total = website_doughnout(apps, site)
    profile_dough = Markup(profile_doughnut(d_values, d_labels, d_total))

    real_sankey = Markup(alluvial_plot(sankey_data))

    # apps per site data
    apps_table = []
    for a in site.get("apps"):
        f = a.get("frequency")
        apid = a.get("app")
        if apps.get(apid):
            appdict = apps.get(apid)
            appdict["frequency"] = f
            apps_table.append(appdict)

    sorted_trackers = sorted(apps_table,
                             key=lambda a: a['frequency'],
                             reverse=True)
    sorted_trackers_cat = sorted(apps_table,
                                 key=lambda a: a.get("company_id", "")
                                 if a.get("company_id") is not None else "")

    # write to file
    with open('_site/websites/{}.html'.format(site["name"]), 'w') as output:
        output.write(
            render_template(path_to_root='..',
                            template=template,
                            site=site,
                            profile=profile,
                            methods=methods,
                            tracker_changes=tracker_changes,
                            sankey=real_sankey,
                            doughnut=profile_dough,
                            tracker_categories=d_labels,
                            tracker_list=sorted_trackers,
                            trackers_list_cat=sorted_trackers_cat))
Example #9
0
def build_trackers_list(data):
    with open('_site/trackers.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, name="trackers.html"),
                            tracker_list=data.trackers.sort_by(metric="reach"),
                            trackers_list_company=data.trackers.sort_by(
                                metric="company_id", descending=False),
                            header_stats=data.trackers.summary_stats()))

    print_progress(text="Generate tracker list")
Example #10
0
def company_page(template, company_data, data):
    company_data["logo"] = None
    company_id = company_data['overview']['id']

    company_name = get_company_name(company_data)
    with open(f'_site/{data.url_for("company", company_id)}', 'w') as output:
        output.write(
            render_template(path_to_root='..',
                            template=template,
                            demographics=company_data,
                            initials=company_name[:2]))
Example #11
0
def build_company_reach_chart_page(data):
    top100 = company_reach(data.companies, n=100)
    chart = Markup(overview_bars(top100, highlight=10, custom_height=3000))
    template = get_template(data, name='reach-chart-page.html', path_to_root='..')

    with open('_site/companies/reach-chart.html', 'w') as output:
        output.write(render_template(
            path_to_root='..',
            template=template,
            chart=chart,
        ))
        print_progress(text="Generate company reach chart")
Example #12
0
def build_website_list(data):
    header_numbers = data.sites.summary_stats()

    sorted_websites = data.sites.sort_by(metric='popularity', descending=True)
    sorted_websites_cat = data.sites.sort_by(metric='category', descending=True)

    with open('_site/websites.html', 'w') as output:
        output.write(render_template(
            template=get_template(data, "websites.html"),
            website_list=sorted_websites,
            website_list_cat=sorted_websites_cat,
            header_numbers=header_numbers
        ))
    print_progress(text="Generate website list")
Example #13
0
def build_blogpost_pages(data, blog_posts):
    template = get_template(data,
                            "blog-page.html",
                            render_markdown=True,
                            path_to_root='..')

    for blog_post in blog_posts:
        with open(f'_site/blog/{blog_post.get("filename")}.html',
                  'w') as output:
            output.write(
                render_template(path_to_root='..',
                                template=template,
                                blog_post=blog_post))

    print_progress(text="Generate blog posts")
Example #14
0
def build_blogpost_pages(data, blog_posts):
    for blog_post in blog_posts:
        #TODO: Move template out after footnotes markdown extension does
        # not save global state
        template = get_template(data,
                                "blog-page.html",
                                render_markdown=True,
                                path_to_root='..')
        with open(f'_site/blog/{blog_post.get("filename")}.html',
                  'w') as output:
            output.write(
                render_template(path_to_root='..',
                                template=template,
                                blog_post=blog_post))

    print_progress(text="Generate blog posts")
Example #15
0
def tracker_page(template, data):
    reach = data['reach_ts']

    # page_reach trend line
    page_trend = Markup(ts_trend(ts=reach.get('page'), t=reach.get('ts')))

    # domain_reach trend line - may not reach all the way back in time
    site_trend = Markup(ts_trend(ts=reach.get('site'), t=reach.get('ts')[-len(reach.get('site')):], percent=False))

    with open(f'_site/{data["url"]}', 'w') as output:
        output.write(render_template(
            path_to_root='..',
            template=template,
            reach=recent_tracker_reach(reach),
            trends={'page': page_trend, 'site': site_trend},
            **data,
        ))
Example #16
0
def tracker_page(template, aid, app, data):
    if 'name' not in app:
        app['name'] = aid

    # Tracker Reach ts
    ts, page_reach, site_reach = timeseries(app)

    # page_reach trend line
    page_trend = Markup(ts_trend(ts=page_reach, t=ts))

    # domain_reach trend line
    site_trend = Markup(ts_trend(ts=site_reach, t=ts))

    methods = tracking_methods(app)

    # tag cloud data
    sites_table = tag_cloud_data(aid, app, data)
    sites_by_cat = sites_per_app_by_category(sites_table)

    # for horizontal bar chart in profile
    website_types = presence_by_site_type(app, data.sites)

    # similar trackers
    similar_tracker_list = similar_trackers(app, data.apps, n=4)

    # write to file
    with open('_site/{}'.format(data.url_for('app', aid)), 'w') as output:
        output.write(
            render_template(
                path_to_root='..',
                template=template,
                app=app,
                profile=app,  # profile-card hack
                prevalence=prevalence(app),
                tracking_methods=methods,
                website_list=sites_table,
                sites_by_cat=sites_by_cat,
                website_types=website_types[:5],  # top 3
                similar_trackers=similar_tracker_list,
                trends={
                    "page": page_trend,
                    "site": site_trend
                }))
Example #17
0
def build_home(data):
    top10 = company_reach(data.companies)
    header_graph = Markup(overview_bars(top10))

    with open('_site/index.html', 'w') as output:
        output.write(
            render_template(
                template=get_template(data, "index.html"),
                ts=header_graph,
                tracker_list=data.trackers.sort_by(metric="reach")[:20],
                trackers_list_company=data.trackers.sort_by(
                    metric="company_id")[:20],
                most_tracked_sites=data.sites.sort_by(metric='trackers')[:20],
                least_tracked_sites=data.sites.sort_by(metric='trackers',
                                                       descending=False)[:20],
                websites=data.sites.summary_stats(),
                tracker_stats=data.trackers.summary_stats(),
                top10=top10))

    print_progress(text="Generate home page")
Example #18
0
def build_explorer(data):
    build_packed_data(data)

    temp_folder = Path("temp")
    if not temp_folder.exists():
        temp_folder.mkdir()

    data.trackers.df.to_csv("temp/trackers.csv")
    data.sites.df.to_csv("temp/sites.csv")
    data.companies.df.to_csv("temp/companies.csv")
    data.sites_trackers.df.to_csv("temp/sites_trackers.csv")

    month = datetime.strftime(max(data.trackers.df.month), '%Y-%m')
    shutil.make_archive(f"_site/data/wtm-data-{month}", "zip", "temp")
    shutil.rmtree(temp_folder.as_posix(), ignore_errors=True)

    with open(f"_site/explorer.html", "w") as output:
        output.write(
            render_template(template=get_template(data, name="explorer.html"),
                            download_link=f"data/wtm-data-{month}.zip"))

    print_progress(text="Generated Exporable Dataset")
Example #19
0
def website_page(template, site, rank, data):
    site_id = site.site

    # website url is the most common subdomain
    website_url = f'www.{site_id}'
    profile = {
        "rank": rank,
        "website_url": website_url,
        "name": site.site,
    }

    methods = {
        'cookies': site.cookies > 0.2,
        'fingerprinting': site.bad_qs > 0.1,
    }

    # tracker presence data
    sankey_data = tracker_map_data(site_id, data)
    d_values, d_labels, d_total = website_doughnout(site_id, data)
    profile_dough = Markup(profile_doughnut(d_values, d_labels, d_total))

    rendered_sankey = Markup(sankey_plot(sankey_data))

    # apps per site data
    tracker_table = data.sites.trackers.get_site(site_id)

    with open('_site/websites/{}.html'.format(site.site), 'w') as output:
        output.write(
            render_template(
                path_to_root='..',
                template=template,
                site={'overview': site._asdict()},
                profile=profile,
                methods=methods,
                sankey=rendered_sankey,
                doughnut=profile_dough,
                tracker_categories=d_labels,
                tracker_list=tracker_table,
            ))
Example #20
0
def build_trackers_list(data):
    apps = data.apps

    sorted_trackers = sorted(apps.values(),
                             key=lambda a: a['overview']['reach'],
                             reverse=True)
    sorted_trackers_cat = sorted(
        apps.values(),
        key=lambda a: data.get_app_name(a['overview']['id'])
        if ('company_id' not in a or a['company_id'] in [None, "None"]) else a[
            'company_id'])

    for tracker in sorted_trackers:
        if 'name' not in tracker:
            tracker['name'] = tracker['overview']['id']

    with open('_site/trackers.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, name="trackers.html"),
                            tracker_list=sorted_trackers,
                            trackers_list_cat=sorted_trackers_cat,
                            header_stats=tracker_header_stats(data.apps)))

    print_progress(text="Generate tracker list")
Example #21
0
def build_website_list(data):
    sites = data.sites
    tracker_requests, tracker_buckets, https = summary_stats(data.sites)

    # header stats
    tracker_values = []
    tracker_labels = []
    for (k, v) in tracker_buckets.items():
        tracker_values.append(v)
        tracker_labels.append(k)

    header_numbers = header_stats(data.sites)

    sorted_websites = sort_by_rank(data.sites)
    sorted_websites_cat = sort_by_cat(data.sites)

    # write to file
    with open('_site/websites.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, "websites.html"),
                            website_list=sorted_websites,
                            website_list_cat=sorted_websites_cat,
                            header_numbers=header_numbers))
    print_progress(text="Generate website list")