コード例 #1
0
ファイル: home.py プロジェクト: ghostery/whotracks.me
def build_privacy_policy(data):
    with open('_site/privacy-policy.html', 'w') as output:
        output.write(render_template(
            template=get_template(data, "privacy-policy.html"),
        ))

    print_progress(text="Generate Privacy Policy")
コード例 #2
0
ファイル: templates.py プロジェクト: xbl3/whotracks.me
def generate_sitemap(blog_posts):
    data = DataSource(populate=False)
    # write sitemap to _site (to be used as index for static site search)
    with open("_site/sitemap.json", "w") as output:
        json.dump(site_to_json(data_source=data, blog_posts=blog_posts),
                  output)
    print_progress(text='Generate sitemap index')
コード例 #3
0
ファイル: home.py プロジェクト: ghostery/whotracks.me
def build_imprint(data):
    with open('_site/imprint.html', 'w') as output:
        output.write(render_template(
            template=get_template(data, "imprint.html"),
        ))

    print_progress(text="Generate Imprint")
コード例 #4
0
def build_blogpost_list(data, blog_posts):
    with open('_site/blog.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, "blog.html"),
                            blog_posts=[p for p in blog_posts
                                        if p['publish']]))
    print_progress(text="Generate blog list")
コード例 #5
0
def build_tracker_pages(data):
    template = get_template(data, name='tracker-page.html', path_to_root='..')

    for (tracker_id, tracker) in data.trackers.iter():
        tracker_page(template, tracker_id, tracker, data)

    print_progress(text="Generate tracker pages")
コード例 #6
0
ファイル: explorer.py プロジェクト: xbl3/whotracks.me
def build_explorer():
    data = DataSource(populate=False)

    build_packed_data(data)

    temp_folder = Path("temp")
    if not temp_folder.exists():
        temp_folder.mkdir()

    table_to_csv(data.trackers, "temp/trackers.csv")
    table_to_csv(data.sites, "temp/sites.csv")
    table_to_csv(data.companies, "temp/companies.csv")
    table_to_csv(data.sites_trackers, "temp/sites_trackers.csv")

    month = data.trackers.last_month
    shutil.make_archive(
        f"_site/data/wtm-data-{month}", "zip", "temp"
    )
    shutil.rmtree(temp_folder.as_posix(), ignore_errors=True)

    with open(f"_site/explorer.html", "w") as output:
        output.write(render_template(
            template=get_template(data, name="explorer.html"),
            download_link=f"data/wtm-data-{month}.zip"
        ))

    print_progress(text="Generated Exporable Dataset")
コード例 #7
0
ファイル: home.py プロジェクト: valerymamontov/whotracks.me
def build_home(data):
    apps = data.apps

    sorted_trackers = sorted(apps.values(),
                             key=lambda a: a['overview']['reach'],
                             reverse=True)
    sorted_trackers_cat = sorted(apps.values(),
                                 key=lambda a: a.get('cat', '') or '')

    for tracker in sorted_trackers:
        if 'name' not in tracker:
            tracker['name'] = tracker['overview']['id']

    for tracker in sorted_trackers_cat:
        if 'name' not in tracker:
            tracker['name'] = tracker['overview']['id']

    # most tracked sites by cat
    most_tracked_sites = tracked_by_category(data.sites, worst=True)
    # least tracked sites by cat
    least_tracked_sites = tracked_by_category(data.sites, worst=False)

    top10 = company_reach(data.companies)
    header_graph = Markup(overview_bars(top10))

    with open('_site/index.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, "index.html"),
                            ts=header_graph,
                            tracker_list=sorted_trackers[:20],
                            trackers_list_cat=sorted_trackers_cat[:20],
                            most_tracked_sites=most_tracked_sites,
                            least_tracked_sites=least_tracked_sites))

    print_progress(text="Generate home page")
コード例 #8
0
def build_website_pages(data):
    template = get_template(data, "website-page.html", path_to_root='..')

    for (rank, site) in enumerate(data.sites.sort_by(metric='popularity', descending=True)):
        website_page(template, site, rank + 1, data)

    print_progress(text="Generate website pages")
コード例 #9
0
def build_tracker_pages(data):
    apps = data.apps
    template = get_template(data, name='tracker-page.html', path_to_root='..')

    for (aid, app) in apps.items():
        tracker_page(template, aid, app, data)

    print_progress(text="Generate tracker pages")
コード例 #10
0
def build_company_pages(data):
    companies = data.companies
    template = get_template(data, "company-page.html")

    for company_data in companies.values():
        company_page(template, company_data, data)

    print_progress(text="Generate company pages")
コード例 #11
0
def build_trackers_list(data):
    with open('_site/trackers.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, name="trackers.html"),
                            tracker_list=data.trackers.sort_by(metric="reach"),
                            trackers_list_company=data.trackers.sort_by(
                                metric="company_id", descending=False),
                            header_stats=data.trackers.summary_stats()))

    print_progress(text="Generate tracker list")
コード例 #12
0
def build_api(data):
    # tracker overviews
    data_dir = Path('_site/data/trackers/global')
    if not data_dir.exists():
        data_dir.mkdir(parents=True)

    for id, stats in data.trackers.iter():
        build_tracker_json(id, data)

    print_progress(text='Generate API data')
コード例 #13
0
def build_website_pages(data):
    sites = data.sites
    template = get_template(data, "website-page.html", path_to_root='..')

    for rank, (site_id, site) in enumerate(
            sorted(sites.items(),
                   key=lambda s: s[1]['overview']['popularity'],
                   reverse=True)):
        website_page(template, site_id, rank + 1, data)

    print_progress(text="Generate website pages")
コード例 #14
0
def build_company_reach_chart_page(data):
    top100 = company_reach(data.companies, n=100)
    chart = Markup(overview_bars(top100, highlight=10, custom_height=3000))
    template = get_template(data, name='reach-chart-page.html', path_to_root='..')

    with open('_site/companies/reach-chart.html', 'w') as output:
        output.write(render_template(
            path_to_root='..',
            template=template,
            chart=chart,
        ))
        print_progress(text="Generate company reach chart")
コード例 #15
0
def build_website_list(data):
    header_numbers = data.sites.summary_stats()

    sorted_websites = data.sites.sort_by(metric='popularity', descending=True)
    sorted_websites_cat = data.sites.sort_by(metric='category', descending=True)

    with open('_site/websites.html', 'w') as output:
        output.write(render_template(
            template=get_template(data, "websites.html"),
            website_list=sorted_websites,
            website_list_cat=sorted_websites_cat,
            header_numbers=header_numbers
        ))
    print_progress(text="Generate website list")
コード例 #16
0
def build_blogpost_pages(data, blog_posts):
    template = get_template(data,
                            "blog-page.html",
                            render_markdown=True,
                            path_to_root='..')

    for blog_post in blog_posts:
        with open(f'_site/blog/{blog_post.get("filename")}.html',
                  'w') as output:
            output.write(
                render_template(path_to_root='..',
                                template=template,
                                blog_post=blog_post))

    print_progress(text="Generate blog posts")
コード例 #17
0
ファイル: explorer.py プロジェクト: birdsarah/whotracks.me
def build_packed_data(data):
    data_dir = Path("_site/data/packed/")
    if not data_dir.exists():
        data_dir.mkdir(parents=True)

    for data_source in ["trackers", "companies", "sites", "sites_trackers"]:
        with open(f"_site/data/packed/{data_source}.pack", "wb") as output:
            output.write(b"".join(
                pack_rows(
                    fields=FIELDS,
                    rows=getattr(data,
                                 data_source).get_snapshot().itertuples(),
                )))

    print_progress(text="Generate packed data")
コード例 #18
0
ファイル: blog.py プロジェクト: birdsarah/whotracks.me
def build_blogpost_pages(data, blog_posts):
    for blog_post in blog_posts:
        #TODO: Move template out after footnotes markdown extension does
        # not save global state
        template = get_template(data,
                                "blog-page.html",
                                render_markdown=True,
                                path_to_root='..')
        with open(f'_site/blog/{blog_post.get("filename")}.html',
                  'w') as output:
            output.write(
                render_template(path_to_root='..',
                                template=template,
                                blog_post=blog_post))

    print_progress(text="Generate blog posts")
コード例 #19
0
ファイル: builder.py プロジェクト: xbl3/whotracks.me
 def batched_job(inp, batch_fn, batch_size, message):
     batches = []
     input_size = len(inp)
     for batch in [
             inp[i:i + batch_size]
             for i in range(0, input_size, batch_size)
     ]:
         submission = executor.submit(batch_fn, batch=batch)
         batches.append(submission)
         futures.append(submission)
     for i, f in enumerate(
             concurrent.futures.as_completed(batches)):
         print_progress(
             text=
             f"{message} {min((i+1) * batch_size, input_size)}/{input_size}"
         )
     return batches
コード例 #20
0
ファイル: data.py プロジェクト: birdsarah/whotracks.me
def build_api(data):
    # tracker overviews
    data_dir = Path('_site/data/trackers/global')
    if not data_dir.exists():
        data_dir.mkdir(parents=True)

    for id, stats in data.trackers.iter():
        stats = data.trackers.get_tracker(id)
        stats['overview'] = dict(stats['overview'])
        # drop some columns
        for col in ['Index', 'companies', 'month', 'trackers', 'tracker', 'id', 'company_id', 'category', 'country']:
            del stats['overview'][col]
        stats['date_range'] = [date.strftime('%Y-%m') for date in stats['date_range']]
        # print(stats)
        with open(f'_site/data/trackers/global/{id}.json', 'w') as output:
            json.dump(stats, output)

    print_progress(text='Generate API data')
コード例 #21
0
def build_home(data):
    top10 = company_reach(data.companies)
    header_graph = Markup(overview_bars(top10))

    with open('_site/index.html', 'w') as output:
        output.write(
            render_template(
                template=get_template(data, "index.html"),
                ts=header_graph,
                tracker_list=data.trackers.sort_by(metric="reach")[:20],
                trackers_list_company=data.trackers.sort_by(
                    metric="company_id")[:20],
                most_tracked_sites=data.sites.sort_by(metric='trackers')[:20],
                least_tracked_sites=data.sites.sort_by(metric='trackers',
                                                       descending=False)[:20],
                websites=data.sites.summary_stats(),
                tracker_stats=data.trackers.summary_stats(),
                top10=top10))

    print_progress(text="Generate home page")
コード例 #22
0
ファイル: explorer.py プロジェクト: birdsarah/whotracks.me
def build_explorer(data):
    build_packed_data(data)

    temp_folder = Path("temp")
    if not temp_folder.exists():
        temp_folder.mkdir()

    data.trackers.df.to_csv("temp/trackers.csv")
    data.sites.df.to_csv("temp/sites.csv")
    data.companies.df.to_csv("temp/companies.csv")
    data.sites_trackers.df.to_csv("temp/sites_trackers.csv")

    month = datetime.strftime(max(data.trackers.df.month), '%Y-%m')
    shutil.make_archive(f"_site/data/wtm-data-{month}", "zip", "temp")
    shutil.rmtree(temp_folder.as_posix(), ignore_errors=True)

    with open(f"_site/explorer.html", "w") as output:
        output.write(
            render_template(template=get_template(data, name="explorer.html"),
                            download_link=f"data/wtm-data-{month}.zip"))

    print_progress(text="Generated Exporable Dataset")
コード例 #23
0
def build_trackers_list(data):
    apps = data.apps

    sorted_trackers = sorted(apps.values(),
                             key=lambda a: a['overview']['reach'],
                             reverse=True)
    sorted_trackers_cat = sorted(
        apps.values(),
        key=lambda a: data.get_app_name(a['overview']['id'])
        if ('company_id' not in a or a['company_id'] in [None, "None"]) else a[
            'company_id'])

    for tracker in sorted_trackers:
        if 'name' not in tracker:
            tracker['name'] = tracker['overview']['id']

    with open('_site/trackers.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, name="trackers.html"),
                            tracker_list=sorted_trackers,
                            trackers_list_cat=sorted_trackers_cat,
                            header_stats=tracker_header_stats(data.apps)))

    print_progress(text="Generate tracker list")
コード例 #24
0
def build_website_list(data):
    sites = data.sites
    tracker_requests, tracker_buckets, https = summary_stats(data.sites)

    # header stats
    tracker_values = []
    tracker_labels = []
    for (k, v) in tracker_buckets.items():
        tracker_values.append(v)
        tracker_labels.append(k)

    header_numbers = header_stats(data.sites)

    sorted_websites = sort_by_rank(data.sites)
    sorted_websites_cat = sort_by_cat(data.sites)

    # write to file
    with open('_site/websites.html', 'w') as output:
        output.write(
            render_template(template=get_template(data, "websites.html"),
                            website_list=sorted_websites,
                            website_list_cat=sorted_websites_cat,
                            header_numbers=header_numbers))
    print_progress(text="Generate website list")
コード例 #25
0
    def feed_event(self, event):
        futures = []
        with concurrent.futures.ThreadPoolExecutor() as executor:
            ###################################################################
            # This needs to be first, as other tasks will need to write in   #
            # the resulting folders.                                          #
            ###################################################################

            # Depends on folder: 'static/'
            if event & STATIC_FOLDER:
                create_site_structure(static_path=STATIC_PATH)
                print_progress(text='Create _site')

            ###################################################################
            # We then reload data in memory, before generating the site       #
            ###################################################################

            # Depends on folder: 'data/'
            if self.data_source is None or event & DATA_FOLDER:
                # class where all data can be accessed from
                data_source = DataSource()
                print_progress(text='Load data sources')

            # Depends on: 'blog/'
            if self.blog_posts is None or event & BLOG_FOLDER:
                self.blog_posts = load_blog_posts()
                print_progress(text='Load blog posts')

            ###################################################################
            # Once site structure has been created and data is refreshed, we  #
            # can build all parts of the site in parallel, since there is no  #
            # dependencies between them.                                      #
            ###################################################################

            # Depends on: 'templates/', 'data/'
            if event & DATA_FOLDER or event & TEMPLATES_FOLDER:
                print_progress(text='Generate error pages')
                copy_custom_error_pages(data=data_source)

            # Depends on: 'data/', 'templates/'
            if event & DATA_FOLDER or event & TEMPLATES_FOLDER:
                # Home
                futures.append(executor.submit(build_home, data=data_source))

                # Trackers
                futures.append(
                    executor.submit(build_trackers_list, data=data_source))
                futures.append(
                    executor.submit(build_tracker_pages, data=data_source))

                # Websites
                futures.append(
                    executor.submit(build_website_list, data=data_source))
                futures.append(
                    executor.submit(build_website_pages, data=data_source))

            # Depends on: 'data/', 'blog/', 'templates/'
            if event & DATA_FOLDER or event & BLOG_FOLDER or event & TEMPLATES_FOLDER:
                futures.append(
                    executor.submit(build_blogpost_list,
                                    data=data_source,
                                    blog_posts=self.blog_posts))

                futures.append(
                    executor.submit(build_blogpost_pages,
                                    data=data_source,
                                    blog_posts=self.blog_posts))

            # Depends on: 'data/', 'blog/', 'templates/'
            if event & DATA_FOLDER or event & BLOG_FOLDER or event & TEMPLATES_FOLDER:
                futures.append(
                    executor.submit(generate_sitemap,
                                    data=data_source,
                                    blog_posts=self.blog_posts))

            # TODO: uncomment when company profiles are ready
            # if args['site'] or args['companies']:
            #     company_process = Process(target=build_company_pages, args=(data_source,))
            #     company_process.start()

            # Wait for all jobs to finish
            concurrent.futures.wait(futures)

            # Getting the `result` of each promise (although none is expected)
            # allows to re-raise exception happening in children processes. If
            # we don't do it, exceptions will be silently ignored.
            for future in futures:
                future.result()

            print('Done')
コード例 #26
0
ファイル: builder.py プロジェクト: xbl3/whotracks.me
    def feed_event(self, event):
        futures = []
        with concurrent.futures.ProcessPoolExecutor(max_workers=8) as executor:
            ###################################################################
            # This needs to be first, as other tasks will need to write in   #
            # the resulting folders.                                          #
            ###################################################################

            # Depends on folder: 'static/'
            if event & STATIC_FOLDER:
                create_site_structure(static_path=STATIC_PATH)
                print_progress(text='Create _site')

            ###################################################################
            # We then reload data in memory, before generating the site       #
            ###################################################################

            # Depends on folder: 'data/'
            if self.data_source is None or event & DATA_FOLDER:
                # class where all data can be accessed from
                data_source = DataSource()
                print_progress(text='Load data sources')

            # Depends on: 'blog/'
            if self.blog_posts is None or event & BLOG_FOLDER:
                self.blog_posts = load_blog_posts()
                print_progress(text='Load blog posts')

            ###################################################################
            # Once site structure has been created and data is refreshed, we  #
            # can build all parts of the site in parallel, since there is no  #
            # dependencies between them.                                      #
            ###################################################################

            # Depends on: 'templates/', 'data/'
            if event & DATA_FOLDER or event & TEMPLATES_FOLDER:
                print_progress(text='Generate error pages')
                copy_custom_error_pages(data=data_source)

            def batched_job(inp, batch_fn, batch_size, message):
                batches = []
                input_size = len(inp)
                for batch in [
                        inp[i:i + batch_size]
                        for i in range(0, input_size, batch_size)
                ]:
                    submission = executor.submit(batch_fn, batch=batch)
                    batches.append(submission)
                    futures.append(submission)
                for i, f in enumerate(
                        concurrent.futures.as_completed(batches)):
                    print_progress(
                        text=
                        f"{message} {min((i+1) * batch_size, input_size)}/{input_size}"
                    )
                return batches

            # Explorer: depends on 'data/'
            if event & DATA_FOLDER or event & STATIC_FOLDER:
                futures.append(executor.submit(build_explorer, ))

            # Depends on: 'data/', 'blog/', 'templates/'
            if event & DATA_FOLDER or event & BLOG_FOLDER or event & TEMPLATES_FOLDER:
                futures.append(
                    executor.submit(generate_sitemap,
                                    blog_posts=self.blog_posts))

            # Depends on: 'data/', 'templates/'
            if event & DATA_FOLDER or event & TEMPLATES_FOLDER:
                # Home
                build_home(data=data_source)
                build_privacy_policy(data=data_source)

                # Trackers
                trackers = [id for id, _ in data_source.trackers.iter()]
                batched_job(trackers, build_tracker_page_batch, 150,
                            "Generate tracker pages")
                build_trackers_list(data=data_source)

                # Websites
                websites = list(
                    enumerate([id for id, _ in data_source.sites.iter()]))
                batched_job(websites, build_website_pages_batch, 400,
                            "Generate website pages")
                build_website_list(data=data_source)

                # Companies
                build_company_reach_chart_page(data=data_source)

            # Depends on: 'data/', 'blog/', 'templates/'
            if event & DATA_FOLDER or event & BLOG_FOLDER or event & TEMPLATES_FOLDER:
                futures.append(
                    executor.submit(build_blogpost_pages,
                                    blog_posts=self.blog_posts))

                futures.append(
                    executor.submit(build_rss_feeds,
                                    blog_posts=self.blog_posts))

                build_blogpost_list(data=data_source,
                                    blog_posts=self.blog_posts)

            if event & DATA_FOLDER:
                build_tracker_db()
                trackers = [id for id, _ in data_source.trackers.iter()]
                data_dir = Path('_site/data/trackers/global')
                if not data_dir.exists():
                    data_dir.mkdir(parents=True)
                batched_job(trackers, build_tracker_api_batch, 150,
                            "Generate Tracker API pages")

                site_data_dir = Path('_site/data/sites/global')
                if not site_data_dir.exists():
                    site_data_dir.mkdir(parents=True)

                sites = [id for id, _ in data_source.sites.iter()]
                batched_job(sites, build_website_api_batch, 400,
                            "Generate Website API pages")

            # TODO: uncomment when company profiles are ready
            # if args['site'] or args['companies']:
            #     company_process = Process(target=build_company_pages, args=(data_source,))
            #     company_process.start()

            # Wait for all jobs to finish
            concurrent.futures.wait(futures)

            # Getting the `result` of each promise (although none is expected)
            # allows to re-raise exception happening in children processes. If
            # we don't do it, exceptions will be silently ignored.
            for future in futures:
                future.result()

            print('Done')
コード例 #27
0
ファイル: data.py プロジェクト: birdsarah/whotracks.me
def build_tracker_db():
    with open('_site/data/trackerdb.json', 'w') as output:
        db_map = create_tracker_map(load_tracker_db(), with_iab_vendors=True)
        db_map['about'] = 'WhoTracks.Me tracker database: whotracks.me'
        json.dump(db_map, output, indent=2, sort_keys=True)
    print_progress(text='Generate tracker DB')