Example #1
0
def build_website_list(data):
    sites = data.sites

    text_content = {
        "summary_title":
        "Top Website Stats",
        "summary_description":
        "Proportion of requests on page load that go toward third party trackers\
                                average number of trackers, and use Https in these top websites"
    }

    tracker_requests, tracker_buckets, https = summary_stats(sites)
    tracker_values = []
    tracker_labels = []
    for (k, v) in tracker_buckets.items():
        tracker_values.append(v)
        tracker_labels.append(k)

    header_numbers = header_stats(data.sites)

    sorted_websites = sort_by_rank(sites)
    sorted_websites_cat = sort_by_cat(sites)

    # write to file
    with open('_site/websites.html', 'w') as fp:
        file_content = render_template(template=get_template(
            data, "websites.html"),
                                       content=text_content,
                                       website_list=sorted_websites,
                                       website_list_cat=sorted_websites_cat,
                                       header_numbers=header_numbers)
        fp.write(file_content)
        print_progress(text="Website list")
        return
Example #2
0
def build_trackers_list(data):
    apps = data.apps

    text_content = {
        "summary_title":
        "Tracker Stats",
        "summary_description":
        "The proportion of page loads where cookies and\
                                fingerprinting are used for tracking, and requests are done over Https"
    }

    sorted_trackers = sorted(apps.values(),
                             key=lambda a: a['overview']['reach'],
                             reverse=True)
    sorted_trackers_cat = sorted(
        apps.values(),
        key=lambda a: data.get_app_name(a['overview']['id'])
        if 'company_id' not in a or 'company_id' in a and a[
            'company_id'] in [None, "None"] else a['company_id'])

    for t in sorted_trackers:
        if 'name' not in t:
            t['name'] = t['overview']['id']
    with open('_site/trackers.html', 'w') as fp:
        file_content = render_template(
            template=get_template(data, name="trackers.html"),
            content=text_content,
            tracker_list=sorted_trackers,
            trackers_list_cat=sorted_trackers_cat,
            header_stats=tracker_header_stats(data.apps))

        fp.write(file_content)
        print_progress(text="Tracker list")
        return
Example #3
0
def build_reports_list(data, entity='reports'):
    with open('_site/{}.html'.format(entity), 'w') as fp:
        reports = [
            parse(os.path.join("{}".format(entity), f))
            for f in os.listdir("{}/".format(entity))
        ]
        reports = [r for r in reports if r['publish']]
        content = render_template(template=get_template(
            data, "{}.html".format(entity)),
                                  reports=reports)
        fp.write(content)
        print_progress(text="{} List".format(entity.capitalize()))
        return
Example #4
0
def html_index(sink, crumbs, root, datauri, querychannel):
    context = new_context()
    context.addGlobal('crumbs', crumbs)
    context.addGlobal('datarooturi', root)
    context.addGlobal('datauri', datauri)

    context.addGlobal('querychannel', querychannel)

    channels = sorted(sink.channels.keys())

    if querychannel:
        title = "Channel #%s" % channels[0]
    else:
        title = "Some IRC discussion logs"

    context.addGlobal('title', title)

    if datauri == root:
        datauri2 = root + "index"
    else:
        datauri2 = datauri

    context.addGlobal('datauri2', datauri2)

    channeldata = []
    for channel in channels:
        channelID = channel.strip("#").lower()
        channelURI = root + channelID + "#channel"
        channeldata.append({'uri': channelURI, 'name': channelID})

    # XXX list works around a bug in simpletal
    days = list(reversed(sorted(sink.days.keys())))

    context.addGlobal('channels', channeldata)
    context.addGlobal('days', days)
    context.addGlobal('day2channels', sink.day2channels)

    if querychannel:
        nicks = sorted(sink.channel2nicks[querychannel].keys())
    else:
        nicks = sorted(sink.nicks)

    userdata = []
    for nick in nicks:
        userURI = root + "users/%s#user" % nick
        userdata.append({'uri': userURI, 'name': nick})
    context.addGlobal('users', userdata)
    context.addGlobal('nick2people', get_nick2people())

    template = get_template('index')
    expand_template(template, context)
Example #5
0
def build_company_pages(data):
    companies = data.companies
    template = get_template(data, "company-page.html")

    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
        futures = {
            executor.submit(company_page, template, company_data, data):
            company_data
            for (_, company_data) in companies.items()
        }
        for future in futures:
            future.result()

    print_progress(text="Company pages")
Example #6
0
def build_report_pages(data, entity="reports"):
    template = get_template(data,
                            "report-page.html",
                            render_markdown=True,
                            path_to_root='..')
    for f in os.listdir("{}/".format(entity)):
        report = parse(os.path.join("{}".format(entity), f))
        with open("_site/{0}/{1}.html".format(entity, report.get("filename")),
                  'w') as fp:
            fp.write(
                render_template(path_to_root='..',
                                template=template,
                                report=report))
    print_progress(text="{}".format(entity.capitalize()))
Example #7
0
    def close(self):
        context = self.context

        channelID = self.channel.strip("#").lower()
        channelURI = self.root + channelID + "#channel"

        context.addGlobal('channel', {'name': channelID,
                                      'uri': channelURI})
        context.addGlobal('timeprefix', self.timeprefix)

        context.addGlobal('title', self.title)

        context.addGlobal('events', self.events)

        template = get_template('channellog')
        expand_template(template, context)
Example #8
0
def build_website_pages(data):
    sites = data.sites
    template = get_template(data, "website-page.html", path_to_root='..')

    # NOTE: trying to make faster
    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
        futures = {
            executor.submit(website_page, template, site_id, rank + 1, data):
            (site_id, site)
            for rank, (site_id, site) in enumerate(
                sorted(sites.items(),
                       key=lambda s: s[1]['overview']['popularity'],
                       reverse=True))
        }
        for future in futures:
            future.result()

    print_progress(text="Website pages")
Example #9
0
def build_tracker_pages(data):
    # site_values, values, rects = site_tree_map(sites)
    apps = data.apps
    template = get_template(data, name='tracker-page.html', path_to_root='..')

    # NOTE: trying to make faster
    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
        futures = {
            executor.submit(tracker_page, template, aid, app, data): (aid, app)
            for (aid, app) in list(
                sorted(apps.items(),
                       key=lambda a: a[1]['overview']['reach'],
                       reverse=True))
        }
        for future in futures:
            future.result()

    print_progress(text="Tracker Pages")
Example #10
0
def render_user_index(sink, format, crumbs, datarooturi, datauri):
    freenodeURI = datarooturi + "#freenode"
    nicks = get_nicks()
    nick2people = get_nick2people()
    if format == "html":
        context = new_context()
        context.addGlobal('crumbs', crumbs)
        context.addGlobal('datarooturi', datarooturi)
        context.addGlobal('datauri', datauri)

        users = []
        for nick in nicks:
            user = datarooturi + "users/%s#user" % nick
            users.append({'uri': user, 'nick': nick})

        context.addGlobal('users', users)

        template = get_template('users')
        expand_template(template, context)

    elif format == "turtle":
        triples = []
        for nick in nicks:
            user = datarooturi + "users/%s#user" % nick
            triples += [None,
                        (freenodeURI, SIOC.space_of, user),
                        (user, RDFS.label, PlainLiteral(nick)),
                        (user, RDF.type, SIOC.User)]
            if nick in nick2people:
                triples += [(nick2people[nick], FOAF.holdsAccount, user)]

        writer = TurtleWriter(None, namespaces)
        title = "User index"
        writer.write([("", RDFS.label, PlainLiteral(title)),
                      ("", FOAF.primaryTopic, freenodeURI)])
        writer.write(triples)
        writer.close()
Example #11
0
def build_home(data):
    apps = data.apps

    sorted_trackers = sorted(apps.values(),
                             key=lambda a: a['overview']['reach'],
                             reverse=True)
    sorted_trackers_cat = sorted(
        apps.values(),
        key=lambda a: ''
        if 'cat' not in a or 'cat' in a and a['cat'] is None else a['cat'])

    for t in sorted_trackers:
        if 'name' not in t:
            t['name'] = t['overview']['id']

    for t in sorted_trackers_cat:
        if 'name' not in t:
            t['name'] = t['overview']['id']

    # most tracked sites by cat
    most_tracked_sites = tracked_by_category(data.sites, worst=True)
    # least tracked sites by cat
    least_tracked_sites = tracked_by_category(data.sites, worst=False)

    top10 = company_reach(data.companies)
    header_graph = Markup(overview_bars(top10))

    with open('_site/index.html', 'w') as fp:
        content = render_template(template=get_template(data, "index.html"),
                                  ts=header_graph,
                                  tracker_list=sorted_trackers[:20],
                                  trackers_list_cat=sorted_trackers_cat[:20],
                                  most_tracked_sites=most_tracked_sites,
                                  least_tracked_sites=least_tracked_sites)
        fp.write(content)
        print_progress(text="Home page")
        return
Example #12
0
def render_user(sink, format, crumbs, datarooturi, nick, datauri, latestsink):
    userURI = datarooturi + "users/%s#user" % nick

    global Red
    import RDF as Red

    person = find_person(nick)

    error = None

    model = Red.Model()
    # XXX work around a bug in Redland?
    if person:
        try:
            model.load(person.rsplit('#', 1)[0], name='guess')
        except:
            error = "Error loading the FOAF info: %s" % sys.exc_info()[1]

    channels = sorted(sink.nick2channels.get(nick, {}).keys())

    if format == "html":
        context = new_context()
        context.addGlobal('crumbs', crumbs)
        context.addGlobal('datarooturi', datarooturi)
        context.addGlobal('datauri', datauri)
        context.addGlobal('error', error)

        info = []
        if person:
            for name in link_values(model, person, [FOAF.name, FOAF.firstName, FOAF.nick, RDFS.label]):
                info.append({'key': 'Name', 'value': "%s" % name})
            for ircnick in nick_values(datarooturi, model, person, [FOAF.holdsAccount]):
                if userURI in ("%s" % ircnick):
                    ircnick = ircnick + " <em>[confirms the Web ID claim]</em>"
                elif ircnick is None:
                    ircnick = """None <em>[can't confirm the Web ID claim, should be <a href="%s">%s</a>]</em>""" % (userURI, nick)
                else:
                    ircnick = ircnick + """ <em>[doesn't confirm the Web ID claim, should be <a href="%s">%s</a>]</em>""" % (userURI, nick)
                info.append({'key': 'IRC account', 'value': "%s" % ircnick})
            for website in link_values(model, person, [FOAF.homepage]):
                info.append({'key': 'Website', 'value': "%s" % website})
            for weblog in link_values(model, person, [FOAF.weblog]):
                info.append({'key': 'Weblog', 'value': "%s" % weblog})
            for img in image_values(model, person, [FOAF.depiction, FOAF.img]):
                info.append({'key': 'Image', 'value': "%s" % img})
            for known in friend_values(datarooturi, model, person, [FOAF.knows]):
                info.append({'key': 'Knows', 'value': "%s" % known})

        context.addGlobal('here', {'nick': nick,
                                   'person': {'webid': person,
                                              'info': info}})

        channeldata = []
        for channel in channels:
            channelURI = datarooturi + "%s#channel" % channel
            channeldata.append({'uri': channelURI, 'name': "#"+channel})
        context.addGlobal('channels', channeldata)

        context.addGlobal('events', latestsink.events)

        template = get_template('user')
        expand_template(template, context)

    elif format == "turtle":
        oldUserURI = "irc://freenode/%s,isuser" % nick
        triples = [None,
                   (datarooturi + "#freenode", SIOC.space_of, userURI),
                   (userURI, OWL.sameAs, oldUserURI),
                   (userURI, RDFS.label, PlainLiteral(nick)),
                   (userURI, RDF.type, SIOC.User),
                   ]
        if person:
            triples += [None, (person, FOAF.holdsAccount, userURI)]
            triples += get_triples(model, person, [FOAF.name, FOAF.firstName, FOAF.nick, RDFS.label])
        for channel in channels:
            channelURI = datarooturi +  "%s#channel" % channel
            triples += [None, 
                        (channelURI, SIOC.has_subscriber, userURI),
                        (channelURI, RDFS.label, PlainLiteral("#%s" % channel)),
                        ]
        writer = TurtleWriter(None, namespaces)
        title = "About user %s" % nick
        writer.write([("", RDFS.label, PlainLiteral(title)),
                      ("", FOAF.primaryTopic, userURI),
                      ])
        writer.write(triples)
        writer.close()