Ejemplo n.º 1
0
def gen_and_link_groupby(cluster_id, base, raw_fname='data'):
    json_file = index_rank.fname_rank_list('groupby', cluster_id)
    raw_fname += '.json'
    mylib.symlink(json_file, mylib.path_add(base, raw_fname))
    named_download = 'compare-{}.json'.format(cluster_id)
    return prefill_groupby_table(json_file) + HTML.p_download_json(
        raw_fname, named_download)
Ejemplo n.º 2
0
def gen_results(base_dir, c_apps, c_domains, title):
    [c_recs, c_logs] = index_rank.get_total_counts()
    print('    {} apps'.format(c_apps))
    print('    {} domains'.format(c_domains))
    print('    {} recordings'.format(c_recs))
    print('    {} logs'.format(c_logs))
    HTML.write(base_dir,
               '''
<h2>{}</h2>
<p>The appchk database currently contains <b>{:,}&nbsp;apps</b> with a total of <b>{:,} unique domains</b>.</p>
<p>Collected through <b>{:,}&nbsp;recordings</b> with <b>{:,} individual requests</b>.</p>
<ul>
  <li>List of <a href="/index/apps/">Apps</a></li>
  <li>List of <a href="/category/">Categories</a></li>
  <li>List of <a href="/index/domains/all/">All Domains</a>, 
  only <a href="/index/domains/tracker/">Trackers</a>,
  or <a href="/index/domains/highly-used/">Highly-used Domains</a> <br>which appear in at least 5 apps but are not considered tracker <i>yet</i>.</li>
</ul>
<ul>
  <li>Compare <a href="/lists/">App Lists</a></li>
  <li>Compare <a href="/compare/">Group Lists</a></li>
</ul>
'''.format(title, c_apps, c_domains, c_recs, c_logs),
               title=title)
    mylib.symlink(index_rank.fname_app_rank(),
                  mylib.path_add(base_dir, 'rank.json'))  # after HTML.write
Ejemplo n.º 3
0
def process(affected=None, per_page=60):
    print('generating html: category-index ...')
    base = mylib.path_out('category')
    parent = 'All Categories'
    arr = []
    for fname, json in mylib.enum_categories():
        cid, cname = json['meta']
        arr.append([cid, cname])
        if affected and cid not in affected:
            continue
        out_dir = mylib.path_add(base, cid)
        # full url since categories can have page 2, 3, etc.
        A = HTML.h2_path_n_rank(cname, [('/category/', parent)], 'ranking/')
        Z = HTML.p_download_json('data.json', 'category-{}.json'.format(cid))
        _, a = HTML.write_app_pages(out_dir,
                                    json['apps'],
                                    cname,
                                    per_page,
                                    pre=A,
                                    post=Z)
        # write_app_pages breaks html_ranking!! call html_ranking after this!
        print('  {} ({})'.format(cname, a))
        if a > 1:
            arr[-1][-1] += ' ({})'.format(a)  # append count
        mylib.symlink(fname, mylib.path_add(out_dir, 'data.json'))

    print('  .. {} categories'.format(len(arr)))
    write_overview_page(base, arr, parent)
    print('')
Ejemplo n.º 4
0
def write_ranking_custom_lists(base_dir, list_id, list_name, parent_title):
    base = mylib.path_add(base_dir, list_id)
    src = html_h2_path([('/results/', 'Results'), ('/lists/', parent_title)],
                       list_name)
    src += html_table()
    src += HTML.p_download_json('data.json',
                                'raw-list-{}.json'.format(list_id))
    src += html_script_chunk('data.json', 9, 1)  # tracker percent asc
    HTML.write(base, src, title='Compare: ' + list_name)
    mylib.symlink(index_rank.fname_rank_list('custom', list_id),
                  mylib.path_add(base, 'data.json'))
Ejemplo n.º 5
0
def write_ranking_all(title, base_dir):
    # full urls since app index can have page 2, 3, etc.
    src = html_h2_path([('/results/', 'Results'),
                        ('/index/apps/', 'Apps (A–Z)')])
    src += html_default_description()
    src += html_table()
    src += HTML.p_download_json('data.json', 'raw-apps.json')
    src += html_script_chunk('data.json', 12, -1)  # last update desc
    HTML.write(base_dir, src, title=title)
    mylib.symlink(index_rank.fname_ranking_all(),
                  mylib.path_add(base_dir, 'data.json'))
Ejemplo n.º 6
0
def write_ranking_category(cid, category_name):
    base = mylib.path_out('category', cid, 'ranking')
    # full urls since categories can have page 2, 3, etc.
    src = html_h2_path([('/category/', 'All Categories'),
                        ('/category/{}/'.format(cid), category_name)])
    src += html_default_description()
    src += html_table()
    src += HTML.p_download_json('data.json',
                                'raw-category-{}.json'.format(cid))
    src += html_script_chunk('data.json', 12, -1)  # last update desc
    HTML.write(base, src, title='Category Ranking: ' + category_name)
    mylib.symlink(index_rank.fname_rank_list('category', cid),
                  mylib.path_add(base, 'data.json'))
Ejemplo n.º 7
0
def process(bundle_ids):
    print('generating html: apps ...')
    i = 0
    for bid in mylib.appids_in_out(bundle_ids):
        gen_page(bid, bundle_combine.get_evaluated(bid))
        mylib.symlink(bundle_combine.fname_evaluated(bid),
                      mylib.path_out_app(bid, 'data.json'))
        mylib.printf('  .' if i == 0 else '.')
        i = (i + 1) % 50
        if i == 0:
            print('')  # close printf
    print('')  # close printf
    print('')
Ejemplo n.º 8
0
def process():
    # bundle_combine assures domain name is [a-zA-Z0-9.-]
    print('generating html: domain-index ...')
    json = index_domains.loadAll()
    app_count = index_domains.number_of_apps(json)
    dom_count = len(json['subdom'])

    # Prepare for lookup
    names = [[x, index_app_names.get_name(x)] for x in json['bundle']]
    dest_dir = mylib.path_out('results')
    mylib.mkdir(dest_dir)
    mylib.json_write(mylib.path_add(dest_dir, 'lookup-apps.json'), names)
    mylib.symlink(index_domains.fname_dom_subdoms(),
                  mylib.path_add(dest_dir, 'subdoms.json'))
    names = None

    print('  Lookup')
    gen_lookup(mylib.path_out('domain'), json['pardom'], True,
               title='Domain Lookup')
    gen_lookup(mylib.path_out('subdomain'), json['subdom'], False,
               title='Subdomain Lookup')

    print('  All Domains')
    for key in ['subdom', 'pardom']:
        for x in json[key].keys():
            json[key][x] = json[key][x][1:]
    gen_html_trinity(mylib.path_out('index', 'domains', 'all'), app_count,
                     json=json, title='Requested Domains',
                     symlink=index_domains.fname_all())
    json = None

    print('  Trackers Only')
    gen_html_trinity(mylib.path_out('index', 'domains', 'tracker'), app_count,
                     json=index_domains.loadTracker(), title='Tracker',
                     symlink=index_domains.fname_tracker())

    print('  Highly Used')
    gen_html_trinity(mylib.path_out('index', 'domains', 'highly-used'),
                     app_count, json=index_domains.loadNonTracker(),
                     title='Highly Used Domains',
                     symlink=index_domains.fname_no_tracker())
    print('')
    return app_count, dom_count
Ejemplo n.º 9
0
def gen_html_trinity(idx_dir, app_count, json, title, symlink):
    list1 = [(dom, len(ids)) for dom, ids in json['subdom'].items()]
    list2 = [(dom, len(ids)) for dom, ids in json['pardom'].items()]

    def write_index(fname, title, button):
        HTML.write(idx_dir, '<h2>{}</h2>{}{}'.format(
            HTML.a_path([('/results/', 'Results')], title),
            dropdown_choose(button), duo_list(list1, list2)
        ), title=title, fname=fname)

    # Full list (A–Z)
    list1.sort(key=lambda x: x[0])
    list2.sort(key=lambda x: x[0])
    write_index('by_name.html', title='{} (A–Z)'.format(title),
                button='Full list (A–Z)')
    # Full list (by count)
    list1.sort(key=lambda x: -x[1])
    list2.sort(key=lambda x: -x[1])
    write_index('by_count.html', title='{} (by count)'.format(title),
                button='Full list (by count)')
    # Top 10
    gen_html_top_10(idx_dir, list2[:25], app_count, 'Top 25 {}'.format(title))
    mylib.symlink(symlink, mylib.path_out(idx_dir, 'data.json'))