def gen_and_link_groupby(cluster_id, base, raw_fname='data'): json_file = index_rank.fname_rank_list('groupby', cluster_id) raw_fname += '.json' mylib.symlink(json_file, mylib.path_add(base, raw_fname)) named_download = 'compare-{}.json'.format(cluster_id) return prefill_groupby_table(json_file) + HTML.p_download_json( raw_fname, named_download)
def gen_results(base_dir, c_apps, c_domains, title): [c_recs, c_logs] = index_rank.get_total_counts() print(' {} apps'.format(c_apps)) print(' {} domains'.format(c_domains)) print(' {} recordings'.format(c_recs)) print(' {} logs'.format(c_logs)) HTML.write(base_dir, ''' <h2>{}</h2> <p>The appchk database currently contains <b>{:,} apps</b> with a total of <b>{:,} unique domains</b>.</p> <p>Collected through <b>{:,} recordings</b> with <b>{:,} individual requests</b>.</p> <ul> <li>List of <a href="/index/apps/">Apps</a></li> <li>List of <a href="/category/">Categories</a></li> <li>List of <a href="/index/domains/all/">All Domains</a>, only <a href="/index/domains/tracker/">Trackers</a>, or <a href="/index/domains/highly-used/">Highly-used Domains</a> <br>which appear in at least 5 apps but are not considered tracker <i>yet</i>.</li> </ul> <ul> <li>Compare <a href="/lists/">App Lists</a></li> <li>Compare <a href="/compare/">Group Lists</a></li> </ul> '''.format(title, c_apps, c_domains, c_recs, c_logs), title=title) mylib.symlink(index_rank.fname_app_rank(), mylib.path_add(base_dir, 'rank.json')) # after HTML.write
def process(affected=None, per_page=60): print('generating html: category-index ...') base = mylib.path_out('category') parent = 'All Categories' arr = [] for fname, json in mylib.enum_categories(): cid, cname = json['meta'] arr.append([cid, cname]) if affected and cid not in affected: continue out_dir = mylib.path_add(base, cid) # full url since categories can have page 2, 3, etc. A = HTML.h2_path_n_rank(cname, [('/category/', parent)], 'ranking/') Z = HTML.p_download_json('data.json', 'category-{}.json'.format(cid)) _, a = HTML.write_app_pages(out_dir, json['apps'], cname, per_page, pre=A, post=Z) # write_app_pages breaks html_ranking!! call html_ranking after this! print(' {} ({})'.format(cname, a)) if a > 1: arr[-1][-1] += ' ({})'.format(a) # append count mylib.symlink(fname, mylib.path_add(out_dir, 'data.json')) print(' .. {} categories'.format(len(arr))) write_overview_page(base, arr, parent) print('')
def write_ranking_custom_lists(base_dir, list_id, list_name, parent_title): base = mylib.path_add(base_dir, list_id) src = html_h2_path([('/results/', 'Results'), ('/lists/', parent_title)], list_name) src += html_table() src += HTML.p_download_json('data.json', 'raw-list-{}.json'.format(list_id)) src += html_script_chunk('data.json', 9, 1) # tracker percent asc HTML.write(base, src, title='Compare: ' + list_name) mylib.symlink(index_rank.fname_rank_list('custom', list_id), mylib.path_add(base, 'data.json'))
def write_ranking_all(title, base_dir): # full urls since app index can have page 2, 3, etc. src = html_h2_path([('/results/', 'Results'), ('/index/apps/', 'Apps (A–Z)')]) src += html_default_description() src += html_table() src += HTML.p_download_json('data.json', 'raw-apps.json') src += html_script_chunk('data.json', 12, -1) # last update desc HTML.write(base_dir, src, title=title) mylib.symlink(index_rank.fname_ranking_all(), mylib.path_add(base_dir, 'data.json'))
def write_ranking_category(cid, category_name): base = mylib.path_out('category', cid, 'ranking') # full urls since categories can have page 2, 3, etc. src = html_h2_path([('/category/', 'All Categories'), ('/category/{}/'.format(cid), category_name)]) src += html_default_description() src += html_table() src += HTML.p_download_json('data.json', 'raw-category-{}.json'.format(cid)) src += html_script_chunk('data.json', 12, -1) # last update desc HTML.write(base, src, title='Category Ranking: ' + category_name) mylib.symlink(index_rank.fname_rank_list('category', cid), mylib.path_add(base, 'data.json'))
def process(bundle_ids): print('generating html: apps ...') i = 0 for bid in mylib.appids_in_out(bundle_ids): gen_page(bid, bundle_combine.get_evaluated(bid)) mylib.symlink(bundle_combine.fname_evaluated(bid), mylib.path_out_app(bid, 'data.json')) mylib.printf(' .' if i == 0 else '.') i = (i + 1) % 50 if i == 0: print('') # close printf print('') # close printf print('')
def process(): # bundle_combine assures domain name is [a-zA-Z0-9.-] print('generating html: domain-index ...') json = index_domains.loadAll() app_count = index_domains.number_of_apps(json) dom_count = len(json['subdom']) # Prepare for lookup names = [[x, index_app_names.get_name(x)] for x in json['bundle']] dest_dir = mylib.path_out('results') mylib.mkdir(dest_dir) mylib.json_write(mylib.path_add(dest_dir, 'lookup-apps.json'), names) mylib.symlink(index_domains.fname_dom_subdoms(), mylib.path_add(dest_dir, 'subdoms.json')) names = None print(' Lookup') gen_lookup(mylib.path_out('domain'), json['pardom'], True, title='Domain Lookup') gen_lookup(mylib.path_out('subdomain'), json['subdom'], False, title='Subdomain Lookup') print(' All Domains') for key in ['subdom', 'pardom']: for x in json[key].keys(): json[key][x] = json[key][x][1:] gen_html_trinity(mylib.path_out('index', 'domains', 'all'), app_count, json=json, title='Requested Domains', symlink=index_domains.fname_all()) json = None print(' Trackers Only') gen_html_trinity(mylib.path_out('index', 'domains', 'tracker'), app_count, json=index_domains.loadTracker(), title='Tracker', symlink=index_domains.fname_tracker()) print(' Highly Used') gen_html_trinity(mylib.path_out('index', 'domains', 'highly-used'), app_count, json=index_domains.loadNonTracker(), title='Highly Used Domains', symlink=index_domains.fname_no_tracker()) print('') return app_count, dom_count
def gen_html_trinity(idx_dir, app_count, json, title, symlink): list1 = [(dom, len(ids)) for dom, ids in json['subdom'].items()] list2 = [(dom, len(ids)) for dom, ids in json['pardom'].items()] def write_index(fname, title, button): HTML.write(idx_dir, '<h2>{}</h2>{}{}'.format( HTML.a_path([('/results/', 'Results')], title), dropdown_choose(button), duo_list(list1, list2) ), title=title, fname=fname) # Full list (A–Z) list1.sort(key=lambda x: x[0]) list2.sort(key=lambda x: x[0]) write_index('by_name.html', title='{} (A–Z)'.format(title), button='Full list (A–Z)') # Full list (by count) list1.sort(key=lambda x: -x[1]) list2.sort(key=lambda x: -x[1]) write_index('by_count.html', title='{} (by count)'.format(title), button='Full list (by count)') # Top 10 gen_html_top_10(idx_dir, list2[:25], app_count, 'Top 25 {}'.format(title)) mylib.symlink(symlink, mylib.path_out(idx_dir, 'data.json'))