def process(affected=None, per_page=60): print('generating html: category-index ...') base = mylib.path_out('category') parent = 'All Categories' arr = [] for fname, json in mylib.enum_categories(): cid, cname = json['meta'] arr.append([cid, cname]) if affected and cid not in affected: continue out_dir = mylib.path_add(base, cid) # full url since categories can have page 2, 3, etc. A = HTML.h2_path_n_rank(cname, [('/category/', parent)], 'ranking/') Z = HTML.p_download_json('data.json', 'category-{}.json'.format(cid)) _, a = HTML.write_app_pages(out_dir, json['apps'], cname, per_page, pre=A, post=Z) # write_app_pages breaks html_ranking!! call html_ranking after this! print(' {} ({})'.format(cname, a)) if a > 1: arr[-1][-1] += ' ({})'.format(a) # append count mylib.symlink(fname, mylib.path_add(out_dir, 'data.json')) print(' .. {} categories'.format(len(arr))) write_overview_page(base, arr, parent) print('')
def write_ranking_custom_lists(base_dir, list_id, list_name, parent_title): base = mylib.path_add(base_dir, list_id) src = html_h2_path([('/results/', 'Results'), ('/lists/', parent_title)], list_name) src += html_table() src += HTML.p_download_json('data.json', 'raw-list-{}.json'.format(list_id)) src += html_script_chunk('data.json', 9, 1) # tracker percent asc HTML.write(base, src, title='Compare: ' + list_name) mylib.symlink(index_rank.fname_rank_list('custom', list_id), mylib.path_add(base, 'data.json'))
def gen_results(base_dir, c_apps, c_domains, title): [c_recs, c_logs] = index_rank.get_total_counts() print(' {} apps'.format(c_apps)) print(' {} domains'.format(c_domains)) print(' {} recordings'.format(c_recs)) print(' {} logs'.format(c_logs)) HTML.write(base_dir, ''' <h2>{}</h2> <p>The appchk database currently contains <b>{:,} apps</b> with a total of <b>{:,} unique domains</b>.</p> <p>Collected through <b>{:,} recordings</b> with <b>{:,} individual requests</b>.</p> <ul> <li>List of <a href="/index/apps/">Apps</a></li> <li>List of <a href="/category/">Categories</a></li> <li>List of <a href="/index/domains/all/">All Domains</a>, only <a href="/index/domains/tracker/">Trackers</a>, or <a href="/index/domains/highly-used/">Highly-used Domains</a> <br>which appear in at least 5 apps but are not considered tracker <i>yet</i>.</li> </ul> <ul> <li>Compare <a href="/lists/">App Lists</a></li> <li>Compare <a href="/compare/">Group Lists</a></li> </ul> '''.format(title, c_apps, c_domains, c_recs, c_logs), title=title) mylib.symlink(index_rank.fname_app_rank(), mylib.path_add(base_dir, 'rank.json')) # after HTML.write
def gen_and_link_groupby(cluster_id, base, raw_fname='data'): json_file = index_rank.fname_rank_list('groupby', cluster_id) raw_fname += '.json' mylib.symlink(json_file, mylib.path_add(base, raw_fname)) named_download = 'compare-{}.json'.format(cluster_id) return prefill_groupby_table(json_file) + HTML.p_download_json( raw_fname, named_download)
def write_overview_page(base_dir, category_tuples, title): cluster = {} for x in category_tuples: i = int(int(x[0]) / 1000) try: cluster[i].append(x) except KeyError: cluster[i] = [x] src = HTML.h2_path_n_rank(title, [], 'compare/', 'Compare') src += '<div id="categories">' for i, arr in sorted(cluster.items()): mylib.sort_by_name(arr, 1) kind = 'Apps' if i == 6 else 'Games' if i == 7 else 'Other' src += '<h3 class="center">{}</h3>'.format(kind) src += '<div class="tags large center">' src += ''.join([HTML.a_category(*x) for x in arr]) + '</div>' HTML.write(base_dir, src + '</div>', title) # make groupby compare html base_dir = mylib.path_add(base_dir, 'compare') html_group_compare.write_groupby_multi(base_dir, [('/category', 'All Categories')], 'Compare', 'Categories', [('category-6', 'Category: Apps'), ('category-7', 'Category: Games')])
def move_ios14(): # delete unrelated data for bid in mylib.appids_in_data(['*']): if bid not in study and bid[:-2] not in study: diir = mylib.path_data_app(bid) mylib.rm_dir(diir) print('del', diir) diir = os.path.dirname(diir) while not os.listdir(diir): print('del', diir) mylib.rm_dir(diir) diir = os.path.dirname(diir) # delete unrelated out for bid in mylib.appids_in_out(['*']): diir = mylib.path_out_app(bid) if bid not in study: if bid[:-2] not in study: print('del', diir) mylib.rm_dir(diir) for bid in study: diir = mylib.path_out_app(bid) try: shutil.copytree(diir, diir + '.2') except FileExistsError: pass # copy meta for bid in study: diir = mylib.path_data_app(bid) mylib.mkdir(mylib.path_add(diir, '2')) # continue for x in ['info_de', 'info_us', 'combined', 'evaluated']: try: shutil.copy(mylib.path_add(diir, x + '.json'), mylib.path_add(diir, '2', x + '.json')) except: pass # move ios 14 for fname, json in mylib.enum_jsons(bid): fiil = os.path.basename(fname) try: ios = json['ios'].split('.')[0] except KeyError: ios = '14' if ios == '14' and os.path.getmtime(fname) > 1600258000: mylib.mv(fname, mylib.path_add(diir, '2', fiil)) write_temporary_lists()
def process(): # bundle_combine assures domain name is [a-zA-Z0-9.-] print('generating html: domain-index ...') json = index_domains.loadAll() app_count = index_domains.number_of_apps(json) dom_count = len(json['subdom']) # Prepare for lookup names = [[x, index_app_names.get_name(x)] for x in json['bundle']] dest_dir = mylib.path_out('results') mylib.mkdir(dest_dir) mylib.json_write(mylib.path_add(dest_dir, 'lookup-apps.json'), names) mylib.symlink(index_domains.fname_dom_subdoms(), mylib.path_add(dest_dir, 'subdoms.json')) names = None print(' Lookup') gen_lookup(mylib.path_out('domain'), json['pardom'], True, title='Domain Lookup') gen_lookup(mylib.path_out('subdomain'), json['subdom'], False, title='Subdomain Lookup') print(' All Domains') for key in ['subdom', 'pardom']: for x in json[key].keys(): json[key][x] = json[key][x][1:] gen_html_trinity(mylib.path_out('index', 'domains', 'all'), app_count, json=json, title='Requested Domains', symlink=index_domains.fname_all()) json = None print(' Trackers Only') gen_html_trinity(mylib.path_out('index', 'domains', 'tracker'), app_count, json=index_domains.loadTracker(), title='Tracker', symlink=index_domains.fname_tracker()) print(' Highly Used') gen_html_trinity(mylib.path_out('index', 'domains', 'highly-used'), app_count, json=index_domains.loadNonTracker(), title='Highly Used Domains', symlink=index_domains.fname_no_tracker()) print('') return app_count, dom_count
def write_app_pages(base, apps, title, per_page=60, pre='', post=''): pages = 0 entries = 0 mylib.rm_dir(base) for i, count, src in app_tiles_all(apps, per_page): pages += 1 entries += count pth = base if i == 1 else mylib.path_add(base, str(i)) mylib.mkdir(pth) write(pth, pre + '\n' + src + '\n' + post, title=title) return pages, entries
def write_ranking_all(title, base_dir): # full urls since app index can have page 2, 3, etc. src = html_h2_path([('/results/', 'Results'), ('/index/apps/', 'Apps (A–Z)')]) src += html_default_description() src += html_table() src += HTML.p_download_json('data.json', 'raw-apps.json') src += html_script_chunk('data.json', 12, -1) # last update desc HTML.write(base_dir, src, title=title) mylib.symlink(index_rank.fname_ranking_all(), mylib.path_add(base_dir, 'data.json'))
def write_ranking_category(cid, category_name): base = mylib.path_out('category', cid, 'ranking') # full urls since categories can have page 2, 3, etc. src = html_h2_path([('/category/', 'All Categories'), ('/category/{}/'.format(cid), category_name)]) src += html_default_description() src += html_table() src += HTML.p_download_json('data.json', 'raw-category-{}.json'.format(cid)) src += html_script_chunk('data.json', 12, -1) # last update desc HTML.write(base, src, title='Category Ranking: ' + category_name) mylib.symlink(index_rank.fname_rank_list('category', cid), mylib.path_add(base, 'data.json'))
def gen_lookup(html_dir, doms_dict, flag, title): HTML.write(html_dir, f''' <h2>{ HTML.a_path([('/index/domains/all/', 'All Domains')], '<span id="name"></span>') }</h2> <p>Known Tracker: <b id="known">?</b></p> <p>Present in: <b id="num-apps">… applications</b></p> { '<h3>Subdomains:</h3><div id="subdoms" class="tags"></div>' if flag else '' } <h3>Apps containing this domain:</h3> <div id="app-toc" class="no-ul-all"> { HTML.app_tile_template() } </div> <script type="text/javascript" src="/static/lookup-domain.js?2"></script> <script type="text/javascript" src="/static/lozad.js"></script> <script type="text/javascript"> lookup_domain_js('doms.json', '/results/lookup-apps.json', '/results/subdoms.json'); </script> ''', title=title) mylib.json_write(mylib.path_add(html_dir, 'doms.json'), doms_dict)
def write(path, content, title=None, fname='index.html'): mylib.mkdir(path) with open(mylib.path_add(path, fname), 'w') as fp: fp.write(base_template(content, title=title))
def write_groupby_single(base_dir, gid, name, parent): base = mylib.path_add(base_dir, gid) write_groupby_multi(base, [('/results/', 'Results'), ('/compare/', parent)], name, name, [(gid, '')])