def print_jaccard_by_url(verbose, filt): data = jaccard_by_url() if filt: data = filter_url_result(data) views = common.VIEWS headers = ["url", "loads"] + views headers += ["pair avg {}".format(v) for v in views] table = [] for url in data: # extract url data page_set = data[url]["page_set"] jac = data[url]["jaccard"] pair_jac = data[url]["pair_jaccard"] # print per url details if verbose: print("site: {} : loads: {}".format(url, len(page_set))) print_page_set_cardinality(page_set) print_jaccard(jac, pair_jac) print("#" * 40) # construct summary row row = [url, len(page_set)] for view in views: view_jac = jac[view] view_str = "{:.2f} ({})".format(view_jac["val"], len(view_jac["u"])) row.append(view_str) for view in views: row.append("{:.2f}".format(pair_jac[view])) table.append(row) table = sorted(table, key=operator.itemgetter(headers.index(views[0]))) print_tabulated(table, headers)
def print_page_set_view(pages, jac, view): headers = list(jac[view]["u"]) table = [] for page_id, view_sets in pages.iteritems(): if len(view_sets[view]) > 0: row = [page_id[:4]] for item in headers: if item in view_sets[view]: row.append("#") else: row.append("0") table.append(row) headers = ["page"] + range(len(headers)) print_tabulated(table, headers) print "Union across all loads" print["{} {}".format(i, h) for i, h in enumerate(list(jac[view]["u"]))] print "\nVariance from intersection" intersection = jac[view]["i"] for page_id, view_sets in pages.iteritems(): if len(view_sets[view]) > 0: diff = list(view_sets[view].difference(intersection)) res = "{}:{}".format(page_id[:4], diff) if len(diff) > 0: print res
def variance_from_intersection(load_list): inter = load_list_to_intersection(load_list) loads = sort_load_list_by_time(load_list) headers = ["load", "extra"] table = [] for load in loads: table.append([load[0][:4], load[2].difference(inter)]) analysis.print_tabulated(table, headers)
def variance_from_universe(load_list): univ = load_list_to_universe(load_list) loads = sort_load_list_by_time(load_list) headers = ["load", "missing"] table = [] for load in loads: table.append([load[0][:4], univ.difference(load[2])]) analysis.print_tabulated(table, headers)
def print_page_set_cardinality(page_sets): views = common.VIEWS headers = ["page"] rows = dict(zip(views, [[v] for v in views])) for page, page_sets in page_sets.iteritems(): headers.append(page[:4]) for view, value in page_sets.iteritems(): rows[view].append(len(value)) table = [rows[view] for view in views] print_tabulated(table, headers)
def print_jaccard(jaccard, pairs=None): views = common.VIEWS inter = ["inter"] union = ["union"] value = ["value"] pair_avg = ["pair avg"] for view in views: inter.append(len(jaccard[view]["i"])) union.append(len(jaccard[view]["u"])) value.append("{:.2f}".format(jaccard[view]["val"])) if pairs: pair_avg.append("{:.2f}".format(pairs[view])) table = [inter, union, value] if pairs: table.append(pair_avg) headers = ["measure"] + views print_tabulated(table, headers)
def print_url_summary(url): if not url.startswith("http"): url = "http://{}".format(url) load_list = get_url_load_set(url) table, headers = gen_load_list_cardinality(load_list) analysis.print_tabulated(table, headers) summary = sumarize_load_list(load_list) table = [summary[h] for h in calc] analysis.print_tabulated([table], calc) print("Universe across all loads") print(sorted(load_list_to_universe(load_list))) print("Variance from universe") variance_from_universe(load_list) print("Variance from intersection") variance_from_intersection(load_list) print("Load to request") table, headers, key = load_list_to_value(load_list) print zip(key, range(len(key))) analysis.print_tabulated(table, headers)
def print_summary(): data, table, headers = gen_jac_table() analysis.print_tabulated(table, headers) chart_summary(data)