Exemple #1
0
def print_jaccard_by_url(verbose, filt):
    data = jaccard_by_url()
    if filt:
        data = filter_url_result(data)

    views = common.VIEWS
    headers = ["url", "loads"] + views
    headers += ["pair avg {}".format(v) for v in views]
    table = []
    for url in data:
        # extract url data
        page_set = data[url]["page_set"]
        jac = data[url]["jaccard"]
        pair_jac = data[url]["pair_jaccard"]

        # print per url details
        if verbose:
            print("site: {} : loads: {}".format(url, len(page_set)))
            print_page_set_cardinality(page_set)
            print_jaccard(jac, pair_jac)
            print("#" * 40)

        # construct summary row
        row = [url, len(page_set)]
        for view in views:
            view_jac = jac[view]
            view_str = "{:.2f} ({})".format(view_jac["val"],
                                            len(view_jac["u"]))
            row.append(view_str)
        for view in views:
            row.append("{:.2f}".format(pair_jac[view]))
        table.append(row)

    table = sorted(table, key=operator.itemgetter(headers.index(views[0])))
    print_tabulated(table, headers)
Exemple #2
0
def print_page_set_view(pages, jac, view):
    headers = list(jac[view]["u"])

    table = []
    for page_id, view_sets in pages.iteritems():
        if len(view_sets[view]) > 0:
            row = [page_id[:4]]
            for item in headers:
                if item in view_sets[view]:
                    row.append("#")
                else:
                    row.append("0")
            table.append(row)
    headers = ["page"] + range(len(headers))
    print_tabulated(table, headers)
    print "Union across all loads"
    print["{} {}".format(i, h) for i, h in enumerate(list(jac[view]["u"]))]
    print "\nVariance from intersection"

    intersection = jac[view]["i"]
    for page_id, view_sets in pages.iteritems():
        if len(view_sets[view]) > 0:
            diff = list(view_sets[view].difference(intersection))
            res = "{}:{}".format(page_id[:4], diff)

            if len(diff) > 0:
                print res
Exemple #3
0
def variance_from_intersection(load_list):
    inter = load_list_to_intersection(load_list)
    loads = sort_load_list_by_time(load_list)
    headers = ["load", "extra"]
    table = []
    for load in loads:
        table.append([load[0][:4], load[2].difference(inter)])
    analysis.print_tabulated(table, headers)
Exemple #4
0
def variance_from_universe(load_list):
    univ = load_list_to_universe(load_list)
    loads = sort_load_list_by_time(load_list)
    headers = ["load", "missing"]
    table = []
    for load in loads:
        table.append([load[0][:4], univ.difference(load[2])])
    analysis.print_tabulated(table, headers)
Exemple #5
0
def print_page_set_cardinality(page_sets):
    views = common.VIEWS
    headers = ["page"]

    rows = dict(zip(views, [[v] for v in views]))
    for page, page_sets in page_sets.iteritems():
        headers.append(page[:4])
        for view, value in page_sets.iteritems():
            rows[view].append(len(value))

    table = [rows[view] for view in views]
    print_tabulated(table, headers)
Exemple #6
0
def print_jaccard(jaccard, pairs=None):
    views = common.VIEWS
    inter = ["inter"]
    union = ["union"]
    value = ["value"]
    pair_avg = ["pair avg"]
    for view in views:
        inter.append(len(jaccard[view]["i"]))
        union.append(len(jaccard[view]["u"]))
        value.append("{:.2f}".format(jaccard[view]["val"]))
        if pairs:
            pair_avg.append("{:.2f}".format(pairs[view]))
    table = [inter, union, value]
    if pairs:
        table.append(pair_avg)
    headers = ["measure"] + views
    print_tabulated(table, headers)
Exemple #7
0
def print_url_summary(url):
    if not url.startswith("http"):
        url = "http://{}".format(url)
    load_list = get_url_load_set(url)
    table, headers = gen_load_list_cardinality(load_list)
    analysis.print_tabulated(table, headers)

    summary = sumarize_load_list(load_list)
    table = [summary[h] for h in calc]
    analysis.print_tabulated([table], calc)

    print("Universe across all loads")
    print(sorted(load_list_to_universe(load_list)))

    print("Variance from universe")
    variance_from_universe(load_list)

    print("Variance from intersection")
    variance_from_intersection(load_list)

    print("Load to request")
    table, headers, key = load_list_to_value(load_list)
    print zip(key, range(len(key)))
    analysis.print_tabulated(table, headers)
Exemple #8
0
def print_summary():
    data, table, headers = gen_jac_table()
    analysis.print_tabulated(table, headers)
    chart_summary(data)