Example #1
0
def deg_powerlaw_low_high_sat(g):
    pl_fit = pl.Fit(
        gt_stats.vertex_hist(g, 'total')[0] / g.num_vertices(),
        verbose=False
    )

    return (
        pl_fit.alpha, 
        pl_fit.xmin, 
        pl_fit.xmax
    )
Example #2
0
def plot_centralities(network, title="Centrality measures"):
    g = network.graph
    comm_size = g.num_vertices()

    closeness = centrality.closeness(g).get_array().tolist()

    max_eigenval, eigenvec = centrality.eigenvector(g)
    # eigenvector = [x/max_eigenval for x in eigenvec.get_array().tolist()]  #normalize!
    eigenvector = eigenvec.get_array().tolist()

    betw, _edges = centrality.betweenness(g, norm=True)
    betweenness = betw.get_array().tolist()

    P.suptitle(title)
    # P.figure()
    print "nans", len([x for x in closeness if isnan(x)])
    closeness = [0 if isnan(x) else x for x in closeness]
    # closeness = [x for x in closeness if not isnan(x)]
    closeness = _filtered(closeness, comm_size)
    print "closeness", closeness
    print "non zeros", len([x for x in closeness if x != 0])
    P.subplot(2, 2, 1)

    plot_hist(closeness)
    P.xlabel("Closeness centrality")
    P.ylabel("Number of nodes (total={})".format(len(closeness)))

    counts, degrees = stats.vertex_hist(g, "in", float_count=False)
    print "counts : ", len(counts), counts
    print "degrees: ", len(degrees), degrees
    counts = list(counts)
    counts.append(0)
    P.subplot(2, 2, 2)
    P.bar(degrees, counts, align='center', color="#348ABD")
    # P.hist(counts, bins=degrees, )
    P.xlabel("Degree centrality (in)")
    P.ylabel("Number of nodes (total={})".format(sum(counts)))
    P.xlim(0, max(degrees))

    betweenness = _filtered(betweenness, comm_size)
    print "betweenness", betweenness
    P.subplot(2, 2, 3)
    plot_hist(betweenness)
    P.xlabel("Betweenness centrality")
    P.ylabel("Number of nodes (total={})".format(len(betweenness)))

    eigenvector = _filtered(eigenvector, comm_size)
    print "eigenvector", eigenvector
    P.subplot(2, 2, 4)
    plot_hist(eigenvector)
    P.xlabel("Eigenvector centrality")
    P.ylabel("Number of nodes (total={})".format(len(eigenvector)))
    P.show()
Example #3
0
def cum_deg_powerlaw_low_high_sat(g):
    deg_hist = gt_stats.vertex_hist(g, 'total')[0] / g.num_vertices()
    pl_fit = pl.Fit(
        np.flip(np.flip(deg_hist, 0).cumsum(), 0),
        verbose=False
    )

    return (
        pl_fit.alpha, 
        pl_fit.xmin, 
        pl_fit.xmax
    )
Example #4
0
def user_network_summary(g):
    span = "{:D MMM YYYY, HH:mm} - {:D MMM YYYY, HH:mm}".format(
        arrow.get(g.edge_properties["created_at"].a.min()),
        arrow.get(g.edge_properties["created_at"].a.max())
    )
    largest_component = label_largest_component(g, directed=False).a.sum()

    display(Markdown("### " + g.graph_properties["track"].replace("#", r"\#")))
    display(Markdown("#### " + span))

    graph_draw(g, inline=True, output_size=[1000, 1000],
               vertex_fill_color=[.2, .3, .9, .7], vertex_size=2)
    stats = pd.DataFrame([
        ["Vertices",
         g.num_vertices()],
        ["Edges",
         g.num_edges()],
        ["Avg. degree",
         float(g.num_edges()) / g.num_vertices()],
        ["Avg. clustering",
         vertex_average(g, local_clustering(g))[0]],
        ["Giant component share",
         "{:.1%}".format(largest_component / g.num_vertices())]
    ], columns=["Metric", "Value"])
    display(stats)

    bins = 20
    counts, _ = vertex_hist(g, "in", range(bins))

    plt.bar(range(1, bins), counts, align="center")

    plt.xticks(range(bins))
    plt.xlim([0.5, bins - 1])
    plt.title("Degree distribution")

    plt.show()
Example #5
0
def get_descriptors(network, short_name, nx_network, already_calculated=False):
    def _prefixToTitle(prefix):
        if prefix == 'a':
            return "Artists"
        elif prefix == 't':
            return "Tags"
        elif prefix == 'u':
            return 'Users'

    filename = "cache/{}.pickle".format(short_name)
    if os.path.isfile(filename):
        result = pickle.load(open(filename, 'rb'))
        return result

    result = {}
    prefix1, prefix2 = short_name[0], short_name[1]
    t1 = _prefixToTitle(prefix1)
    t2 = _prefixToTitle(prefix2)
    result['name'] = short_name
    result['title_dd1'] = PLOT_TITLES[short_name].format(t1, "")
    result['title_dd2'] = PLOT_TITLES[short_name].format(t2, "")
    result['title_dd1_acum'] = PLOT_TITLES[short_name].format(
        t1, " Cumulative")
    result['title_dd2_acum'] = PLOT_TITLES[short_name].format(
        t2, " Cumulative")
    result['title_wd'] = PLOT_TITLES['wd'].format("", t1, t2)
    result['title_wd_acum'] = PLOT_TITLES['wd'].format("Cumulative ", t1, t2)
    result['title_cd'] = PLOT_TITLES['cd'].format(t1, t2)
    result['title_sp'] = PLOT_TITLES['sp'].format(t1, t2)
    result['filename_dd'] = '{}_dd'.format(short_name)  # degree input dist
    result['filename_ddl'] = '{}_dd_log'.format(
        short_name)  # degree dist (log)
    result['filename_dd1'] = '{}_{}_dd'.format(short_name[0],
                                               short_name)  # degree input dist
    result['filename_dd2'] = '{}_{}_dd'.format(short_name[1],
                                               short_name)  # degree input dist
    result['filename_dd1l'] = '{}_{}_dd_log'.format(
        short_name[0], short_name)  # degree input dist
    result['filename_dd2l'] = '{}_{}_dd_log'.format(
        short_name[1], short_name)  # degree input dist
    result['filename_dd1_acum'] = '{}_{}_dd_acum'.format(
        short_name[0], short_name)  # degree input dist
    result['filename_dd2_acum'] = '{}_{}_dd_acum'.format(
        short_name[1], short_name)  # degree input dist
    result['filename_wd'] = '{}_wd'.format(short_name)  # weight distribution
    result['filename_wdl'] = '{}_wd_log'.format(
        short_name)  # weight distribution
    result['filename_wd_acum'] = '{}_wd_acum'.format(
        short_name)  # weight distribution
    result['filename_sp'] = '{}_sp'.format(short_name)  # shortest path
    result['filename_cd'] = '{}_cd'.format(short_name)  # components
    result['filename_cdl'] = '{}_cd_log'.format(short_name)  #

    nodes = network.get_vertices()
    edges = network.get_edges()
    result['num_nodes'] = {}
    result['num_nodes']['total'] = nodes.shape[0]

    result['num_edges'] = edges.shape[0]

    result['degree'] = {"total": {}, "prefix1": {}, "prefix2": {}}
    result['degree']["total"]['max'] = network.get_out_degrees(nodes).max()
    result['degree']["total"]['min'] = network.get_out_degrees(nodes).min()
    result['degree']["total"]['avg'] = network.get_out_degrees(nodes).mean()
    result['degree']["total"]["counts"], result['degree']["total"][
        "bins"] = st.vertex_hist(network, "out")

    nodes1, nodes2 = [], []
    for node in nodes:
        if prefix1 in network.vp['id'][node]:
            nodes1.append(node)
        elif prefix2 in network.vp['id'][node]:
            nodes2.append(node)

    result['num_nodes']['prefix1'] = len(nodes1)
    result['degree']["prefix1"]['max'] = network.get_out_degrees(nodes1).max()
    result['degree']["prefix1"]['min'] = network.get_out_degrees(nodes1).min()
    result['degree']["prefix1"]['avg'] = network.get_out_degrees(nodes1).mean()
    result['degree']["prefix1"]["counts"], result['degree']["prefix1"][
        "bins"] = np.histogram(
            network.get_out_degrees(nodes1),
            bins=15)  # result['degree']["total"]["bins"].shape[0]
    result['degree']["prefix1"]["d"] = network.get_out_degrees(
        nodes1)  # result['degree']["total"]["bins"].shape[0]
    if prefix1 == prefix2:
        nodes2 = nodes1
    result['num_nodes']['prefix2'] = len(nodes2)
    result['degree']["prefix2"]['max'] = network.get_out_degrees(nodes2).max()
    result['degree']["prefix2"]['min'] = network.get_out_degrees(nodes2).min()
    result['degree']["prefix2"]['avg'] = network.get_out_degrees(nodes2).mean()
    result['degree']["prefix2"]["counts"], result['degree']["prefix2"][
        "bins"] = np.histogram(network.get_out_degrees(nodes2), bins=15)
    result['degree']["prefix2"]["d"] = network.get_out_degrees(
        nodes2)  # result['degree']["total"]["bins"].shape[0]

    result['weights'] = {}
    weights = []

    for v1, v2 in nx_network.edges():
        weight = nx_network.get_edge_data(v1, v2)['weight']
        weights.append(weight)

    # result['weights']['counts'], result['weights']['bins'] = np.histogram(weights, bins=8)
    result['weights']['d'] = weights

    # estimated diamater  and longest path
    d, (v1, v2) = top.pseudo_diameter(network)
    result['diameter'] = d
    d_path = "{}-{}".format(network.vp['id'][v1], network.vp['id'][v2])
    result['diameter_path'] = d_path

    result['clustering'] = clu.global_clustering(network)

    if not already_calculated:
        net2 = gt.Graph(network)  # undirected version
        net2.set_directed(False)
        result['sp'] = {}
        result['sp']['counts'], result['sp']['bins'] = shortest_paths(net2)
        # connected components

        _, c2 = top.label_components(net2)

        result['components'] = {}
        result['components']['num'] = len(c2)
        result['components']['bins'] = range(len(c2))
        result['components']['counts'] = c2

    pickle.dump(result, open(filename, "wb"))
    return result
Example #6
0
def avgSquares(graph):
    counts, degrees = vertex_hist(graph, 'out')
    return np.sum([(degrees[i]**2) * counts[i]
                   for i in range(len(counts))]) / graph.num_vertices()
Example #7
0
def variance(g):
    degree_hist = gt_stats.vertex_hist(g, 'total')[0] / g.num_vertices()
    second_m = np.sum(degree_hist * (np.arange(len(degree_hist)) ** 2))
    return second_m - avg_degree(g) ** 2
Example #8
0
def max_degree(g):
    return gt_stats.vertex_hist(g, 'total')[1][-2]
Example #9
0
def kcore(g):
    return gt_stats.vertex_hist(g, gt.kcore_decomposition(g))[0]