def deg_powerlaw_low_high_sat(g): pl_fit = pl.Fit( gt_stats.vertex_hist(g, 'total')[0] / g.num_vertices(), verbose=False ) return ( pl_fit.alpha, pl_fit.xmin, pl_fit.xmax )
def plot_centralities(network, title="Centrality measures"): g = network.graph comm_size = g.num_vertices() closeness = centrality.closeness(g).get_array().tolist() max_eigenval, eigenvec = centrality.eigenvector(g) # eigenvector = [x/max_eigenval for x in eigenvec.get_array().tolist()] #normalize! eigenvector = eigenvec.get_array().tolist() betw, _edges = centrality.betweenness(g, norm=True) betweenness = betw.get_array().tolist() P.suptitle(title) # P.figure() print "nans", len([x for x in closeness if isnan(x)]) closeness = [0 if isnan(x) else x for x in closeness] # closeness = [x for x in closeness if not isnan(x)] closeness = _filtered(closeness, comm_size) print "closeness", closeness print "non zeros", len([x for x in closeness if x != 0]) P.subplot(2, 2, 1) plot_hist(closeness) P.xlabel("Closeness centrality") P.ylabel("Number of nodes (total={})".format(len(closeness))) counts, degrees = stats.vertex_hist(g, "in", float_count=False) print "counts : ", len(counts), counts print "degrees: ", len(degrees), degrees counts = list(counts) counts.append(0) P.subplot(2, 2, 2) P.bar(degrees, counts, align='center', color="#348ABD") # P.hist(counts, bins=degrees, ) P.xlabel("Degree centrality (in)") P.ylabel("Number of nodes (total={})".format(sum(counts))) P.xlim(0, max(degrees)) betweenness = _filtered(betweenness, comm_size) print "betweenness", betweenness P.subplot(2, 2, 3) plot_hist(betweenness) P.xlabel("Betweenness centrality") P.ylabel("Number of nodes (total={})".format(len(betweenness))) eigenvector = _filtered(eigenvector, comm_size) print "eigenvector", eigenvector P.subplot(2, 2, 4) plot_hist(eigenvector) P.xlabel("Eigenvector centrality") P.ylabel("Number of nodes (total={})".format(len(eigenvector))) P.show()
def cum_deg_powerlaw_low_high_sat(g): deg_hist = gt_stats.vertex_hist(g, 'total')[0] / g.num_vertices() pl_fit = pl.Fit( np.flip(np.flip(deg_hist, 0).cumsum(), 0), verbose=False ) return ( pl_fit.alpha, pl_fit.xmin, pl_fit.xmax )
def user_network_summary(g): span = "{:D MMM YYYY, HH:mm} - {:D MMM YYYY, HH:mm}".format( arrow.get(g.edge_properties["created_at"].a.min()), arrow.get(g.edge_properties["created_at"].a.max()) ) largest_component = label_largest_component(g, directed=False).a.sum() display(Markdown("### " + g.graph_properties["track"].replace("#", r"\#"))) display(Markdown("#### " + span)) graph_draw(g, inline=True, output_size=[1000, 1000], vertex_fill_color=[.2, .3, .9, .7], vertex_size=2) stats = pd.DataFrame([ ["Vertices", g.num_vertices()], ["Edges", g.num_edges()], ["Avg. degree", float(g.num_edges()) / g.num_vertices()], ["Avg. clustering", vertex_average(g, local_clustering(g))[0]], ["Giant component share", "{:.1%}".format(largest_component / g.num_vertices())] ], columns=["Metric", "Value"]) display(stats) bins = 20 counts, _ = vertex_hist(g, "in", range(bins)) plt.bar(range(1, bins), counts, align="center") plt.xticks(range(bins)) plt.xlim([0.5, bins - 1]) plt.title("Degree distribution") plt.show()
def get_descriptors(network, short_name, nx_network, already_calculated=False): def _prefixToTitle(prefix): if prefix == 'a': return "Artists" elif prefix == 't': return "Tags" elif prefix == 'u': return 'Users' filename = "cache/{}.pickle".format(short_name) if os.path.isfile(filename): result = pickle.load(open(filename, 'rb')) return result result = {} prefix1, prefix2 = short_name[0], short_name[1] t1 = _prefixToTitle(prefix1) t2 = _prefixToTitle(prefix2) result['name'] = short_name result['title_dd1'] = PLOT_TITLES[short_name].format(t1, "") result['title_dd2'] = PLOT_TITLES[short_name].format(t2, "") result['title_dd1_acum'] = PLOT_TITLES[short_name].format( t1, " Cumulative") result['title_dd2_acum'] = PLOT_TITLES[short_name].format( t2, " Cumulative") result['title_wd'] = PLOT_TITLES['wd'].format("", t1, t2) result['title_wd_acum'] = PLOT_TITLES['wd'].format("Cumulative ", t1, t2) result['title_cd'] = PLOT_TITLES['cd'].format(t1, t2) result['title_sp'] = PLOT_TITLES['sp'].format(t1, t2) result['filename_dd'] = '{}_dd'.format(short_name) # degree input dist result['filename_ddl'] = '{}_dd_log'.format( short_name) # degree dist (log) result['filename_dd1'] = '{}_{}_dd'.format(short_name[0], short_name) # degree input dist result['filename_dd2'] = '{}_{}_dd'.format(short_name[1], short_name) # degree input dist result['filename_dd1l'] = '{}_{}_dd_log'.format( short_name[0], short_name) # degree input dist result['filename_dd2l'] = '{}_{}_dd_log'.format( short_name[1], short_name) # degree input dist result['filename_dd1_acum'] = '{}_{}_dd_acum'.format( short_name[0], short_name) # degree input dist result['filename_dd2_acum'] = '{}_{}_dd_acum'.format( short_name[1], short_name) # degree input dist result['filename_wd'] = '{}_wd'.format(short_name) # weight distribution result['filename_wdl'] = '{}_wd_log'.format( short_name) # weight distribution result['filename_wd_acum'] = '{}_wd_acum'.format( short_name) # weight distribution result['filename_sp'] = '{}_sp'.format(short_name) # shortest path result['filename_cd'] = '{}_cd'.format(short_name) # components result['filename_cdl'] = '{}_cd_log'.format(short_name) # nodes = network.get_vertices() edges = network.get_edges() result['num_nodes'] = {} result['num_nodes']['total'] = nodes.shape[0] result['num_edges'] = edges.shape[0] result['degree'] = {"total": {}, "prefix1": {}, "prefix2": {}} result['degree']["total"]['max'] = network.get_out_degrees(nodes).max() result['degree']["total"]['min'] = network.get_out_degrees(nodes).min() result['degree']["total"]['avg'] = network.get_out_degrees(nodes).mean() result['degree']["total"]["counts"], result['degree']["total"][ "bins"] = st.vertex_hist(network, "out") nodes1, nodes2 = [], [] for node in nodes: if prefix1 in network.vp['id'][node]: nodes1.append(node) elif prefix2 in network.vp['id'][node]: nodes2.append(node) result['num_nodes']['prefix1'] = len(nodes1) result['degree']["prefix1"]['max'] = network.get_out_degrees(nodes1).max() result['degree']["prefix1"]['min'] = network.get_out_degrees(nodes1).min() result['degree']["prefix1"]['avg'] = network.get_out_degrees(nodes1).mean() result['degree']["prefix1"]["counts"], result['degree']["prefix1"][ "bins"] = np.histogram( network.get_out_degrees(nodes1), bins=15) # result['degree']["total"]["bins"].shape[0] result['degree']["prefix1"]["d"] = network.get_out_degrees( nodes1) # result['degree']["total"]["bins"].shape[0] if prefix1 == prefix2: nodes2 = nodes1 result['num_nodes']['prefix2'] = len(nodes2) result['degree']["prefix2"]['max'] = network.get_out_degrees(nodes2).max() result['degree']["prefix2"]['min'] = network.get_out_degrees(nodes2).min() result['degree']["prefix2"]['avg'] = network.get_out_degrees(nodes2).mean() result['degree']["prefix2"]["counts"], result['degree']["prefix2"][ "bins"] = np.histogram(network.get_out_degrees(nodes2), bins=15) result['degree']["prefix2"]["d"] = network.get_out_degrees( nodes2) # result['degree']["total"]["bins"].shape[0] result['weights'] = {} weights = [] for v1, v2 in nx_network.edges(): weight = nx_network.get_edge_data(v1, v2)['weight'] weights.append(weight) # result['weights']['counts'], result['weights']['bins'] = np.histogram(weights, bins=8) result['weights']['d'] = weights # estimated diamater and longest path d, (v1, v2) = top.pseudo_diameter(network) result['diameter'] = d d_path = "{}-{}".format(network.vp['id'][v1], network.vp['id'][v2]) result['diameter_path'] = d_path result['clustering'] = clu.global_clustering(network) if not already_calculated: net2 = gt.Graph(network) # undirected version net2.set_directed(False) result['sp'] = {} result['sp']['counts'], result['sp']['bins'] = shortest_paths(net2) # connected components _, c2 = top.label_components(net2) result['components'] = {} result['components']['num'] = len(c2) result['components']['bins'] = range(len(c2)) result['components']['counts'] = c2 pickle.dump(result, open(filename, "wb")) return result
def avgSquares(graph): counts, degrees = vertex_hist(graph, 'out') return np.sum([(degrees[i]**2) * counts[i] for i in range(len(counts))]) / graph.num_vertices()
def variance(g): degree_hist = gt_stats.vertex_hist(g, 'total')[0] / g.num_vertices() second_m = np.sum(degree_hist * (np.arange(len(degree_hist)) ** 2)) return second_m - avg_degree(g) ** 2
def max_degree(g): return gt_stats.vertex_hist(g, 'total')[1][-2]
def kcore(g): return gt_stats.vertex_hist(g, gt.kcore_decomposition(g))[0]