def viz_authorities(g: Graph, in_out, file_name: str, folder: str): u.start_time() auths = gm.authorities(g) g.vs["authority_score"] = auths my_layout = g.layout_auto() visual_style = { "edge_curved": False, "vertex_label": [ v["twitter_name"] if v["twitter_name"] != "" else (v["twitter_id"] if (v["authority_score"] > 0) else "") for v in g.vs ], "target": u.format_file(folder=folder, file_name=file_name, img_suffix=img_suffix), "bbox": (1600, 1600), "margin": 10, "vertex_size": [x * 50 for x in auths], "layout": my_layout } # Plot the graph drawing.plot(g, **visual_style) u.print_delta("generate community viz graph ")
def get_top_n_forbetweenness_centrality(g: Graph, directed, top_n: int): u.start_time() top = nc.betweenness_centrality( g, directed )[:top_n] # get_top_n_for_list (g, g.pagerank (directed=directed), top_n) u.print_delta('get top ' + str(top_n) + ' betweenness_centrality ') return fill_twitter_top_n_for_list(top)
def global_metrics(g: Graph): u.start_time() degree_stats = degree_properties(g) summary(g) print("======= GLOBAL MEASURES ============") print('Number of nodes: %d' % g.vcount()) print('Number of edges: %d' % g.ecount()) print('Number of components: %d' % len(g.components())) print("Connected:", g.is_connected()) print("Density:", g.density()) print("Diameter:", g.diameter(directed=g.is_directed())) print("Diameter un-directed:", g.diameter(directed=False)) print("Clustering Coefficient:", g.transitivity_undirected()) print("Average Local Clustering Coefficient:", g.transitivity_avglocal_undirected()) print('Degree: min = %d, max = %d, mean = %0.2f, std = %0.2f' % (degree_stats[0], degree_stats[1], degree_stats[2], degree_stats[3])) print("Average Degree:", mean(g.degree())) print("Max Degree:", g.maxdegree()) print("Average Betweenness:", mean(g.betweenness())) print("Max Betweenness:", max(g.betweenness())) print("Average Closeness:", mean(g.closeness())) print("Max Closeness:", max(g.closeness())) print("mean distance directed", g.average_path_length(g.is_directed())) print("mean distance un-directed", g.average_path_length(directed=False)) u.print_delta("get global metrics ")
def get_top_n_for_closeness_centrality(g: Graph, mode_in_out_all, top_n: int): u.start_time() top = nc.closeness_centrality( g, mode_in_out_all )[:top_n] # get_top_n_for_list (g, g.pagerank (directed=directed), top_n) u.print_delta('get top ' + str(top_n) + ' closeness_centrality ') return fill_twitter_top_n_for_list(top)
def get_top_n_for_degree(g: Graph, mode_in_out_all, top_n: int): u.start_time() top = nc.degree_centrality( g, mode_in_out_all )[:top_n] # get_top_n_for_list (g, g.degree (mode=mode_in_out_all), top_n) u.print_delta('get top ' + str(top_n) + ' Degree ') return fill_twitter_top_n_for_list(top)
def get_top_n_for_page_rank(g: Graph, directed: bool, top_n: int): u.start_time() top = nc.pagerank_centrality( g )[:top_n] # get_top_n_for_list (g, g.pagerank (directed=directed), top_n) u.print_delta('get top ' + str(top_n) + ' PageRank ') return fill_twitter_top_n_for_list(top)
def local_metrics(g: Graph): u.start_time() if "name" not in g.vertex_attributes(): g.vs["name"] = [str(i) for i in range(g.vcount())] degrees = g.degree() betweenness = g.betweenness() closeness = g.closeness() if not g.is_directed(): clustering_coef = g.transitivity_local_undirected() print("==============LOCAL MEASURES=======================") # for i in range (g.vcount ( )): # print (g.vs["twitter_id"][i] + ':') # print (" Degree:", degrees[i]) # print (" Betweenness:", betweenness[i]) # print (" Closeness:", closeness[i]) # if not g.is_directed ( ): # print (" Clustering Coefficient:", clustering_coef[i]) max_v = g.vs.select(_degree=g.maxdegree()) tweeter_df = u.get_twitter_profile(max_v['twitter_id']) if tweeter_df is None: _name = "" else: _name = tweeter_df.loc[0, "name"] print("Vertex with highest degree:", max_v['name'], " , twitter id : ", max_v['twitter_id'], "twitter name=", _name) max_v = g.vs.select(_betweenness=max(betweenness)) tweeter_df = u.get_twitter_profile(max_v['twitter_id']) if tweeter_df is None: _name = "" else: _name = tweeter_df.loc[0, "name"] print("Vertex with highest betweenness:", max_v['name'], " , twitter id : ", max_v['twitter_id'], "twitter name=", _name) max_v = g.vs.select(_closeness=max(closeness)) tweeter_df = u.get_twitter_profile(max_v['twitter_id']) if tweeter_df is None: _name = "" else: _name = tweeter_df.loc[0, "name"] print("Vertex with highest closeness:", max_v['name'], " , twitter id : ", max_v['twitter_id'], "twitter name=", _name) # print ("Vertex with highest betweenness:", g.vs.select (_betweenness=max (betweenness))['name']) # print ("Vertex with highest closeness:", g.vs.select (_closeness=max (closeness))['name']) if not g.is_directed(): max_v = g.vs[clustering_coef.index(max(clustering_coef))] tweeter_df = u.get_twitter_profile(max_v['twitter_id']) if tweeter_df is None: _name = "" else: _name = tweeter_df.loc[0, "name"] print("Vertex with highest clustering coefficient:", max_v['name'], " , twitter id : ", max_v['twitter_id'], "twitter name=", _name) # print ("Vertex with highest clustering coefficient:", # g.vs[clustering_coef.index (max (clustering_coef))]['name']) u.print_delta("get local metrics ")
def get_max_vertex(g: Graph, mode_in_out_all): # degree OUT u.start_time() degrees = g.degree(mode=mode_in_out_all) # .vs.degree(mode=igraph.OUT) max_deg = max(degrees) print('found max degree OUT : ', max_deg) df_degree_out = [ g.vs[idx] for idx, eb in enumerate(degrees) if eb == max_deg ] print('out max degree id : ', df_degree_out[0]["name"], ' twitter_id : ', df_degree_out[0]["twitter_id"], ' degree : ', max_deg) u.print_delta("get max degree ") return u.get_twitter_profile(df_degree_out[0]["twitter_id"])
def viz_community(g: Graph, in_out, file_name: str, folder: str, community_type, show_label_by_degree_percent): u.start_time() max_degree = max(g.degree(mode=in_out)) my_layout = g.layout_auto() visual_style = { "edge_curved": False, "vertex_label": [ # (v["twitter_id"] if ((1 - v.degree ( ) / max_degree) <= show_label_by_degree_percent) else None) v["twitter_name"] if v["twitter_name"] != "" else (v["twitter_id"] if ((1 - v.degree() / max_degree) <= show_label_by_degree_percent) else None) for v in g.vs ], "target": u.format_file(folder=folder, file_name=file_name, img_suffix=img_suffix), "bbox": (1600, 1600), "margin": 10, "layout": my_layout } g_undirected: Graph = g.as_undirected() communities = g_undirected.community_multilevel() # https://stackoverflow.com/questions/43580304/python-modularity-statistic-using-louvain-package#43583445 modularity_score = g_undirected.modularity(communities.membership) print("The modularity Q based on igraph is {}".format(modularity_score)) # Community detection # communities = g.community_edge_betweenness (directed=False) # communities = geant.community_fastgreedy().as_clustering() # communities = geant.community_infomap().as_clustering() # clusters = communities.as_clustering() # Set edge weights based on communities # weights = {v: len(c) for c in clusters for v in c} # geant.es["weight"] = [weights[e.tuple[0]] + weights[e.tuple[1]] for e in geant.es] # Plot the graph drawing.plot(communities, mark_groups=True, **visual_style) u.print_delta("generate community viz graph ")
def viz_diameter(g: Graph, file_name: str, folder: str): u.start_time() is_directed = g.is_directed() diam = g.get_diameter(is_directed) # Define colors used for outdegree visualization colours = ['#CCCCCC', "#FFCC66"] for i in range(len(g.vs)): g.vs[i]["color"] = colours[0] for v in g.vs.select(diam): v["color"] = colours[1] # g.vs.select()["color"] = [colours[1] for x in diam] for i in range(len(g.es)): g.es[i]["color"] = colours[0] for eid in g.get_eids(path=diam, directed=is_directed): g.es[eid]["color"] = colours[1] g.vs.select(diam)["belongs_to_diameter"] = 1 my_layout = g.layout_auto() visual_style = { "edge_curved": False, "vertex_label": [(v["twitter_id"] if (v["belongs_to_diameter"] == 1) else "") for v in g.vs], "target": u.format_file(folder=folder, file_name=file_name, img_suffix=img_suffix), "bbox": (1600, 1600), "margin": 10, "vertex_color": g.vs["color"], "edge_color": g.es["color"] # , "vertex_size": [x * 50 for x in auths], "layout": my_layout } # Plot the graph drawing.plot(g, **visual_style) u.print_delta("generate community viz graph ")
def plot_plot_degree_distribution(g: Graph, file_name='degree_distribution', loglog=True, marker='.', folder: str = u.folder_img): import pylab u.start_time() f = g.degree_distribution(bin_width=1) xs, ys = zip(*[(left, count) for left, _, count in g.degree_distribution().bins()]) # pylab.bar(xs, ys) pylab.show() if loglog: pylab.xscale('log') pylab.yscale('log') pylab.xlabel('k') pylab.ylabel('N') pylab.title('Degree distribution') pylab.plot(xs, ys, marker) # pylab.plot(f.keys(), f.values(), marker) pylab.savefig(u.format_file(folder=folder, file_name=file_name)) u.print_delta(' generate degree_distribution_plot')
def viz_graph( g: Graph, degree, file_name: str, folder: str, layout: str = "auto", show_label_by_degree: int = 0.1, label: str = "twitter_id", ): u.start_time() # Define colors used for outdegree visualization colours = ['#fecc5c', '#a31a1c'] max_degree = max(degree) if type(max_degree) == int: if max_degree >= 1: num_colors = (max_degree) + 1 # https://github.com/igraph/python-igraph/issues/98 palette = RainbowPalette(n=num_colors) color_list = [palette.get(d) for d in degree] # Set colors according to bins g.vs[ "color"] = color_list # [colours[x - 1] for x in digitized_degrees] # Order vertices in bins based on outdegree bins = np.linspace(0, max_degree, len(colours)) digitized_degrees = np.digitize(degree, bins) # file_name = folder_img + file_name + img_suffix my_layout = g.layout(layout=layout) visual_style = { # Don't curve the edges "edge_curved": False # Scale vertices based on degree , "vertex_size": [x / max_degree * 50 + 1 for x in degree] # outdegree * 10 # , # https://github.com/igraph/python-igraph/issues/98 "vertex_label": [ v["twitter_name"] if v["twitter_name"] != "" else (v[label] if ((1 - v.degree() / max_degree) <= show_label_by_degree) else "") for v in g.vs ], "vertex_label_size": [x / max_degree * 22 for x in degree], "target": u.format_file(folder=folder, file_name=file_name, img_suffix=img_suffix), "bbox": (1600, 1600), "margin": 10, "layout": my_layout } # Also color the edges for ind, color in enumerate(g.vs["color"]): edges = g.es.select(_source=ind) edges["color"] = [color] # Plot the graph drawing.plot(g, **visual_style) u.print_delta("generate viz graph ")
import igraph import numpy as np from utils import utils as u from utils import metrics as gm print('====== Profiling results =======') u.start_time() g = igraph.Graph.Read_Picklez(fname= "./files/twitter_1K_picklez") u.print_delta("load graph") print('graph is directed ? : ',g.is_directed()) u.start_time() gm.summary(g) u.print_delta("get summary graph") u.start_time() gm.report(g) u.print_delta("get report graph") #gm.degree_distribution_plot(g,'dg_distri',loglog=False) #gm.plot_degree_distribution(g) #dD = g.degree_distribution(bin_width=10) #print(dD) #print(g.degree_distribution().bins())