def core_community(G, country): # does what is explained on slide 51 of AVDC_2019-2022_AIAS_Lecture_Graph & Visualisation.pdf fig, ax = plt.subplots(1, 1, figsize=(17, 8)) time_label = ["2003-2009", "2010-2016", "2003-2016"] color = ["#4C72B0", "#DD8452", "#55A868"] s = [] for k in range(len(G)): try: core = nx.k_core(G[k], core_number=nx.core_number(G[k])) except nx.exception.NetworkXError: g = G[k] g.remove_edges_from(list(nx.selfloop_edges(g))) core = nx.k_core(g, core_number=nx.core_number(g)) s.append(len(core)) nx.draw_networkx(core, ax=ax, label=time_label[k], alpha=0.75, node_color=color[k], edge_color=color[k]) plt.legend(loc="best") plt.show() return s
def _cliques_heuristic(G, H, k, min_density): h_cnumber = nx.core_number(H) for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): cands = set(n for n, c in h_cnumber.items() if c == c_value) # Skip checking for overlap for the highest core value if i == 0: overlap = False else: overlap = set.intersection(*[ set(x for x in H[n] if x not in cands) for n in cands]) if overlap and len(overlap) < k: SH = H.subgraph(cands | overlap) else: SH = H.subgraph(cands) sh_cnumber = nx.core_number(SH) SG = nx.k_core(G.subgraph(SH), k) while not (_same(sh_cnumber) and nx.density(SH) >= min_density): #!! This subgraph must be writable => .copy() SH = H.subgraph(SG).copy() if len(SH) <= k: break sh_cnumber = nx.core_number(SH) sh_deg = dict(SH.degree()) min_deg = min(sh_deg.values()) SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) SG = nx.k_core(G.subgraph(SH), k) else: yield SG
def _cliques_heuristic(G, H, k, min_density): h_cnumber = nx.core_number(H) for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): cands = set(n for n, c in h_cnumber.items() if c == c_value) # Skip checking for overlap for the highest core value if i == 0: overlap = False else: overlap = set.intersection(*[ set(x for x in H[n] if x not in cands) for n in cands]) if overlap and len(overlap) < k: SH = H.subgraph(cands | overlap) else: SH = H.subgraph(cands) sh_cnumber = nx.core_number(SH) SG = nx.k_core(G.subgraph(SH), k) while not (_same(sh_cnumber) and nx.density(SH) >= min_density): SH = H.subgraph(SG) if len(SH) <= k: break sh_cnumber = nx.core_number(SH) sh_deg = SH.degree() min_deg = min(sh_deg.values()) SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) SG = nx.k_core(G.subgraph(SH), k) else: yield SG
def calculate_existed_ratio(g1, g2): gg = nx.k_core(g1) s1 = set(gg.nodes()) s2 = set(nx.k_core(g2).nodes()) s3 = s1 & s2 # return str(len(s1)) + "\t" + str(len(s3)) return str(len(s3) / len(s1))
def main(argv): if len(argv) != 1: print "usage: python analyze-centrality.py <path/to/edgelist>" sys.exit(0) graph_file_path = argv[0] graph_name = graph_file_path.split('/')[-1].split('.')[0] print "Current file: {}".format(graph_name) print "nx version: {}".format(nx.__version__) # 1. Read in the weighted edge lists file as an undirected graph with node type integer. # g_nx = nx.read_weighted_edgelist(path=graph_file_path, create_using=nx.Graph(), nodetype=int) g_nx = nx.read_weighted_edgelist(path=graph_file_path, create_using=nx.Graph()) # Number of triads. num_triads = len(nx.triangles(g_nx)) print "Number of triads: {}".format(num_triads) # 2. Local Clustering coefficient of a randomly selected node random_node = choice(nx.nodes(g_nx)) lcc = get_local_clustering_coef(g_nx, random_node) print "Clustering coefficient of random node {} in {}: {}".format( random_node, graph_name, lcc) # 3. Number of triads a random node participates in random_node = choice(nx.nodes(g_nx)) num_triads_for_rand_node = nx.triangles(g_nx, random_node) print "Number of triads node {} participates in {} triads".format( random_node, num_triads_for_rand_node) # 4. Watts-Strogratz (average over local) and global clustering coefficients. avg_clustering = nx.average_clustering(g_nx) global_clustering_dict = nx.clustering(g_nx) avg_global_clustering = sum(v for k, v in global_clustering_dict.items() ) / len(global_clustering_dict) print "Clustering coefficient of the network: {} (Watts-Strogatz); {} (global)".format( avg_clustering, avg_global_clustering) # 5. Plot of the k-core edge-size distribution k_core_edge_dict = {} k_core_node_dict = {} max_num_cores = max(nx.core_number(g_nx).values()) for i in range(0, max_num_cores): k_core_edge_dict[i] = nx.k_core(g_nx, k=i).number_of_edges() k_core_node_dict[i] = nx.k_core(g_nx, k=i).number_of_nodes() file_name = plot_distribution(k_core_edge_dict, "core k", "number of edges in k-core", graph_name) print "k-core edge-size distribution is in: {}".format(file_name) # 6. Plot of the k-core node-size distribution file_name = plot_distribution(k_core_edge_dict, "core k", "number of nodes in k-core", graph_name) print "k-core node-size distribution is in: {}".format(file_name)
def test_k_core(self): # k=0 k_core_subgraph = nx.k_core(self.H, k=0) assert_equal(sorted(k_core_subgraph.nodes()), sorted(self.H.nodes())) # k=1 k_core_subgraph = nx.k_core(self.H, k=1) assert_equal(sorted(k_core_subgraph.nodes()), [1, 2, 3, 4, 5, 6]) # k = 2 k_core_subgraph = nx.k_core(self.H, k=2) assert_equal(sorted(k_core_subgraph.nodes()), [2, 4, 5, 6])
def test_k_core(self): # k=0 k_core_subgraph = nx.k_core(self.H, k=0) assert sorted(k_core_subgraph.nodes()) == sorted(self.H.nodes()) # k=1 k_core_subgraph = nx.k_core(self.H, k=1) assert sorted(k_core_subgraph.nodes()) == [1, 2, 3, 4, 5, 6] # k = 2 k_core_subgraph = nx.k_core(self.H, k=2) assert sorted(k_core_subgraph.nodes()) == [2, 4, 5, 6]
def max_k_shell(g1, g2): g2 = nx.k_core(g2) g1 = nx.k_core(g1) s1 = set(g1.nodes()) s2 = set(g2.nodes()) s3 = s1 & s2 print(str(len(s3) / len(s1))) print(str(len(s3) / len(s2))) return len(s3) / len(s1), len(s3) / len(s2)
def clickMethodAllie(self): print('test: ' + self.country_name.text()) print('test: ' + self.year_val.text()) print('test: ' + self.combo.currentText()) print('test: ' + self.core_val.text()) if self.combo.currentText() == 'All Alliances': start_year = 0 alliances = [] if self.year_val.text(): try: start_year = int(self.year_val.text()) except: QMessageBox.about(self, 'Wrong input!', 'Year number has to be an integer.') return for res in session.query(Alliance.name_1, Alliance.name_2).filter( Alliance.start_year >= start_year).all(): alliances.append(res) G_allies = nx.Graph() G_allies.add_edges_from(alliances) G_allies = G_allies.to_undirected() if self.core_val.text(): try: G_allies = nx.k_core(G_allies, k=int(self.core_val.text())) except: QMessageBox.about(self, 'Wrong input!', 'Core size has to be an integer.') gd.draw_alliance(G_allies) elif self.combo.currentText() == 'All Tanks': seller_buyers = [] for res in session.query(Tank.owner_name, Tank.seller_name).all(): seller_buyers.append(res) G_sell_buy = nx.Graph() G_sell_buy.add_edges_from(seller_buyers) G_sell_buy = G_sell_buy.to_undirected() if self.core_val.text(): G_sell_buy.remove_edges_from(nx.selfloop_edges(G_sell_buy)) try: G_sell_buy = nx.k_core(G_sell_buy, k=int(self.core_val.text())) except: QMessageBox.about(self, 'Wrong input!', 'Core size has to be an integer.') gd.draw_tanks(G_sell_buy)
def subgraph(path, filename, CenterAddress, k=3): #find out the subgraph which subpoints no less than k交易笔数大于等于3笔的节点 G = nx.DiGraph(directed=True) G = nx.read_gpickle(path + filename) G.remove_edges_from(nx.selfloop_edges(G)) G2 = nx.k_core(G, k) return G2
def neighbors2_community(G, remove_duplicates=True, use_kcore=False): Gc = None if use_kcore: Gc = G.copy() Gc.remove_edges_from(Gc.selfloop_edges()) Gc = nx.k_core(Gc, 3) # Gc = [cl for cl in nx.find_cliques(G)] else: Gc = G communities = set() for v in Gc.nodes(): neighs = G.neighbors(v) community = [] for n in neighs: community.append(n) neighs2 = G.neighbors(n) community.extend(neighs2) if remove_duplicates: community = list(set(community)) communities.add(tuple(community)) communities = list(map(list, communities)) # Convert tuples back into lists return communities
def get_kcore_graph(self, input_file, output_dir, sep='\t', core_list=None, degree_list=None): input_path = os.path.join(self.origin_base_path, input_file) graph = get_nx_graph(input_path, self.full_node_list, sep=sep) core_num_dict = nx.core_number(graph) print("unique core nums: ", len(np.unique(np.array(list(core_num_dict.values()))))) max_core_num = max(list(core_num_dict.values())) print('file name: ', input_file, 'max core num: ', max_core_num) # x = list(graph.degree()) # max_degree = max(list(zip(*x))[1]) # print('max degree: ', max_degree) # core_list.append(max_core_num) # degree_list.append(max_degree) check_and_make_path(output_dir) format_str = get_format_str(max_core_num) for i in range(1, max_core_num + 1): k_core_graph = nx.k_core(graph, k=i, core_number=core_num_dict) k_core_graph.add_nodes_from(self.full_node_list) ############################### # This node_list is quit important, or it will change the graph adjacent matrix and cause bugs!!! A = nx.to_scipy_sparse_matrix(k_core_graph, nodelist=self.full_node_list) ############################### signature = format_str.format(i) sp.save_npz(os.path.join(output_dir, signature + '.npz'), A)
def get_max_core_degree(G): i = 88 # start from this value not to wait too long while True: if not nx.k_core(G, i).number_of_nodes(): return i - 1 else: i += 1
def write_timers(graph): out_fn = graph.replace(".graphml", "") + ".csv" g = nx.read_graphml(graph) out = open(out_fn, "w") intv = __compute_intervals(g, return_bc=True) ap = [x for x in nx.articulation_points(g)] no_leaves_nodes = nx.k_core(g, 2).nodes() cn = [x for x in g.nodes() if x not in ap and x in no_leaves_nodes] out.write("node,h,tc,hnd,tcnd,hm,tcm,bc,d,ap,mbc\n") for n, (h, tc, h2, tc2, bc, d, h3, tc3, mbc) in intv.iteritems(): out.write( "%s,%f,%f,%f,%f,%f,%f,%f,%d,%d,%f\n" % (n, h, tc, h2, tc2, h3, tc3, bc, d, 0 if n in cn else 1, mbc)) out.close() # import matplotlib.pyplot as plt # nx.draw_networkx(g, with_labels=True, pos=nx.spring_layout(g)) # plt.show() print("Rel. GLR : %f" % __get_loss_strategy(intv, False, False, cn)) print("Rel. GLR (+5, -5) : %f" % __get_loss_strategy(intv, False, False, cn, subset=True)) print("Rel. GLR (d_i = 1) : %f" % __get_loss_strategy(intv, True, False, cn)) print("Rel. GLR (+5, -5) (d_i = 1) : %f" % __get_loss_strategy(intv, True, False, cn, subset=True)) print("Rel. GLR (d_i = 1, modified BC) : %f" % __get_loss_strategy(intv, True, True, cn)) print("Rel. GLR (+5, -5) (d_i = 1, modified BC): %f" % __get_loss_strategy(intv, True, True, cn, subset=True))
def get_kpeak_decomposition(G): G.remove_edges_from(G.selfloop_edges()) G = removeSingletons(G) orig_core_nums = nx.core_number(G) H = G.copy() H_nodes = set(G.nodes()) current_core_nums = orig_core_nums.copy() peak_numbers = {} # Each iteration of the while loop finds a k-contour while (len(H.nodes()) > 0): # degen_core is the degeneracy of the graph degen_core = nx.k_core(H) # Degen-core # Nodes in the k-contour. Their current core number is their peak number. kcontour_nodes = degen_core.nodes() for n in kcontour_nodes: peak_numbers[n] = current_core_nums[n] # Removing the kcontour (i.e. degeneracy) and re-computing core numbers. H_nodes = H_nodes.difference(set(kcontour_nodes)) H = G.subgraph(list(H_nodes)) current_core_nums = nx.core_number(H) return peak_numbers, orig_core_nums
def get_k_core(reviews_path,k_val): # Report start of process print "==================================" print "EXTRACTING K-CORE OF PID GRAPH " print "==================================" print "AT STEP #1: Determine which reviewer reviewed which products" # with ufora.remotely.downloadAll(): (PID_to_lines,PID_to_reviewerID) = get_PID_facts(reviews_path) print "At STEP #2: Created weighted edges" # with ufora.remotely.downloadAll(): weighted_edges = get_weighted_edges(PID_to_reviewerID) print "AT STEP #3: Create PID graph structure" # with ufora.remotely.downloadAll(): PID_graph = create_graph(PID_to_reviewerID,weighted_edges) print nx.info(PID_graph) print "AT STEP #4: Extracting K-core" # with ufora.remotely.downloadAll(): k_core_graph = nx.k_core(PID_graph,k_val) print nx.info(k_core_graph) pickle.dump(graph,open("graph",'w')) print "DONE!"
def get_community(G: nx.Graph, n, hops: int, max, maxCore: bool, visited={}): e = nx.ego_graph(G, n['id'], hops) # filter here before k-core # # here we filter out the nodes that doesn't # meet the domination requirements # currently we select nodes that have dom > 0 # but this can change to a pruning function community = [ x for x, y in e.nodes(data=True) if (y['dom'] > 0 and y['id'] not in visited) ] community = G.subgraph(community) k_core = nx.k_core(community) max_k_core = min(k_core.degree(), key=lambda x: x[1])[1] if maxCore: res = k_core else: res = community # mark nodes as visited # res = [x for x,y in res.nodes(data=True) if y['id'] not in visited ] # mod = nx_comm.modularity(G, community) data1 = json_graph.node_link_data(res, {"link": "edges"}) data1['stats'] = get_graph_stats(res, max['dom'], max_k_core, fast=True) data1['stats']['init'] = n['id'] return (data1, res)
def properties_full_graph(full_g, most_central_characters): """ :param full_g: full entity graph :param most_important_entities: list of book's main characters Retrieves properties of the graphs """ print('-- FULL ENTITY GRAPH PROPERTIES --') # Number of nodes and edges print('number of entities: ', full_g.number_of_nodes()) print('number of interactions:: ', full_g.number_of_edges()) # Connectedness print('is connected ?', nx.is_connected(full_g)) if nx.is_connected(full_g) == True: print(nx.diameter(full_g)) # Degree graph - connectedness count = 0 for edge in full_g.edges(data=True): count += edge[2]['weight'] print('average weighted degree of the graph', count / full_g.number_of_edges()) # Isolated nodes - connectedness isolated_nodes = list(nx.isolates(full_g)) print('number of isolated nodes', len(isolated_nodes)) # Cliques - connectedness print('size largest clique: ', nx.graph_clique_number(full_g)) cliques = list(nx.find_cliques(full_g)) length = 0 for element in cliques: if len(element) > length: max_element = element length = len(element) print('largest clique: ', max_element) # Visualise clique # subgraph_entities = most_central_characters[:50] # clique_visu(full_g.subgraph(subgraph_entities), max_element) # Clustering most_important_entities = list(most_central_characters.keys()) print('clustering coef', nx.average_clustering(full_g)) cc = sorted( nx.clustering(full_g, nodes=most_important_entities, weight='weight')) print('characters with highest clustering coef', cc) # k core k_core = list(nx.k_core(full_g)) # k_core_visu(full_g, nx.k_core(full_g)) print('k core', list(k_core)) # Distance between first and second most important characters m = most_central_characters[most_important_entities[0]] s = most_central_characters[most_important_entities[1]] dist_importance = (m - s) / m if dist_importance > 1 / 3: print('one-main-character type novel') else: print('several main characters type novel')
def onionPeeling(self, graph, k, anchors=[], core_numbers=None): i = 0 if core_numbers is None: N = nx.k_core(graph, k=k - 1) else: nodes = [ u for u in core_numbers if core_numbers[u] >= k - 1 or u in anchors ] N = nx.Graph(graph.subgraph(nodes)) P = [u for u in N.nodes() if len(N[u]) < k and u not in anchors] L = {} L_nodes = {} while len(P) > 0: i += 1 L[i] = [v for v in P if v not in anchors] L_nodes.update({x: i for x in P}) N.remove_nodes_from({u: i for u in P}) P = [u for u in N.nodes() if len(N[u]) < k] N_nodes = set(list(N.nodes())) L[0] = [] for u in L_nodes: L[0] += [ v for v in graph[u] if v not in L_nodes and v not in N_nodes and v not in anchors ] L_nodes.update({u: 0 for u in L[0]}) return L, L_nodes
def draw_k_cores(): k_cores = nx.k_core(G) nodes = [ opts.GraphNode(name=k_cores.nodes[x]['name'], value=k_cores.degree[x], symbol_size=k_cores.degree[x]) for x in k_cores.nodes ] links = [ opts.GraphLink(source=k_cores.nodes[x]['name'], target=k_cores.nodes[y]['name']) for x, y in k_cores.edges ] c = ( Graph().add( series_name="", nodes=nodes, links=links, layout='force', is_roam=True, is_focusnode=True, label_opts=opts.LabelOpts(is_show=False), is_draggable=True, repulsion=10000, # linestyle_opts=opts.LineStyleOpts(width=0.5, curve=0.3, opacity=0.7), )) c.render("k_cores_subgraph.html")
def extract_k_cores(self, k=3): """抽取网络中的k核,k默认为3""" k_core = nx.k_core(self.network, k) n_list = list() for item in k_core: n_list.append(Net(item, self.net_type, self.weight_type, from_external=False)) return n_list
def main(args): save_path_base = "./reddit_data/Reddit_split_2017-11/split_csv/" save_path_master = save_path_base + args.save_master ipdb.set_trace() if os.path.isfile(save_path_master): master_G = nx.read_gpickle(save_path_master) else: file_paths = args.filedir + args.fileprefix split_files = glob.glob(file_paths) G_list = [] with concurrent.futures.ProcessPoolExecutor() as executor: for file_, sub_G in tqdm(zip( split_files, executor.map(process_csv, split_files)), total=len(split_files)): print("Split file %s" % (file_)) G_list.append(sub_G) save_path_base = "./reddit_data/Reddit_split_2017-11/split_csv/" master_G = nx.compose_all(G_list) nx.write_gpickle(master_G, save_path_master) print("Created Master Graph") master_G_k_core = nx.k_core(master_G, k=args.k_core) print("K-core of Master Graph") save_path_k_core = save_path_base + str(args.k_core) + \ '_' + args.save_master_k_core nx.write_gpickle(master_G_k_core, save_path_k_core)
def plot_kcore_networkx(message_adj_list, k): """Plot the kcore nodes of the graph by the date""" # for time, message_adj_list in message_adj_list_dict.items(): G = nx.Graph() for src in range(len(message_adj_list)): for dest in message_adj_list[src]: G.add_edge(src, dest) G.remove_edges_from(nx.selfloop_edges(G)) kcore_G = nx.k_core(G, k) print(kcore_G.nodes) pos = nx.spring_layout(kcore_G) num_nodes = len(kcore_G.nodes) print("Number of k-core Nodes: {0}".format(num_nodes)) colors = [1] * num_nodes nx.draw_networkx_nodes(kcore_G, pos, node_size=30, node_color=colors, edgecolors='k', cmap=plt.cm.Greys) nx.draw_networkx_edges(kcore_G, pos, alpha=0.5) # plt.title("{0}-core Graph for Date : {1}".format(k,time)) plt.show()
def getCoreNodes(self, G='default', k=6): if G == 'default': G = self.G Gcore = nx.k_core(G, k=k) return Gcore
def findVerticesEdges(G): vePerCore = {} SccPerCore = {} for i in xrange(1, 18): Gtemp = nx.k_core(G, k=i) vePerCore[i] = (Gtemp.number_of_nodes(), Gtemp.number_of_edges()) SccPerCore[i] = (i, nx.number_connected_components(Gtemp)) return vePerCore, SccPerCore
def get_k_core(self, k): try: kc = nx.k_core(self.graph, k=k) return kc except Exception, e: print traceback.print_exc()
def extract_k_core_nodes(self) -> list: """ This function extracts the nodes of the k-core sub-graph of a given graph. :return: """ core = nx.k_core(self.directed_graph) return core.nodes()
def generateCoreSubgraph(graph, cnumber): cn = set(cnumber.values()) core_subgraph = {} for c in cn: core_subgraph[c] = nx.k_core(graph, k=c, core_number=cnumber) return core_subgraph
def get_main_cores(G, n): print("get_main_cores: Computing main cores") G_kcores = list() for handle in range(n): G_kcore = nx.k_core(G[handle]) G_kcores.append(G_kcore) return G_kcores
def plot_k_core(self, k=None): """ Plot k-core subgraph. :param k: int, optional The order of the core. If not specified return the main core. """ subgraph = nx.k_core(self.graph, k) self.plot_graph(subgraph)
def calculate_k_core(G, K): core_k = nx.k_core(G, k=K) nx.draw(core_k) plt.savefig("./images/kCore" + str(K) + ".png") if verbose: print "\r\nk-Core: " + str(K) print core_k.nodes() plt.show() write_csv_group('./data/results/kCore' + str(K) + '.csv', core_k.nodes())
def calculate_main_k_core(G): core_main = nx.k_core(G) nx.draw(core_main) plt.savefig("./images/kCoreMain.png") if verbose: print "\r\nk-Core: Main" print core_main.nodes() plt.show() write_csv_group('./data/results/mainKCore.csv', core_main.nodes())
def find_kcore(self): self.kcore_dict = nx.core_number(self.graph) self.max_kcore = max(self.kcore_dict.items(), key=operator.itemgetter(1))[1] kcore_subgraph = nx.k_core(self.graph) self.top_kcore_nodes = list(kcore_subgraph) self.top_kcore_nodes = [int(x) for x in self.top_kcore_nodes] self.kcore_length = len(self.top_kcore_nodes)
def getKCore(undirectedGraph): pos = nx.spring_layout(undirectedGraph,k=0.15,iterations=20) nx.draw(undirectedGraph,pos,node_size=100,node_color = 'k') kCore = nx.k_core(undirectedGraph) kCore_edges = nx.edges(kCore) nx.draw_networkx_nodes(undirectedGraph,pos,nodelist=kCore,node_color='b',node_size = 100) nx.draw_networkx_edges(undirectedGraph,pos,edgelist=kCore_edges,edge_color='b',width=3) # fig = plt.gcf() # fig.set_size_inches((10, 10)) plt.savefig('kcore.eps', format='eps', dpi=1000) plt.show()
def find_k_cores(self, max_k): current_graph = self.G if self.verbose: print 'K-CORES' for i in range(max_k,0,-1): core_k = nx.k_core(current_graph, i) if len(core_k) > 0: self.k_cores.append(core_k.nodes()) current_graph = nx.k_crust(current_graph, i) if self.verbose: print 'Found %s k-cores' %(len(self.k_cores)) return len(self.k_cores)
def detect_recover(filename,k): #Read Network files as gml file type, create a networkx graph and use the eisted graph file #Random graph may have poor performance, erdos renyi graph doesn't have true community structure G = nx.read_gml(filename) H = nx.k_core (G, int(k)) #print len(H.nodes()) #kcore_partition = kcore_partition(H) partition = community.best_partition(H) #print partition sorted_recover_nodes = sort_by_neighbor(H, G) #print sorted_recover_nodes vote_for_node(partition, sorted_recover_nodes, G) new_partition = vote_for_node(partition, sorted_recover_nodes, G) return convert_partition_format(new_partition)
def KCored(G): # Set k value k_values = [] # k = 0.0 nodes = G.nodes() for node in nodes: k_values.append(G.degree(node)) k_values = sorted(k_values) k = k_values[len(k_values)/2] # print clusterFile, k # print min(k_values) # print max(k_values) subG = nx.k_core(G, k=k) # Returns subgraph # print len(G.nodes()), '\t', len(subG.nodes()) nx.write_weighted_edgelist(subG, outDirK + clusterFile, 'w')
def getKCore(graph,worksheet): global name kCore = nx.k_core(graph) print 'KCore : ' directory = "wordGraphs/top50/"+name directory = os.path.normpath(directory) if not os.path.exists(directory): os.makedirs(directory) path = directory +"/KCore.png" fileName = os.path.normpath(path) nx.draw(kCore,node_size=100) plt.title(r'$\mathrm{K-Core\ for\ }' + name +'\ $',fontsize = 15) plt.savefig(fileName, format="PNG") plt.show() worksheet.insert_image(4,5,fileName, {'x_scale': 0.5, 'y_scale': 0.5})
def kcore_partition(k, FILE_PATH): #Read Network files as gml file type, create a networkx graph and use the eisted graph file #Random graph may have poor performance, erdos renyi graph doesn't have true community structure G = nx.read_gml(FILE_PATH) H = nx.k_core (G, k) partition = community.best_partition(H) communities = list(set(partition.values())) new_partition = {} for community_part in communities: new_partition[community_part] = [] #print new_partition for nodes in partition.keys(): new_partition[partition[nodes]].append(nodes) #print new_partition return new_partition '''
def SentimentAnalysis_RGO_Belief_Propagation(nxg): #Bayesian Pearl Belief Propagation is done by #assuming the senti scores as probabilities with positive #and negative signs and the Recursive Gloss Overlap #definition graph being the graphical model. #Sentiment as a belief potential is passed through #the DFS tree of this graph. dfs_positive_belief_propagated=1.0 core_positive_belief_propagated=1.0 dfs_negative_belief_propagated=1.0 core_negative_belief_propagated=1.0 core_xnegscore=core_xposscore=1.0 dfs_knegscore=dfs_kposscore=dfs_vposscore=dfs_vnegscore=1.0 sorted_core_nxg=sorted(nx.core_number(nxg).items(),key=operator.itemgetter(1), reverse=True) kcore_nxg=nx.k_core(nxg,6,nx.core_number(nxg)) for x in sorted_core_nxg: xsset = swn.senti_synsets(x[0]) if len(xsset) > 2: core_xnegscore = float(xsset[0].neg_score())*10.0 core_xposscore = float(xsset[0].pos_score())*10.0 if core_xnegscore == 0.0: core_xnegscore = 1.0 if core_xposscore == 0.0: core_xposscore = 1.0 core_positive_belief_propagated *= float(core_xposscore) core_negative_belief_propagated *= float(core_xnegscore) print "Core Number: RGO_sentiment_analysis_belief_propagation: %f, %f" % (float(core_positive_belief_propagated), float(core_negative_belief_propagated)) #for k,v in nx.dfs_edges(nxg): for k,v in nx.dfs_edges(kcore_nxg): ksynset = swn.senti_synsets(k) vsynset = swn.senti_synsets(v) if len(ksynset) > 2: dfs_knegscore = float(ksynset[0].neg_score())*10.0 dfs_kposscore = float(ksynset[0].pos_score())*10.0 if len(vsynset) > 2: dfs_vnegscore = float(vsynset[0].neg_score())*10.0 dfs_vposscore = float(vsynset[0].pos_score())*10.0 dfs_kposscore_vposscore = float(dfs_kposscore*dfs_vposscore) dfs_knegscore_vnegscore = float(dfs_knegscore*dfs_vnegscore) if dfs_kposscore_vposscore == 0.0: dfs_kposscore_vposscore = 1.0 if dfs_knegscore_vnegscore == 0.0: dfs_knegscore_vnegscore = 1.0 dfs_positive_belief_propagated *= float(dfs_kposscore_vposscore) dfs_negative_belief_propagated *= float(dfs_knegscore_vnegscore) print "K-Core DFS: RGO_sentiment_analysis_belief_propagation: %f, %f" % (float(dfs_positive_belief_propagated),float(dfs_negative_belief_propagated)) return (dfs_positive_belief_propagated, dfs_negative_belief_propagated, core_positive_belief_propagated, core_negative_belief_propagated)
def core_topological_sort(vg_en_tn_prdct,threshold=1): invdistmerit=inverse_distance_intrinsic_merit(vg_en_tn_prdct) vg_en_tn_prdct_nxg=nx.DiGraph() rowframe=0 columnframe=0 for row in invdistmerit[0]: for column in row: print "column:",column if max(column) > threshold: vg_en_tn_prdct_nxg.add_edge(rowframe, columnframe) columnframe = columnframe + 1 rowframe = rowframe + 1 vg_en_tn_prdct_nxg.remove_edges_from(nx.selfloop_edges(vg_en_tn_prdct_nxg)) video_core=nx.k_core(vg_en_tn_prdct_nxg.to_undirected()) topsorted_video_core=nx.topological_sort(video_core) print "Topological Sorted Core Summary of the Video - Edges:",topsorted_video_core return topsorted_video_core
def week4(): path = "D:\Dropbox\PhD\My Work\Algorithms\@Machine Learning\Lectures\Social Network Analysis\Week 4_Community Structure\wikipedia.gml" wiki = nx.read_gml(path) wiki = wiki.to_undirected() # cliques cid, cls = max(nx.node_clique_number(wiki).iteritems(), key=operator.itemgetter(1)) print 'clique', cid, ' size:', cls # k-cores kcs = nx.k_core(wiki) print 'k-core size:', len(kcs.node) # community cs = list(nx.k_clique_communities(wiki, 2)) ratio = (len(cs[0]) + 0.0) / len(wiki.node) print 'community ratio:', ratio
def rank(self,return_type='set'): entity = self.get_entity() graph = self.get_graph() if graph==None: graph = self.build_graph() sub_graphs = nx.connected_component_subgraphs(graph) result = set() result = set(nx.k_core(graph,k=3).nodes()) if return_type == 'set': return result else: result = {ite:1 for ite in result} return result
def networkxOperations(graph): print "Computing in-Degree of the graph's nodes... \n " in_degree=nx.DiGraph.in_degree(graph) in_degree_sequence=sorted(in_degree.values(),reverse=True) plt.loglog(in_degree_sequence,'b-',marker='o', label="in-degree") print "Computing out-Degree of the graph's nodes... \n " out_degree=nx.DiGraph.out_degree(graph) out_degree_sequence=sorted(out_degree.values(),reverse=True) plt.loglog(out_degree_sequence,'b-',marker='*', label="out-degree") print "Computing Closeness of the graph's nodes... \n " closeness=nx.closeness_centrality(graph) closeness_sequence= sorted(closeness.values(),reverse=True) plt.loglog(closeness_sequence,'r-',marker='s', label="closeness") print "Computing Betweenness of the graph's nodes... \n " betweenness=nx.betweenness_centrality(graph) betweenness_sequence = sorted(betweenness.values(), reverse = True) plt.loglog(betweenness_sequence,'g-',marker='p', label="betweenness") print "Computing Pagerank of the graph's nodes... \n " prank=nx.pagerank(graph, alpha=0.85) pagerank_sequence= sorted(prank.values(), reverse=True) plt.loglog(pagerank_sequence,'y-',marker='*', label="pagerank") print "Computing Clustering Coefficient of the graph..." cc=nx.clustering(graph.to_undirected()) cc_sequence = sorted(cc.values(), reverse=True) plt.loglog(cc_sequence,'k-',marker='h', label="cc") print (sum(cc_sequence)*1.0)/len(graph), "\n" #plotting functions plt.title("Graph Properties Rank Plot") plt.ylabel("Properties") plt.xlabel("Rank") plt.legend(loc= 3, prop={"size": 8}) plt.savefig(topic.replace(" ", "") +"_plot.png") plt.show() print "Computing Clusering coefficient of nodes... \n ", nx.clustering(graph.to_undirected(), nodes=[choice(graph.nodes()),choice(graph.nodes()),choice(graph.nodes())]) print "Number of connected components: ", sum(1 for x in nx.connected_components(graph.to_undirected())) maxSubGraph = max(nx.connected_component_subgraphs(graph.to_undirected()), key=len) core = nx.k_core(maxSubGraph).nodes() print "k-core", core with open(topic.replace(" ", "")+"_results.csv", "w") as f: f.write("NODE"+","+"IN_DEGREE"+","+"OUT_DEGREE"+","+"CLOSENESS"+","+"BETWEENNESS"+","+"PAGERANK"+","+"CC \n") for node in graph.nodes(): f.write(str(node)+","+str(in_degree[node])+","+str(out_degree[node])+","+str(closeness[node])+","+str(betweenness[node])+","+str(prank[node])+","+str(cc[node])+"\n")
top_percentile+=1 else: break if n[1] > max_core_number: max_core_number=n[1] print "max_core_number",max_core_number print "===================================================================" print "Page Rank of the vertices of RGO Definition Graph" print "===================================================================" print sorted(nx.pagerank(nxg).items(),key=operator.itemgetter(1),reverse=True) print "================================================================================" print "A primitive text generated from traversal of the k-core closure of RGO Definition Graph" print "================================================================================" kcore_nxg=nx.k_core(nxg,10,nx.core_number(nxg)) #kcore_nxg=nx.k_core(nxg) text="" path=[] for k,v in kcore_nxg.edges(): #for k,v in nxg.edges(): #print k, weight_str_map[k+" - "+v], v, ".", path=WordNetPath.path_between(k,v) path_sentence = " is related to ".join(path) text = text + path_sentence + ". " print text print "\n" print "==============================================================================" print "Lambda Composition Closure with Depth First Search of RGO graph edges as relations" print "==============================================================================" print definitiongraphedgelabels
def k_components(G, min_density=0.95): r"""Returns the approximate k-component structure of a graph G. A `k`-component is a maximal subgraph of a graph G that has, at least, node connectivity `k`: we need to remove at least `k` nodes to break it into more components. `k`-components have an inherent hierarchical structure because they are nested in terms of connectivity: a connected graph can contain several 2-components, each of which can contain one or more 3-components, and so forth. This implementation is based on the fast heuristics to approximate the `k`-component structure of a graph [1]_. Which, in turn, it is based on a fast approximation algorithm for finding good lower bounds of the number of node independent paths between two nodes [2]_. Parameters ---------- G : NetworkX graph Undirected graph min_density : Float Density relaxation threshold. Default value 0.95 Returns ------- k_components : dict Dictionary with connectivity level `k` as key and a list of sets of nodes that form a k-component of level `k` as values. Examples -------- >>> # Petersen graph has 10 nodes and it is triconnected, thus all >>> # nodes are in a single component on all three connectivity levels >>> from networkx.algorithms import approximation as apxa >>> G = nx.petersen_graph() >>> k_components = apxa.k_components(G) Notes ----- The logic of the approximation algorithm for computing the `k`-component structure [1]_ is based on repeatedly applying simple and fast algorithms for `k`-cores and biconnected components in order to narrow down the number of pairs of nodes over which we have to compute White and Newman's approximation algorithm for finding node independent paths [2]_. More formally, this algorithm is based on Whitney's theorem, which states an inclusion relation among node connectivity, edge connectivity, and minimum degree for any graph G. This theorem implies that every `k`-component is nested inside a `k`-edge-component, which in turn, is contained in a `k`-core. Thus, this algorithm computes node independent paths among pairs of nodes in each biconnected part of each `k`-core, and repeats this procedure for each `k` from 3 to the maximal core number of a node in the input graph. Because, in practice, many nodes of the core of level `k` inside a bicomponent actually are part of a component of level k, the auxiliary graph needed for the algorithm is likely to be very dense. Thus, we use a complement graph data structure (see `AntiGraph`) to save memory. AntiGraph only stores information of the edges that are *not* present in the actual auxiliary graph. When applying algorithms to this complement graph data structure, it behaves as if it were the dense version. See also -------- k_components References ---------- .. [1] Torrents, J. and F. Ferraro (2015) Structural Cohesion: Visualization and Heuristics for Fast Computation. https://arxiv.org/pdf/1503.04476v1 .. [2] White, Douglas R., and Mark Newman (2001) A Fast Algorithm for Node-Independent Paths. Santa Fe Institute Working Paper #01-07-035 http://eclectic.ss.uci.edu/~drwhite/working.pdf .. [3] Moody, J. and D. White (2003). Social cohesion and embeddedness: A hierarchical conception of social groups. American Sociological Review 68(1), 103--28. http://www2.asanet.org/journals/ASRFeb03MoodyWhite.pdf """ # Dictionary with connectivity level (k) as keys and a list of # sets of nodes that form a k-component as values k_components = defaultdict(list) # make a few functions local for speed node_connectivity = local_node_connectivity k_core = nx.k_core core_number = nx.core_number biconnected_components = nx.biconnected_components density = nx.density combinations = itertools.combinations # Exact solution for k = {1,2} # There is a linear time algorithm for triconnectivity, if we had an # implementation available we could start from k = 4. for component in nx.connected_components(G): # isolated nodes have connectivity 0 comp = set(component) if len(comp) > 1: k_components[1].append(comp) for bicomponent in nx.biconnected_components(G): # avoid considering dyads as bicomponents bicomp = set(bicomponent) if len(bicomp) > 2: k_components[2].append(bicomp) # There is no k-component of k > maximum core number # \kappa(G) <= \lambda(G) <= \delta(G) g_cnumber = core_number(G) max_core = max(g_cnumber.values()) for k in range(3, max_core + 1): C = k_core(G, k, core_number=g_cnumber) for nodes in biconnected_components(C): # Build a subgraph SG induced by the nodes that are part of # each biconnected component of the k-core subgraph C. if len(nodes) < k: continue SG = G.subgraph(nodes) # Build auxiliary graph H = _AntiGraph() H.add_nodes_from(SG.nodes()) for u, v in combinations(SG, 2): K = node_connectivity(SG, u, v, cutoff=k) if k > K: H.add_edge(u, v) for h_nodes in biconnected_components(H): if len(h_nodes) <= k: continue SH = H.subgraph(h_nodes) for Gc in _cliques_heuristic(SG, SH, k, min_density): for k_nodes in biconnected_components(Gc): Gk = nx.k_core(SG.subgraph(k_nodes), k) if len(Gk) <= k: continue k_components[k].append(set(Gk)) return k_components
import community import networkx as nx import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt #Read Network files as gml file type, create a networkx graph and use the eisted graph file #Random graph may have poor performance, erdos renyi graph doesn't have true community structure H = nx.read_gml('dolphins.gml') H = nx.k_core (H,3) #Compute the best partition by using louvain method partition = community.best_partition(H) #Plot the result values = [partition.get(node) for node in H.nodes()] print H.number_of_nodes() nx.draw_spring(H, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) plt.savefig('dolphinKcore3')
import community import networkx as nx import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt #Read Network files as gml file type, create a networkx graph and use the eisted graph file #Random graph may have poor performance, erdos renyi graph doesn't have true community structure H = nx.read_gml('netscience.gml') H = nx.k_core (H,2) #Compute the best partition by using louvain method partition = community.best_partition(H) #Plot the result values = [partition.get(node) for node in H.nodes()] print H.number_of_nodes() nx.draw_spring(H, cmap = plt.get_cmap('jet'), node_color = values, node_size=30, with_labels=False) plt.savefig('netscienceKcore2')
def KKD(graph, reduction_list, num_trials, edges, verbose): n = reduction_list[0] e = edges if not os.path.exists('KKD/'): os.mkdir('KKD/') for trial in range(num_trials): random.seed(trial) if verbose: print "Starting reduction number: " + str(trial) num_nodes = graph.number_of_nodes() k = 0 g = nx.k_core(graph,k) while (nx.k_core(graph,k+1).number_of_nodes()) > n: k +=1 if verbose: print 'Calculating ' + str(k) + '-core...' g = nx.k_core(graph, k) if verbose: print "Removing nodes..." flag = False last_node_count = g.number_of_nodes() while g.number_of_nodes() > n: if not flag: rn= random.sample([x for x in g.nodes() if g.degree(x) == k], 50) else: rn = random.sample([x for x in g.nodes() if g.degree(x) == k], 1) edges = [] for node in rn: edges.extend(g.edges(node)) g.remove_nodes_from(rn) if nx.number_connected_components(g) != 1: nodes = 0 for h in nx.connected_component_subgraphs(g): if h.number_of_nodes() > nodes: nodes = h.number_of_nodes() temp = h.copy() if temp.number_of_nodes() >= n and temp.number_of_edges() >= e: g = temp.copy() else: flag = True g.add_edges_from(edges) else: if g.number_of_edges() < e or g.number_of_nodes() < n: g.add_edges_from(edges) flag = True if verbose and (g.number_of_nodes() != last_node_count): print "Graph node/edge count: ", g.number_of_nodes(), g.number_of_edges() last_node_count = g.number_of_nodes() if verbose: print "Done removing nodes." print "Removing edges..." while g.number_of_edges() > e: edge = None while not edge: edge = random.choice(g.edges()) g.remove_edge(*edge) if nx.number_connected_components(g) > 1: g.add_edge(*edge) nx.write_gexf(g, 'KKD/KKD-' + str(trial) + '.gexf') if verbose: print "Reduced graph written."
def core(net): #net.remove_edges_from(net.selfloop_edges()) cori = nx.core_number(net) main_core = nx.k_core(net, core_number=cori) return distri(cori.values(), 'core_number')+setSize(main_core, net.number_of_nodes(), 'main_core')
#Open the log file to write data wb = openpyxl.load_workbook(FILE_LOG_NAME) sheet = wb.get_sheet_by_name('Sheet1') Kcore_Value = int(sys.argv[2]) sheet['A' + str(Kcore_Value + 3)] = Kcore_Value FILE_PATH = sys.argv[1] #start to read data from file sheet['B' + str(Kcore_Value + 3)] = time.time() G = nx.read_edgelist(FILE_PATH) sheet['C' + str(Kcore_Value + 3)] = time.time() #perform kcore search H = nx.k_core (G, int(sys.argv[2])) if (not H.nodes()): sheet['J' + str(Kcore_Value + 3)] = "The community with K of value: " + sys.argv[2] + " is empty; execution stopped" sys.exit(0) #record time sheet['D' + str(Kcore_Value + 3)] = time.time() #perform partition on Kcore partition = community.best_partition(H) #record time sheet['E' + str(Kcore_Value + 3)] = time.time() sorted_recover_nodes = sort_by_neighbor(H, G) new_partition = vote_for_node(partition, sorted_recover_nodes, G) sheet['F' + str(Kcore_Value + 3)] = time.time()
def test_main_core(self): main_core_subgraph = nx.k_core(self.H) assert_equal(sorted(main_core_subgraph.nodes()), [2, 4, 5, 6])
vote_dic = Counter(vote_list) # print vote_dic vote_dic_sorted = sorted(vote_dic.iteritems(), key=lambda d: d[1], reverse=False) # print vote_dic_sorted if not vote_dic_sorted: kcore_partition[node] = -1 else: kcore_partition[node] = vote_dic_sorted[0][0] return kcore_partition if __name__ == "__main__": command2 = sys.argv[1] command3 = sys.argv[2] # Read Network files as gml file type, create a networkx graph and use the eisted graph file # Random graph may have poor performance, erdos renyi graph doesn't have true community structure G = nx.read_gml(command2) H = nx.k_core(G, int(command3)) print len(H.nodes()) # kcore_partition = kcore_partition(H) partition = community.best_partition(H) # print partition sorted_recover_nodes = sort_by_neighbor(H, G) # print sorted_recover_nodes vote_for_node(partition, sorted_recover_nodes, G) new_partition = vote_for_node(partition, sorted_recover_nodes, G) print len(new_partition.keys()) convert = convert_partition_format(new_partition) print len(convert[-1])
LOG_FILE.write(TEMP_INFO) LOG_FILE.write("\n") G = nx.read_edgelist(FILE_PATH) LOG_FILE.write("Transaction: Parse External File Successful. \t") LOG_FILE.write("Finish Time: %f" % time.time()) LOG_FILE.write("\n") if int(kcore_value) == 0: kcore_partition = community.best_partition(G) LOG_FILE.write("Transaction: Partition on K-core Successful.(without recovery) \t") LOG_FILE.write("Finish Time: %f" % time.time()) LOG_FILE.write("\n") else: H = nx.k_core(G, int(kcore_value)) LOG_FILE.write("Transaction: k-core Search Successful. \t") LOG_FILE.write("Finish Time: %f" % time.time()) LOG_FILE.write("\n") tmp_partition = community.best_partition(H) LOG_FILE.write("Transaction: Partition on K-core Successful. \t") LOG_FILE.write("Finish Time: %f" % time.time()) LOG_FILE.write("\n") # print partition sorted_recover_nodes = sort_by_neighbor(H, G) kcore_partition = vote_for_node(tmp_partition, sorted_recover_nodes, G) # cov_partition = convert_partition_format(new_partition) # print new_partition LOG_FILE.write("Transaction: Recovery Process Successful. \t")
import networkx as net import process_tweets retweets=process_tweets.g len(retweets) retweets.remove_edges_from(retweets.selfloop_edges()) undir=net.to_networkx_graph(retweets) core=net.k_core(undir) len(core) net.draw(core)
def kcores_cdf(graph): max_core_graph = nx.k_core(graph) print max_core_graph return
def getKCore(UDG): kCore = nx.k_core(UDG,k=10) nx.draw(kCore) plt.savefig("KCore.png", format="PNG") plt.show()
print(">> Edges: {}".format(g.number_of_edges())) if len(sys.argv) != 3: print("Error: Wrong number of arguments.") sys.exit(-1) _, GRAPH_FILE, OUT_FILE = sys.argv print("Opening {}...".format(GRAPH_FILE)) G = nx.read_gpickle(GRAPH_FILE) print_graph_stats(G) print("Computing k-cores...") core_numbers = nx.core_number(G) max_core = max(core_numbers.values()) max_core_communities = list(nx.connected_component_subgraphs(nx.k_core(G, k=max_core, core_number=core_numbers))) print(">> max-core: {}".format(max_core)) four_cores_communities = list(nx.connected_component_subgraphs(nx.k_core(G, k=4, core_number=core_numbers))) num_4_core = len(four_cores_communities) print(">> num-4-cores: {}".format(num_4_core)) print("Computing modularities...") print(">> max-core communities: {}".format(len(max_core_communities))) modularity_max_core = cm.modularity(G, max_core_communities) print(">> max-core modularity: {}".format(modularity_max_core)) print(">> 4-core communities: {}".format(len(four_cores_communities))) modularity_four_core = cm.modularity(G, four_cores_communities) print(">> 4-core modularity: {}".format(modularity_four_core)) print("Computing wcc...") wcc_max_core = cm.wcc(max_core_communities[0], G)