def addEdges(graph, ne, edges, id=None): cn = nx.core_number(graph) core = np.array(cn.values()) i = 0 while len(edges) > 0: e = edges.pop(0) graph.add_edge(e[0], e[1]) tcore = nx.core_number(graph) tcore = np.array(tcore.values()) diff = np.linalg.norm(core - tcore) if diff != 0: graph.remove_edge(e[0], e[1]) else: i += 1 if i >= ne: break if len(edges) % 1000 == 0: print('Edges : {}'.format(len(edges))) return graph, edges
def _cliques_heuristic(G, H, k, min_density): h_cnumber = nx.core_number(H) for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): cands = set(n for n, c in h_cnumber.items() if c == c_value) # Skip checking for overlap for the highest core value if i == 0: overlap = False else: overlap = set.intersection(*[ set(x for x in H[n] if x not in cands) for n in cands]) if overlap and len(overlap) < k: SH = H.subgraph(cands | overlap) else: SH = H.subgraph(cands) sh_cnumber = nx.core_number(SH) SG = nx.k_core(G.subgraph(SH), k) while not (_same(sh_cnumber) and nx.density(SH) >= min_density): SH = H.subgraph(SG) if len(SH) <= k: break sh_cnumber = nx.core_number(SH) sh_deg = SH.degree() min_deg = min(sh_deg.values()) SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) SG = nx.k_core(G.subgraph(SH), k) else: yield SG
def run(self): graph = self.readGraph() cnumber = self.fractionalRank(nx.core_number(graph)) degree = self.fractionalRank(graph.degree()) odmp = self.computeDMP(degree, cnumber) sizes = [0.25, 0.50, 0.75] data = [] resilience = self.computeResilience(50) for s in sizes: agreement = [] for _ in xrange(0, 10): sample = self.sample(graph, s) cnumber = self.fractionalRank(nx.core_number(sample)) degree = self.fractionalRank(sample.degree()) ndmp = self.computeDMP(degree, cnumber) #agreement.append(self.compareRanking(odmp, ndmp)) a = self.computeAgreement(odmp, ndmp) if a is not None: agreement.append(a) data.append([s, resilience, np.mean(agreement), np.std(agreement)]) # Save with open(self.sname, 'a') as f: writer = csv.writer(f, delimiter=',') for d in data: writer.writerow([self.tname, self.name] + d) print(d)
def computeResilience(fname): step = 10 p = range(0, 51, step) adata = [] for _ in xrange(0, 10): graph = readGraph(fname) num = int(graph.number_of_nodes()*step/100) ocn = nx.core_number(graph) tdata = [] for y in p: nodes = graph.nodes() random.shuffle(nodes) nodes = nodes[:num] redges = [] for u in nodes: redges += list([(u,v) for v in graph.neighbors(u)]) graph.remove_edges_from(redges) ncn = nx.core_number(graph) cor = compareRanking(ocn, ncn) tdata.append(cor) adata.append(np.mean(tdata)) return np.mean(adata)
def get_kpeak_decomposition(G): G.remove_edges_from(G.selfloop_edges()) G = removeSingletons(G) orig_core_nums = nx.core_number(G) H = G.copy() H_nodes = set(G.nodes()) current_core_nums = orig_core_nums.copy() peak_numbers = {} # Each iteration of the while loop finds a k-contour while (len(H.nodes()) > 0): # degen_core is the degeneracy of the graph degen_core = nx.k_core(H) # Degen-core # Nodes in the k-contour. Their current core number is their peak number. kcontour_nodes = degen_core.nodes() for n in kcontour_nodes: peak_numbers[n] = current_core_nums[n] # Removing the kcontour (i.e. degeneracy) and re-computing core numbers. H_nodes = H_nodes.difference(set(kcontour_nodes)) H = G.subgraph(list(H_nodes)) current_core_nums = nx.core_number(H) return peak_numbers, orig_core_nums
def _cliques_heuristic(G, H, k, min_density): h_cnumber = nx.core_number(H) for i, c_value in enumerate(sorted(set(h_cnumber.values()), reverse=True)): cands = set(n for n, c in h_cnumber.items() if c == c_value) # Skip checking for overlap for the highest core value if i == 0: overlap = False else: overlap = set.intersection(*[ set(x for x in H[n] if x not in cands) for n in cands]) if overlap and len(overlap) < k: SH = H.subgraph(cands | overlap) else: SH = H.subgraph(cands) sh_cnumber = nx.core_number(SH) SG = nx.k_core(G.subgraph(SH), k) while not (_same(sh_cnumber) and nx.density(SH) >= min_density): #!! This subgraph must be writable => .copy() SH = H.subgraph(SG).copy() if len(SH) <= k: break sh_cnumber = nx.core_number(SH) sh_deg = dict(SH.degree()) min_deg = min(sh_deg.values()) SH.remove_nodes_from(n for n, d in sh_deg.items() if d == min_deg) SG = nx.k_core(G.subgraph(SH), k) else: yield SG
def kcore_decomposition(orig_g_M, otherModel_M, name): dorig = pd.DataFrame() for g in orig_g_M: g.remove_edges_from(g.selfloop_edges()) d = nx.core_number(g) df = pd.DataFrame.from_dict(d.items()) df[[0]] = df[[0]].astype(int) gb = df.groupby(by=[1]) dorig = pd.concat([dorig, gb.count()], axis=1) # Appends to bottom new DFs print "orig" if not dorig.empty : zz = len(dorig.mean(axis=1).values) sa = int(math.ceil(zz/75)) if sa == 0: sa=1 for x in range(0, len(dorig.mean(axis=1).values), sa): print "(" + str(dorig.mean(axis=1).index[x]) + ", " + str(dorig.mean(axis=1).values[x]) + ")" dorig = pd.DataFrame() for g in otherModel_M: d = nx.core_number(g) df = pd.DataFrame.from_dict(d.items()) df[[0]] = df[[0]].astype(int) gb = df.groupby(by=[1]) dorig = pd.concat([dorig, gb.count()], axis=1) # Appends to bottom new DFs print "== the other model ==" if not dorig.empty : zz = len(dorig.mean(axis=1).values) sa = int(math.ceil(zz/75)) if sa == 0: sa=1 for x in range(0, len(dorig.mean(axis=1).values), sa): print "(" + str(dorig.mean(axis=1).index[x]) + ", " + str(dorig.mean(axis=1).values[x]) + ")" return
def runExperimentEdges(self, step=5, end=50, mode=0, sname=None): """ mode 0 random edge delete 1 random edge rewiring """ #cnumber = {0:self.coreNumber(can_cache=True)} cnumber = {0: nx.core_number(self.graph)} size = int(self.graph.number_of_edges() * step * 0.01) if mode == 0: noise = missing.MissingData() elif mode == 1: noise = rewire.RewireEdges() for i in xrange(1, int(end / step)): if mode == 0: self.graph = noise.removeRandomEdges(self.graph, size) elif mode == 1: self.graph = noise.rewire(self.graph, size) cnumber[i * step] = nx.core_number(self.graph) # if i*step % 10 == 0: # nx.write_edgelist(self.graph, self.sname + '_' + str(i*step) + '.edgelist', data=False) return cnumber
def kcore_ranking(multiplex_structure, N): total_nodes = [i for i in range(N)] G = {} G[0] = create_graph_fromedgelist(multiplex_structure[0]) G[1] = create_graph_fromedgelist(multiplex_structure[1]) core_numbers = {} core_numbers[0] = nx.core_number(G[0]) core_numbers[1] = nx.core_number(G[1]) max_cores = [max(core_numbers[0].values()), max(core_numbers[1].values())] if max(max_cores) >= 1: ranking_kcore = {} for layerID in multiplex_structure: ranking_kcore[layerID] = {} for nodei in total_nodes: if nodei in core_numbers[layerID]: #if core_numbers[layerID][nodei] == max_cores[layerID] and core_numbers[layerID][nodei] >=2: ranking_kcore[layerID][nodei] = core_numbers[layerID][ nodei] # else: # ranking_kcore[layerID][nodei]=0 else: ranking_kcore[layerID][nodei] = 0 else: ranking_kcore = {} for layerID in multiplex_structure: ranking_kcore[layerID] = {} for nodei in total_nodes: ranking_kcore[layerID][nodei] = 0 return (ranking_kcore)
def score_sentences_k_core(sents_A, sents_B, score_sentence_by_word): graph_A = graph_builder.build_graph_word(sents_A, 3) graph_B = graph_builder.build_graph_word(sents_B, 3) print("Size of graphs : " + str(len(graph_A.nodes())) + " " + str(len(graph_B.nodes()))) dict_core_A = nx.core_number( graph_A) #k_core_decomposition.weighted_core_number(graph_A) k_A = max(dict_core_A.values()) dict_core_B = nx.core_number( graph_B) #k_core_decomposition.weighted_core_number(graph_B) k_B = max(dict_core_B.values()) for word in dict_core_A: if word in dict_core_B: dict_core_A[ word] = dict_core_A[word] / k_A - dict_core_B[word] / k_B else: dict_core_A[word] = dict_core_A[word] / k_A for word in dict_core_B: if word in dict_core_A: dict_core_B[ word] = dict_core_B[word] / k_B - dict_core_A[word] / k_A else: dict_core_B[word] = dict_core_B[word] / k_B dict_sent_A = {} for i, sent in enumerate(sents_A): dict_sent_A[i] = score_sentence_by_word(sent, dict_core_A) dict_sent_B = {} for i, sent in enumerate(sents_B): dict_sent_B[i] = score_sentence_by_word(sent, dict_core_B) return dict_sent_A, dict_sent_B
def core_community(G, country): # does what is explained on slide 51 of AVDC_2019-2022_AIAS_Lecture_Graph & Visualisation.pdf fig, ax = plt.subplots(1, 1, figsize=(17, 8)) time_label = ["2003-2009", "2010-2016", "2003-2016"] color = ["#4C72B0", "#DD8452", "#55A868"] s = [] for k in range(len(G)): try: core = nx.k_core(G[k], core_number=nx.core_number(G[k])) except nx.exception.NetworkXError: g = G[k] g.remove_edges_from(list(nx.selfloop_edges(g))) core = nx.k_core(g, core_number=nx.core_number(g)) s.append(len(core)) nx.draw_networkx(core, ax=ax, label=time_label[k], alpha=0.75, node_color=color[k], edge_color=color[k]) plt.legend(loc="best") plt.show() return s
def approximation_k_components_dense(G, max_k=None): # Compute only until max k if max_k is None: max_k = float('infinity') # Dictionary with connectivity level (k) as keys and a list of # sets of nodes that form a k-component as values k_components = {} # Dictionary with nodes as keys and maximum k of the deepest # k-component in which they are embedded k_number = dict(((n, 0) for n in G.nodes())) # We deal first with k = 1 k_components[1] = [] for cc in networkx.connected_components(G): for node in cc: k_number[node] = 1 if len(cc) > 2: k_components[1].append(set(cc)) # Start from k_cores: all k-components are also k-cores # but not all k-cores are k-components core_number = networkx.core_number(G) for k in range(2, min(max(core_number.values()) + 1, max_k + 1)): k_components[k] = [] # Build k-core subgraph C = G.subgraph((n for n, cnum in core_number.items() if cnum >= k)) for candidates in networkx.connected_components(C): # Compute pairwise vertex connectivity for each connected part # of this k-core using White and Newman 2001 algorithm. K = all_pairs_vertex_connectivity(G.subgraph(candidates), max_paths=k, strict=True) # Build a graph where two nodes are linked if they have at least k # node independent paths between them. Suggested in # White & Newman, 2001 (This is a very dense graph, almost complete # in many cases) H = networkx.Graph() # Too slow because we add every edge twice #H.add_edges_from(((u,v) for u in K \ # for (v,w) in K[u].iteritems() if w >= k)) seen = set() for u, nbrs in K.items(): for v, ni_paths in nbrs.iteritems(): if v not in seen and ni_paths >= k: H.add_edge(u, v) seen.add(u) # Compute k-core of H and assume that the core of level k is a good # approximation for a component of level k core_number_2 = networkx.core_number(H) C2 = H.subgraph( (n for n, cnum in core_number_2.items() if cnum >= k)) for k_component in networkx.connected_components(C2): if len(k_component) >= k: k_components[k].append(set(k_component)) for node in k_component: k_number[node] = k return k_components, k_number
def approximation_k_components_dense(G, max_k=None): # Compute only until max k if max_k is None: max_k = float('infinity') # Dictionary with connectivity level (k) as keys and a list of # sets of nodes that form a k-component as values k_components = {} # Dictionary with nodes as keys and maximum k of the deepest # k-component in which they are embedded k_number = dict(((n,0) for n in G.nodes())) # We deal first with k = 1 k_components[1] = [] for cc in networkx.connected_components(G): for node in cc: k_number[node] = 1 if len(cc) > 2: k_components[1].append(set(cc)) # Start from k_cores: all k-components are also k-cores # but not all k-cores are k-components core_number = networkx.core_number(G) for k in range(2, min(max(core_number.values())+1, max_k + 1)): k_components[k] = [] # Build k-core subgraph C = G.subgraph((n for n, cnum in core_number.items() if cnum >= k)) for candidates in networkx.connected_components(C): # Compute pairwise vertex connectivity for each connected part # of this k-core using White and Newman 2001 algorithm. K = all_pairs_vertex_connectivity(G.subgraph(candidates), max_paths=k, strict=True) # Build a graph where two nodes are linked if they have at least k # node independent paths between them. Suggested in # White & Newman, 2001 (This is a very dense graph, almost complete # in many cases) H = networkx.Graph() # Too slow because we add every edge twice #H.add_edges_from(((u,v) for u in K \ # for (v,w) in K[u].iteritems() if w >= k)) seen = set() for u, nbrs in K.items(): for v, ni_paths in nbrs.iteritems(): if v not in seen and ni_paths >= k: H.add_edge(u,v) seen.add(u) # Compute k-core of H and assume that the core of level k is a good # approximation for a component of level k core_number_2 = networkx.core_number(H) C2 = H.subgraph((n for n, cnum in core_number_2.items() if cnum >= k)) for k_component in networkx.connected_components(C2): if len(k_component) >= k: k_components[k].append(set(k_component)) for node in k_component: k_number[node] = k return k_components, k_number
def test_directed_find_cores(Self): '''core number had a bug for directed graphs found in issue #1959''' # small example where too timid edge removal can make cn[2] = 3 G = nx.DiGraph() edges = [(1, 2), (2, 1), (2, 3), (2, 4), (3, 4), (4, 3)] G.add_edges_from(edges) assert_equal(nx.core_number(G), {1: 2, 2: 2, 3: 2, 4: 2}) # small example where too aggressive edge removal can make cn[2] = 2 more_edges = [(1, 5), (3, 5), (4, 5), (3, 6), (4, 6), (5, 6)] G.add_edges_from(more_edges) assert_equal(nx.core_number(G), {1: 3, 2: 3, 3: 3, 4: 3, 5: 3, 6: 3})
def SentimentAnalysis_RGO_Belief_Propagation(nxg): #Bayesian Pearl Belief Propagation is done by #assuming the senti scores as probabilities with positive #and negative signs and the Recursive Gloss Overlap #definition graph being the graphical model. #Sentiment as a belief potential is passed through #the DFS tree of this graph. dfs_positive_belief_propagated = 1.0 core_positive_belief_propagated = 1.0 dfs_negative_belief_propagated = 1.0 core_negative_belief_propagated = 1.0 core_xnegscore = core_xposscore = 1.0 dfs_knegscore = dfs_kposscore = dfs_vposscore = dfs_vnegscore = 1.0 sorted_core_nxg = sorted(nx.core_number(nxg).items(), key=operator.itemgetter(1), reverse=True) kcore_nxg = nx.k_core(nxg, 6, nx.core_number(nxg)) for x in sorted_core_nxg: xsset = swn.senti_synsets(x[0]) if len(xsset) > 2: core_xnegscore = float(xsset[0].neg_score()) core_xposscore = float(xsset[0].pos_score()) if core_xnegscore == 0.0: core_xnegscore = 0.000001 if core_xposscore == 0.0: core_xposscore = 0.000001 core_positive_belief_propagated += float(core_xposscore) core_negative_belief_propagated += float(core_xnegscore) print "Core Number: RGO_sentiment_analysis_belief_propagation: %f, %f" % ( float(core_positive_belief_propagated), float(core_negative_belief_propagated)) #for k,v in nx.dfs_edges(nxg): for k, v in nx.dfs_edges(kcore_nxg): ksynset = swn.senti_synsets(k) vsynset = swn.senti_synsets(v) if len(ksynset) > 2: dfs_knegscore = float(ksynset[0].neg_score()) dfs_kposscore = float(ksynset[0].pos_score()) if len(vsynset) > 2: dfs_vnegscore = float(vsynset[0].neg_score()) dfs_vposscore = float(vsynset[0].pos_score()) dfs_kposscore_vposscore = float(dfs_kposscore * dfs_vposscore) dfs_knegscore_vnegscore = float(dfs_knegscore * dfs_vnegscore) if dfs_kposscore_vposscore == 0.0: dfs_kposscore_vposscore = 1.0 if dfs_knegscore_vnegscore == 0.0: dfs_knegscore_vnegscore = 1.0 dfs_positive_belief_propagated += float(dfs_kposscore_vposscore) dfs_negative_belief_propagated += float(dfs_knegscore_vnegscore) print "K-Core DFS: RGO_sentiment_analysis_belief_propagation: %f, %f" % ( float(dfs_positive_belief_propagated), float(dfs_negative_belief_propagated)) return (dfs_positive_belief_propagated, dfs_negative_belief_propagated, core_positive_belief_propagated, core_negative_belief_propagated)
def egodata(graphname): corenumber = {} connectedcomponents = {} triangles = {} coefficient = {} egonetSize = {} cc = list() tri = list() coeff = list() egoSize = list() hasrisk = list() corenumberlist = list() for node in graphname.nodes(): ego_graph = nx.ego_graph(graphname, node) if len(graphname.node[node]) is not 0: hasrisk.append(graphname.node[node]["negemo"]) # Core number corenumber[node] = max(nx.core_number(ego_graph).values()) corenumberlist.append(max(nx.core_number(ego_graph).values())) # egonetSize egonetSize[node] = ego_graph.size() egoSize.append(ego_graph.size()) # Triangle count triangleCount = nx.triangles(ego_graph, node) triangles[node] = triangleCount # adding the count for that node in dictionary tri.append(triangleCount) # Clustering co-efficients coeff_temp = nx.average_clustering(ego_graph) coefficient[node] = coeff_temp # adding the count for that node in dictionary coeff.append(coeff_temp) # Connected components minus ego ego_graph.remove_node(node) number = nx.number_connected_components(ego_graph) # adding the count for that node in dictionary connectedcomponents[node] = number cc.append(number) print corenumber print connectedcomponents print triangles print coefficient print egonetSize
def kshell(G, topk): """use the kshell to get topk nodes # Arguments g: a graph as networkx Graph topk: how many nodes will be returned Returns return the topk nodes by kshell, [(node1, ' '), (node2, ' '), ...] """ node_core = nx.core_number(G) core_node_list = {} for node in node_core: if node_core[node] not in core_node_list: core_node_list[node_core[node]] = [] core_node_list[node_core[node]].append((node, nx.degree(G, node))) for core in core_node_list: core_node_list[core] = sorted(core_node_list[core], key=lambda x: x[1], reverse=True) core_node_list = sorted(core_node_list.items(), key=lambda x: x[0], reverse=True) kshellrank = [] for core, node_list in core_node_list: kshellrank.extend([n[0] for n in node_list]) rank = [] for node in kshellrank: rank.append((node, ' ')) if len(rank) == topk: return rank
def coreness(G): # first do k-shell decomposition core_numbers = nx.core_number(G) nodes_list = list(G.nodes()) n = len(nodes_list) # initialize result list r_res = np.zeros(shape=(n, 1), dtype=np.uint32) # loop over all the nodes for i in range(n): coreness_sums = 0 current_node = nodes_list[i] # get the neigbhors of the current node neighbors_node = list(G.neighbors(current_node)) # loop over the neighbors for neighbor in neighbors_node: # extract the coreness value of each neighbor, this loop is more robust as nodes can have names now coreness_sums += core_numbers.get(neighbor) r_res[i] = coreness_sums r_core = dict(zip(nodes_list, r_res[:, 0])) return r_core
def main(self, dir): ''' 走来不要那么难,先搞定树吧。才能继续搞定图。 :return: ''' pre = '../../data/' last = '.txt' self.initG = commons.get_networkByFile(fileName=pre + dir + last) # 获取图, max_sub_graph = commons.judge_data(self.initG) source_list = commons.product_sourceList(max_sub_graph, self.fix_number_source) self.true_Source_list = source_list self.infectG = commons.propagation1(max_sub_graph, self.true_Source_list) # 开始传染 subinfectG = commons.get_subGraph(self.infectG) subinfectG.to_undirected() subinfectG.remove_edges_from(subinfectG.selfloop_edges()) data = nx.core_number(subinfectG) # Graph.to_undirected() print('data', data) data_sort = sorted(data.items(), key=lambda x: x[1], reverse=True) print('data_sort', data_sort) print('max-core', data_sort[0][1]) core_number1 = 0 #看下源点在那一层。 for node, core_number in data_sort: if node in source_list: print('node,core_number', [node, core_number]) core_number1 = core_number return core_number1
def _checkIfCoreNumberChange(graph, cnumber, edges): """ Add edge and check if core number change Returns true if change, false otherwise """ nodes = list(set([u for e in edges for u in e])) #print('Number of edges', graph.number_of_edges()) graph.add_edges_from(edges) tcore = nx.core_number(graph) graph.remove_edges_from(edges) #print('Number of edges', graph.number_of_edges()) changed = [] for u in graph.nodes(): if cnumber[u] != tcore[u]: changed.append(u) whitelist = [] #print('Number of candidates', len(edges)) for e in edges: if e[0] not in changed and e[1] not in changed: whitelist.append(e) elif e[0] in changed and e[1] not in changed and cnumber[ e[0]] > cnumber[e[1]]: whitelist.append(e) elif e[1] in changed and e[0] not in changed and cnumber[ e[1]] > cnumber[e[0]]: whitelist.append(e) #blacklist = [e for e in edges if e not in whitelist] #print(len(whitelist), len(blacklist), len(edges)) #print('Number of whitelist', len(edges), len(whitelist)) return whitelist
def generateCoreNumber(kcore): carray = {} for c in kcore: carray[c] = nx.core_number(kcore[c]) return carray
def bound_branch(G, k ,q_nodes, is_use_cores=False, select_method='rand'): ''' wrapper of branch and bound method ''' ts = time.time() global optimal optimal = set() k_neighbors = k_hop_nbrs_n(G, k, q_nodes) sub = set(q_nodes) sub.update(k_neighbors) g = nx.subgraph(G, sub) if is_use_cores: cores = nx.core_number(g) else: cores = None # print('subgraph ', g.nodes()) print('minimum degree of subgraph', minimum_degree(g)) print('k neighbors', len(k_neighbors)) BB(g, k, q_nodes, set(), cores, select_method) print('the solution is', optimal) te = time.time() texe = round(te-ts, 2) # the execution time return texe
def getCoreNumber(self): if self.G.is_multigraph() or self.G.is_directed(): G = nx.Graph(self.G) else: G = self.G G.remove_edges_from(G.selfloop_edges()) return sorted(list((nx.core_number(G).values())))
def test_core_number(self): core = nx.core_number(self.G) nodes_by_core = [sorted(n for n in core if core[n] == val) for val in range(4)] assert nodes_equal(nodes_by_core[0], [21]) assert nodes_equal(nodes_by_core[1], [17, 18, 19, 20]) assert nodes_equal(nodes_by_core[2], [9, 10, 11, 12, 13, 14, 15, 16]) assert nodes_equal(nodes_by_core[3], [1, 2, 3, 4, 5, 6, 7, 8])
def main(): f = open(sys.argv[1]) graphname = pickle.load(f) f.close() corenumber = {} connectedcomponents = {} triangles = {} coefficient = {} egonetSize = {} for node in graphname.nodes(): ego_graph = nx.ego_graph(graphname, node) # Core number corenumber[node] = max(nx.core_number(ego_graph).values()) # egonetSize egonetSize[node] = ego_graph.size() # Triangle count triangleCount = nx.triangles(ego_graph, node) triangles[node] = triangleCount # adding the count for that node in dictionary # Clustering co-efficients coeff_temp = nx.average_clustering(ego_graph) coefficient[node] = coeff_temp # adding the count for that node in dictionary # Connected components minus ego ego_graph.remove_node(node) number = nx.number_connected_components(ego_graph) # adding the count for that node in dictionary connectedcomponents[node] = number print ("***** %s ******" % sys.argv[1]) print "Core Number Avg \t" + str(float(sum(corenumber.values()) / len(corenumber))) print "Connected Component Avg \t" + str(sum(connectedcomponents.values())/len(connectedcomponents)) print "Triangle Avg \t" + str(sum(triangles.values())/len(triangles)) print "Coefficient Avg \t" + str(sum(coefficient.values())/len(coefficient)) print "Ego Net Size Avg \t" + str(sum(egonetSize.values())/len(egonetSize))
def graphical_features(train_data, test_data): ''' Compute some structural features on the graph obtained from the questions ''' df = pd.concat([train_data[['qid1', 'qid2']], test_data[['qid1', 'qid2']]], axis = 0, ignore_index=True) g = nx.Graph() g.add_nodes_from(df.qid1) edges = list(df[["qid1", "qid2"]].to_records(index=False)) g.add_edges_from(edges) g.remove_edges_from(g.selfloop_edges()) print('Get kcore dict') kcore_dict = nx.core_number(g) print('Get centrality dict') centrality_dict = nx.degree_centrality(g) print('Get closeness dict') closeness_dict = nx.closeness_centrality(g) print('Get cliques dict') cliques_dict = nx.number_of_cliques(g) return np.array([(min(kcore_dict[qid1], kcore_dict[qid2]), max(kcore_dict[qid1], kcore_dict[qid2]), min(centrality_dict[qid1], centrality_dict[qid2]), max(centrality_dict[qid1], centrality_dict[qid2]), min(closeness_dict[qid1], closeness_dict[qid2]), max(closeness_dict[qid1], closeness_dict[qid2]), min(cliques_dict[qid1], cliques_dict[qid2]), max(cliques_dict[qid1], cliques_dict[qid2])) for qid1, qid2 in zip(train_data.qid1, train_data.qid2)]), \ np.array([(min(kcore_dict[qid1], kcore_dict[qid2]), max(kcore_dict[qid1], kcore_dict[qid2]), min(centrality_dict[qid1], centrality_dict[qid2]), max(centrality_dict[qid1], centrality_dict[qid2]), min(closeness_dict[qid1], closeness_dict[qid2]), max(closeness_dict[qid1], closeness_dict[qid2]), min(cliques_dict[qid1], cliques_dict[qid2]), max(cliques_dict[qid1], cliques_dict[qid2])) for qid1, qid2 in zip(test_data.qid1, test_data.qid2)])
def makeDataAll(G, beta, miu, name): Fect = {} for i in range(30): BA = G[i] if i == 0: for node in BA.nodes(): Fect[node] = [node] Kshell = nx.core_number(BA) # allNeighborsDegree = {} # for node in BA.nodes(): # allNeighborsDegree[node] = 0 # for neighbor in BA.neighbors(node): # allNeighborsDegree[node] += BA.degree(neighbor) for node in BA.nodes(): #degree Fect[node].append(float(BA.degree(node))) #kshell Fect[node].append(float(Kshell[node])) for node in G[0].nodes(): Fect[node].append(SIR(G, node, beta, miu)) X = np.array([x for x in Fect.values()], dtype=float) np.save( "NewData/data_" + name + "_D_" + str(beta) + "_" + str(miu) + ".npy", X) return True
def get_alias_edge(self, src, dst): ''' Get the alias edge setup lists for a given edge. ''' G = self.G core = nx.core_number(G) p = self.p q = self.q d = 3.5 unnormalized_probs = [] for dst_nbr in sorted(G.neighbors(dst)): if dst_nbr == src: #unnormalized_probs.append(float(G[dst][dst_nbr]['weight'])/((abs(G.degree(dst_nbr)-G.degree(dst))+1)*p*d)) unnormalized_probs.append( float(G[dst][dst_nbr]['weight']) / ((abs(core[dst_nbr] - core[dst]) + 1) * p * d)) elif G.has_edge(dst_nbr, src): unnormalized_probs.append(G[dst][dst_nbr]['weight']) else: #unnormalized_probs.append(float(G[dst][dst_nbr]['weight'])/((abs(G.degree(dst_nbr)-G.degree(dst))+1)*q*d)) unnormalized_probs.append( float(G[dst][dst_nbr]['weight']) / ((abs(core[dst_nbr] - core[dst]) + 1) * q * d)) norm_const = sum(unnormalized_probs) normalized_probs = [ float(u_prob) / norm_const for u_prob in unnormalized_probs ] return alias_setup(normalized_probs)
def get_core_number(self): try: return nx.core_number(self.graph) except Exception, e: print traceback.print_exc()
def core_investigate(G): '''This will output an array of shell numbers of the input graph G''' c=nx.core_number(G) d=c.values() d.sort() s=list(set(d)) return len(s)#returns the number of shells
def get_centrality_measures(network): """ Calculates five centrality measures (degree, betweenness, closeness, and eigenvector centrality, and k-shell) for the nodes of the given network. Parameters ---------- network: networkx.Graph() tol: tolerance parameter for calculating eigenvector centrality Returns -------- [degree, betweenness, closeness, eigenvector_centrality, kshell]: list of numpy.arrays """ kshell = nx.core_number(network) clustering = nx.clustering(network) degree = nx.degree_centrality(network) betweenness = nx.betweenness_centrality(network) closeness = nx.closeness_centrality(network) #eigenvector_centrality = nx.eigenvector_centrality(network, tol=tol).values() return [kshell, clustering, degree, betweenness, closeness]
def same_node_degree(g1, g2): g2.remove_edges_from(g2.selfloop_edges()) g1.remove_edges_from(g1.selfloop_edges()) s1 = set(g1.nodes()) s2 = set(g2.nodes()) s3 = s2 - s1 ns_list = [] temp_list = [] for node in s3: ns1 = g2.neighbors(node) cc = 0 for n in ns1: if n in s1: cc += 1 ns_list.append(cc) if cc == 5: temp_list.append(node) dd = nx.core_number(g2) tt_list = [] for temp_word in temp_list: tt_list.append(dd[temp_word]) if len(tt_list) > 0: print(len(tt_list), end="\t") print(sum(tt_list) / len(tt_list), end="\t") print(variance(tt_list)) r_dict = Counter(ns_list) r_list = list(range(max(r_dict.keys()) + 1)) for k, v in r_dict.items(): r_list[k] = v return r_list
def runExperimentEdges(self, iter): self.readGraph() end = int(self.graph.number_of_edges() * 0.95) step = int(self.graph.number_of_edges() * 0.01) ori_cnumber = nx.core_number(self.graph) for _ in xrange(0, iter): graph = nx.Graph() sample = RandomEdge(self.graph) for i in xrange(1, 95): graph = sample.getSample(graph, step) cnumber = nx.core_number(graph) cor = self.correlation(ori_cnumber, cnumber) print(i, cor)
def OrigCoreN(self): ''' returns a 2d array containing the pagerank of the origin node for all edges ''' probas = np.dot( np.array(nx.core_number(self).values(),dtype=float).reshape(-1,1), np.ones((1,self.number_of_nodes()))) return probas
def SentimentAnalysis_RGO_Belief_Propagation(nxg): #Bayesian Pearl Belief Propagation is done by #assuming the senti scores as probabilities with positive #and negative signs and the Recursive Gloss Overlap #definition graph being the graphical model. #Sentiment as a belief potential is passed through #the DFS tree of this graph. dfs_positive_belief_propagated=1.0 core_positive_belief_propagated=1.0 dfs_negative_belief_propagated=1.0 core_negative_belief_propagated=1.0 core_xnegscore=core_xposscore=1.0 dfs_knegscore=dfs_kposscore=dfs_vposscore=dfs_vnegscore=1.0 sorted_core_nxg=sorted(nx.core_number(nxg).items(),key=operator.itemgetter(1), reverse=True) kcore_nxg=nx.k_core(nxg,6,nx.core_number(nxg)) for x in sorted_core_nxg: xsset = swn.senti_synsets(x[0]) if len(xsset) > 2: core_xnegscore = float(xsset[0].neg_score())*10.0 core_xposscore = float(xsset[0].pos_score())*10.0 if core_xnegscore == 0.0: core_xnegscore = 1.0 if core_xposscore == 0.0: core_xposscore = 1.0 core_positive_belief_propagated *= float(core_xposscore) core_negative_belief_propagated *= float(core_xnegscore) print "Core Number: RGO_sentiment_analysis_belief_propagation: %f, %f" % (float(core_positive_belief_propagated), float(core_negative_belief_propagated)) #for k,v in nx.dfs_edges(nxg): for k,v in nx.dfs_edges(kcore_nxg): ksynset = swn.senti_synsets(k) vsynset = swn.senti_synsets(v) if len(ksynset) > 2: dfs_knegscore = float(ksynset[0].neg_score())*10.0 dfs_kposscore = float(ksynset[0].pos_score())*10.0 if len(vsynset) > 2: dfs_vnegscore = float(vsynset[0].neg_score())*10.0 dfs_vposscore = float(vsynset[0].pos_score())*10.0 dfs_kposscore_vposscore = float(dfs_kposscore*dfs_vposscore) dfs_knegscore_vnegscore = float(dfs_knegscore*dfs_vnegscore) if dfs_kposscore_vposscore == 0.0: dfs_kposscore_vposscore = 1.0 if dfs_knegscore_vnegscore == 0.0: dfs_knegscore_vnegscore = 1.0 dfs_positive_belief_propagated *= float(dfs_kposscore_vposscore) dfs_negative_belief_propagated *= float(dfs_knegscore_vnegscore) print "K-Core DFS: RGO_sentiment_analysis_belief_propagation: %f, %f" % (float(dfs_positive_belief_propagated),float(dfs_negative_belief_propagated)) return (dfs_positive_belief_propagated, dfs_negative_belief_propagated, core_positive_belief_propagated, core_negative_belief_propagated)
def anti_kcore(G, k = None, core_number = None): if core_number is None: core_number = nx.core_number(G) if k is None: k = max(core_number.values()) nodes = (n for n in core_number if core_number[n] >= k) anti_nodes = (n for n in core_number if core_number[n] < k) return (G.subgraph(anti_nodes).copy(), list(nodes))
def TargCoreN(self): ''' returns a 2d array containing the pagerank of the target node for all edges ''' probas = np.dot( np.ones((self.number_of_nodes(),1)), np.array(nx.core_number(self).values(),dtype=float).reshape(1,-1) ) return probas
def k_crust(G,k=None,core_number=None): """Return the k-crust of G. The k-crust is the graph G with the k-core removed. Parameters ---------- G : NetworkX graph A graph or directed graph. k : int, optional The order of the shell. If not specified return the main crust. core_number : dictionary, optional Precomputed core numbers for the graph G. Returns ------- G : NetworkX graph The k-crust subgraph Raises ------ NetworkXError The k-crust is not defined for graphs with self loops or parallel edges. Notes ----- This definition of k-crust is different than the definition in [1]_. The k-crust in [1]_ is equivalent to the k+1 crust of this algorithm. Not implemented for graphs with parallel edges or self loops. For directed graphs the node degree is defined to be the in-degree + out-degree. Graph, node, and edge attributes are copied to the subgraph. See Also -------- core_number References ---------- .. [1] A model of Internet topology using k-shell decomposition Shai Carmi, Shlomo Havlin, Scott Kirkpatrick, Yuval Shavitt, and Eran Shir, PNAS July 3, 2007 vol. 104 no. 27 11150-11154 http://www.pnas.org/content/104/27/11150.full """ func = lambda v, k, core_number: core_number[v] <= k # HACK These two checks are done in _core_helper, but this function # requires k to be one less than the maximum core value instead of # just the maximum. Therefore we duplicate the checks here. A better # solution should exist... if core_number is None: core_number = nx.core_number(G) if k is None: k = max(core_number.values()) - 1 return _core_helper(G, func, k, core_number)
def test_white_harary_2(): # Figure 8 white and harary (2001) # # http://eclectic.ss.uci.edu/~drwhite/sm-w23.PDF G = nx.disjoint_union(nx.complete_graph(4), nx.complete_graph(4)) G.add_edge(0,4) # kappa <= lambda <= delta assert_equal(3, min(nx.core_number(G).values())) assert_equal(1, nx.node_connectivity(G)) assert_equal(1, nx.edge_connectivity(G))
def k_shell(G,k=None,core_number=None): """Return the k-shell of G. The k-shell is the subgraph of nodes in the k-core but not in the (k+1)-core. Parameters ---------- G : NetworkX graph A graph or directed graph. k : int, optional The order of the shell. If not specified return the main shell. core_number : dictionary, optional Precomputed core numbers for the graph G. Returns ------- G : NetworkX graph The k-shell subgraph Raises ------ NetworkXError The k-shell is not defined for graphs with self loops or parallel edges. Notes ----- This is similar to k_corona but in that case only neighbors in the k-core are considered. Not implemented for graphs with parallel edges or self loops. For directed graphs the node degree is defined to be the in-degree + out-degree. Graph, node, and edge attributes are copied to the subgraph. See Also -------- core_number k_corona References ---------- .. [1] A model of Internet topology using k-shell decomposition Shai Carmi, Shlomo Havlin, Scott Kirkpatrick, Yuval Shavitt, and Eran Shir, PNAS July 3, 2007 vol. 104 no. 27 11150-11154 http://www.pnas.org/content/104/27/11150.full """ if core_number is None: core_number=nx.core_number(G) if k is None: k=max(core_number.values()) # max core nodes=(n for n in core_number if core_number[n]==k) return G.subgraph(nodes).copy()
def real_degeneracy(node): friends = get_all_friends(node) print "construct graph" G = construct_networkx_graph(friends) print "calculate core number" core_list = nx.core_number(G) ret = 0 for key in core_list.keys(): ret = max(ret, core_list[key]) return ret
def k_corona(G, k, core_number=None): """Return the k-corona of G. The k-corona is the subgraph of nodes in the k-core which have exactly k neighbours in the k-core. Parameters ---------- G : NetworkX graph A graph or directed graph k : int The order of the corona. core_number : dictionary, optional Precomputed core numbers for the graph G. Returns ------- G : NetworkX graph The k-corona subgraph Raises ------ NetworkXError The k-cornoa is not defined for graphs with self loops or parallel edges. Notes ----- Not implemented for graphs with parallel edges or self loops. For directed graphs the node degree is defined to be the in-degree + out-degree. Graph, node, and edge attributes are copied to the subgraph. See Also -------- core_number References ---------- .. [1] k -core (bootstrap) percolation on complex networks: Critical phenomena and nonlocal effects, A. V. Goltsev, S. N. Dorogovtsev, and J. F. F. Mendes, Phys. Rev. E 73, 056101 (2006) http://link.aps.org/doi/10.1103/PhysRevE.73.056101 """ if core_number is None: core_number = nx.core_number(G) nodes = (n for n in core_number if core_number[n] == k and len([v for v in G[n] if core_number[v] >= k]) == k) return G.subgraph(nodes).copy()
def KShell_Centrality(G): #网络的kshell中心性 #The k-core is found by recursively pruning nodes with degrees less than k. #The k-shell is the subgraph of nodes in the k-core but not in the (k+1)-core. nodes = {} core_number = nx.core_number(G) #The core number of a node is the largest value k of a k-core containing that node. for k in list(set(core_number.values())): nodes[k] = list(n for n in core_number if core_number[n]==k) #print core_number #{'1': 2, '0': 2, '3': 2, '2': 2, '4': 1}字典(节点:KShell值) #print nodes.keys(),nodes KShell_Centrality = core_number return KShell_Centrality
def k_crust(G,k=None,core_number=None): """Return the k-crust of G. The k-crust is the graph G with the k-core removed. Parameters ---------- G : NetworkX graph A graph or directed graph. k : int, optional The order of the shell. If not specified return the main crust. core_number : dictionary, optional Precomputed core numbers for the graph G. Returns ------- G : NetworkX graph The k-crust subgraph Raises ------ NetworkXError The k-crust is not defined for graphs with self loops or parallel edges. Notes ----- This definition of k-crust is different than the definition in [1]_. The k-crust in [1]_ is equivalent to the k+1 crust of this algorithm. Not implemented for graphs with parallel edges or self loops. For directed graphs the node degree is defined to be the in-degree + out-degree. Graph, node, and edge attributes are copied to the subgraph. See Also -------- core_number References ---------- .. [1] A model of Internet topology using k-shell decomposition Shai Carmi, Shlomo Havlin, Scott Kirkpatrick, Yuval Shavitt, and Eran Shir, PNAS July 3, 2007 vol. 104 no. 27 11150-11154 http://www.pnas.org/content/104/27/11150.full """ if core_number is None: core_number=nx.core_number(G) if k is None: k=max(core_number.values())-1 nodes=(n for n in core_number if core_number[n]<=k) return G.subgraph(nodes).copy()
def k_core(G,k=None,core_number=None): """Return the k-core of G. A k-core is a maximal subgraph that contains nodes of degree k or more. Parameters ---------- G : NetworkX graph A graph or directed graph k : int, optional The order of the core. If not specified return the main core. core_number : dictionary, optional Precomputed core numbers for the graph G. Returns ------- G : NetworkX graph The k-core subgraph Raises ------ NetworkXError The k-core is not defined for graphs with self loops or parallel edges. Notes ----- The main core is the core with the largest degree. Not implemented for graphs with parallel edges or self loops. For directed graphs the node degree is defined to be the in-degree + out-degree. Graph, node, and edge attributes are copied to the subgraph. See Also -------- core_number References ---------- .. [1] An O(m) Algorithm for Cores Decomposition of Networks Vladimir Batagelj and Matjaz Zaversnik, 2003. http://arxiv.org/abs/cs.DS/0310049 """ if core_number is None: core_number=nx.core_number(G) if k is None: k=max(core_number.values()) # max core nodes=(n for n in core_number if core_number[n]>=k) return G.subgraph(nodes).copy()
def calculate(net): if net.number_of_selfloops() > 0: try: net.remove_edges_from(net.selfloop_edges()) except: return 0 try: c = nx.core_number(net).values() except: return 0 if len(c) == 0: return 0 else: return round(sum(c)/len(c),7)
def approximation_k_components(G, max_k=None): # Compute only until max k if max_k is None: max_k = float('infinity') # Dictionary with connectivity level (k) as keys and a list of # sets of nodes that form a k-component as values k_components = {} # Dictionary with nodes as keys and maximum k of the deepest # k-component in which they are embedded k_number = dict(((n,0) for n in G.nodes())) # We deal first with k = 1 k_components[1] = [] for cc in networkx.connected_components(G): for node in cc: k_number[node] = 1 if len(cc) > 2: k_components[1].append(set(cc)) # Start from k_cores: all k-components are also k-cores # but not all k-cores are k-components core_number = networkx.core_number(G) for k in range(2, min(max(core_number.values())+1, max_k + 1)): k_components[k] = [] # Build k-core subgraph C = G.subgraph((n for n, cnum in core_number.items() if cnum >= k)) for candidates in networkx.connected_components(C): # Compute pairwise vertex connectivity for each connected part # of this k-core using White and Newman (2001) algorithm and build # the complement graph of a graph where two nodes are linked if # they have at least k node independent paths between them. SG = G.subgraph(candidates) H = networkx.Graph() for u,v in itertools.combinations(SG, 2): K = pairwise_vertex_connectivity(SG, u, v, max_paths=k, strict=True) if K < k or math.isnan(K): H.add_edge(u,v) # Compute complement k-core (anticore) of H and assume that the # core of level k is a good approximation for a component of level k acore_number = anticore_number(H) A = H.subgraph((n for n, cnum in acore_number.items() if cnum >= k)) for k_component in networkx.connected_components(A): if len(k_component) >= k: k_components[k].append(set(k_component)) for node in k_component: k_number[node] = k return k_components, k_number
def compute_node_measures(ntwk, calculate_cliques=False): """ These return node-based measures """ iflogger.info('Computing node measures:') measures = {} iflogger.info('...Computing degree...') measures['degree'] = np.array(list(ntwk.degree().values())) iflogger.info('...Computing load centrality...') measures['load_centrality'] = np.array( list(nx.load_centrality(ntwk).values())) iflogger.info('...Computing betweenness centrality...') measures['betweenness_centrality'] = np.array( list(nx.betweenness_centrality(ntwk).values())) iflogger.info('...Computing degree centrality...') measures['degree_centrality'] = np.array( list(nx.degree_centrality(ntwk).values())) iflogger.info('...Computing closeness centrality...') measures['closeness_centrality'] = np.array( list(nx.closeness_centrality(ntwk).values())) # iflogger.info('...Computing eigenvector centrality...') # measures['eigenvector_centrality'] = np.array(nx.eigenvector_centrality(ntwk, max_iter=100000).values()) iflogger.info('...Computing triangles...') measures['triangles'] = np.array(list(nx.triangles(ntwk).values())) iflogger.info('...Computing clustering...') measures['clustering'] = np.array(list(nx.clustering(ntwk).values())) iflogger.info('...Computing k-core number') measures['core_number'] = np.array(list(nx.core_number(ntwk).values())) iflogger.info('...Identifying network isolates...') isolate_list = nx.isolates(ntwk) binarized = np.zeros((ntwk.number_of_nodes(), 1)) for value in isolate_list: value = value - 1 # Zero indexing binarized[value] = 1 measures['isolates'] = binarized if calculate_cliques: iflogger.info('...Calculating node clique number') measures['node_clique_number'] = np.array( list(nx.node_clique_number(ntwk).values())) iflogger.info('...Computing number of cliques for each node...') measures['number_of_cliques'] = np.array( list(nx.number_of_cliques(ntwk).values())) return measures
def features(G,normalize_centrality): ''' Returns the features we are interested in within a dict ''' load_centrality=nx.load_centrality(G,normalized=normalize_centrality) betweenness_centrality=nx.betweenness_centrality(G,normalized=normalize_centrality) eigenvector_centrality=nx.eigenvector_centrality_numpy(G,normalized=normalize_centrality) closeness_centrality=nx.closeness_centrality(G,normalized=normalize_centrality) in_degree=G.in_degree() out_degree=G.out_degree() core_number=nx.core_number(G) clustering=nx.clustering(G) d={} d['in_degree']=in_degree d['out_degree']=out_degree d['load_centrality']=load_centrality d['betweennes_centrality']=betweennes_centrality d['eigenvector_centrality']=eigenvector_centrality d['closeness_centrality']=closeness_centrality d['core_number']=core_number return d
def main(): parser = argparse.ArgumentParser() parser.add_argument("input", help="input file with the adjacency list", type=str) args = parser.parse_args() input_file = args.input # Load graph from adjacency list G = nx.read_adjlist(input_file) print("\nDiameter: %d" % nx.diameter(G)) print("\nAverage shortest path: %.4f" % nx.average_shortest_path_length(G)) print("\nAverage clustering: %.4f" % nx.average_clustering(G)) print("\nEfficiency of the network: %.4f" % efficiency(G)) clustDict = nx.clustering(G) print("\nClustering for each node:") for n in sorted(clustDict, key=int): print("\tNode %s: %.4f" % (n, clustDict[n])) degreeDict = nx.degree(G) print("\nNode degrees:") for n in sorted(degreeDict, key=int): print("\tNode %s: %d" % (n, degreeDict[n])) closenessDict = nx.closeness_centrality(G) print("\nCloseness centrality of nodes:") for n in sorted(closenessDict, key=int): print("\tNode %s: %.4f" % (n, closenessDict[n])) beetwennessDict = nx.betweenness_centrality(G, normalized=False) print("\nBetweenness centrality of nodes (not normalized):") for n in sorted(beetwennessDict, key=int): print("\tNode %s: %.4f" % (n, beetwennessDict[n])) coreDict = nx.core_number(G) print("\nk-core decomposition for each node:") for n in sorted(coreDict, key=int): print("\tNode %s: %d-core" % (n, coreDict[n]))
# To Run this script: python core_number.py import networkx as nx import matplotlib.pyplot as plt import sys from matplotlib.legend_handler import HandlerLine2D from matplotlib.font_manager import FontProperties import random import math from scipy.stats import ks_2samp import numpy as np year = [] normalisedkcoresize = [] curyear = 2005 endyear = 2005 for x in range(curyear, endyear+1): print "Year", x,":" fh1 = open("../data/adjlistfile_till_year_"+str(x)) G1 = nx.read_adjlist(fh1, create_using=nx.DiGraph()) G1.remove_edges_from(G1.selfloop_edges()) core_numbers = nx.core_number(G1) print core_numbers
def SentimentAnalysis_RGO(text,output): #--------------------------------------------------------------------------------- #2.Compute intrinsic merit (either using linear or quadratic overlap) #--------------------------------------------------------------------------------- tokenized = nltk.word_tokenize(text) fdist1 = FreqDist(tokenized) stopwords = nltk.corpus.stopwords.words('english') stopwords = stopwords + [u'why',u'be',u'what',u' ',u'or',u'and',u'who',u'he',u'she',u'whom',u'well',u'is',u'was',u'were',u'are',u'there',u'where',u'when',u'may',u'might',u'would',u'shall',u'will',u'should',u'The', u'the', u'In',u'in',u'A',u'B',u'C',u'D',u'E',u'F',u'G',u'H',u'I',u'J',u'K',u'L',u'M',u'N',u'O',u'P',u'Q',u'R',u'S',u'T',u'U',u'V',u'W',u'X',u'Y',u'Z'] puncts = [u' ',u'.', u'"', u',', u'{', u'}', u'+', u'-', u'*', u'/', u'%', u'&', u'(', ')', u'[', u']', u'=', u'@', u'#', u':', u'|', u';',u'\'s'] #at present tfidf filter is not applied #freqterms1 = [w for w in fdist1.keys() if w not in stopwords and w not in puncts and (fdist1.freq(w) * compute_idf(corpus, w))] freqterms1 = [w.decode("utf-8") for w in fdist1.keys() if w not in stopwords and w not in puncts] current_level = 1 nodewithmaxparents = '' noofparents = 0 maxparents = 0 relatedness = 0 first_convergence_level = 1 tokensofthislevel = [] convergingterms = [] convergingparents = [] tokensofprevlevel = [] prevlevelsynsets = [] commontokens = [] vertices = 0 edges = 0 overlap = 0 iter = 0 from nltk.corpus import wordnet as wn #recurse down to required depth and update intrinsic merit score #relatedness is either sum(overlaps) or sum((overlapping_parents)*(overlaps)^2) also called convergence factor while current_level < 3: #crucial - gather nodes which converge/overlap (have more than 1 parent) if current_level > 1: print current_level for x in freqterms1: for y in parents(x,prevlevelsynsets): ylemmanames=y.lemma_names() for yl in ylemmanames: definitiongraphedges[x].append(yl) definitiongraphedgelabels[x + " - " + yl].append(" is a subinstance of ") definitiongraphedgelabels[yl + " - " + x].append(" is a superinstance of ") convergingterms = [w for w in freqterms1 if len(parents(w,prevlevelsynsets)) > 1] for kw in freqterms1: convergingparents = convergingparents + ([w for w in parents(kw, prevlevelsynsets) if len(parents(kw, prevlevelsynsets)) > 1]) for kw in freqterms1: noofparents = len(parents(kw, prevlevelsynsets)) if noofparents > maxparents: maxparents = noofparents nodewithmaxparents = kw output.write('converging terms(terms with more than 1 parent):\n ') #pickle.dump(convergingterms,output) output.write('\n') output.write('converging parents :\n') #pickle.dump(convergingparents,output) output.write('\n') for keyword in freqterms1: #WSD - invokes Lesk's algorithm adapted to recursive gloss overlap- best_matching_synset() output.write('===============================================\n') #output.write('keyword : ' + keyword.decode("utf-8")) #output.write('\n') #disamb_synset = best_matching_synset(set(doc1), wn.synsets(keyword)) disamb_synset = best_matching_synset(freqterms1, wn.synsets(keyword)) prevlevelsynsets = prevlevelsynsets + [disamb_synset] output.write('prevlevelsynsets:\n') #pickle.dump(prevlevelsynsets, output) output.write('\n') output.write('matching synset:\n') #pickle.dump( disamb_synset,output) output.write('\n') if len(wn.synsets(keyword)) != 0: disamb_synset_def = disamb_synset.definition() tokens = nltk.word_tokenize(disamb_synset_def) fdist_tokens = FreqDist(tokens) #at present frequency filter is not applied #if keyword in convergingterms: tokensofthislevel = tokensofthislevel + ([w for w in fdist_tokens.keys() if w not in stopwords and w not in puncts and fdist_tokens.freq(w)]) output.write('At level:\n') output.write(str(current_level)) output.write('\n') output.write('tokens grasped at this level:\n') #pickle.dump(tokensofthislevel, output) output.write('\n') listcount = len(tokensofthislevel) setcount = len(set(tokensofthislevel)) overlap = listcount-setcount if overlap > 0 and iter == 0 : first_convergence_level = current_level iter = 1 #choose between two relatedness/convergence criteria :- #1) simple linear overlap or 2) zipf distributed quadratic overlap #relatedness = relatedness + len(convergingparents)*overlap relatedness = relatedness + overlap + len(convergingparents) #relatedness = relatedness + ((len(convergingparents)*overlap*overlap) + 1) #find out common tokens of this and previous level so that same token does not get grasped again - #relatedness must be increased since repetition of keywords in two successive levels is a sign of #interrelatedness(a backedge from child-of-one-of-siblings to one-of-siblings). Remove vertices and edges #corresponding to common tokens commontokens = set(tokensofthislevel).intersection(set(tokensofprevlevel)) tokensofthislevel = set(tokensofthislevel).difference(commontokens) relatedness = relatedness + len(commontokens) output.write('removing tokens already grasped:\n') #pickle.dump(commontokens,output) output.write('\n') output.write('Relatedness:\n') output.write(str(relatedness)) output.write('\n') #decrease the vertices count to address common tokens removed above - edges should remain same since they #would just point elsewhere vertices = vertices + setcount - len(commontokens) output.write('Vertices:\n') output.write(str(vertices)) output.write('\n') edges = edges + listcount output.write('Edges:\n') output.write(str(edges)) output.write('\n') current_level = current_level + 1 freqterms1 = set(tokensofthislevel) tokensofprevlevel = tokensofthislevel tokensofthislevel = [] intrinsic_merit = vertices*edges*relatedness / first_convergence_level output.write('Intrinsic merit of this document is:\n') output.write(str(intrinsic_merit)) output.write('\n') output.write('Node with maximum parents (and hence the most likely class of document) is:\n') output.write(nodewithmaxparents) output.write('\n') print definitiongraphedges nxg=nx.DiGraph() #pos=nx.spring_layout(nxg) #pos=nx.shell_layout(nxg) #pos=nx.random_layout(nxg) #weight_str_map={} pos=nx.spectral_layout(nxg) for k,v in definitiongraphedges.iteritems(): for l in v: nxg.add_edge(k,l) nxg.add_edge(l,k) ksynset=wn.synsets(k) lsynset=wn.synsets(l) if ksynset and lsynset: print "ksynset=",ksynset[0] print "lsynset=",lsynset[0] hypoksynsets=set([i for i in ksynset[0].closure(lambda n:n.hyponyms())]) hyperlsynsets=set([i for i in lsynset[0].closure(lambda n:n.hypernyms())]) for m in hypoksynsets: try: mlemmanames=m.lemma_names() weight_str_map[k+" - "+l]=weight_str_map[k+" - "+l]+" contains "+mlemmanames[0] except KeyError: weight_str_map[k+" - "+l]="" for n in hyperlsynsets: try: nlemmanames=n.lemma_names() weight_str_map[l+" - "+k]=weight_str_map[l+" - "+k]+" is part of "+nlemmanames[0] except KeyError: weight_str_map[l+" - "+k]="" #nx.dra w_graphviz(nxg,prog="neato") nx.draw_networkx(nxg) plt.show() nxg.remove_edges_from(nxg.selfloop_edges()) #print "Core number =",nx.core_number(nxg) sorted_core_nxg=sorted(nx.core_number(nxg).items(),key=operator.itemgetter(1), reverse=True) print "Core number (sorted) :",sorted_core_nxg print "=============================================================================================================" print "Unsupervised Classification based on top percentile Core numbers of the definition graph(subgraph of WordNet)" print "=============================================================================================================" no_of_classes=len(nx.core_number(nxg)) top_percentile=0 max_core_number=0 for n in sorted_core_nxg: print "This document belongs to class:",n[0],",core number=",n[1] if top_percentile < no_of_classes*0.10: top_percentile+=1 else: break if n[1] > max_core_number: max_core_number=n[1] print " max_core_number",max_core_number print "===================================================================" print "Page Rank of the vertices of RGO Definition Graph" print "===================================================================" print sorted(nx.pagerank(nxg).items(),key=operator.itemgetter(1),reverse=True) #print "================================================================================" #print "A primitive text generated from traversal of the k-core closure of RGO Definition Graph" #print "================================================================================" #kcore_nxg=nx.k_core(nxg,10,nx.core_number(nxg)) #kcore_nxg=nx.k_core(nxg) #for k,v in kcore_nxg.edges(): # print k, weight_str_map[k+" - "+v], v, ".", #print "\n" #print "==============================================================================" #print "Lambda Composition Closure with Depth First Search of RGO graph edges as relations" #print "==============================================================================" #print definitiongraphedgelabels #lambda_vertex_map={} #lambda_edge_map={} #print composition_lambda(nxg) print "==============================================================================" print "Sentiment Analysis (Applying SentiWordNet to the tokenized text) of the text" print "==============================================================================" pos,neg,obj = SentimentAnalysis_SentiWordNet(text) print "Positivity = ", pos print "Negativity = ", neg print "Objectivity = ",obj print "==========================================================================================================" print "Sentiment Analysis (Applying SentiWordNet to the top core-numbered words in RGO graph of text) of the text" print "==========================================================================================================" for x in sorted_core_nxg: xsset = swn.senti_synsets(x[0]) if len(xsset) > 2: xnegscore = xsset[0].neg_score() xposscore = xsset[0].pos_score() print "negscore of ", x[0], ": ", xnegscore print "posscore of ", x[0], ": ", xposscore return nxg
def RecursiveGlossOverlap_Classify(text): definitiongraphedges=defaultdict(list) definitiongraphedgelabels=defaultdict(list) #--------------------------------------------------------------------------------- #2.Compute intrinsic merit (either using linear or quadratic overlap) #--------------------------------------------------------------------------------- tokenized = nltk.word_tokenize(text) fdist1 = FreqDist(tokenized) stopwords = nltk.corpus.stopwords.words('english') stopwords = stopwords + [u' ',u'or',u'and',u'who',u'he',u'she',u'whom',u'well',u'is',u'was',u'were',u'are',u'there',u'where',u'when',u'may', u'The', u'the', u'In',u'in',u'A',u'B',u'C',u'D',u'E',u'F',u'G',u'H',u'I',u'J',u'K',u'L',u'M',u'N',u'O',u'P',u'Q',u'R',u'S',u'T',u'U',u'V',u'W',u'X',u'Y',u'Z'] puncts = [u' ',u'.', u'"', u',', u'{', u'}', u'+', u'-', u'*', u'/', u'%', u'&', u'(', ')', u'[', u']', u'=', u'@', u'#', u':', u'|', u';',u'\'s'] #at present tfidf filter is not applied #freqterms1 = [w for w in fdist1.keys() if w not in stopwords and w not in puncts and (fdist1.freq(w) * compute_idf(corpus, w))] freqterms1 = [w.decode("utf-8") for w in fdist1.keys() if w not in stopwords and w not in puncts] current_level = 1 nodewithmaxparents = '' noofparents = 0 maxparents = 0 relatedness = 0 first_convergence_level = 1 tokensofthislevel = [] convergingterms = [] convergingparents = [] tokensofprevlevel = [] prevlevelsynsets = [] commontokens = [] vertices = 0 edges = 0 overlap = 0 iter = 0 from nltk.corpus import wordnet as wn #recurse down to required depth and update intrinsic merit score #relatedness is either sum(overlaps) or sum((overlapping_parents)*(overlaps)^2) also called convergence factor while current_level < 3: #crucial - gather nodes which converge/overlap (have more than 1 parent) if current_level > 1: print current_level for x in freqterms1: for y in parents(x,prevlevelsynsets): ylemmanames=y.lemma_names() #for yl in ylemmanames: # definitiongraphedges[x].append(yl) definitiongraphedges[x].append(ylemmanames[0]) definitiongraphedgelabels[x + " - " + ylemmanames[0]].append(" is a subinstance of ") definitiongraphedgelabels[ylemmanames[0] + " - " + x].append(" is a superinstance of ") convergingterms = [w for w in freqterms1 if len(parents(w,prevlevelsynsets)) > 1] for kw in freqterms1: convergingparents = convergingparents + ([w for w in parents(kw, prevlevelsynsets) if len(parents(kw, prevlevelsynsets)) > 1]) for kw in freqterms1: noofparents = len(parents(kw, prevlevelsynsets)) if noofparents > maxparents: maxparents = noofparents nodewithmaxparents = kw for keyword in freqterms1: #WSD - invokes Lesk's algorithm adapted to recursive gloss overlap- best_matching_synset() #disamb_synset = best_matching_synset(set(doc1), wn.synsets(keyword)) if use_pywsd_lesk: disamb_synset = simple_lesk(" ".join(freqterms1), keyword) if use_nltk_lesk: disamb_synset = lesk(freqterms1, keyword) else: disamb_synset = best_matching_synset(freqterms1, wn.synsets(keyword)) prevlevelsynsets = prevlevelsynsets + [disamb_synset] if len(wn.synsets(keyword)) != 0: disamb_synset_def = disamb_synset.definition() tokens = nltk.word_tokenize(disamb_synset_def) fdist_tokens = FreqDist(tokens) #at present frequency filter is not applied #if keyword in convergingterms: tokensofthislevel = tokensofthislevel + ([w for w in fdist_tokens.keys() if w not in stopwords and w not in puncts and fdist_tokens.freq(w)]) listcount = len(tokensofthislevel) setcount = len(set(tokensofthislevel)) overlap = listcount-setcount if overlap > 0 and iter == 0 : first_convergence_level = current_level iter = 1 #choose between two relatedness/convergence criteria :- #1) simple linear overlap or 2) zipf distributed quadratic overlap #relatedness = relatedness + len(convergingparents)*overlap relatedness = relatedness + overlap + len(convergingparents) #relatedness = relatedness + ((len(convergingparents)*overlap*overlap) + 1) #find out common tokens of this and previous level so that same token does not get grasped again - #relatedness must be increased since repetition of keywords in two successive levels is a sign of #interrelatedness(a backedge from child-of-one-of-siblings to one-of-siblings). Remove vertices and edges #corresponding to common tokens commontokens = set(tokensofthislevel).intersection(set(tokensofprevlevel)) tokensofthislevel = set(tokensofthislevel).difference(commontokens) relatedness = relatedness + len(commontokens) #decrease the vertices count to address common tokens removed above - edges should remain same since they #would just point elsewhere vertices = vertices + setcount - len(commontokens) edges = edges + listcount current_level = current_level + 1 freqterms1 = set(tokensofthislevel) tokensofprevlevel = tokensofthislevel tokensofthislevel = [] intrinsic_merit = vertices*edges*relatedness / first_convergence_level print definitiongraphedges nxg=nx.DiGraph() pos=nx.spring_layout(nxg) #pos=nx.shell_layout(nxg) #pos=nx.random_layout(nxg) #pos=nx.spectral_layout(nxg) #nx.draw_graphviz(nxg,prog="neato") for k,v in definitiongraphedges.iteritems(): for l in v: nxg.add_edge(k,l) nxg.add_edge(l,k) #nx.draw_networkx(nxg) #plt.show() nxg.remove_edges_from(nxg.selfloop_edges()) #print "Core number =",nx.core_number(nxg) sorted_core_nxg=sorted(nx.core_number(nxg).items(),key=operator.itemgetter(1), reverse=True) print "Core number (sorted) :",sorted_core_nxg print "=============================================================================================================" print "Unsupervised Classification based on top percentile Core numbers of the definition graph(subgraph of WordNet)" print "=============================================================================================================" no_of_classes=len(nx.core_number(nxg)) top_percentile=0 max_core_number=0 max_core_number_class="" for n in sorted_core_nxg: print "This document belongs to class:",n[0],",core number=",n[1] if top_percentile < no_of_classes*0.50: top_percentile+=1 else: break if n[1] > max_core_number: max_core_number=n[1] max_core_number_class=n[0] print " max_core_number",max_core_number print "===================================================================" print "Betweenness Centrality of Recursive Gloss Overlap graph vertices" print "===================================================================" bc=nx.betweenness_centrality(nxg) sorted_bc=sorted(bc.items(),key=operator.itemgetter(1),reverse=True) print sorted_bc print "===================================================================" print "Closeness Centrality of Recursive Gloss Overlap graph vertices" print "===================================================================" cc=nx.closeness_centrality(nxg) sorted_cc=sorted(cc.items(),key=operator.itemgetter(1),reverse=True) print sorted_cc print "===================================================================" print "Degree Centrality of Recursive Gloss Overlap graph vertices" print "===================================================================" dc=nx.degree_centrality(nxg) sorted_dc=sorted(dc.items(),key=operator.itemgetter(1),reverse=True) print sorted_dc print "===================================================================" print "Page Rank of the vertices of RGO Definition Graph (a form of Eigenvector Centrality)" print "===================================================================" sorted_pagerank_nxg=sorted(nx.pagerank(nxg).items(),key=operator.itemgetter(1),reverse=True) print sorted_pagerank_nxg return (sorted_core_nxg, sorted_pagerank_nxg)
def run(self, G): cn = networkx.core_number(G)
ymin=min(yy for xx,yy in pos.values()) dx = xmax - xmin ddx=0.1*dx dy = ymax - ymin ddy=0.1*dy plt.hold(False) plt.xlim(xmin-ddx,xmax+ddx) plt.ylim(ymin-ddy,ymax+ddy) plt.title('Bibliography Graph %s' %year_iter) plt.axis('off') plt.savefig("../output/bib-graph%s.png" %year_iter) # save as png fig = plt.figure(1, figsize=(10, 10)) plt.axis('off') pos=nx.drawing.nx_agraph.graphviz_layout(bib_graph) node_color=[c*10 for c in nx.core_number(bib_graph).values()] # for node in bib_graph: # pos[node]=(pos[node][0]*10, pos[node][1]*10) nx.draw_networkx(bib_graph, pos, node_size=node_weight, node_color=node_color, edge_color=edge_weight, alph=0.5, cmap=plt.cm.OrRd, font_size=11, font_family='arial') plt.title('Bibliography Graph %s' %year_iter) mpld3.plugins.connect(fig) mpld3.save_html(fig, html_fig) # Uncomment for more output formats #nx.write_graphml(bib_graph, './output/biblio-perso-globale%s.graphml' %year_iter) #nx.write_gml(bib_graph, './output/biblio-perso-globale.gml') #nx.write_graphml(bib_graph, './output/biblio-perso-globale.graphml') #plt.show() # display input = "../output/bib-graph????.png" output = "../output/bib-animation.gif" os.system("convert -delay 100 -loop 0 %s %s" % (input, output))
def test_core_number(self): # smoke test real name cores = nx.core_number(self.G)
def K_core_graph(R_graph): Kcore=nx.core_number(R_graph) return(Kcore)
weight_str_map[k+" - "+l]=weight_str_map[k+" - "+l]+" contains "+mlemmanames[0] except KeyError: weight_str_map[k+" - "+l]="" for n in hyperlsynsets: try: nlemmanames=n.lemma_names() weight_str_map[l+" - "+k]=weight_str_map[l+" - "+k]+" is part of "+nlemmanames[0] except KeyError: weight_str_map[l+" - "+k]="" #nx.draw_graphviz(nxg,prog="neato") nx.draw_networkx(nxg) plt.show() nxg.remove_edges_from(nxg.selfloop_edges()) #print "Core number =",nx.core_number(nxg) sorted_core_nxg=sorted(nx.core_number(nxg).items(),key=operator.itemgetter(1), reverse=True) print "Core number (sorted) :",sorted_core_nxg print "=============================================================================================================" print "Unsupervised Classification based on top percentile Core numbers of the definition graph(subgraph of WordNet)" print "=============================================================================================================" no_of_classes=len(nx.core_number(nxg)) top_percentile=0 max_core_number=0 for n in sorted_core_nxg: print "This document belongs to class:",n[0],",core number=",n[1] if top_percentile < no_of_classes*0.10: top_percentile+=1 else: break if n[1] > max_core_number: max_core_number=n[1]