def getDegCentr(graph): nid = snap.GetMxDegNId(graph) CDn = snap.GetDegreeCentr(graph, nid) n = graph.GetNodes() freeman_nom = 0. for NI in graph.Nodes(): CDi = snap.GetDegreeCentr(graph, NI.GetId()) freeman_nom += CDn - CDi return freeman_nom / (n - 2)
def Degree(d, e): f = open(d) s = f.read() s1 = re.split('\n', s) G1 = snap.PUNGraph.New() a = re.split(' ', s1[0]) for i in range(0, int(a[0])): G1.AddNode(i) for i in range(1, int(a[1]) + 1): b = re.split(' ', s1[i]) G1.AddEdge(int(b[0]), int(b[1])) DegCentr = dict() for NI in G1.Nodes(): DegCentr[NI.GetId()] = snap.GetDegreeCentr(G1, NI.GetId()) # print "node: %d centrality: %f" % (NI.GetId(), DegCentr) # print DegCentr EdgePara = dict() for i in range(1, int(a[1]) + 1): c = re.split(' ', s1[i]) EdgePara[(int(c[0]), int(c[1]))] = e * DegCentr[int( c[0])] / (DegCentr[int(c[0])] + DegCentr[int(c[1])]) EdgePara[(int(c[1]), int(c[0]))] = e * DegCentr[int( c[1])] / (DegCentr[int(c[0])] + DegCentr[int(c[1])]) return EdgePara
def degree(rankCommands, Graph, conn, cur): DegreeH = snap.TIntFltH() before_time = time.time() for NI in Graph.Nodes(): DegreeH[NI.GetId()] = snap.GetDegreeCentr(Graph, NI.GetId()) print "Total handling time is: ", (time.time() - before_time) slist = sorted(DegreeH, key=lambda key: DegreeH[key], reverse=True) createTable(rankCommands, slist, DegreeH, conn, cur)
def calc_DegreeCentrality(Graph, node_to_g): prot_to_degree_centrality = {} for NI in Graph.Nodes(): my_prot = node_to_g[NI.GetId()] ## degree centrality of the node DegCentr = snap.GetDegreeCentr(Graph, NI.GetId()) prot_to_degree_centrality[my_prot] = DegCentr return prot_to_degree_centrality
def rank_degree(self): """ Return dictionary of node ID and its degree centrality score, in score order """ DegreeCentr = {} for NI in self._graph.Nodes(): deg = snap.GetDegreeCentr(self._graph, NI.GetId()) DegreeCentr[NI.GetId()] = deg assert len(DegreeCentr) == self._num_nodes, 'Number of nodes must match' return snap_hash_to_dict(DegreeCentr)
def get_node_centrality(snap_graph): nids, deg_centr = [], [] for NI in snap_graph.Nodes(): centr = snap.GetDegreeCentr(snap_graph, NI.GetId()) nids.append(NI.GetId()) deg_centr.append(centr) return np.asarray(nids, dtype='uint32'), np.asarray(deg_centr, dtype='float32')
def degreeCentrality(graph, x): degCent = [] for NI in graph.Nodes(): DegCentr = snap.GetDegreeCentr(graph, NI.GetId()) # print "node: %d centrality: %f" % (NI.GetId(), DegCentr) degCent.append([NI.GetId(), DegCentr]) degCent = sorted(degCent, key=lambda x: x[1], reverse=True) degCent = degCent[:int(x)] return degCent
def GetMaxKDegreeCentrality(self, k): lstDeg = [] nodesId = [] for NI in self.graph.Nodes(): DegCentr = snap.GetDegreeCentr(self.graph, NI.GetId()) nodesId.append(NI.GetId()) lstDeg.append(DegCentr) print lstDeg, nodesId return self.GetMaxK(lstDeg, nodesId, k)
def model_degree(G): x = [] y = [] title_name = 'degree centrality' for NI in G.Nodes(): DegCentr = snap.GetDegreeCentr(G, NI.GetId()) x.append(NI.GetId()) y.append(DegCentr) picture(x, y, title_name, 'node', 'centrality')
def sample_degree_centrality(self, n_node=100): ''' Degree centrality of a node is defined as its degree/(N-1), where N is the number of nodes in the network. :param n_node: number of nodes to sample ''' snap = self.snap n_node = min(self.num_nodes, n_node) nodes = self.nodes src = np.random.choice(nodes, n_node, replace=False) ret = [] for i in range(n_node): DegCentr = snap.GetDegreeCentr(self.graph, int(src[i])) ret.append(DegCentr) return ret
def get_node_centrality(graph, gtype='snap'): nids, deg_centr = [], [] if gtype == 'snap': for NI in graph.Nodes(): centr = snap.GetDegreeCentr(graph, NI.GetId()) nids.append(NI.GetId()) deg_centr.append(centr) elif gtype == 'nx': nnodes = graph.number_of_nodes() output = graph.degree(range(nnodes), weight='weight') for (nid, con) in output: nids.append(nid) deg_centr.append(con) # deg_dict = nx.degree_centrality(graph) # for k in np.sort(list(deg_dict.keys())): # nids.append(k) # deg_centr.append(deg_dict[k]) return np.asarray(nids, dtype='uint32'), np.asarray(deg_centr, dtype='float32')
def GetOpinionLeaders(self, method, proportion, communities): k = int(self.graph.GetNodes() * proportion) if method == "W": # whole network self.opinionLeaders = self.GetMaxKDegree(k) else: # method = "C": each community if not communities: return set() dictIMN = {} dictNodeCommunity = {} for i in range(len(communities)): dictIMN[i] = [int(len(communities[i]) * proportion), 0] for node in communities[i]: dictNodeCommunity[node] = i # get whole sorted list lstDeg = [] nodesId = [] for NI in self.graph.Nodes(): DegCentr = snap.GetDegreeCentr(self.graph, NI.GetId()) nodesId.append(NI.GetId()) lstDeg.append(DegCentr) count = len(lstDeg) # nodes = range(0, count) for i in range(0, count): for j in range(i + 1, count): if lstDeg[i] > lstDeg[j]: lstDeg[i], lstDeg[j] = lstDeg[j], lstDeg[i] nodesId[i], nodesId[j] = nodesId[j], nodesId[i] print nodesId for i in range(count-1,0,-1): node = nodesId[i] communityindex = dictNodeCommunity[node] if dictIMN[communityindex][1] < dictIMN[communityindex][0]: self.opinionLeaders.add(node) dictIMN[communityindex][1] += 1 if len(self.opinionLeaders) == k: break return self.opinionLeaders
def get_degree_centrality(G, n): return snap.GetDegreeCentr(G, n)
def getAttribute(filename): UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1) UGraph.Dump() attributes = pd.DataFrame(np.zeros(shape=(UGraph.GetNodes(), 12)), columns=['Graph', 'Id', 'Degree', 'DegreeCentrality', 'NodeBetweennessCentrality', 'ClosenessCentrality', 'FarnessCentrality', 'PageRank', 'HubsScore', 'AuthoritiesScore', 'NodeEccentricity', 'EigenvectorCentrality']) attributes['Graph'] = [filename] * UGraph.GetNodes() # Degree id = [] degree = [] OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(UGraph, OutDegV) for item in OutDegV: id.append(item.GetVal1()) degree.append(item.GetVal2()) attributes['Id'] = id attributes['Degree'] = degree # Degree, Closeness, Farness Centrality, Node Eccentricity degCentr = [] cloCentr = [] farCentr = [] nodeEcc = [] for NI in UGraph.Nodes(): degCentr.append(snap.GetDegreeCentr(UGraph, NI.GetId())) cloCentr.append(snap.GetClosenessCentr(UGraph, NI.GetId())) farCentr.append(snap.GetFarnessCentr(UGraph, NI.GetId())) nodeEcc.append(snap.GetNodeEcc(UGraph, NI.GetId(), False)) attributes['DegreeCentrality'] = degCentr attributes['ClosenessCentrality'] = cloCentr attributes['FarnessCentrality'] = farCentr attributes['NodeEccentricity'] = nodeEcc # Betweenness Centrality betCentr = [] Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0) for node in Nodes: betCentr.append(Nodes[node]) attributes['NodeBetweennessCentrality'] = betCentr # PageRank pgRank = [] PRankH = snap.TIntFltH() snap.GetPageRank(UGraph, PRankH) for item in PRankH: pgRank.append(PRankH[item]) attributes['PageRank'] = pgRank # Hubs, Authorities score hubs = [] auth = [] NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(UGraph, NIdHubH, NIdAuthH) for item in NIdHubH: hubs.append(NIdHubH[item]) for item in NIdAuthH: auth.append(NIdAuthH[item]) attributes['HubsScore'] = hubs attributes['AuthoritiesScore'] = auth # Eigenvector Centrality eigenCentr = [] NIdEigenH = snap.TIntFltH() snap.GetEigenVectorCentr(UGraph, NIdEigenH) for item in NIdEigenH: eigenCentr.append(NIdEigenH[item]) attributes['EigenvectorCentrality'] = eigenCentr return attributes
maxnode = 0 for node in NIdEigenH: if maxd < NIdEigenH[node]: maxd = NIdEigenH[node] maxnode = node eigen1.add(NIdEigenH[node]) print "Max Eigen node ->", maxnode print "Max Eigen Value ->", maxd #Degree Centrality - Graph1 maxd = 0.0 maxnode = 0 for node in Graph1.Nodes(): #degree centrality DegCentr = snap.GetDegreeCentr(Graph1, node.GetId()) degree1.add(DegCentr) if maxd < DegCentr: maxd = DegCentr maxnode = node.GetId() fp = open('myrecipe.names.txt') for i in fp.readlines(): ar = i.rstrip('\n').split('\t') if int(ar[0]) == maxnode: print "Max. Degree Centrality Node in 'My Recipes' network is ", maxnode, " : ", ar[ 1], " with centrality: ", maxd print "i.e.,", ar[1], "ingredient is used ", round( maxd * 100, 2), "% of the time in My Recipes network\n" fp.close() #Graph2
def degree_centrality(graph): return [sp.GetDegreeCentr(graph, node.GetId()) for node in graph.Nodes()]
f = open("Graphdata/retweet.txt") s = f.read() s1 = re.split('\n', s) G1 = snap.PUNGraph.New() a = re.split(' ', s1[0]) for i in range(0, int(a[0])): G1.AddNode(i) for i in range(1, int(a[1]) + 1): b = re.split(' ', s1[i]) G1.AddEdge(int(b[0]), int(b[1])) DegCentr = dict() for NI in G1.Nodes(): DegCentr[NI.GetId()] = snap.GetDegreeCentr(G1, NI.GetId()) # print "node: %d centrality: %f" % (NI.GetId(), DegCentr) # print DegCentr[15232] EdgePara = dict() for i in range(1, int(a[1]) + 1): c = re.split(' ', s1[i]) EdgePara[(int(c[0]), int(c[1]))] = DegCentr[int( c[0])] / (DegCentr[int(c[0])] + DegCentr[int(c[1])]) EdgePara[(int(c[1]), int(c[0]))] = DegCentr[int( c[1])] / (DegCentr[int(c[0])] + DegCentr[int(c[1])]) snap.DrawGViz(G1, snap.gvlNeato, "graph_undirected.png", "graph 2", True)
import sys import numpy as np import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt input_file = sys.argv[1] Graph = snap.LoadEdgeList(snap.PUNGraph, input_file, 0, 1) dc = set() degree = dict() for node in Graph.Nodes(): #degree centrality DegCentr = snap.GetDegreeCentr(Graph, node.GetId()) degree[node.GetId()] = DegCentr for item in degree: dc.add(degree[item]) with open(sys.argv[1] + '.degree.txt', 'w+') as fp: for p in sorted(degree.items(), key=lambda (k, v): (v, k), reverse=True): fp.write("%s : %s\n" % p) dc = sorted(dc, key=float, reverse=True) #plotting degree centrality plt.plot(np.arange(1, len(dc) + 1, 1), dc, 'b.') plt.xlabel('Rank') plt.ylabel('Degree Centrality')
continue trial = random.random() if trial < 0.01: scheduled.add(neighborNodeId) influenceSet.add(neighborNodeId) return len(influenceSet) nodelist = [] degree_centrality = {} for node in smallWorld.Nodes(): nodelist.append(node.GetId()) for node in nodelist: DegCentr = snap.GetDegreeCentr(smallWorld, node) degree_centrality[node] = DegCentr tdc = sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True) getset = [] some = 5 resultset = [0] while (some <= 30): for i in range(0, some): getset.append(tdc[i][0]) some = some + 5 resultset.append( sum([ getLengthInfluenceSet(smallWorld, set([node])) for node in getset ]))
page_writer.writeheader() for item in PRankH: page_writer.writerow({'node_id': item, 'page_rank': PRankH[item]}) #Eigenvector Centrality #epsilon = 10^-4, max_iters = 100 NIdEigenH = snap.TIntFltH() snap.GetEigenVectorCentr(UGraph, NIdEigenH) with open('na_power_eigcentr.csv', 'w') as csvfile: #print eig centralities to CSV fieldnames = ['node_id', 'eig_centr'] eig_writer = csv.DictWriter(csvfile, fieldnames=fieldnames) eig_writer.writeheader() for item in NIdEigenH: eig_writer.writerow({'node_id': item, 'eig_centr': NIdEigenH[item]}) #Degree Centrality #for some reason I get an error when I try to print all three in a single execution of the code; #works if you comment one block out with open('na_power_degcentr.csv', 'w') as csvfile: #print degree centralities fieldnames = ['node_id', 'deg_centr'] deg_writer = csv.DictWriter(csvfile, fieldnames=fieldnames) deg_writer.writeheader() for NI in UGraph.Nodes(): n_id = NI.GetId() DegCentr = snap.GetDegreeCentr(UGraph, n_id) deg_writer.writerow({'node_id': n_id, 'deg_centr': DegCentr})
dataset['keyword']).get_feature_names() if re.search('[0-9].....', x) == None ] print 'Creating node and edge list' nx_input = output_network_inputs(id_dict, pack='snap') # pp.pprint(nx_input) # print_break('Network Graph: NetworkX') # G=nx.Graph() # G.add_nodes_from(nx_input['nodes']) # G.add_edges_from(nx_input['edges']) # measures = { 'centrality': nx.degree_centrality(G), 'clustering': nx.clustering(G), 'triads': nx.triangles(G) } # pp.pprint(measures) print_break('Network Graph: SNAP') t0 = time.time() G = snap.TUNGraph.New() print 'Adding Nodes' for i in tqdm(nx_input['nodes']): G.AddNode(i) print 'Adding Edges' for x in tqdm(nx_input['edges']): G.AddEdge(x[0], x[1]) print 'Calculating measures' centrality = [snap.GetDegreeCentr(G, n.GetId()) for n in G.Nodes()] measures = { 'centrality': np.mean(centrality), 'clustering': snap.GetClustCf(G), 'triads': snap.GetTriads(G) } pp.pprint(measures) print_break('SNAP Graph Measures Time elapsed: %s' % (time.time() - t0))
import snap import parser, make_graphs filenames = ["0301/{}.txt".format(i) for i in range(0, 3)] data = parser.Data(filenames) graph = make_graphs.make_graph(data) ugraph = snap.ConvertGraph(snap.PUNGraph, graph) mxwcc = snap.GetMxWcc(graph) umxwcc = snap.GetMxWcc(ugraph) N = 20 # === GetDegreeCentr === s = [] for NI in umxwcc.Nodes(): DegCentr = snap.GetDegreeCentr(umxwcc, NI.GetId()) s.append((NI.GetId(), DegCentr)) s.sort(key=lambda x: x[1], reverse=True) # sort with max centrality at front print '=== GetDegreeCentr ===' with open("GetDegreeCentr-0-2.txt", 'w') as f: for x in s: f.write("{} {}\n".format(*x)) # === GetBetweennessCentr === Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(mxwcc, Nodes, Edges, 1.0) s = [(node, Nodes[node]) for node in Nodes] s.sort(key=lambda x: x[1], reverse=True) # sort with max centrality at front print '=== GetBetweennessCentr ===' with open("GetBetweennessCentr-0-2.txt", 'w') as f: for x in s:
def DegreeCentrality(self): lstDeg = {} for NI in self.graph.Nodes(): DegCentr = snap.GetDegreeCentr(self.graph, NI.GetId()) lstDeg[NI.GetId()] = DegCentr return lstDeg
import snap UGraph = snap.GenRndGnm(snap.PUNGraph, 100, 1000) for NI in UGraph.Nodes(): DegCentr = snap.GetDegreeCentr(UGraph, NI.GetId()) print "node: %d centrality: %f" % (NI.GetId(), DegCentr)
for line in lines: tokens = line.split('||') if tokens[2] != '': nId = nameToNId[tokens[1]] uIdToNId[int(tokens[0])] = nId graph = snap.ConvertGraph(snap.PUNGraph, network) degCenters = {} closeCenters = {} pageRanks = snap.TIntFltH() eigenCenters = snap.TIntFltH() # btwnCenters = snap.TIntFltH() # edgeHash = snap.TIntPrFltH() print('Running PageRank...') snap.GetPageRank(graph, pageRanks) print('Running Eigenvector centrality...') snap.GetEigenVectorCentr(graph, eigenCenters) # print('Running Betweeness...') # snap.GetBetweennessCentr(graph, btwnCenters, edgeHash) print('Running Degree and Closeness...') for uId, nId in uIdToNId.iteritems(): print uId, nId degCenters[uId] = snap.GetDegreeCentr(graph, nId) closeCenters[uId] = snap.GetClosenessCentr(graph, nId) outfile = open('csv/centralities.csv', 'w') for uId, nId in uIdToNId.iteritems(): outfile.write(str(uId) + ',' + str(pageRanks[nId]) + ',' +\ str(eigenCenters[nId]) + ',' +\ str(degCenters[uId]) + ',' + str(closeCenters[uId]) + '\n') outfile.close()
## calculate indegree and outdegree centrality----Fail So I choose NetworkX import snap txt_file = "/Users/dukechan/Downloads/sms_sna_oct18_directed.txt" f = open('/Users/dukechan/Downloads/result4.txt', 'w') f2 = open('/Users/dukechan/Downloads/result5.txt', 'w') G = snap.LoadEdgeList(snap.PNGraph, txt_file, 4, 5) InDegV = snap.TIntPrV() OutDegV = snap.TIntPrV() snap.GetNodeInDegV(G, InDegV) snap.GetNodeOutDegV(G, OutDegV) # indegree for item in InDegV: DegCentr = snap.GetDegreeCentr(G, item.GetVal1()) f.write("node: %d centrality: %f\n" % (item.GetVal1(), DegCentr)) f.close() # outdegree for item in OutDegV: DegCentr = snap.GetDegreeCentr(G, item.GetVal1()) f2.write("node: %d centrality: %f\n" % (item.GetVal1(), DegCentr)) f2.close() # problem : centrality is 0 why????????
def basic_analysis(): FIn = snap.TFIn("../graphs/ph_simple.graph") G = snap.TUNGraph.Load(FIn) numNodes = G.GetNodes() print "num nodes: ", numNodes numEdges = G.GetEdges() print "num edges: ", numEdges # clustering coefficient print "\nclustering coefficient" print "Clustering G: ", snap.GetClustCf(G) ER = snap.GenRndGnm(snap.PUNGraph, numNodes, numEdges) print "Clustering ER: ", snap.GetClustCf(ER) # degree distribution histogram print "\ndegree distribution histogram" x_erdosRenyi, y_erdosRenyi = getDataPointsToPlot(ER) plt.loglog(x_erdosRenyi, y_erdosRenyi, color = 'g', label = 'Erdos Renyi Network') x_smallWorld, y_smallWorld = getDataPointsToPlot(G) plt.loglog(x_smallWorld, y_smallWorld, linestyle = 'dashed', color = 'b', label = 'PH Agency Network') plt.xlabel('Node Degree (log)') plt.ylabel('Proportion of Nodes with a Given Degree (log)') plt.title('Degree Distribution of Erdos Renyi and PH Agency Network') plt.legend() plt.show() # degree print "\ndegree distribution" deg_sum = 0.0 CntV = snap.TIntPrV() snap.GetOutDegCnt(G, CntV) for p in CntV: deg_sum += p.GetVal1() * p.GetVal2() max_node = G.GetNI(snap.GetMxDegNId(G)) deg_sum /= float(numNodes) print "average degree: ", deg_sum # same for G and ER print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId() deg_sum = 0.0 max_node = ER.GetNI(snap.GetMxDegNId(ER)) print "max degree: ", max_node.GetOutDeg(), ", id: ", max_node.GetId() # diameter print "\ndiameter" diam = snap.GetBfsFullDiam(G, 10) print "Diameter: ", diam print "ER Diameter: ", snap.GetBfsFullDiam(ER, 10) # triads print "\ntriads" print "Triads: ", snap.GetTriads(G) print "ER Triads: ", snap.GetTriads(ER) # centrality print "\ncentrality" max_dc = 0.0 maxId = -1 all_centr = [] for NI in G.Nodes(): DegCentr = snap.GetDegreeCentr(G, NI.GetId()) all_centr.append(DegCentr) if DegCentr > max_dc: max_dc = DegCentr maxId = NI.GetId() print "max" print "node: %d centrality: %f" % (maxId, max_dc) print "average centrality: ", np.mean(all_centr) print "ER" max_dc = 0.0 maxId = -1 all_centr = [] for NI in ER.Nodes(): DegCentr = snap.GetDegreeCentr(ER, NI.GetId()) all_centr.append(DegCentr) if DegCentr > max_dc: max_dc = DegCentr maxId = NI.GetId() print "max" print "node: %d centrality: %f" % (maxId, max_dc) print "average centrality: ", np.mean(all_centr)
def getFeatures(G_CoSponsor, G_Campaign, bill_node, legislator_node, comm_node, legislator_node_from_campaign, G_Campaign_folded): ''' return two pd: X, Y ''' print "before dropping", len(legislator_node) for l in legislator_node: if not G_Campaign_folded.IsNode(l): legislator_node.remove(l) if l not in legislator_node_from_campaign: try: legislator_node.remove(l) except: pass cluster_0, cluster_1 = loadClusteringAttr() print "after dropping", len(legislator_node) Y = getY(G_CoSponsor, legislator_node) #compute a list of clustering coefficient NIdCCfH = snap.TIntFltH() snap.GetNodeClustCf(G_Campaign_folded, NIdCCfH) #compute a list of node centrality and degree node_centrality = {} in_deg = {} for i in legislator_node: if G_Campaign.IsNode(i): node_centrality[i] = snap.GetDegreeCentr(G_Campaign_folded, i) in_deg[i] = G_Campaign.GetNI(i).GetInDeg() print "begin to compute X" X = Y[['node_i', 'node_j']] #list of features X['Degree_Diff'] = 0 X['Union_of_Neighbors'] = 0.0 X['CommNeighbors'] = 0.0 #X['Contribution_Sum'] = 0.0 #X['Contribution_Diff'] = 0.0 X['Clustering_Coeff_Diff'] = 0.0 X['Clustering_Coeff_Sum'] = 0.0 X['Clustering_Coeff_Avg'] = 0.0 X['Jaccard'] = 0.0 X['Shortest_Dist'] = 0.0 X['Deg_Centrality_Diff'] = 0.0 X['FromSameCluster'] = 0 def compute_attri(x): NId_i = int(x['node_i']) NId_j = int(x['node_j']) if G_Campaign_folded.IsNode(NId_i) and G_Campaign_folded.IsNode(NId_j): node_i_contribution_sum = 0.0 node_j_contribution_sum = 0.0 neighbors_i = [] neighbors_j = [] clustering_cf_i = NIdCCfH[NId_i] clustering_cf_j = NIdCCfH[NId_j] CommNeighbors = snap.GetCmnNbrs(G_Campaign, NId_i, NId_j) NeighborsUnion = float( len( list(set().union(getNeighbors(NId_i, G_Campaign), getNeighbors(NId_j, G_Campaign))))) FromSameCluster = 0 if NId_i in cluster_0 and NId_j in cluster_0: FromSameCluster = 1 if NId_i in cluster_1 and NId_j in cluster_1: FromSameCluster = 1 ''' Nbrs = snap.TIntV() snap.GetCmnNbrs(G_Campaign, NId_i,NId_j, Nbrs) for NId in Nbrs: eid_i = G_Campaign.GetEId(NId,NId_i) eid_j = G_Campaign.GetEId(NId,NId_j) neighbors_i.append(NId) neighbors_j.append(NId) node_i_contribution_sum += G_Campaign.GetIntAttrDatE(eid_i, 'TRANSACTION_AMT') node_j_contribution_sum += G_Campaign.GetIntAttrDatE(eid_j, 'TRANSACTION_AMT') ''' result = { 'Degree_Diff': abs(in_deg[NId_i] - in_deg[NId_j]), 'Union_of_Neighbors': NeighborsUnion, 'CommNeighbors': CommNeighbors, 'Clustering_Coeff_Diff': abs(clustering_cf_i - clustering_cf_j), 'Clustering_Coeff_Sum': clustering_cf_i + clustering_cf_j, 'Clustering_Coeff_Avg': clustering_cf_i + clustering_cf_j / 2.0, #'Contribution_Diff': abs(node_i_contribution_sum - node_j_contribution_sum), #'Contribution_Sum': node_i_contribution_sum + node_j_contribution_sum, 'Jaccard': CommNeighbors * 1.0 / NeighborsUnion, 'Shortest_Dist': snap.GetShortPath(G_Campaign, NId_i, NId_j), 'Deg_Centrality_Diff': abs(node_centrality[NId_i] - node_centrality[NId_j]), 'FromSameCluster': FromSameCluster } else: result = {} return pd.Series(result, name="Attri") begin = time.time() print "My program took", time.time() - start_time, "to begin compute X" X = X.apply(compute_attri, axis=1) print "before dropping nan from computing attribute", X.shape inds = pd.isnull(X).any(1).nonzero()[0] print "My program took", time.time() - start_time, "to finish compute X" end = time.time() print "time to compute x", begin - end X = X.drop(inds) Y = Y.drop(inds) print "after dropping nan from computing attribute", X.shape return X, Y
evCntr_dict[item] = evCntr_Vector[item] evCntr_sort = sorted(evCntr_dict.values(), reverse=True) evCntr_count10 = 0 for value in evCntr_sort: for key1, val1 in evCntr_dict.iteritems(): if val1 == value: print val1, key1 evCntr_count10 += 1 break if evCntr_count10 == 10: break dc_dict = dict() for nex in ugraph.Nodes(): dc_dict[nex.GetId()] = snap.GetDegreeCentr(ugraph, nex.GetId()) dc_sort = sorted(dc_dict.values(), reverse=True) dc_count10 = 0 dc_set = set(dc_sort) dc_sort1 = sorted(list(dc_set), reverse=True) for value in dc_sort1: for key1, val1 in dc_dict.iteritems(): if val1 == value and dc_count10 < 10: print val1, key1 dc_count10 += 1 if dc_count10 == 10: break if dc_count10 == 10: break pg_rank_nodes = [None] * (len(nodes) + 1)