def _initialize(self, mu, sigma_ratio): indeg = snap.TIntPrV() snap.GetNodeInDegV(self._graph, indeg) for item in indeg: nid, deg = item.GetVal1(), item.GetVal2() if deg == 0: continue node = self._graph.GetNI(nid) # Sample a random probability for each in link p = np.clip( np.random.normal( np.ones(deg, dtype=np.float32) / deg, sigma_ratio / np.ones(deg)), 0., 1.) # Handle corner cases if p.sum() == 0: p = np.ones(deg, dtype=np.float32) p /= p.sum() for i in range(deg): edge = self._graph.GetEI(node.GetInNId(i), node.GetId()) self._graph.AddFltAttrDatE(edge, p[i], self.prob)
def degree_distribution_graphs(): InDegV = snap.TIntPrV() snap.GetNodeInDegV(G, InDegV) a = np.arange(1, snap.CntNonZNodes(G) - snap.CntInDegNodes(G, 0) + 2) i = 0 for item in InDegV: if item.GetVal2() > 0: i = i + 1 a[i] = item.GetVal2() bars, bins = np.histogram(a, bins=np.arange(1, max(a))) plt.hist(bars, bins) plt.grid() plt.show() plt.loglog(bins[0:-1], bars) plt.ylabel('# users per degree') plt.xlabel('in-degree') plt.grid() plt.show() plt.loglog(bins[0:-1], sum(bars) - np.cumsum(bars)) plt.ylabel('# users with degree larger or equal than x') plt.xlabel('in-degree') plt.grid() plt.show()
def in_out_degree_correlation(G): result = None #REMOVER in_degree = {} out_degree = {} v_in_d = [] v_out_d = [] InDegV = snap.TIntPrV() snap.GetNodeInDegV(G,InDegV) #Retorna o id do vertice e o grau de entrada- inclusive se o grau for 0 for item in InDegV: node = item.GetVal1() degree = item.GetVal2() in_degree[node] = degree OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDegV) for item in OutDegV: node = item.GetVal1() degree = item.GetVal2() out_degree[node] = degree for k,v in in_degree.iteritems(): if k in out_degree: v_in_d.append(v) v_out_d.append(out_degree[k]) result = pearsonr(v_in_d,v_out_d) #Retorna uma tupla (coef,p-value) return result[0] #Retorna apenas o coef
def in_degree_distribution(G): result = None #REMOVER in_degree = {} out_degree = {} v_in_d = [] v_out_d = [] v_degrees = [] InDegV = snap.TIntPrV() snap.GetNodeInDegV( G, InDegV ) #Retorna o id do vertice e o grau de entrada- inclusive se o grau for 0 for item in InDegV: node = item.GetVal1() degree = item.GetVal2() in_degree[node] = degree OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDegV) for item in OutDegV: node = item.GetVal1() degree = item.GetVal2() out_degree[node] = degree for k, v in in_degree.iteritems(): if k in out_degree: v_in_d.append(v) v_out_d.append(out_degree[k]) soma = v + out_degree[k] v_degrees.append(soma) return v_in_d, v_out_d, v_degrees #Retorna uma lista com in_degree e outra lista com out_degree, e mais uma com a soma dos graus de entrada e saída.
def _get_degree_in_graph(Graph, H, output_path): InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) InDeg_set = dict() for item in InDegV: username = H.GetKey(item.GetVal1()) InDeg = item.GetVal2() InDeg_set[username] = InDeg OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) OutDeg_set = dict() for item in OutDegV: username = H.GetKey(item.GetVal1()) OutDeg = item.GetVal2() OutDeg_set[username] = OutDeg dataset = list() tot = len(InDeg_set) num = 0 for username in InDeg_set: user_degree = dict() user_degree['username'] = username user_degree['in_degree'] = InDeg_set[username] user_degree['out_degree'] = OutDeg_set[username] profile_path = './data/Users/%s.json' % username if not os.path.exists(profile_path): continue with open(profile_path, 'r') as f: profile = json.load(f) in_set = set(profile['followers']) out_set = set(profile['following']) if user_degree['out_degree'] == 0: user_degree['balance'] = float(user_degree['in_degree']) / eps else: user_degree['balance'] = float(user_degree['in_degree']) / float( user_degree['out_degree']) bi = 0 for out_username in out_set: if out_username in in_set: try: ID = H.GetDat(out_username) if ID is not -1 and Graph.IsNode(ID): bi += 1 except Exception as e: print type(e) print e.args print e if user_degree['out_degree'] == 0: user_degree['reciprocity'] = float(bi) / eps else: user_degree['reciprocity'] = float(bi) / float( user_degree['out_degree']) dataset.append(user_degree) num += 1 print '%d/%d' % (num, tot) dataset = pd.DataFrame(dataset) dataset = dataset[[ 'username', 'in_degree', 'out_degree', 'balance', 'reciprocity' ]] dataset.to_csv(output_path, index=False, encoding='utf-8')
def indegree(rankCommands, Graph, conn, cur): InDegV = snap.TIntPrV() before_time = time.time() snap.GetNodeInDegV(Graph, InDegV) print "Total handling time is: ", (time.time() - before_time) DegH = snap.TIntIntH() slist = sortNodes(InDegV, DegH) createTable(rankCommands, slist, DegH, conn, cur)
def getNodesByDegree(self): result = snap.TIntPrV() nodesByDegree = [] snap.GetNodeInDegV(self.rawGraph, result) for x in result: nodesByDegree.append((self.node(x.GetVal1()), x.GetVal2())) return sorted(nodesByDegree,key=lambda e: e[1], reverse=True)
def degree_stats(): InDegV = snap.TIntPrV() snap.GetNodeInDegV(G, InDegV) numItemstoList = 10 i = 0 for item in InDegV: print("node ID %d: in-degree %d" % (item.GetVal1(), item.GetVal2())) i = i + 1 if i == numItemstoList: break # comment to output all nodes
def get_robustness(file_path, LSCC_output_path, LWCC_output_path): frac_list = [ 0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] Graph, H = load_graph(file_path) InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) degree = dict() for item in InDegV: ID = item.GetVal1() InDeg = item.GetVal2() degree[ID] = InDeg for item in OutDegV: ID = item.GetVal1() OutDeg = item.GetVal2() degree[ID] += OutDeg sorted_degree = sorted(degree.items(), key=itemgetter(1), reverse=True) tot = len(sorted_degree) pos = [int(tot * frac) for frac in frac_list] print pos cur = 0 LSCC_robust = list() LWCC_robust = list() for i in range(tot): Graph.DelNode(sorted_degree[i][0]) if i == pos[cur] - 1: LSCC_frac = snap.GetMxSccSz(Graph) LWCC_frac = snap.GetMxWccSz(Graph) singleton_frac = 1.0 - 1.0 * snap.CntNonZNodes( Graph) / Graph.GetNodes() LSCC_robust.append({ 'removed': frac_list[cur], 'singleton': singleton_frac, 'middle': 1.0 - singleton_frac - LSCC_frac, 'LSCC': LSCC_frac }) LWCC_robust.append({ 'removed': frac_list[cur], 'singleton': singleton_frac, 'middle': 1.0 - singleton_frac - LWCC_frac, 'LWCC': LWCC_frac }) cur += 1 if cur >= len(pos): break LSCC_robust = pd.DataFrame(LSCC_robust) LSCC_robust = LSCC_robust[['removed', 'singleton', 'middle', 'LSCC']] LSCC_robust.to_csv(LSCC_output_path, index=False, encoding='utf-8') LWCC_robust = pd.DataFrame(LWCC_robust) LWCC_robust = LWCC_robust[['removed', 'singleton', 'middle', 'LWCC']] LWCC_robust.to_csv(LWCC_output_path, index=False, encoding='utf-8')
def computeDegreeCentrality(G, NodeAttributes): # # 1. Degree Centrality # Get In Degree and Out Degree for each node # InDegV = snap.TIntPrV() OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDegV) snap.GetNodeInDegV(G, InDegV) InDegreeList = [(item.GetVal1(), item.GetVal2()) for item in InDegV] OutDegreeList = [(item.GetVal1(), item.GetVal2()) for item in OutDegV] InDegreeList.sort(key=lambda x: x[1], reverse=True) OutDegreeList.sort(key=lambda x: x[1], reverse=True) minOutDegree = min(OutDegreeList, key=lambda x: x[1])[1] maxOutDegree = max(OutDegreeList, key=lambda x: x[1])[1] minInDegree = min(InDegreeList, key=lambda x: x[1])[1] maxInDegree = max(InDegreeList, key=lambda x: x[1])[1] # # Sanity Check #print maxOutDegree, minOutDegree, maxInDegree, minInDegree #print InDegreeList[0], InDegreeList[-1] for (nodeId, Degree) in InDegreeList: if not NodeAttributes.get(nodeId, None): NodeAttributes[nodeId] = dict() NodeAttributes[nodeId]['InDegree'] = Degree normalizedDegree = (float(Degree) - float(minInDegree)) / ( float(maxInDegree - float(minInDegree))) NodeAttributes[nodeId]['NormInDegree'] = normalizedDegree for (nodeId, Degree) in OutDegreeList: NodeAttributes[nodeId]['OutDegree'] = Degree normalizedDegree = (float(Degree) - float(minOutDegree)) / ( float(maxOutDegree - float(minOutDegree))) NodeAttributes[nodeId]['NormOutDegree'] = normalizedDegree # # Sanity Check # #print NodeAttributes[1874] #print NodeAttributes[893] return NodeAttributes
def getMinAvgMax(graph): nodes = graph.GetNodes() edges = graph.GetEdges() InDegV = snap.TIntPrV() snap.GetNodeInDegV(graph, InDegV) sum_deg = 0 max_deg = 0 min_deg = 10000 for item in InDegV: cur_deg = item.GetVal2() sum_deg += cur_deg min_deg = min(min_deg, cur_deg) max_deg = max(max_deg, cur_deg) avg_deg = sum_deg / nodes return (max_deg, min_deg, avg_deg)
def get_in_degree_rank(G): result = None #REMOVER inlinkNumb = [] InDegV = snap.TIntPrV() snap.GetNodeInDegV(G,InDegV) for item in InDegV: node = item.GetVal1() degree = item.GetVal2() _tuple=(node,degree) inlinkNumb.append(_tuple) in_degree_rank = sorted(inlinkNumb, key=lambda x: (x[1], -x[0]), reverse=True) #Ordena uma tupla decrescente (id,in_degree)). Em caso de empate ordena crecente pelo id os empatados return in_degree_rank #Retorna o id do vertice e o grau de entrada- inclusive se o grau for 0
def calculate_stats(): # create similarities folder if not os.path.exists(config.DATASET_DIR / 'similarities'): os.makedirs(config.DATASET_DIR / 'similarities') if config.CALCULATE_EGO_GRAPHS: print(f'Calculating ego graphs for {config.DATASET_DIR }...') if not (config.DATASET_DIR / 'ego_graphs.txt').exists() or config.OVERRIDE: ego_graph_dict = {} for node in snap_graph.Nodes(): node_id = int(node.GetId()) nodes_vec = snap.TIntV() snap.GetNodesAtHop(snap_graph, node_id, 1, nodes_vec, False) ego_graph_dict[node_id] = list(nodes_vec) with open(str(config.DATASET_DIR / 'ego_graphs.txt'), 'w') as f: json.dump(ego_graph_dict, f) if config.CALCULATE_DEGREE_SEQUENCE: print(f'Calculating degree sequences for {config.DATASET_DIR}...') if not (config.DATASET_DIR / 'degree_sequence.txt').exists() or config.OVERRIDE: n_nodes = len(list(snap_graph.Nodes())) degrees = {} InDegV = snap.TIntPrV() snap.GetNodeInDegV(snap_graph, InDegV) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(snap_graph, OutDegV) for item1, item2 in zip(InDegV, OutDegV): degrees[item1.GetVal1()] = item1.GetVal2() with open(str(config.DATASET_DIR / 'degree_sequence.txt'), 'w') as f: json.dump(degrees, f) if config.CALCULATE_SHORTEST_PATHS: print(f'Calculating shortest paths for {config.DATASET_DIR}...') if not (config.DATASET_DIR / 'shortest_path_matrix.npy').exists() or config.OVERRIDE: with multiprocessing.Pool(processes=config.N_PROCESSSES) as pool: shortest_paths = pool.map(get_shortest_path, node_ids) all_shortest_paths = np.stack(shortest_paths) np.save(str(config.DATASET_DIR / 'shortest_path_matrix.npy'), all_shortest_paths)
def runSimulation(graph, nodeDeg, iterations): log("Running simulation with params: nodeDeg=%s, iterations=%s ..." % (nodeDeg, iterations)) totalInfected = 0 nodeIdSet = set() InDegV = snap.TIntPrV() snap.GetNodeInDegV(graph, InDegV) for item in InDegV: currNodeDeg = item.GetVal2() if currNodeDeg >= nodeDeg: nodeIdSet.add(item.GetVal1()) for iteration in xrange(0, iterations): log("Running iteration %s ..." % iteration) infected = set() # All infected patients. infectedQueue = deque( ) # Infected patients we have not "processed" yet. # Find and infect patient zero. if len(nodeIdSet) == 0: return 0 else: patientZeroId = random.sample(nodeIdSet, 1) # choose patient zero #print patientZeroId[0] infected.add(patientZeroId[0]) infectedQueue.append(patientZeroId[0]) # Run through the "infected" queue in FIFO order until all the # patients have had a turn. while infectedQueue: nodeId = infectedQueue.popleft() node = graph.GetNI(nodeId) numNeighbors = node.GetOutDeg() for nbrIndex in xrange(0, numNeighbors): nbrId = node.GetNbrNId(nbrIndex) if nbrId not in infected: if random.random() < TRANSMISSIBILITY: # We have infected them. infected.add(nbrId) infectedQueue.append(nbrId) totalInfected += len(infected) avgInfected = totalInfected / iterations infectedFraction = avgInfected / graph.GetNodes() return infectedFraction
def power_law_fit(): InDegV = snap.TIntPrV() snap.GetNodeInDegV(G, InDegV) a = np.arange(1, snap.CntNonZNodes(G) - snap.CntInDegNodes(G, 0) + 2) fit = pl.Fit(a) pl.plot_pdf(a, color='r') fig2 = fit.plot_pdf(color='b', linewidth=2) # power-law exponent print("Power Law Data\n") print("Power Law Exponential:", fit.alpha) print("Min value for X:", fit.xmin) print("Kolmogorov-Smirnov test:", fit.D) # comparison of data and Pl-fits of pdf (blue) and ccdf (red)" figCCDF = fit.plot_pdf(color='b', linewidth=2) fit.power_law.plot_pdf(color='b', linestyle='--', ax=figCCDF) fit.plot_ccdf(color='r', linewidth=2, ax=figCCDF) fit.power_law.plot_ccdf(color='r', linestyle='--', ax=figCCDF) #### figCCDF.set_ylabel(u"p(X), p(X≥x)") figCCDF.set_xlabel(r"in-degree")
def get_in_out_degree_table(self, graph): # Placeholder for node / degree / out degree. nodes_degrees = np.zeros((graph.GetNodes(), 3), dtype=np.int32) # In degree vector. in_degree_v = snap.TIntPrV() snap.GetNodeInDegV(graph, in_degree_v) # Out degree vector. out_degree_v = snap.TIntPrV() snap.GetNodeOutDegV(graph, out_degree_v) # Set the nodes_degrees Numpy array. for item in in_degree_v: node = item.GetVal1() nodes_degrees[node, 0] = node nodes_degrees[node, 1] = item.GetVal2() for item in out_degree_v: node = item.GetVal1() # nodes_degrees[node, 0] = node nodes_degrees[node, 2] = item.GetVal2() return nodes_degrees
r.show("__context__") # load network print time.ctime(), "loading network ..." net = snap.TNEANet.Load(FIn) t.show("loadbin network", net) r.show("__network__") print time.ctime(), "done" # In[2]: print time.ctime(), "computing indegv ..." InDegV = snap.TIntPrV() snap.GetNodeInDegV(net, InDegV) t.show("indegv", InDegV) r.show("__InDegV__") print time.ctime() # In[3]: print time.ctime(), "computing pagerank ..." PRankH = snap.TIntFltH() snap.GetPageRankMP(net, PRankH, 0.85, 1e-4, 100) t.show("prank", PRankH) r.show("__PRankH__") print time.ctime()
def indegree(graph): indegrees = snap.TIntPrV() snap.GetNodeInDegV(graph, indegrees) return dict((indegrees[i].GetVal1(), indegrees[i].GetVal2()) for i in range(indegrees.Len()))
def main(): parentDir = os.getcwd() os.chdir(parentDir + "/subgraphs") sub_graph = snap.LoadEdgeList(snap.PUNGraph, sys.argv[1], 0, 1) subGraphName = sys.argv[1].split(".")[0] os.chdir(parentDir) #### 1 ######## node_count = 0 for node in sub_graph.Nodes(): node_count = node_count + 1 printWithOutNewLine("Number of nodes:", node_count) printWithOutNewLine("Number of edges:", snap.CntUniqBiDirEdges(sub_graph)) #### 2 ######## printWithOutNewLine("Number of nodes with degree=7:", snap.CntDegNodes(sub_graph, 7)) rndMaxDegNId = snap.GetMxDegNId(sub_graph) nodeDegPairs = snap.TIntPrV() snap.GetNodeInDegV(sub_graph, nodeDegPairs) maxDegVal = 0 for pair in nodeDegPairs: if (pair.GetVal1() == rndMaxDegNId): maxDegVal = pair.GetVal2() break maxDegNodes = [] for pair in nodeDegPairs: if (pair.GetVal2() == maxDegVal): maxDegNodes.append(pair.GetVal1()) print("Node id(s) with highest degree:", end=" ") print(*maxDegNodes, sep=',') #### 3 ######## sampledFullDiam = [] sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 10, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 100, False)) sampledFullDiam.append(snap.GetBfsFullDiam(sub_graph, 1000, False)) sampledFullDiamStats = [] sampledFullDiamStats.append(round(statistics.mean(sampledFullDiam), 4)) sampledFullDiamStats.append(round(statistics.variance(sampledFullDiam), 4)) printWithOutNewLine("Approximate full diameter by sampling 10 nodes:", sampledFullDiam[0]) printWithOutNewLine("Approximate full diameter by sampling 100 nodes:", sampledFullDiam[1]) printWithOutNewLine("Approximate full diameter by sampling 1000 nodes:", sampledFullDiam[2]) print("Approximate full diameter (mean and variance):", end=" ") print(*sampledFullDiamStats, sep=',') sampledEffDiam = [] sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 10, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 100, False), 4)) sampledEffDiam.append(round(snap.GetBfsEffDiam(sub_graph, 1000, False), 4)) sampledEffDiamStats = [] sampledEffDiamStats.append(round(statistics.mean(sampledEffDiam), 4)) sampledEffDiamStats.append(round(statistics.variance(sampledEffDiam), 4)) printWithOutNewLine("Approximate effective diameter by sampling 10 nodes:", sampledEffDiam[0]) printWithOutNewLine( "Approximate effective diameter by sampling 100 nodes:", sampledEffDiam[1]) printWithOutNewLine( "Approximate effective diameter by sampling 1000 nodes:", sampledEffDiam[2]) print("Approximate effective diameter (mean and variance):", end=" ") print(*sampledEffDiamStats, sep=',') #### 4 ######## printWithOutNewLine("Fraction of nodes in largest connected component:", round(snap.GetMxSccSz(sub_graph), 4)) bridgeEdges = snap.TIntPrV() snap.GetEdgeBridges(sub_graph, bridgeEdges) printWithOutNewLine("Number of edge bridges:", len(bridgeEdges)) articulationPoints = snap.TIntV() snap.GetArtPoints(sub_graph, articulationPoints) printWithOutNewLine("Number of articulation points:", len(articulationPoints)) #### 5 ######## printWithOutNewLine("Average clustering coefficient:", round(snap.GetClustCf(sub_graph, -1), 4)) printWithOutNewLine("Number of triads:", snap.GetTriads(sub_graph, -1)) randomNodeId = sub_graph.GetRndNId() nodeIdCcfMap = snap.TIntFltH() snap.GetNodeClustCf(sub_graph, nodeIdCcfMap) print("Clustering coefficient of random node", end=" ") print(randomNodeId, end=": ") print(round(nodeIdCcfMap[randomNodeId], 4)) print("Number of triads random node", end=" ") print(randomNodeId, end=" participates: ") print(snap.GetNodeTriads(sub_graph, randomNodeId)) printWithOutNewLine( "Number of edges that participate in at least one triad:", snap.GetTriadEdges(sub_graph, -1)) #### plots ######## if not os.path.isdir('plots'): os.makedirs('plots') os.chdir(parentDir + "/plots") plotsDir = os.getcwd() snap.PlotOutDegDistr(sub_graph, subGraphName, subGraphName + " Subgraph Degree Distribution") snap.PlotShortPathDistr( sub_graph, subGraphName, subGraphName + " Subgraph Shortest Path Lengths Distribution") snap.PlotSccDistr( sub_graph, subGraphName, subGraphName + " Subgraph Connected Components Size Distribution") snap.PlotClustCf( sub_graph, subGraphName, subGraphName + " Subgraph Clustering Coefficient Distribution") files = os.listdir(plotsDir) for file in files: if not file.endswith(".png"): os.remove(os.path.join(plotsDir, file)) plots = os.listdir(plotsDir) filePrefix = "filename" for file in plots: nameSplit = file.split(".") if (len(nameSplit) == 2): continue if (nameSplit[0] == "ccf"): filePrefix = "clustering_coeff_" elif (nameSplit[0] == "outDeg"): filePrefix = "deg_dist_" elif (nameSplit[0] == "diam"): filePrefix = "shortest_path_" elif (nameSplit[0] == "scc"): filePrefix = "connected_comp_" os.rename(file, filePrefix + nameSplit[1] + "." + nameSplit[2]) os.chdir(parentDir)
def getNextAttachment(Graph): InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) rand = random.random()
line_num = 0 for line in fil: if line_num > 2: a, b = map(int, line.split(" ")) node_list.add(a - 1) node_list.add(b - 1) line_num += 1 fil.close() #Degree Centrality D = {} y = sys.argv[1] + "-degree-centrality-4.txt" InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) for item in InDegV: D[item.GetVal1()] = item.GetVal2() D = sorted(D.iteritems(), key=lambda x: x[1], reverse=True) f = open(y, "w+") f.write("#NId Centrality\r\n") for NId, value in D: f.write("%d %d\r\n" % (NId, value)) f.close() #print("Degree Centrality done !") #Closeness Centrality
## calculate indegree and outdegree centrality----Fail So I choose NetworkX import snap txt_file = "/Users/dukechan/Downloads/sms_sna_oct18_directed.txt" f = open('/Users/dukechan/Downloads/result4.txt', 'w') f2 = open('/Users/dukechan/Downloads/result5.txt', 'w') G = snap.LoadEdgeList(snap.PNGraph, txt_file, 4, 5) InDegV = snap.TIntPrV() OutDegV = snap.TIntPrV() snap.GetNodeInDegV(G, InDegV) snap.GetNodeOutDegV(G, OutDegV) # indegree for item in InDegV: DegCentr = snap.GetDegreeCentr(G, item.GetVal1()) f.write("node: %d centrality: %f\n" % (item.GetVal1(), DegCentr)) f.close() # outdegree for item in OutDegV: DegCentr = snap.GetDegreeCentr(G, item.GetVal1()) f2.write("node: %d centrality: %f\n" % (item.GetVal1(), DegCentr)) f2.close() # problem : centrality is 0 why????????
def get_in_degrees(Graph): InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) return {item.GetVal1() : item.GetVal2() for item in InDegV}
def _get_degree(Graph, H, output_path): InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) InDeg_set = dict() for item in InDegV: username = H.GetKey(item.GetVal1()) InDeg = item.GetVal2() InDeg_set[username] = InDeg OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) OutDeg_set = dict() for item in OutDegV: username = H.GetKey(item.GetVal1()) OutDeg = item.GetVal2() OutDeg_set[username] = OutDeg dataset = list() tot = len(InDeg_set) num = 0 for username in InDeg_set: user_degree = dict() user_degree['username'] = username user_degree['in_degree'] = InDeg_set[username] user_degree['out_degree'] = OutDeg_set[username] profile_path = './data/Users/%s.json' % username if not os.path.exists(profile_path): continue with open(profile_path, 'r') as f: profile = json.load(f) if 'socialStats' in profile['profile']['user']: user_degree['in_degree'] = max( user_degree['in_degree'], profile['profile']['user'] ['socialStats']['usersFollowedByCount']) user_degree['out_degree'] = max( user_degree['out_degree'], profile['profile']['user'] ['socialStats']['usersFollowedCount']) in_set = set(profile['followers']) out_set = set(profile['following']) user_degree['in_degree'] = max(user_degree['in_degree'], len(in_set)) user_degree['out_degree'] = max(user_degree['out_degree'], len(out_set)) if user_degree['out_degree'] == 0: user_degree['balance'] = float(user_degree['in_degree']) / eps else: user_degree['balance'] = float(user_degree['in_degree']) / float( user_degree['out_degree']) bi = 0 for out_username in out_set: if out_username in in_set: bi += 1 if user_degree['out_degree'] == 0: user_degree['reciprocity'] = float(bi) / eps else: user_degree['reciprocity'] = float(bi) / float( user_degree['out_degree']) dataset.append(user_degree) num += 1 print '%d/%d' % (num, tot) dataset = pd.DataFrame(dataset) dataset = dataset[[ 'username', 'in_degree', 'out_degree', 'balance', 'reciprocity' ]] dataset.to_csv(output_path, index=False, encoding='utf-8')
#no of directed edges Count = snap.CntUniqDirEdges(graph) print "Count of directed edges is %d" % Count #no of undirected edges Count = snap.CntUniqUndirEdges(graph) print "Count of undirected edges is %d" % Count #no of self edges Count = snap.CntSelfEdges(graph) print "Count of self edges is %d" % Count #no of unique bi-directional/reciprocated edges Count = snap.CntUniqBiDirEdges(graph) print "Count of unique bidirectional edges is %d" % Count #no of nodes with out-degree greater than 10 OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(graph, OutDegV) count_od = 0 for item in OutDegV: if (item.GetVal2() > 10): count_od = count_od + 1 print "Count of nodes with more than 10 outgoing edges %d" % count_od #no of nodes with in-degree greater than 10 InDegV = snap.TIntPrV() snap.GetNodeInDegV(graph, InDegV) count_in = 0 for item in InDegV: if (item.GetVal2() < 10): count_in = count_in + 1 print "Count of nodes with fewer than 10 incoming edges %d" % count_in
def getDirAttribute(filename, node_num, weighted=None, param=1.0): Graph = snap.LoadEdgeList(snap.PNGraph, filename, 0, 1) attributeNames = [ 'Graph', 'Id', 'Degree', 'InDegree', 'OutDegree', 'NodeBetweennessCentrality', 'PageRank', 'EgonetDegree', 'EgonetInDegree', 'EgonetOutDegree', 'AvgNeighborDeg', 'AvgNeighborInDeg', 'AvgNeighborOutDeg', 'EgonetConnectivity' ] if weighted: attributeNames += [ 'WeightedDegree', 'WeightedInDegree', 'WeightedOutDegree', 'EgoWeightedDegree', 'AvgWeightedNeighborDeg', 'EgonetWeightedConnectivity', 'EgoWeightedInDegree', 'EgoWeightedOutDegree', 'AvgWeightedNeighborInDeg', 'AvgWeightedNeighborOutDeg' ] attributes = pd.DataFrame(np.zeros((node_num, len(attributeNames))), columns=attributeNames) attributes['Graph'] = [filename.split('/')[-1].split('.')[0]] * node_num attributes['Id'] = range(0, node_num) # Degree degree = np.zeros((node_num, )) InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) for item in InDegV: degree[item.GetVal1()] = item.GetVal2() attributes['Degree'] += degree attributes['InDegree'] = degree degree = np.zeros((node_num, )) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) for item in OutDegV: degree[item.GetVal1()] = item.GetVal2() attributes['Degree'] += degree attributes['OutDegree'] = degree getEgoAttr(Graph, node_num, attributes) if weighted: df = getWeightedDegree(filename, node_num, attributes, directed=True) getWeightedEgoAttr(Graph, node_num, attributes, df, directed=True) # Betweenness Centrality betCentr = np.zeros((node_num, )) Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(Graph, Nodes, Edges, param, True) for node in Nodes: betCentr[node] = Nodes[node] attributes['NodeBetweennessCentrality'] = betCentr # PageRank pgRank = np.zeros((node_num, )) PRankH = snap.TIntFltH() snap.GetPageRank(Graph, PRankH) for item in PRankH: pgRank[item] = PRankH[item] attributes['PageRank'] = pgRank return attributes