def get_node_degree(UGraph, graph_type, attributes): degree = np.zeros((UGraph.GetNodes(), )) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(UGraph, OutDegV) for item in OutDegV: degree[item.GetVal1()] = item.GetVal2() attributes['Degree'] = degree
def main(): G = snap.LoadEdgeList(snap.PNGraph, "wiki-Vote.txt", 0, 1) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDegV) dict = {} for item in OutDegV: node = item.GetVal2() if node in dict: dict[node] += 1 else: dict[node] = 1 x = [] y = [] for key, values in dict.items(): if key > 0 and values > 0: x.append(math.log(key, 10)) y.append(math.log(values, 10)) z1 = np.polyfit(x, y, 1) p1 = np.poly1d(z1) yvals = p1(x) plt.plot(x, y, '*', label='original values') plt.plot(x, yvals, 'r', label='polyfit values') plt.xlabel('x=log(Out-Grades)') plt.ylabel('y=log(Sum of the Nodes)') plt.show()
def in_degree_distribution(G): result = None #REMOVER in_degree = {} out_degree = {} v_in_d = [] v_out_d = [] v_degrees = [] InDegV = snap.TIntPrV() snap.GetNodeInDegV( G, InDegV ) #Retorna o id do vertice e o grau de entrada- inclusive se o grau for 0 for item in InDegV: node = item.GetVal1() degree = item.GetVal2() in_degree[node] = degree OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDegV) for item in OutDegV: node = item.GetVal1() degree = item.GetVal2() out_degree[node] = degree for k, v in in_degree.iteritems(): if k in out_degree: v_in_d.append(v) v_out_d.append(out_degree[k]) soma = v + out_degree[k] v_degrees.append(soma) return v_in_d, v_out_d, v_degrees #Retorna uma lista com in_degree e outra lista com out_degree, e mais uma com a soma dos graus de entrada e saída.
def in_out_degree_correlation(G): result = None #REMOVER in_degree = {} out_degree = {} v_in_d = [] v_out_d = [] InDegV = snap.TIntPrV() snap.GetNodeInDegV(G,InDegV) #Retorna o id do vertice e o grau de entrada- inclusive se o grau for 0 for item in InDegV: node = item.GetVal1() degree = item.GetVal2() in_degree[node] = degree OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDegV) for item in OutDegV: node = item.GetVal1() degree = item.GetVal2() out_degree[node] = degree for k,v in in_degree.iteritems(): if k in out_degree: v_in_d.append(v) v_out_d.append(out_degree[k]) result = pearsonr(v_in_d,v_out_d) #Retorna uma tupla (coef,p-value) return result[0] #Retorna apenas o coef
def _get_degree_in_graph(Graph, H, output_path): InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) InDeg_set = dict() for item in InDegV: username = H.GetKey(item.GetVal1()) InDeg = item.GetVal2() InDeg_set[username] = InDeg OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) OutDeg_set = dict() for item in OutDegV: username = H.GetKey(item.GetVal1()) OutDeg = item.GetVal2() OutDeg_set[username] = OutDeg dataset = list() tot = len(InDeg_set) num = 0 for username in InDeg_set: user_degree = dict() user_degree['username'] = username user_degree['in_degree'] = InDeg_set[username] user_degree['out_degree'] = OutDeg_set[username] profile_path = './data/Users/%s.json' % username if not os.path.exists(profile_path): continue with open(profile_path, 'r') as f: profile = json.load(f) in_set = set(profile['followers']) out_set = set(profile['following']) if user_degree['out_degree'] == 0: user_degree['balance'] = float(user_degree['in_degree']) / eps else: user_degree['balance'] = float(user_degree['in_degree']) / float( user_degree['out_degree']) bi = 0 for out_username in out_set: if out_username in in_set: try: ID = H.GetDat(out_username) if ID is not -1 and Graph.IsNode(ID): bi += 1 except Exception as e: print type(e) print e.args print e if user_degree['out_degree'] == 0: user_degree['reciprocity'] = float(bi) / eps else: user_degree['reciprocity'] = float(bi) / float( user_degree['out_degree']) dataset.append(user_degree) num += 1 print '%d/%d' % (num, tot) dataset = pd.DataFrame(dataset) dataset = dataset[[ 'username', 'in_degree', 'out_degree', 'balance', 'reciprocity' ]] dataset.to_csv(output_path, index=False, encoding='utf-8')
def outdegree(rankCommands, Graph, conn, cur): OutDegV = snap.TIntPrV() before_time = time.time() snap.GetNodeOutDegV(Graph, OutDegV) print "Total handling time is: ", (time.time() - before_time) DegH = snap.TIntIntH() slist = sortNodes(OutDegV, DegH) createTable(rankCommands, slist, DegH, conn, cur)
def __init__(self): self.G = snap.LoadEdgeList(snap.PNGraph, 'data/snap-web-2016-09-links-clean-1.txt', 0, 1) self.out_deg_v = snap.TIntPrV() snap.GetNodeOutDegV(self.G, self.out_deg_v) self.deg_freq = self.get_deg_freq_map(self.out_deg_v)
def get_landmarks_ids(Graph, nL, savedir): ''' Choose landmarks based on weighted distribution ''' # get node degree for each node # node ids are not in consecutive order print "Getting Nodal Degrees..." OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) node_degree = zeros((V, 2)) for i, item in enumerate(OutDegV): node_degree[i, 1] = item.GetVal2() node_degree[i, 0] = item.GetVal1() node_degree = node_degree[node_degree[:, 0].argsort()] node_degree2 = node_degree[node_degree[:, 1].argsort()] node_degree = node_degree.astype(int) node_degree2 = flipud(node_degree2.astype(int)) node_list = sort(node_degree[:, 0]).astype(int) # sample from nodal degree # values = arange(len(node_degree[:,0])) node_deg = node_degree[:, 1].copy() node_deg[node_deg <= 2] = 0 # set probab to zero for deg <= n probabilities = 1.0 * node_deg / sum(node_deg) Llist = [] land_idx = weighted_values(node_degree[:, 0], probabilities, 1) Llist.append(land_idx[0]) lcount = 1 check_random_n = min(16, nL) while lcount < nL: # sample from degree distribution curr_land_idx = weighted_values(node_degree[:, 0], probabilities, 1)[0] # check if sample is not in current list if curr_land_idx not in Llist: # # compute distances to current landmarks # dtemp = [] # count = 0 # Llist_shuffled = Llist[:] # copy list # random.shuffle(Llist_shuffled) # for li,l in reversed(list(enumerate(Llist_shuffled))): # if count <= check_random_n: # dtemp.append(snap.GetShortPath(Graph, l, curr_land_idx)) # count += 1 # # keep only if distances >= 3 # if all(array(dtemp) >= 3): Llist.append(curr_land_idx) lcount += 1 print lcount, " landmarks so far..." land_ids0 = sort(node_degree[Llist][:, 0])[:nL] print node_degree[Llist][:, 1] # create directory print "Creating directory for input data..." os.system('mkdir ' + savedir) return land_ids0, node_list
def get_robustness(file_path, LSCC_output_path, LWCC_output_path): frac_list = [ 0.0001, 0.001, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9 ] Graph, H = load_graph(file_path) InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) degree = dict() for item in InDegV: ID = item.GetVal1() InDeg = item.GetVal2() degree[ID] = InDeg for item in OutDegV: ID = item.GetVal1() OutDeg = item.GetVal2() degree[ID] += OutDeg sorted_degree = sorted(degree.items(), key=itemgetter(1), reverse=True) tot = len(sorted_degree) pos = [int(tot * frac) for frac in frac_list] print pos cur = 0 LSCC_robust = list() LWCC_robust = list() for i in range(tot): Graph.DelNode(sorted_degree[i][0]) if i == pos[cur] - 1: LSCC_frac = snap.GetMxSccSz(Graph) LWCC_frac = snap.GetMxWccSz(Graph) singleton_frac = 1.0 - 1.0 * snap.CntNonZNodes( Graph) / Graph.GetNodes() LSCC_robust.append({ 'removed': frac_list[cur], 'singleton': singleton_frac, 'middle': 1.0 - singleton_frac - LSCC_frac, 'LSCC': LSCC_frac }) LWCC_robust.append({ 'removed': frac_list[cur], 'singleton': singleton_frac, 'middle': 1.0 - singleton_frac - LWCC_frac, 'LWCC': LWCC_frac }) cur += 1 if cur >= len(pos): break LSCC_robust = pd.DataFrame(LSCC_robust) LSCC_robust = LSCC_robust[['removed', 'singleton', 'middle', 'LSCC']] LSCC_robust.to_csv(LSCC_output_path, index=False, encoding='utf-8') LWCC_robust = pd.DataFrame(LWCC_robust) LWCC_robust = LWCC_robust[['removed', 'singleton', 'middle', 'LWCC']] LWCC_robust.to_csv(LWCC_output_path, index=False, encoding='utf-8')
def getUndirAttribute(filename, node_num, weighted=None, param=1.0): UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1) attributeNames = [ 'Graph', 'Id', 'Degree', 'NodeBetweennessCentrality', 'PageRank', 'EgonetDegree', 'AvgNeighborDeg', 'EgonetConnectivity' ] if weighted: attributeNames += [ 'WeightedDegree', 'EgoWeightedDegree', 'AvgWeightedNeighborDeg', 'EgonetWeightedConnectivity' ] attributes = pd.DataFrame(np.zeros((node_num, len(attributeNames))), columns=attributeNames) attributes['Graph'] = [filename.split('/')[-1].split('.')[0] ] * node_num #node_num # Degree attributes['Id'] = range(0, node_num) degree = np.zeros((node_num, )) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(UGraph, OutDegV) for item in OutDegV: degree[item.GetVal1()] = item.GetVal2() attributes['Degree'] = degree getEgoAttr(UGraph, node_num, attributes, directed=False) if weighted: df = getWeightedDegree(filename, node_num, attributes, directed=False) getWeightedEgoAttr(UGraph, node_num, attributes, df, directed=False) # Betweenness Centrality betCentr = np.zeros((node_num, )) Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(UGraph, Nodes, Edges, param) for node in Nodes: betCentr[node] = Nodes[node] attributes['NodeBetweennessCentrality'] = betCentr # PageRank pgRank = np.zeros((node_num, )) PRankH = snap.TIntFltH() snap.GetPageRank(UGraph, PRankH) for item in PRankH: pgRank[item] = PRankH[item] attributes['PageRank'] = pgRank return attributes
def top_nodes_by_deg(UGraph, k=10): """ return a list of nodes with highest degree Args: Graph: undirected graph Kwargs: k: Return: """ OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(UGraph, OutDegV) highest = sorted(((item.GetVal2(), item.GetVal1()) for item in OutDegV), reverse=True)[:k] #[(deg, ID)] return [x[1] for x in highest]
def computeDegreeCentrality(G, NodeAttributes): # # 1. Degree Centrality # Get In Degree and Out Degree for each node # InDegV = snap.TIntPrV() OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDegV) snap.GetNodeInDegV(G, InDegV) InDegreeList = [(item.GetVal1(), item.GetVal2()) for item in InDegV] OutDegreeList = [(item.GetVal1(), item.GetVal2()) for item in OutDegV] InDegreeList.sort(key=lambda x: x[1], reverse=True) OutDegreeList.sort(key=lambda x: x[1], reverse=True) minOutDegree = min(OutDegreeList, key=lambda x: x[1])[1] maxOutDegree = max(OutDegreeList, key=lambda x: x[1])[1] minInDegree = min(InDegreeList, key=lambda x: x[1])[1] maxInDegree = max(InDegreeList, key=lambda x: x[1])[1] # # Sanity Check #print maxOutDegree, minOutDegree, maxInDegree, minInDegree #print InDegreeList[0], InDegreeList[-1] for (nodeId, Degree) in InDegreeList: if not NodeAttributes.get(nodeId, None): NodeAttributes[nodeId] = dict() NodeAttributes[nodeId]['InDegree'] = Degree normalizedDegree = (float(Degree) - float(minInDegree)) / ( float(maxInDegree - float(minInDegree))) NodeAttributes[nodeId]['NormInDegree'] = normalizedDegree for (nodeId, Degree) in OutDegreeList: NodeAttributes[nodeId]['OutDegree'] = Degree normalizedDegree = (float(Degree) - float(minOutDegree)) / ( float(maxOutDegree - float(minOutDegree))) NodeAttributes[nodeId]['NormOutDegree'] = normalizedDegree # # Sanity Check # #print NodeAttributes[1874] #print NodeAttributes[893] return NodeAttributes
def calculate_stats(): # create similarities folder if not os.path.exists(config.DATASET_DIR / 'similarities'): os.makedirs(config.DATASET_DIR / 'similarities') if config.CALCULATE_EGO_GRAPHS: print(f'Calculating ego graphs for {config.DATASET_DIR }...') if not (config.DATASET_DIR / 'ego_graphs.txt').exists() or config.OVERRIDE: ego_graph_dict = {} for node in snap_graph.Nodes(): node_id = int(node.GetId()) nodes_vec = snap.TIntV() snap.GetNodesAtHop(snap_graph, node_id, 1, nodes_vec, False) ego_graph_dict[node_id] = list(nodes_vec) with open(str(config.DATASET_DIR / 'ego_graphs.txt'), 'w') as f: json.dump(ego_graph_dict, f) if config.CALCULATE_DEGREE_SEQUENCE: print(f'Calculating degree sequences for {config.DATASET_DIR}...') if not (config.DATASET_DIR / 'degree_sequence.txt').exists() or config.OVERRIDE: n_nodes = len(list(snap_graph.Nodes())) degrees = {} InDegV = snap.TIntPrV() snap.GetNodeInDegV(snap_graph, InDegV) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(snap_graph, OutDegV) for item1, item2 in zip(InDegV, OutDegV): degrees[item1.GetVal1()] = item1.GetVal2() with open(str(config.DATASET_DIR / 'degree_sequence.txt'), 'w') as f: json.dump(degrees, f) if config.CALCULATE_SHORTEST_PATHS: print(f'Calculating shortest paths for {config.DATASET_DIR}...') if not (config.DATASET_DIR / 'shortest_path_matrix.npy').exists() or config.OVERRIDE: with multiprocessing.Pool(processes=config.N_PROCESSSES) as pool: shortest_paths = pool.map(get_shortest_path, node_ids) all_shortest_paths = np.stack(shortest_paths) np.save(str(config.DATASET_DIR / 'shortest_path_matrix.npy'), all_shortest_paths)
def _initialize(self, mu, sigma_ratio): """ NodeStat uses out links to initalize popularity, then sample edge probabilities using in links """ outdeg = snap.TIntPrV() snap.GetNodeOutDegV(self._graph, outdeg) max_out_nid = snap.GetMxOutDegNId(self._graph) max_out_deg = self._graph.GetNI(max_out_nid).GetOutDeg() for item in outdeg: nid, deg = item.GetVal1(), float(item.GetVal2()) init_pop_mu = deg / max_out_deg + mu init_pop_sig = deg / max_out_deg * sigma_ratio # Initialized according to scaled number of followers init_pop = np.random.normal(init_pop_mu, init_pop_sig) self._graph.AddFltAttrDatN(nid, init_pop, self.pop) NodeStat._compute_prob(self._graph, self.sid)
def get_in_out_degree_table(self, graph): # Placeholder for node / degree / out degree. nodes_degrees = np.zeros((graph.GetNodes(), 3), dtype=np.int32) # In degree vector. in_degree_v = snap.TIntPrV() snap.GetNodeInDegV(graph, in_degree_v) # Out degree vector. out_degree_v = snap.TIntPrV() snap.GetNodeOutDegV(graph, out_degree_v) # Set the nodes_degrees Numpy array. for item in in_degree_v: node = item.GetVal1() nodes_degrees[node, 0] = node nodes_degrees[node, 1] = item.GetVal2() for item in out_degree_v: node = item.GetVal1() # nodes_degrees[node, 0] = node nodes_degrees[node, 2] = item.GetVal2() return nodes_degrees
def get_out_degrees(Graph): OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) return {item.GetVal1() : item.GetVal2() for item in OutDegV}
#size = len(self_cite) self_cite_ratio = [] #initializes matrix for row in range(len(self_cite)): if self_cite[row][0] == sum_cite[row][0]: ratio = float(self_cite[row][1]) / float(sum_cite[row][1]) self_cite_ratio.append([nodedict[self_cite[row][0]], ratio]) return self_cite_ratio self_cite_ratio = self_citation_ratio(nodedict, self_cite, sum_cite) #output for graphs os.chdir('C:\\Users\\Owner\\Desktop\\SNA') OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(G1, OutDegV) def printOutDegV(): for item in OutDegV: print "node ID %d: out-degree %d" % (item.GetVal1(), item.GetVal2()) return def printNumNodes(nodeid, reject): print("The number of unique authors is", nodeid, ".") print("The number of non-unique occurences is", reject, ".") return def totalSelfCite(edge):
def getAttribute(filename): UGraph = snap.LoadEdgeList(snap.PUNGraph, filename, 0, 1) UGraph.Dump() attributes = pd.DataFrame(np.zeros(shape=(UGraph.GetNodes(), 12)), columns=['Graph', 'Id', 'Degree', 'DegreeCentrality', 'NodeBetweennessCentrality', 'ClosenessCentrality', 'FarnessCentrality', 'PageRank', 'HubsScore', 'AuthoritiesScore', 'NodeEccentricity', 'EigenvectorCentrality']) attributes['Graph'] = [filename] * UGraph.GetNodes() # Degree id = [] degree = [] OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(UGraph, OutDegV) for item in OutDegV: id.append(item.GetVal1()) degree.append(item.GetVal2()) attributes['Id'] = id attributes['Degree'] = degree # Degree, Closeness, Farness Centrality, Node Eccentricity degCentr = [] cloCentr = [] farCentr = [] nodeEcc = [] for NI in UGraph.Nodes(): degCentr.append(snap.GetDegreeCentr(UGraph, NI.GetId())) cloCentr.append(snap.GetClosenessCentr(UGraph, NI.GetId())) farCentr.append(snap.GetFarnessCentr(UGraph, NI.GetId())) nodeEcc.append(snap.GetNodeEcc(UGraph, NI.GetId(), False)) attributes['DegreeCentrality'] = degCentr attributes['ClosenessCentrality'] = cloCentr attributes['FarnessCentrality'] = farCentr attributes['NodeEccentricity'] = nodeEcc # Betweenness Centrality betCentr = [] Nodes = snap.TIntFltH() Edges = snap.TIntPrFltH() snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0) for node in Nodes: betCentr.append(Nodes[node]) attributes['NodeBetweennessCentrality'] = betCentr # PageRank pgRank = [] PRankH = snap.TIntFltH() snap.GetPageRank(UGraph, PRankH) for item in PRankH: pgRank.append(PRankH[item]) attributes['PageRank'] = pgRank # Hubs, Authorities score hubs = [] auth = [] NIdHubH = snap.TIntFltH() NIdAuthH = snap.TIntFltH() snap.GetHits(UGraph, NIdHubH, NIdAuthH) for item in NIdHubH: hubs.append(NIdHubH[item]) for item in NIdAuthH: auth.append(NIdAuthH[item]) attributes['HubsScore'] = hubs attributes['AuthoritiesScore'] = auth # Eigenvector Centrality eigenCentr = [] NIdEigenH = snap.TIntFltH() snap.GetEigenVectorCentr(UGraph, NIdEigenH) for item in NIdEigenH: eigenCentr.append(NIdEigenH[item]) attributes['EigenvectorCentrality'] = eigenCentr return attributes
inGraph = snap.LoadEdgeList(snap.PUNGraph, file, 0, 1) nodes = inGraph.GetNodes() edges = 0 with open(file, "r") as f: for x in f: edges += 1 print "nodes: %d, edges: %d" % (nodes, edges) path = os.path.join(r"p3_data", filename[3:]) print "starting BA graph at time %s\n" % time.ctime() edgeList = [] outDegree = 0 OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(inGraph, OutDegV) #Sums the value of all out degrees of each node for item in OutDegV: outDegree += item.GetVal2() # print "node ID %d: out-degree %d" % (item.GetVal1(), item.GetVal2()) # averages the out degree in OutDeg Vector, and rounds it to the nearest integar avgOutDegree = round(outDegree / nodes) # print avgOutDegree Rnd = snap.TRnd() barabasiAlbertGraph = snap.GenPrefAttach(nodes, int(avgOutDegree), Rnd) for EI in barabasiAlbertGraph.Edges(): edgeList.append([EI.GetSrcNId(), EI.GetDstNId()]) with open(path, "w") as f: for x in edgeList:
noise_level=0.01, weighted_noise=0, weighted=False, is_perm=False) node_num, n = multi_graphs['M0'].get_shape() att = {} for key, a in multi_graphs.iteritems(): attributeNames = ['Degree', 'NodeEccentricity'] attributes = pd.DataFrame(np.zeros((node_num, len(attributeNames))), columns=attributeNames) UGraph = snap.LoadEdgeList(snap.PUNGraph, syn_path + '/' + key + '.edges', 0, 1) degree = np.zeros((node_num, )) OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(UGraph, OutDegV) for item in OutDegV: degree[item.GetVal1()] = item.GetVal2() attributes['Degree'] = degree nodeEcc = np.zeros((node_num, )) for NI in UGraph.Nodes(): nodeEcc[NI.GetId()] = snap.GetNodeEcc(UGraph, NI.GetId(), False) attributes['NodeEccentricity'] = nodeEcc att[key] = attributes print att for attr in ['Degree', 'NodeEccentricity']: plt.figure() bins = np.linspace(min(min(att['M0'][attr]), min(att['M1'][attr])), max(max(att['M0'][attr]), max(att['M1'][attr])), 40)
fd_in.close() # Output Sentences print("Number of nodes: {}".format(G.GetNodes())) print("Number of edges: {}".format(G.GetEdges())) # [2] Degree of nodes in the network DegToCnt = snap.TIntPrV() snap.GetOutDegCnt(G, DegToCnt) degree_count = {} for item in DegToCnt: degree_count[item.GetVal1()] = item.GetVal2() OutDeg = snap.TIntPrV() snap.GetNodeOutDegV(G, OutDeg) node_deg = {} for item in OutDeg: node_deg[item.GetVal1()] = item.GetVal2() max_deg_nodes = [k for k, v in node_deg.items() if v == max(node_deg.values())] # Output sentences print("Number of nodes with degree=7: {}".format(snap.CntOutDegNodes(G, 7))) print("Node id(s) with highest degree: ", end=" ") for node in max_deg_nodes: if node == max_deg_nodes[-1]: print(node) else: print(str(node) + ", ", end=" ")
#no of directed edges Count = snap.CntUniqDirEdges(graph) print "Count of directed edges is %d" % Count #no of undirected edges Count = snap.CntUniqUndirEdges(graph) print "Count of undirected edges is %d" % Count #no of self edges Count = snap.CntSelfEdges(graph) print "Count of self edges is %d" % Count #no of unique bi-directional/reciprocated edges Count = snap.CntUniqBiDirEdges(graph) print "Count of unique bidirectional edges is %d" % Count #no of nodes with out-degree greater than 10 OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(graph, OutDegV) count_od = 0 for item in OutDegV: if (item.GetVal2() > 10): count_od = count_od + 1 print "Count of nodes with more than 10 outgoing edges %d" % count_od #no of nodes with in-degree greater than 10 InDegV = snap.TIntPrV() snap.GetNodeInDegV(graph, InDegV) count_in = 0 for item in InDegV: if (item.GetVal2() < 10): count_in = count_in + 1 print "Count of nodes with fewer than 10 incoming edges %d" % count_in
for item in b: if item not in proxy: proxy[item]=count rev_proxy[count]=item count=count+1 if b[item] not in proxy: proxy[b[item]]=count rev_proxy[count]=b[item] count=count+1 for item in proxy: n1.AddNode(proxy[item]) for item in b: if b[item] != 0: n1.AddEdge(proxy[b[item]],proxy[item]) NIdColorH = snap.TIntStrH() OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(n1, OutDegV) for item in OutDegV: if item.GetVal2()>=3: NIdColorH[item.GetVal1()]="green" print rev_proxy[item.GetVal1()], item.GetVal2() if item.GetVal2()==0: NIdColorH[item.GetVal1()]="black" snap.DrawGViz(n1, snap.gvlDot, "graph3.png", "graph 1",False,NIdColorH) snap.DrawGViz(n1, snap.gvlNeato, "graph1.png", "graph 1",False,NIdColorH) snap.DrawGViz(n1, snap.gvlCirco, "graph2.png", "graph 2",False,NIdColorH)
def _get_degree(Graph, H, output_path): InDegV = snap.TIntPrV() snap.GetNodeInDegV(Graph, InDegV) InDeg_set = dict() for item in InDegV: username = H.GetKey(item.GetVal1()) InDeg = item.GetVal2() InDeg_set[username] = InDeg OutDegV = snap.TIntPrV() snap.GetNodeOutDegV(Graph, OutDegV) OutDeg_set = dict() for item in OutDegV: username = H.GetKey(item.GetVal1()) OutDeg = item.GetVal2() OutDeg_set[username] = OutDeg dataset = list() tot = len(InDeg_set) num = 0 for username in InDeg_set: user_degree = dict() user_degree['username'] = username user_degree['in_degree'] = InDeg_set[username] user_degree['out_degree'] = OutDeg_set[username] profile_path = './data/Users/%s.json' % username if not os.path.exists(profile_path): continue with open(profile_path, 'r') as f: profile = json.load(f) if 'socialStats' in profile['profile']['user']: user_degree['in_degree'] = max( user_degree['in_degree'], profile['profile']['user'] ['socialStats']['usersFollowedByCount']) user_degree['out_degree'] = max( user_degree['out_degree'], profile['profile']['user'] ['socialStats']['usersFollowedCount']) in_set = set(profile['followers']) out_set = set(profile['following']) user_degree['in_degree'] = max(user_degree['in_degree'], len(in_set)) user_degree['out_degree'] = max(user_degree['out_degree'], len(out_set)) if user_degree['out_degree'] == 0: user_degree['balance'] = float(user_degree['in_degree']) / eps else: user_degree['balance'] = float(user_degree['in_degree']) / float( user_degree['out_degree']) bi = 0 for out_username in out_set: if out_username in in_set: bi += 1 if user_degree['out_degree'] == 0: user_degree['reciprocity'] = float(bi) / eps else: user_degree['reciprocity'] = float(bi) / float( user_degree['out_degree']) dataset.append(user_degree) num += 1 print '%d/%d' % (num, tot) dataset = pd.DataFrame(dataset) dataset = dataset[[ 'username', 'in_degree', 'out_degree', 'balance', 'reciprocity' ]] dataset.to_csv(output_path, index=False, encoding='utf-8')