def get_shortest_path(file_path, output_path): Graph, H = load_graph(file_path) path_distr = dict() MxScc = snap.GetMxScc(Graph) tot = MxScc.GetNodes() cnt = 0 for NI in MxScc.Nodes(): NIdToDistH = snap.TIntH() shortestPath = snap.GetShortPath(MxScc, NI.GetId(), NIdToDistH, True) for ID in NIdToDistH: dist = NIdToDistH[ID] if dist in path_distr: path_distr[dist] += 1 else: path_distr[dist] = 1 cnt += 1 print '%d/%d' % (cnt, tot) dataset = list() for dist in path_distr: distr = dict() distr['dist'] = dist distr['freq'] = path_distr[dist] dataset.append(distr) dataset = pd.DataFrame(dataset) dataset = dataset[['dist', 'freq']] dataset.sort('dist', ascending=1, inplace=True) dataset.to_csv(output_path, index=False, encoding='utf-8')
def removeLink(G): plot = plotting(G, snap.gvlNeato, True) # 1st layer plot.hirachical() print 'DEBUG1', len(plot.community), plot.community nodeList = [] for node in plot.community: for NI in G.Nodes(): if snap.GetShortPath(G, NI.GetId(), node) == 1: if NI.GetId() not in nodeList: nodeList.append(NI.GetId()) print 'DEBUG2', node, nodeList nodeList.extend(plot.community) rmList = snap.TIntV() for NI in G.Nodes(): if NI.GetId() not in nodeList: rmList.Add(NI.GetId()) print 'DEBUG3', rmList.Len() snap.DelNodes(G, rmList) print '\nRemoving Nodes which is not the adjacent ones of CLAIMID' print 'Graph has %d Nodes and %d Edges\n' % (G.GetNodes(), G.GetEdges()) return nodeList, rmList
def output_closeness_centrality(graph, filename): num_nodes = graph.GetNodes() t0 = time.time() closeness_dict = {} for start in graph.Nodes(): # get the sum of shortest path distances from start to all nodes by using snap algorithms sht_distance_htable = snap.TIntH() snap.GetShortPath(graph, start.GetId(), sht_distance_htable) sum_of_sht_paths = 0 for item in sht_distance_htable: sum_of_sht_paths += sht_distance_htable[item] closeness_centrality_i = (num_nodes-1)/sum_of_sht_paths closeness_dict[start.GetId()] = closeness_centrality_i # sort the closeness centrality values in descending order closeness_dict = {k:v for k,v in sorted(closeness_dict.items(), key=lambda item: item[1], reverse=True)} with open(filename, "w") as f: for i in closeness_dict: f.write("{} {:.6f}\n".format(i, closeness_dict[i])) print("Time taken for calculation of closeness centrality = {:.6f}".format(time.time()-t0))
def get_shortest_path(node_id): NIdToDistH = snap.TIntH() path_len = snap.GetShortPath(snap_graph, int(node_id), NIdToDistH) paths = np.zeros((max(node_ids) + 1)) #previously was n_nodes for dest_node in NIdToDistH: paths[dest_node] = NIdToDistH[dest_node] return paths
def analyzeMisc(FNGraph): # LCC, average distances, clustering t1 = time.time() print "Started calculating miscellaneous network statistics:" print '\tPercentage of nodes in LCC in Football network: %.3f' % (snap.GetMxWccSz(FNGraph) * 100.0) GraphClustCoeff = snap.GetClustCf (FNGraph, -1) print "\tClustering coefficient: %.3f" % GraphClustCoeff diam = snap.GetBfsFullDiam(FNGraph, 1432, False) print "\tNetwork diameter: %.3f\n" % diam print "\tCalculating average distance..." avgDist = 0 iter1 = 0 allNodes1 = FNGraph.GetNodes() for NI in FNGraph.Nodes(): if(iter1 % 100 == 0): print "\t\tCalculated for %d nodes" % iter1 NIdToDistH = snap.TIntH() snap.GetShortPath(FNGraph, NI.GetId(), NIdToDistH) singleDistSum = 0 for item in NIdToDistH: singleDistSum += NIdToDistH[item] avgDist += (1.0/allNodes1) * float(singleDistSum)/(allNodes1-1) iter1 += 1 print "\tNetwork average distance: %.3f" % avgDist print "\nFinished calculating in %f seconds\n" % (time.time() - t1)
def make_path_graph(Graphs): for tup in Graphs: G, label, color = tup results = dict() bigtotal = 0. bigcount = 0. for node in G.Nodes(): pathtotal = 0. count = 0. for node2 in G.Nodes(): pathtotal += snap.GetShortPath(G, node, node2) count += 1 if pathtotal / count in results: results[pathtotal / count] += 1 else: results[pathtotal / count] = 1 bigtotal += pathtotal bigcount += pathcount print(label, bigtotal / bigcount) x = [] y = [] for key in results: x.append(key) y.append(results[key]) inds = np.argsort(x) x2 = [] y2 = [] for ind in inds: x2.append(x[ind]) y2.append(y[ind]) plt.loglog(x2, y2, color=color, label=label) plt.show()
def getDistances(graph): distances = {} nameToNId = {} for n in graph.Nodes(): id = n.GetId() nameToNId[graph.GetStrAttrDatN(id, 'name').decode('utf-8')] = id infile = codecs.open('csv/dblpusers.csv', 'r', 'utf-8') lines = infile.read().splitlines() dblpUsers = [] for line in lines: tokens = line.split('||') if tokens[2] != '' and tokens[1] in nameToNId: id = int(tokens[0]) distances[id] = {} dblpUsers.append({'name': tokens[1], 'id': id}) for i in range(len(dblpUsers)): n1 = dblpUsers[i]['name'] i1 = dblpUsers[i]['id'] for j in range(i + 1, len(dblpUsers)): n2 = dblpUsers[j]['name'] i2 = dblpUsers[j]['id'] # shortest path behavior is weird if no path exists dist = snap.GetShortPath(graph, nameToNId[n1], nameToNId[n2]) distances[i1][i2] = dist distances[i2][i1] = dist outfile = open('csv/distances.csv', 'w') for i1 in distances: for i2 in distances[i1]: outfile.write( str(i1) + '||' + str(i2) + '||' + str(distances[i1][i2]) + '\n') outfile.close() infile.close() return distances
def compute_closeness_centrality(G, GName, Nodes, nodes, edges): counter = 0 closeness_centralities = [] start_time = time.time() for NI in G.Nodes(): NIdToDistH = snap.TIntH() sum_of_shortest_paths = 0 shortestPath = snap.GetShortPath(G, NI.GetId(), NIdToDistH) for paths in NIdToDistH: sum_of_shortest_paths = sum_of_shortest_paths + NIdToDistH[paths] sum_of_shortest_paths = sum_of_shortest_paths + nodes * ( nodes - len(NIdToDistH)) #incorporating unreachable nodes current_centrality = float(nodes) / sum_of_shortest_paths closeness_centralities.append([current_centrality, NI.GetId()]) time_taken = time.time() - start_time print "Execution for Closeness Centrality completed in ", time_taken // 60, " mins and ", ( time_taken // 1) % 60, "seconds" closeness_centralities.sort() store_in_file(closeness_centralities, "closeness_centrality", GName) closeness_centralities.sort(reverse=True) return closeness_centralities
def get_graph_distance(G, n1, n2, directed=False): deleted = False if G.IsEdge(n1, n2): G.DelEdge(n1, n2) deleted = True result = -1 * snap.GetShortPath(G, n1, n2, directed) if deleted: G.AddEdge(n1, n2) return result
def avgShortestPath(G): avgPathDir = 0 avgPathUndir = 0 numDirPath = 0 numUndirPath = 0 for src in G.Nodes(): NIdToDistH = snap.TIntH() shortestPathUndir = snap.GetShortPath(G, src.GetId(), NIdToDistH, False) numUndirPath += len(NIdToDistH) for item in NIdToDistH: avgPathUndir += 1.0*NIdToDistH[item]#/len(NIdToDistH) shortestPathDir = snap.GetShortPath(G, src.GetId(), NIdToDistH, True) numDirPath += len(NIdToDistH) for item in NIdToDistH: avgPathDir += 1.0*NIdToDistH[item]#/len(NIdToDistH) print "Avg. Shortest Path (directed): %f"%(1.0*avgPathDir/numDirPath) print "Avg. Shortest Path (undirected): %f"%(1.0*avgPathUndir/numUndirPath)
def getAvgEfficiency(Graph): nodes = [node.GetId() for node in Graph.Nodes()] efficiency = 0 n = len(nodes) for i in range(0, n): for j in range(i + 1, n): if i != j: efficiency += 1 / float( snap.GetShortPath(Graph, nodes[i], nodes[j])) return 1 / float(n * n - 1) * efficiency
def getNodeEfficiency(Graph): nodes = [node.GetId() for node in Graph.Nodes()] efficiency = [] n = len(nodes) for i in range(0, n): for j in range(i + 1, n): if i != j: efficiency.append( 1 / float(snap.GetShortPath(Graph, nodes[i], nodes[j]))) return efficiency
def closeness_centrality_node(graph, node, sample=None): if not sample: list_nodes = {x for x in range(graph.GetNodes())} else: list_nodes = random.sample(xrange(graph.GetNodes()), len(sample)) N = len(list_nodes) list_of_sp = (snap.GetShortPath(graph, node, x) for x in list_nodes) # Better to use list comprehension than # map for clarity and speed reasons return sum((1.0 / x for x in list_of_sp if x > 0)) / N
def q2_4_aux(name): G = load_graph(name) counter = 0.0 for i in range (1000): path = snap.GetShortPath(G, G.GetRndNId(), G.GetRndNId()) if path != -1: counter += 1 return counter / 1000
def calc_path_prob(graph): trials = 1000 reachable_count = 0 for _ in range(trials): node1 = graph.GetRndNId() node2 = graph.GetRndNId() shortestPath = snap.GetShortPath(graph, node1, node2, True) if shortestPath > 0: reachable_count += 1 return float(reachable_count) / float(trials)
def ProbabilityOfPath(Graph, nSamples=1000): ''' Given a graph, returns the sampled probability of two nodes being connected. Takes nSamples of (u,v) pairs to check for path connectedness. ''' paths = 0.0 for _ in xrange(nSamples): u, v = Graph.GetRndNId(Rnd), Graph.GetRndNId(Rnd) if snap.GetShortPath(Graph, u, v, True) != NO_PATH: paths += 1 return 100 * paths / nSamples
def getHarmonicClosenessCentr(self, G, nodeId): n = G.GetNodes() nodeDistances = snap.TIntH() snap.GetShortPath(G, nodeId, nodeDistances) centrValue = 0.0 for nodeKey in nodeDistances: if (nodeKey != nodeId): centrValue += (1 / float(nodeDistances[nodeKey])) centrValue /= (n - 1) # print nodeId, centrValue return centrValue
def avg_path_length(self): """ Brute force average path length calculation """ # TODO: Maybe add dynamic programming to speed up the operation total_path_length = 0 num_path = 0 for i in range(1, self._num_nodes): for j in range(i+1, self._num_nodes + 1): length = snap.GetShortPath(self._graph, i, j) if length > 0: num_path += 1 total_path_length += length else: pass return 1.0 * total_path_length / num_path
def TestFracPath(Graph, num_test=1000): Rnd = snap.TRnd(42) Rnd.Randomize() count = 0 i = 0 while i < num_test: NodeId1 = Graph.GetRndNId(Rnd) NodeId2 = Graph.GetRndNId(Rnd) if (NodeId1 != NodeId2): Length = snap.GetShortPath(Graph, NodeId1, NodeId2, True) if (Length > 0): count += 1 i += 1 print(" fraction of reachable pairs", count / num_test)
def GetTrustList(fSourceNode, graph, visitPath, totalNodeList, nodeMapNumDict): trustList = [0] * len(visitPath) # tSourceIndex=totalNodeList.index(fSourceNode) tSourceIndex = nodeMapNumDict[fSourceNode] # tVisitPathIndex=[totalNodeList.index(node) for node in visitPath] tVisitPathIndex = [nodeMapNumDict[node] for node in visitPath] for i in range(len(tVisitPathIndex)): tempDstNodeIndex = tVisitPathIndex[i] dist = snap.GetShortPath(graph, tSourceIndex, tempDstNodeIndex) trustList[i] = 1.0 / dist return trustList
def harmonic_closeness_centrality(): sizeGraph = UGraph.GetNodes() NIdToDistH = snap.TIntH() for node in UGraph.Nodes(): hashTableCount = 0 shortestPath = snap.GetShortPath(UGraph, node.GetId(), NIdToDistH) for x in NIdToDistH: if (NIdToDistH[x] != 0): hashTableCount += float(1 / NIdToDistH[x]) calculation = float((1 / (sizeGraph - 1)) * hashTableCount) harmonicList.append([node.GetId(), calculation]) return harmonicList
def predictLinksNegatedShortestPath(GCombined, nodesAtHop, itemNodeIds, userNodeIds, directory): scores = {} for node1 in userNodeIds: for node2 in itemNodeIds: if not GCombined.IsNode(node1) or not GCombined.IsNode(node2) or GCombined.IsEdge(node1, node2): if not node1 in scores: scores[node1] = {} scores[node1][node2] = 0.0 else: if not node1 in scores: scores[node1] = {} scores[node1][node2] = 1.0/snap.GetShortPath(GCombined, node1, node2, False) with open(directory + 'NegatedShortestPath', 'wb') as outfile: pickle.dump(scores, outfile)
def path_proba(graph, name, n=1000): """Calculate the probability that a path exists between two uniformly random nodes (n simulations)""" p = 0 for i in range(n): a = graph.GetRndNId() b = graph.GetRndNId() while a == b: b = graph.GetRndNId() NIdToDistH = snap.TIntH() snap.GetShortPath(graph, a, NIdToDistH, True) if b in NIdToDistH: p += 1 print 'Using {} random pairs, the probability that a path exists between two nodes is ' \ '{} for the {} network'.format(n, p / n, name) return p/n
def getClosenessCentralities(self): centralities = [] #print("In closeness method") for NI in self.network.Nodes(): #print("Selected new origin node.\n") sumShortestPaths = 0 for NI2 in self.network.Nodes(): #print("Selected new comparitive node\n") sumShortestPaths += abs(snap.GetShortPath(self.network, NI.GetId(), NI2.GetId())) closeness = float(sumShortestPaths) / float(self.network.GetNodes()) centralities.append(closeness) #print("Finished getting centrality values\n") return centralities
def get_dist_distribution(filename, sample_count): distance_dst = collections.defaultdict(int) graph = snap.LoadEdgeList(snap.PUNGraph, filename) node_list = [] for node in graph.Nodes(): node_list.append(node.GetId()) for i in range(0, sample_count): sample_pair = random.sample(node_list, 2) dist = snap.GetShortPath(graph, sample_pair[0], sample_pair[1], False) if dist > 0: distance_dst[dist] += 1 print "spid is " + str(calculate_spid(distance_dst)) + " for " + str( sample_count) + " samples" for item in distance_dst: distance_dst[item] /= float(sample_count) return distance_dst
def ssspfun(R0): paths = [] start0 = time.time() for i, r in enumerate(R0): if i % 100 == 0: text_to_append = "%2.2f percent done, node %i out of %i" % ( 100 * double(i) / len(R0), i, len(R0)) print "%2.2f percent done, node %i out of %i" % ( 100 * double(i) / len(R0), i, len(R0)) print " %2.2f seconds have elapsed so far..." % (time.time() - start0) os.system("echo '" + text_to_append + "\n' >> percent_done_4testing.txt") sp = snap.GetShortPath(Graph, r[0], r[1]) paths.append(sp) print "Done!" paths = array(paths) return paths
def get_land_D_mtx(land_ids_temp): print "Getting landmark -> node distance matrix..." L2n = zeros((len(land_ids_temp), V0)).astype('int8') for i, l in enumerate(land_ids_temp): text_to_append = " getting sssp for node %i out of %i" % ( i + 1, len(land_ids_temp)) print " getting sssp for node %i out of %i" % (i + 1, len(land_ids_temp)) os.system("echo '" + text_to_append + "\n' >> percent_done_4landmarks.txt") sys.stdout.flush() NIdToDistH = snap.TIntH() shortestPath = snap.GetShortPath(Graph, int(l), NIdToDistH) for item in NIdToDistH: # print item L2n[i, item - 1] = NIdToDistH[item] L2n = L2n[:, conn_node_ids - 1] return L2n
def GetTrustList(fSourceNode, graph, visitPath, totalNodeList, nodeMapNumDict): # trustList=[0] * len(visitPath) # tSourceIndex=totalNodeList.index(fSourceNode) trustList = [] newVisitPath = [] tSourceIndex = nodeMapNumDict[fSourceNode] # tVisitPathIndex=[totalNodeList.index(node) for node in visitPath] tVisitPathIndex = [nodeMapNumDict[node] for node in visitPath] for i in range(len(visitPath)): node = visitPath[i] tempDstNodeIndex = nodeMapNumDict[node] dist = snap.GetShortPath(graph, tSourceIndex, tempDstNodeIndex) if dist == 1: newVisitPath.append(node) trustList.append('1') return newVisitPath, trustList
def sample_shortest_path(self, n_node=100, isDir=False): ''' sample diameter, e.g. ‘shortest path’, of a Graph :param n_node: number of nodes to sample :param isDir: consider direct or not ''' snap = self.snap n_node = min(self.num_nodes, n_node) nodes = self.nodes src = np.random.choice(nodes, n_node, replace=False) dest = np.random.choice(nodes, n_node, replace=False) ret = [] for i in range(n_node): Length = snap.GetShortPath(self.graph, int(src[i]), int(dest[i])) ret.append(Length) return ret
def q2_4_utils(dataset_name): G = load_graph(dataset_name) Rnd = snap.TRnd(42) Rnd.Randomize() count = 0 positive_count = 0 negative_count = 0 while count < 1000: NId_src = G.GetRndNId(Rnd) NId_dst = G.GetRndNId(Rnd) if NId_src != NId_dst: if snap.GetShortPath(G, NId_src, NId_dst, True) > 0: positive_count = positive_count + 1 else: negative_count = negative_count + 1 count = count + 1 # print (snap.GetShortPath(G, NId_src, NId_dst)) print 'positive_count', positive_count print 'negative_count', negative_count