def run_nx(n, niter): pb = progressbar.ProgressBar(maxval=niter).start() g = nx.barabasi_albert_graph(n, 2) start = time.time() for i in range(niter): nx.all_pairs_shortest_path_length(g) pb.update(i) pb.finish() end = time.time() return (start, end)
def compute(self, dataset_pool): """ use networkx to determine whether two geographies are adjacent with k degree of expansion """ zones = self.get_dataset() adjacent_zones = dataset_pool.get_dataset('adjacent_zone') id_max = zones['zone_id'].max()+1 W = zeros((id_max, id_max), dtype='int8') #diagonal sum_income = zeros(id_max, dtype=zones['sum_income'].dtype) sum_households = zeros(id_max, dtype=zones['sum_households'].dtype) sum_income[zones['zone_id']] = zones['sum_income'] sum_households[zones['zone_id']] = zones['sum_households'] if not nx: raise ImportError, "networkx module is required." G = nx.Graph() G.add_nodes_from(zones['zone_id']) G.add_edges_from(adjacent_zones.get_multiple_attributes(['zone_id', 'adjacent_zone_id'])) length = nx.all_pairs_shortest_path_length(G, cutoff=self.order) for key, val in length.items(): W[key][val.keys()] = 1 sum_income = dot(W, sum_income[:, newaxis])[:, 0] sum_households = dot(W, sum_households[:, newaxis])[:, 0] results = safe_array_divide(sum_income, sum_households.astype('f')) return results[zones['zone_id']]
def local_efficiency(G): """Compute array of global efficiency for the given grap.h Local efficiency: returns a list of paths that represent the nodal efficiencies across all nodes with their direct neighbors""" nodepaths=[] length=nx.all_pairs_shortest_path_length(G) for n in G.nodes(): nneighb= nx.neighbors(G,n) paths=[] for src,targets in length.iteritems(): for targ,val in targets.iteritems(): val=float(val) if src==targ: continue if src in nneighb and targ in nneighb: paths.append(1/val) p=np.array(paths) psize=np.size(p) if (psize==0): p=np.array(0) nodepaths.append(p.mean()) return np.array(nodepaths)
def first_return_times( k, backwards=False ): """ RMF: UPDATE Look for k-recurrent vertices in the graph of the DiGraph. A k-recurrent vertex is a vertex v for which the path v -> v is of length <= k. Optional Parameters --------- k : maximum length of path (k+1) See nx.all_pairs_shortest_path_length(G,k) """ if backwards: G = self.reverse() self.backward_return_times = dict() rt = self.backward_return_times else: G = self self.forward_return_times = dict() rt = self.forward_return_times # length = shortest path lengths <= k # length[i][j] = length of shortest path i->j, if <= k # length[i] a dict keyed by neighbors of node i, with values # length of path to j length = nx.all_pairs_shortest_path_length( G, k ) for i in G.nodes_iter(): # nodes = list of successors j which return to i nodes = filter( lambda j: length[j].has_key(i), G.successors(i) ) # distances for each successor j distances = [length[j][i]+1 for j in nodes] if distances: rt[i] = min( distances )
def nodal_matrix(self): """ Returns a matrix containing the nodal 'distance' between all labelled nodes. EXAMPLES:: >>> network = PhyloNetwork(eNewick="(((1,2), 3), 4);") >>> network.nodal_matrix() ... array([[0, 1, 2, 3], ... [1, 0, 2, 3], ... [1, 1, 0, 2], ... [1, 1, 1, 0]) """ n = len(self.taxa()) matrix = numpy.zeros((n, n), int) dicdist = all_pairs_shortest_path_length(self) for i in range(n): ti = self.taxa()[i] for j in range(i, n): tj = self.taxa()[j] lcsa = self.LCSA(ti, tj) matrix[i, j] = dicdist[lcsa][self.node_by_taxa(ti)] matrix[j, i] = dicdist[lcsa][self.node_by_taxa(tj)] return matrix
def get_distance_matrix_from_graph(network, filename = None, floyd = True): """ Returns and optionally stores the distance matrix for a given network. By default the networkX BFS implementation is used. Parameters ---------- network : a NetworkX graph (ATTENTION: nodes need to be sequentially numbered starting at 1!) filename : destination for storing the matrix (optional) floyd : set to true to use floyd warshall instead of BFS Returns ------- A Numpy matrix storing all pairs shortest paths for the given network (or the nodes in the given nodelist). """ n = nx.number_of_nodes(network) if floyd: D = nx.floyd_warshall_numpy(network) else: D_dict = nx.all_pairs_shortest_path_length(network) D = numpy.zeros((n,n)) for row, col_dict in D_dict.iteritems(): for col in col_dict: D[row-1,col-1] = col_dict[col] if filename: numpy.savetxt(filename, D, fmt='%s', delimiter=",", newline="\n") return D
def features_matrix(graph, anchors, use_dist=True, use_pgrs=True, use_pgr=True, use_comm=False, use_comm_centr=False): node_feats = [] n = len(graph) if use_dist: dists = nx.all_pairs_shortest_path_length(graph) if use_pgr: pageranks = nx.pagerank_numpy(graph) if use_pgrs: pgr_anchor = [anchored_pagerank(graph, anchor) for anchor in anchors] if use_comm_centr: communicability_centrality = nx.communicability_centrality(graph) if use_comm: communicability = nx.communicability(graph) for node in graph.nodes(): assert node == len(node_feats) feats = [] if use_dist: feats += [dists[node][anchor] for anchor in anchors] if use_pgrs: feats += [pgr[node]*n for pgr in pgr_anchor] if use_pgr: feats.append(pageranks[node]*n) if use_comm_centr: feats.append(communicability_centrality[node]) if use_comm: feats += [communicability[node][anchor] for anchor in anchors] node_feats.append(np.array(feats)) return node_feats
def calc_distance_matrix(G, max_distance=None): """Returns a matrix containing the shortest distance between all nodes in a network Parameters ---------- G : graph A NetworkX graph max_distance : float or None, optional (default='None') The maximum possible distance value in the network. If None, max_distance is the longest shortest path between two nodes of the network (the graph eccentricity) Returns ------- dist_matrix : NumPy array An NxN numpy array. Notes ----- Along the diagonal, the values are all 0. Unconnected nodes have a distance of max_distance to other nodes. """ # Network (collaborator) Distance dist_matrix = nx.all_pairs_shortest_path_length(G) dist_matrix = DataFrame(dist_matrix, index=G.nodes(), columns=G.nodes()) if max_distance is None: max_distance = float(dist_matrix.max().max()) dist_matrix = dist_matrix.fillna(max_distance) # The unconnected ones are infinitely far from the rest diag_idx = np.diag_indices(len(dist_matrix), ndim=2) dist_matrix.values[diag_idx] = 0 return dist_matrix
def first_return_times( self, k ): """Computes: length = shortest path lengths <= k length[i][j] = length of shortest path i->j, if <= k, using NX.all_pairs_shortest_path_length length[i] is a dict keyed by neighbors of node i, with values length of path to j Returns dictionary of return times <= k, length dictionary described above. """ return_times = dict() # length = shortest path lengths <= k # length[i][j] = length of shortest path i->j, if <= k # length[i] a dict keyed by neighbors of node i, with values # length of path to j length = nx.all_pairs_shortest_path_length( self.graph, k ) for i in G.nodes_iter(): # nodes = list of successors j which return to i nodes = filter(lambda j: length[j].has_key(i),G.successors(i)) # distances for each successor j distances = [length[j][i]+1 for j in nodes] if distances: return_times[i] = min(distances) return return_times, length
def create_hr(G): """ Create heirarchical cluster of a graph G from distance matrix """ # create shortest path matrix labels=G.nodes() path_length = nx.all_pairs_shortest_path_length(G)
def local_efficiency(G): """Compute array of local efficiency for the given graph. Local efficiency: returns a list of paths that represent the nodal efficiencies across all nodes with their direct neighbors""" assert_no_selfloops(G) nodepaths = [] length = nx.all_pairs_shortest_path_length(G) for n in G: nneighb = set(nx.neighbors(G,n)) paths = [] for nei in nneighb: other_neighbors = nneighb - set([nei]) nei_len = length[nei] paths.extend( [nei_len[o] for o in other_neighbors] ) if paths: p = 1.0 / np.array(paths,float) nodepaths.append(p.mean()) else: nodepaths.append(0.0) return np.array(nodepaths)
def get_distance_dict(filename): g = nx.read_edgelist(filename) print "Read in edgelist file ", filename print nx.info(g) path_length = nx.all_pairs_shortest_path_length(g) print len(path_length.keys()) print path_length
def CheckAllHostConnectivity (pairs, g): matrix = nx.all_pairs_shortest_path_length(g) connected = 0 for (a, b) in pairs: if b in matrix[a]: connected += 1 return connected
def create_shortest_path_matrix(weighted=False, discount_highways=False): G = nx.DiGraph() logging.info("Loading graph to NetworkX from database...") c = connection.cursor() if discount_highways: c.execute("SELECT l.beg_node_id, l.end_node_id, (CASE WHEN l.link_type='1' THEN 0.5 WHEN l.link_type='2' THEN 0.5 ELSE 1.0 END) FROM microsim_link l") else: c.execute("SELECT l.beg_node_id, l.end_node_id, l.length/l.lane_count AS resistance FROM microsim_link l") G.add_weighted_edges_from(c.fetchall()) logging.debug("Road network is strongly connected: %s" % repr(nx.is_strongly_connected(G))) logging.info("Computing shortest paths...") if weighted: sp = nx.all_pairs_dijkstra_path_length(G) else: sp = nx.all_pairs_shortest_path_length(G) logging.info("Converting shortest paths into matrix...") c.execute("SELECT ROW_NUMBER() OVER (ORDER BY id), beg_node_id, end_node_id FROM microsim_link") links = c.fetchall() N_LINKS = len(links) shortest_paths = np.zeros((N_LINKS, N_LINKS)) for col_idx, _, col_end_node in links: for row_idx, _, row_end_node in links: if col_idx == row_idx: continue nodes = sp[col_end_node] if row_end_node not in nodes: shortest_paths[row_idx - 1, col_idx - 1] = float(N_LINKS) else: shortest_paths[row_idx - 1, col_idx - 1] = nodes[row_end_node] logging.info("Shortest path matrix complete.") return shortest_paths
def path_lengths(G): """Compute array of all shortest path lengths for the given graph. The length of the output array is the number of unique pairs of nodes that have a connecting path, so in general it is not known in advance. This assumes the graph is undirected, as for any pair of reachable nodes, once we've seen the pair we do not keep the path length value for the inverse path. Parameters ---------- G : an undirected graph object. """ assert_no_selfloops(G) length = nx.all_pairs_shortest_path_length(G) paths = [] seen = set() for src,targets in length.iteritems(): seen.add(src) neigh = set(targets.keys()) - seen paths.extend(targets[targ] for targ in neigh) return np.array(paths)
def path_lengthsSPARSE(G): """Compute array of all shortest path lengths for the given graph. XXX - implementation using scipy.sparse. This might be faster for very sparse graphs, but so far for our cases the overhead of handling the sparse matrices doesn't seem to be worth it. We're leaving it in for now, in case we revisit this later and it proves useful. The length of the output array is the number of unique pairs of nodes that have a connecting path, so in general it is not known in advance. This assumes the graph is undirected, as for any pair of reachable nodes, once we've seen the pair we do not keep the path length value for the inverse path. Parameters ---------- G : an undirected graph object. """ assert_no_selfloops(G) length = nx.all_pairs_shortest_path_length(G) nnod = G.number_of_nodes() paths_mat = sparse.dok_matrix((nnod,nnod)) for src,targets in length.iteritems(): for targ,val in targets.items(): paths_mat[src,targ] = val return sparse.triu(paths_mat,1).data
def create_hc(G, t=1.0): """ Creates hierarchical cluster of graph G from distance matrix Maksim Tsvetovat ->> Generalized HC pre- and post-processing to work on labelled graphs and return labelled clusters The threshold value is now parameterized; useful range should be determined experimentally with each dataset """ """Modified from code by Drew Conway""" ## Create a shortest-path distance matrix, while preserving node labels labels=G.nodes() path_length=nx.all_pairs_shortest_path_length(G) distances=numpy.zeros((len(G),len(G))) i=0 for u,p in path_length.items(): j=0 for v,d in p.items(): distances[i][j]=d distances[j][i]=d if i==j: distances[i][j]=0 j+=1 i+=1 # Create hierarchical cluster Y=distance.squareform(distances) Z=hierarchy.complete(Y) # Creates HC using farthest point linkage # This partition selection is arbitrary, for illustrive purposes membership=list(hierarchy.fcluster(Z,t=t)) # Create collection of lists for blockmodel partition=defaultdict(list) for n,p in zip(list(range(len(G))),membership): partition[p].append(labels[n]) return list(partition.values())
def getGroupMetrics(G, results): results.numEdges = len(G.edges()) results.numNodes = len(G.nodes()) pathLenghts = nx.all_pairs_dijkstra_path_length(G, weight="weight").values() results.averageShortestPathWeighted = np.average( [ x.values()[0] for x in pathLenghts]) results.maxShortestPathWeighted = np.max( [ x.values()[0] for x in pathLenghts]) pathLenghts = nx.all_pairs_shortest_path_length(G).values() results.averageShortestPath = np.average( [ x.values()[0] for x in pathLenghts]) results.maxShortestPath = np.max( [ x.values()[0] for x in pathLenghts]) cache = None runResB = {} runResC = {} for i in range(4,6): res = computeGroupMetrics(G, groupSize=i, weighted=True, cutoff = 2, shortestPathsCache=cache) cache = res[-1] runResB[i] = [res[0], res[1]] runResC[i] = [res[2], res[3]] results.groupMetrics['betweenness'] = runResB results.groupMetrics['closeness'] = runResC
def closenessCentrality(A): H = nx.from_numpy_matrix(A); length = list(nx.all_pairs_shortest_path_length(H)); print(length) distanceMatrix = []; rows = len(length); for i in range(0, rows): x = length[i]; y = x[1]; for j in range(0, rows): distanceMatrix.append(y[j]); a = np.array(distanceMatrix); a = a.reshape(rows, rows); sum = 0; result1 = []; rows = a.shape[0]; cols = a.shape[1]; for r in range(0, rows): sum = 0; for c in range(0, cols): if(r != c): sum += a[r][c]; result1.append((rows - 1) / sum); return result1
def inter_node_distances(graph): """Compute the shortest path lengths between all nodes in graph. This performs the same operation as NetworkX's all_pairs_shortest_path_lengths with two exceptions: Here, self paths are excluded from the dictionary returned, and the distance between disconnected nodes is set to infinity. The latter difference is consistent with the Brain Connectivity Toolbox for Matlab. Parameters ---------- graph: networkx Graph An undirected graph. Returns ------- lengths: dictionary Dictionary of shortest path lengths keyed by source and target. """ lengths = nx.all_pairs_shortest_path_length(graph) node_labels = sorted(lengths) for src in node_labels: lengths[src].pop(src) for targ in node_labels: if src != targ: try: lengths[src][targ] except KeyError: lengths[src][targ] = np.inf return lengths
def hcluster(self): """ .. plot:: :include-source: :width: 50% from cno import XCNOGraph, cnodata c = XCNOGraph(cnodata("PKN-ToyPB.sif"), cnodata("MD-ToyPB.csv")) c.hcluster() .. warning:: experimental """ from scipy.cluster import hierarchy from scipy.spatial import distance path_length=nx.all_pairs_shortest_path_length(self.to_undirected()) n = len(self.nodes()) distances=np.zeros((n,n)) nodes = self.nodes() for u,p in path_length.iteritems(): for v,d in p.iteritems(): distances[nodes.index(u)-1][nodes.index(v)-1] = d sd = distance.squareform(distances) hier = hierarchy.average(sd) pylab.clf(); hierarchy.dendrogram(hier) pylab.xticks(pylab.xticks()[0], nodes)
def od_pairs_from_topology(topology): """ Calculate all possible origin-destination pairs of the topology. This function does not simply calculate all possible pairs of the topology nodes. Instead, it only returns pairs of nodes connected by at least a path. Parameters ---------- topology : Topology or DirectedTopology The topology whose OD pairs are calculated Returns ------- od_pair : list List containing all origin destination tuples. Examples -------- >>> import fnss >>> topology = fnss.ring_topology(3) >>> fnss.od_pairs_from_topology(topology) [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)] """ if topology.is_directed(): routes = nx.all_pairs_shortest_path_length(topology) return [(o, d) for o in routes for d in routes[o] if o != d] else: conn_comp = nx.connected_components(topology) return [(o, d) for G in conn_comp for o in G for d in G if o != d]
def get_shortest_path(nx_graph): print 'Calculating the All Pairs Shortest Path Lengths' length = nx.all_pairs_shortest_path_length(nx_graph) print "Length of Dict", len(length) return length
def _diameter(self): shortest_path_dict_dict = nx.all_pairs_shortest_path_length(self.graph) diameter = 0 for shortest_path_dict in shortest_path_dict_dict.values(): current_shortest = sorted(shortest_path_dict.values(), reverse=True)[0] if diameter < current_shortest: diameter = current_shortest return diameter
def wiener_high_speed_high_memory(self): sum=0 n=self.number_of_vertex d1=nx.all_pairs_shortest_path_length(self.g2) for i in range(0,n): for j in range(i+1,n): sum+= d1[i][j] return sum
def calc_broder_values(g): """ calc_broder_values(g) calculate values for Broder bow-tie : SCC, IN, OUT, IN-tendrils, OUT-tendrils, Tubes, Disconnected :param g: graph as source :return: --- """ logging.info(cs_ref, 'calculate values for Broder bow-tie') func_intro = "\nCalculate Broder Bow-tie values ... \n" print func_intro with open(dest_file, "a") as dat_file: dat_file.write(func_intro) bt_dict = {} scc = nx.strongly_connected_components(g) shortest_path = nx.all_pairs_shortest_path_length(g) inc = {n for n in g.nodes() if scc in shortest_path[n]} inc = scc - inc outc = set() for n in scc: outc |= set(shortest_path[n].keys()) outc -= scc tubes = set() in_tendrils = set() out_tendrils = set() disconnected = set() remainder = set(g.nodes()) - scc - inc - outc inc_out = set() for n in scc: inc_out |= set(shortest_path[n].keys()) inc_out = inc_out - scc - inc - outc for n in remainder: if n in inc_out: if set(shortest_path[n].keys()) & outc: tubes.add(n) else: in_tendrils.add(n) elif set(shortest_path[n].keys()) & outc: out_tendrils.add(n) else: disconnected.add(n) # all should have density = 0 (no connections) bt_dict.update({'IN-tendrils': len(in_tendrils) }) bt_dict.update({'IN': len(inc) }) bt_dict.update({'SCC': len(scc) }) bt_dict.update({'OUT': len(outc) }) bt_dict.update({'OUT-tendrils': len(out_tendrils) }) bt_dict.update({'Tubes': len(tubes) }) bt_dict.update({'Disconnected': len(disconnected) }) return bt_dict
def get_path_lengths(self): if not hasattr(self,"shortest_path_lenghts") or self.shortest_path_lenghts is None: self.shortest_paths_lengths = nx.all_pairs_shortest_path_length(self.G) self.avg_shortest_path = sum([ length for sp in self.shortest_paths_lengths.values() for length in sp.values() ])/float(self.N*(self.N-1)) self.eccentricity = nx.eccentricity(self.G,sp=self.shortest_paths_lengths) self.diameter = nx.diameter(self.G,e=self.eccentricity) self.radius = nx.radius(self.G,e=self.eccentricity) return self.shortest_paths_lengths
def harmonic_centrality(G, distance=None): r"""Compute harmonic centrality for nodes. Harmonic centrality [1]_ of a node `u` is the sum of the reciprocal of the shortest path distances from all other nodes to `u` .. math:: C(u) = \sum_{v \neq u} \frac{1}{d(v, u)} where `d(v, u)` is the shortest-path distance between `v` and `u`. Notice that higher values indicate higher centrality. Parameters ---------- G : graph A NetworkX graph distance : edge attribute key, optional (default=None) Use the specified edge attribute as the edge distance in shortest path calculations. If `None`, then each edge will have distance equal to 1. Returns ------- nodes : dictionary Dictionary of nodes with harmonic centrality as the value. See Also -------- betweenness_centrality, load_centrality, eigenvector_centrality, degree_centrality, closeness_centrality Notes ----- If the 'distance' keyword is set to an edge attribute key then the shortest-path length will be computed using Dijkstra's algorithm with that edge attribute as the edge weight. References ---------- .. [1] Boldi, Paolo, and Sebastiano Vigna. "Axioms for centrality." Internet Mathematics 10.3-4 (2014): 222-262. """ if len(G) <= 1: return {singleton: 0.0 for singleton in G.nodes()} if G.is_directed(): G = G.reverse() if distance is not None: # use Dijkstra's algorithm with specified attribute as edge weight sp = nx.all_pairs_dijkstra_path_length(G, weight=distance) else: sp = nx.all_pairs_shortest_path_length(G) return {n: sum(1/d if d > 0 else 0 for d in dd.values()) for n, dd in sp}
def _annotate(self): tree = self.tree self._g = self._graph() import networkx as nx length = nx.all_pairs_shortest_path_length(self._g) #print sorted(list(self._g.nodes())) #print sorted(list(self._g.edges())) for id in self._g: node = tree.node(id) node.data.length = length[id]
def calculate_graph_distance_matrix(self, df, graph): """ Given two business records, determine the familiarity score of the businesses """ print "Calculating graph distances between nodes..." #print nx.info(graph) path_length = nx.all_pairs_shortest_path_length(graph) #print type(path_length) #print path_length[7530551900] return path_length
def main(): #G = nx.drawing.nx_agraph.read_dot('input.dot') #G = nx.erdos_renyi_graph(50,0.5) G = nx.hypercube_graph(3) P = nx.all_pairs_shortest_path_length(G) for i in P: print(i) d = np.asarray(all_pairs_shortest_path(G)) / 1 print(d) #all_three(d) Y = MDS(d, geometry='euclidean') Y.solve(1000) print(Y.calc_stress()) output_euclidean(G, Y.X)
def compute_distance(self): """ Compute the distance function on pairs of nodes. The distance map self.dist is computed from the graph using all_pairs_shortest_path_length. """ if not self.connected(): raise CouplingError("coupling graph not connected") lengths = nx.all_pairs_shortest_path_length(self.G.to_undirected()) self.dist = {} for i in self.qubits.keys(): self.dist[i] = {} for j in self.qubits.keys(): self.dist[i][j] = lengths[self.qubits[i]][self.qubits[j]]
def test_average_path_length(): print 'Testing avg path length estimator' G = nx.barabasi_albert_graph(300, 5) #G = nx.cycle_graph(300) estimated_avg = average_all_pairs_shortest_path_estimate(G, max_num_sources=200) true_lengths = nx.all_pairs_shortest_path_length(G) true_avg = np.average([np.average(true_lengths[node].values()) for node in G]) print 'Estimate: %f'%estimated_avg print 'True: %f'%true_avg assert abs(estimated_avg-true_avg)/true_avg < 0.03 print 'PASSED'
def calc_shortest_path(protein_graph, prefix, generate_plots=True): num_nodes = len(protein_graph.nodes()) nodes_axis = range(1, num_nodes + 1) path_dict = nx.all_pairs_shortest_path_length(protein_graph) dj_path_matrix = np.zeros((num_nodes, num_nodes)) for i in range(num_nodes): for j in range(num_nodes): try: dj_path_matrix[i, j] = path_dict[i][j] except KeyError, ke: raise nx.exception.NetworkXNoPath( "\nERROR::type=orphan_node:message=No link between %d and %d:exception=%s\n" % (i, j, str(ke)))
def shortestPath(modelPath): f = open(modelPath, 'r') model = f.read().split('\n') #all pair shortest path edges = [] for i in range(3, len(model)): edgeRecords = model[i].split(' ')[:-1] for j in range(1, len(edgeRecords)): edges.append((edgeRecords[0], edgeRecords[j])) #create directed graph G = nx.DiGraph() G.add_edges_from(edges) length = dict(nx.all_pairs_shortest_path_length(G)) return length
def _calc_network_properties(self): if self.dist_matrix is None: self.dist_matrix = dict(nx.all_pairs_shortest_path_length(self.network)) if self.max_dist is None: self.max_dist = max(max(n.values()) for n in self.dist_matrix.values()) if self.nodes is None: self.nodes = list(self.network.nodes()) if self.number_of_nodes is None: self.number_of_nodes = self.network.number_of_nodes() if self.mean_node_weight is None: self.mean_node_weight = sum(self.node_weights.values()) / self.number_of_nodes
def executor(algorithm, dataset, infile, outfile): print(infile) if algorithm == "deepwalk": print(subprocess.run(['python3', 'deepwalk.py', dataset, infile, outfile])) elif algorithm == "node2vec": print(subprocess.run(['python3', 'node2vec.py', dataset, infile, outfile])) elif algorithm == "struc2vec": print(subprocess.run(['python3', 'struc2vec.py', dataset, infile, outfile])) elif algorithm == "LINE": name, ext = os.path.splitext(infile) infile = "{}_weighted{}".format(name, ext) raise NotImplementedError elif algorithm == "HARP": print(subprocess.run(['python', 'harp.py', dataset, infile, outfile])) elif algorithm == "MDS": G = nx.read_edgelist(infile, nodetype=int) A = {} for source, lengths in nx.all_pairs_shortest_path_length(G): A[source] = lengths A = pd.DataFrame(A) A = A.loc[A.index, A.index] mds = sklearn.manifold.MDS(n_components=128, max_iter=300, eps=1e-3, n_jobs=2, dissimilarity="precomputed") mds.fit(A) embedding = pd.DataFrame(mds.embedding_, index=G.nodes(data=False)) with open(outfile, "w") as f: print(len(G), 128, file=f) for n, e in embedding.iterrows(): print("{} {}".format(n, ' '.join(str(i) for i in e)), file=f) elif algorithm == "SpectralEmbedding": G = nx.read_edgelist(infile, nodetype=int) L = nx.normalized_laplacian_matrix(G) eigenvalues, eigenvectors = np.linalg.eig(L.todense()) eigenvalues = np.real(eigenvalues) eigenvectors = np.real(eigenvectors) idx = np.argsort(eigenvalues) embedding = pd.DataFrame(np.multiply(eigenvectors[:, idx[1:129]], eigenvalues[idx[1:129]]), index=G.nodes(data=False)) with open(outfile, "w") as f: print(len(G), 128, file=f) for n, e in embedding.iterrows(): print("{} {}".format(n, ' '.join(str(i) for i in e)), file=f) elif algorithm == "Mdeff": raise NotImplementedError elif algorithm == "Kipf": raise NotImplementedError elif algorithm == "SAGE": raise NotImplementedError elif algorithm == "LGCN": raise NotImplementedError
def Statistic(adjlist): degree = np.sum(adjlist, axis=0) degree = degree.astype(int) NoLink = 0 nn = adjlist.shape[0] mm = adjlist.shape[1] lenth = mm if nn >= mm: lenth = nn cluster = np.zeros([1, lenth])[0] for i in range(lenth): temp = np.zeros([1, degree[i]])[0] temp = temp.astype(int) k = 0 for j in range(lenth): if adjlist[i, j] > 0: temp[k] = j k = k + 1 triangle = 0 for j in range(degree[i] - 1): for k in range(j + 1, degree[i]): if temp[k] != 0: if (adjlist[temp[j], temp[k]] > 0 and adjlist[temp[j], temp[k]] != NoLink): triangle = triangle + 1 if (len(temp) > 1 and ((degree[i] - 1) * degree[i]) != 0): cluster[i] = 2 * triangle / ((degree[i] - 1) * degree[i]) SNet = sparse.csr_matrix(adjlist) Graph = nx.from_scipy_sparse_matrix(SNet) shortpath_dict = dict(nx.all_pairs_shortest_path_length(Graph)) shortpath = np.zeros([lenth, lenth]) for i in range(lenth): shortpath[i, :] = -1 for key1 in shortpath_dict: for key2 in shortpath_dict[key1]: shortpath[int(key1), int(key2)] = shortpath_dict[key1][key2] totalcluster = 0 totalshortpath = 0 totallink = 0 for i in range(lenth): totalcluster = totalcluster + cluster[i] for j in range(lenth): if (shortpath[i, j] != -1 and shortpath[i, j] != 0): totalshortpath = totalshortpath + shortpath[i, j] totallink = totallink + 1 avecluster = totalcluster / lenth aveshortpath = totalshortpath / lenth return degree, cluster, shortpath, avecluster, aveshortpath, totallink
def find_users_distances(G): #pay attention on orient!!!!! shortest_path = dict(nx.all_pairs_shortest_path_length(G)) df_shortest_path = pd.DataFrame.from_dict(shortest_path, orient = "index") df_shortest_path.index = df_shortest_path.index.astype('int64') df_shortest_path.columns = df_shortest_path.columns.astype('int64') shortest_path_weighted = dict(nx.all_pairs_dijkstra_path_length(G, weight='Weight_reversed')) df_shortest_path_weighted = pd.DataFrame.from_dict(shortest_path_weighted, orient = "index") df_shortest_path_weighted.index = df_shortest_path_weighted.index.astype('int64') df_shortest_path_weighted.columns = df_shortest_path_weighted.columns.astype('int64') for user in df_shortest_path_weighted.index: df_shortest_path_weighted.loc[user, user] = np.nan df_shortest_path.loc[user, user] = np.nan return df_shortest_path, df_shortest_path_weighted
def calc_lopacity_matrix(g, L, degs, deg_count, opacity, inv_opacity): """ Calucalte opacity matrix with shortest path with level L :param g: Graph :param L: :param degs: :param deg_count: :param opacity: :param inv_opacity: """ opacity[opacity > 0.0] = 0.0 lapsp = nx.all_pairs_shortest_path_length(g, cutoff=L) for i in lapsp: for z in lapsp[i].keys(): if i < z: __opacity_calc(i, z, degs, deg_count, opacity, inv_opacity)
def get_path_lengths(self): if not hasattr( self, "shortest_path_lenghts") or self.shortest_path_lenghts is None: self.shortest_paths_lengths = dict( nx.all_pairs_shortest_path_length(self.G)) self.avg_shortest_path = sum([ length for sp in list(self.shortest_paths_lengths.values()) for length in list(sp.values()) ]) / float(self.N * (self.N - 1)) self.eccentricity = nx.eccentricity(self.G, sp=self.shortest_paths_lengths) self.diameter = nx.diameter(self.G, e=self.eccentricity) self.radius = nx.radius(self.G, e=self.eccentricity) return self.shortest_paths_lengths
def get_katz_score(graph, edges, beta, max_length): c = Column(1, 'numerical') value = dict() cnt = 0 shortest_path_length = nx.all_pairs_shortest_path_length(graph) for edge in edges: count = __number_of_path(graph, edge, max_length, shortest_path_length) score = 0.0 base = 1.0 for i in range(1, len(count)): base = base * beta score += base * count[i] value[edge] = score cnt += 1 c.value = value return c
def _compute_distance_matrix(self): """Compute the full distance matrix on pairs of nodes. The distance map self._dist_matrix is computed from the graph using all_pairs_shortest_path_length. """ if not self.is_connected(): raise CouplingError("coupling graph not connected") lengths = nx.all_pairs_shortest_path_length(self.graph.to_undirected(as_view=True)) lengths = dict(lengths) size = len(lengths) cmap = np.zeros((size, size)) for idx in range(size): cmap[idx, np.fromiter(lengths[idx].keys(), dtype=int)] = np.fromiter( lengths[idx].values(), dtype=int) self._dist_matrix = cmap
def complexity_b(graff): b = 0 degrees = dict(graff.degree) inter_node_d = dict(nx.all_pairs_shortest_path_length(graff)) d = {} for nodes in inter_node_d: for other_nodes in inter_node_d: if nodes not in d: d[nodes] = inter_node_d[nodes][other_nodes] else: d[nodes] += inter_node_d[nodes][other_nodes] for nodes in inter_node_d: ai = degrees[nodes] di = d[nodes] b += ai / di return b
def GetSimlarityMatrix(dotpath): nodes_index = {} G = nx.MultiGraph(pgv.AGraph(dotpath)) SP = nx.all_pairs_shortest_path_length(G) n = len(G.nodes()) count = 0 for node in G.nodes(): nodes_index[node] = count count = count + 1 M = np.zeros(shape=(n, n)) for x in SP: for y in x[1]: i = nodes_index[x[0]] j = nodes_index[y] M[i][j] = x[1][y] return M, G, nodes_index
def distance_fragmentation(G): ''' Another fragmentation measure of Borgatti's, this one based on distance: dF = 1 - sum_ij(1/d_ij)/N(N-1) ''' N = len(G.nodes()) sum_inv_dij = 0.0 for n in nx.all_pairs_shortest_path_length(G): for k in n[1]: if k != n[0]: sum_inv_dij += 1.0 / n[1][k] if N != 1: dF = 1 - sum_inv_dij / (N * (N - 1)) else: dF = 1 return dF
def get_distance(self): G = nx.Graph() G.add_edges_from(self.edge_index.cpu().t().numpy()) path_length = dict( nx.all_pairs_shortest_path_length(G, cutoff=self.nclass - 1)) distance = -np.ones((self.num_nodes, self.num_nodes)).astype(int) for u, p in path_length.items(): for v, d in p.items(): distance[u][v] = d - 1 distance[distance == -1] = self.nclass - 1 self.distance = distance self.dis_node_pairs = [] for i in range(self.nclass): tmp = np.array(np.where(distance == i)).transpose() self.dis_node_pairs.append(tmp)
def test_tree_wasserstein(self): for i in range(100): num_node = np.random.randint(10, 200) G = nx.generators.random_tree(num_node) edges = [(fr, to, 1) for (fr, to) in list(G.edges())] first_prob = np.random.rand(num_node) first_prob = first_prob / first_prob.sum() second_prob = np.random.rand(num_node) second_prob = second_prob / second_prob.sum() twd = tw.distance(first_prob, second_prob, edges) adj_dict = dict(nx.all_pairs_shortest_path_length(G)) metric = np.array([[adj_dict[i][j] for i in range(num_node)] for j in range(num_node)]) ans = ot.lp.emd2(first_prob, second_prob, metric) assert np.allclose([twd], [ans]), f"i: {i}, TW : {twd}, WD : {ans}"
def n_order_dgl(self, order_num): dgl_graph = dgl.DGLGraph() dgl_graph.add_nodes(self.g.number_of_nodes()) for src, path_dict in nx.all_pairs_shortest_path_length( self.g, cutoff=order_num): for dst in path_dict.keys(): dgl_graph.add_edge(src, dst, data={ 'order': path_dict[dst] * torch.ones(1, dtype=torch.long) }) dgl_graph.edata['order'] = dgl_graph.edata['order'].contiguous() return dgl_graph
def link_pred(): splPG = dict(nx.all_pairs_shortest_path_length(PG, cutoff=2)) friends_PG = list() for x in splPG.keys(): for y in splPG[x].keys(): if splPG[x][y] == 2: l = list() l.append(x) l.append(y) friends_PG.append(l) predictions = nx.jaccard_coefficient(PG, friends_PG) results = list() for x in predictions: results.append(x) results.sort(key=lambda x: x[2]) results.reverse() k_vals = [10, 20, 50, 100] for k in k_vals: f = open( './link_pred/link_prediction_values_jaccard' + str(k) + '.txt', 'w') count = 0 while (count < k): print('({}, {}),jaccard: {}'.format( all_actors_id_map[results[count][0]].getName(), all_actors_id_map[results[count][1]].getName(), results[count][2])) f.write('({}, {}),jaccard: {}\n'.format( all_actors_id_map[results[count][0]].getName(), all_actors_id_map[results[count][1]].getName(), results[count][2])) count += 1 top_k = list() precision_at_k = 0 for x in range(k): top_k.append(results[x]) count = 0 for val in top_k: tup = (val[0], val[1]) if tup in edges: count += 1 precision_at_k = count / k print('precision @ K{}: {}\n'.format(k, precision_at_k)) f.write('precision @ K{}: {}'.format(k, precision_at_k)) f.close()
def global_efficiency_directional(graph: Union[DiGraph, Graph]): # Identical to original, however without raising NetworkxNotImplementedError # when passed directional graphs. No idea why original author blocked it # even though it appears to work. n = len(graph) denom = n * (n - 1) if denom != 0: lengths = all_pairs_shortest_path_length(graph) g_eff = 0 for source, targets in lengths: for target, distance in targets.items(): if distance > 0: g_eff += 1 / distance g_eff /= denom else: g_eff = 0 return g_eff
def generate_exclusion_idxs(mol, scale12, scale13, scale14): """ Generate exclusions for a mol based on the all pairs shortest path. We always take the convention that exclusions for smaller distances override those of longer distances. Parameters ---------- mol: Chem.ROMol romol scale12: float bond scales scale13: float angle scales scale14: float torsions scales """ exclusions = {} g = convert_to_nx(mol) for path in nx.all_pairs_shortest_path_length(g, cutoff=3): src = path[0] for dst, length in path[1].items(): if length == 0: continue else: if length == 1: scale = scale12 elif length == 2: scale = scale13 elif length == 3: scale = scale14 else: assert 0 exclusions[sort_tuple((src, dst))] = scale idxs = list(exclusions.keys()) scales = list(exclusions.values()) return np.array(idxs, dtype=np.int32), np.array(scales, dtype=np.float64)
def create_hc(G): """Creates hierarchical cluster of graph G from distance matrix""" path_length = nx.all_pairs_shortest_path_length(G) distances = numpy.zeros((len(G), len(G))) for u, p in path_length.items(): for v, d in p.items(): distances[u][v] = d # Create hierarchical cluster Y = distance.squareform(distances) Z = hierarchy.complete(Y) # Creates HC using farthest point linkage # This partition selection is arbitrary, for illustrive purposes membership = list(hierarchy.fcluster(Z, t=1.15)) # Create collection of lists for blockmodel partition = defaultdict(list) for n, p in zip(list(range(len(G))), membership): partition[p].append(n) return list(partition.values())
def create_hc(G): """Creates hierarchical cluster of graph G from distance matrix""" path_length = nx.all_pairs_shortest_path_length(G) distances = numpy.zeros((len(G), len(G))) # l1 = sorted(path_length.items(),key=lambda x: x[0]) # for u, p in l1: # l2 = sorted(p.items(),key=lambda x: x[0]) # for v, d in l2: # x = getIndexOfTuple(l1, 0, u) # y = getIndexOfTuple(l2, 0, v) # distances[x][y] = d for u, p in path_length.items(): for v, d in p.items(): distances[u][v] = d # Create hierarchical cluster Y = distance.squareform(distances) Z = hierarchy.complete(Y) # Creates HC using farthest point linkage plt.figure(figsize=(25, 10)) plt.title('Hierarchical Clustering Dendrogram') plt.xlabel('sample index') plt.ylabel('distance') hierarchy.dendrogram( Z, leaf_rotation=90., # rotates the x axis labels leaf_font_size=8., # font size for the x axis labels ) plt.show() # This partition selection is arbitrary, for illustrive purposes membership = list(hierarchy.fcluster(Z, t=1.15)) # Create collection of lists for blockmodel partition = defaultdict(list) for n, p in zip(list(range(len(G))), membership): partition[p].append(n) # [0, 179, 305] # print "Clustering [0, 179, 305]" # print l1[0][0], l1[179][0], l1[305][0] return list(partition.values())
def _search_hull(self, n, max_res_nodes, lamb_cv): # TODO: put a progress bar spl = dict(nx.all_pairs_shortest_path_length(self.graph[n], cutoff=4)) # get closest (within distance 3) AND sampled nodes to create a set of nodes to search over n1 = [ k for (k, v) in spl[max_res_nodes[0][0]].items() if v > 0 and v < 4 and k in np.array( np.where( query_node_attributes(self.graph[n], "n_samples") > 0)) ] n2 = [ k for (k, v) in spl[max_res_nodes[0][1]].items() if v > 0 and v < 4 and k in np.array( np.where( query_node_attributes(self.graph[n], "n_samples") > 0)) ] n1.append(max_res_nodes[0][0]) n2.append(max_res_nodes[0][1]) lr_hull = (tuple(i) for i in it.product(n1, n2)) # removing nodes that are already connected in the default graph final_lr_hull = [ x for x in list(lr_hull) if x not in list(self.graph[n].edges) ] df_hull = pd.DataFrame(index=np.arange(len(final_lr_hull)), columns=['nodes', 'nll']) df_hull['nodes'] = final_lr_hull obj = Objective(self.graph[n]) obj._solve_lap_sys() obj._comp_mat_block_inv() obj._comp_inv_cov() df_hull.iloc[len(df_hull) - 1, 1] = obj.neg_log_lik() for idx in np.arange(0, len(df_hull) - 1)[::-1]: df_hull.iloc[idx, 1] = self._add_edge_get_nll(n, df_hull.iloc[idx + 1, 0], df_hull.iloc[idx, 0], lamb_cv) # print nodes connected by THE edge to give lowest negative log likelihood return (df_hull.loc[(0, df_hull['nll'].astype(float).idxmin()), :])
def blockmodel_output(G, t=1.15): # Makes life easier to have consecutively labeled integer nodes H = nx.convert_node_labels_to_integers(G, label_attribute='label') """Creates hierarchical cluster of graph G from distance matrix""" # Create distance matrix path_length = dict(nx.all_pairs_shortest_path_length(H)) distances = np.zeros((len(H), len(H))) for u, p in path_length.items(): for v, d in p.items(): distances[u][v] = d # Create hierarchical cluster Y = distance.squareform(distances) Z = hierarchy.complete(Y) # Creates HC using farthest point linkage # This partition selection is arbitrary, for illustrative purposes membership = list(hierarchy.fcluster(Z, t=t)) # Create collection of lists for blockmodel partitions = defaultdict(list) for n, p in zip(list(range(len(G))), membership): partitions[p].append(n) # Build blockmodel graph #BM = nx.blockmodel(H, partitions) # change in nx 2.0 p_values = list(partitions.values()) BM = nx.quotient_graph(H, p_values, relabel=True) label_dict = dict([(n, H.node[n]['label']) for n in H]) order = [label_dict[item] for sublist in p_values for item in sublist] nm = nx.to_pandas_dataframe(G) nm = nm.reindex(index=order) nm.columns = nm.index ho = homophily(G, 'type') output = { 'G': G, 'H': H, 'partitions': partitions, 'BM': BM, 'nm': nm, 'label_dict': label_dict, 'order': order, 'distances': distances } output.update(ho) return output
def path_dist(G, sep_dist=3 / 2): """ G is an undirected nx graph sep_dist is the modifier for non-connected nodes (distance will be max distance * sep_dist) later just add an if-then for different types of graphs """ paths = dict(nx.all_pairs_shortest_path_length(G)) n_obs = len(G.nodes) dists = [] for i in range(n_obs): vect = np.zeros(n_obs) for j in range(i, n_obs): vect[j] = paths[i].get(j, -1) dists.append(vect.reshape(1, -1)) dists = np.vstack(dists) dists += np.transpose(dists) dists[np.where(dists == -1)] = int(np.max(dists) * sep_dist) return dists
def _compute_mapping_tbl(self): """ Compute mapping table from source to target tags """ tm = self.tag_manager tbl = np.zeros((len(tm.source_tags), len(tm.target_tags))) # Cutoff is set 2 because in this way we can retrieve the direct # translation and the neighbours of that translation spaths = dict( nx.all_pairs_shortest_path_length(self.G, cutoff=self.cutoff)) for i in range(len(tm.source_tags)): sg = tm.source_tags[i] for j in range(len(tm.target_tags)): tg = tm.target_tags[j] if sg in spaths and tg in spaths[sg]: d = -spaths[sg][tg] else: d = -len(self.G) tbl[i, j] = d return tbl
def Calculate_travel_distances(self): self.distances = {} BigM = 1000000 # depend on locations MaxX*MaxY self.shortest_paths = list(nx.all_pairs_shortest_path_length(self.G)) for i, j in it.permutations(self.G.nodes, 2): if (i, j) in self.G.edges or (j, i) in self.G.edges: pos1 = self.G.node[i]['location'] pos2 = self.G.node[j]['location'] dis = get_distance(pos1, pos2) self.G.edges[min([i, j]), max([i, j])]['Travel_time'] = dis self.distances[(i, j)] = dis elif i == j: self.distances[(i, j)] = 0 else: self.distances[(i, j)] = BigM
def test_stress_min(): #n = 5 #n = 10 #X = torch.rand(n, 2, requires_grad = True) #G = nx.path_graph(n) #G = nx.cycle_graph(n) r = 2 h = 5 #h = 8 # torch.cuda.init() X = torch.rand(r**(h + 1) - 1, 2, requires_grad=True, device=cuda) # torch.cuda.synchronize() G = nx.balanced_tree(r, h) D = dict(nx.all_pairs_shortest_path_length(G)) D = dict2tensor(D) # print('X:', X) # print('D:', D) stress_minimization(X, D, max_iter=1000)