def kats_in_query_set( G , Q = list(G.nodes) , alpha = None ): #here the same idea of eigenvector_centrality is used adjusted with the requirements for kats centrality max_iter = 100 treshold = 1.0e-6 nnodes = len(G.nodes) if alpha is None : #alpha is set as the inverse of the largest eigenvalue of A^T L = nx.normalized_laplacian_matrix(G) e = numpy.linalg.eigvals(L.A) alpha = 1.0 / max(e) x = dict([(n,1.0) for n in G]) for i in range(max_iter): xlast = x #save the x(t-1) vector x = dict.fromkeys(xlast, 0) # do the multiplication x^T = (alpha x^T AQ )+1 for n in x: #n is each node for nbr in G[n]: # nbr iterate over the neighbors of n, G[n] in fact return the neighbors of n Aij = 0 if nbr in Q or n in Q : Aij = G[n][nbr].get("weight", 1) #here we add the weight only if the edge (n , nbr) belogns to Q x[nbr] += alpha * xlast[n] * Aij + 1 # normalize vector try: s = 1.0/sqrt(sum( v**2 for v in x.values() )) # this should never be zero? except ZeroDivisionError: s = 1.0 for n in x: x[n] *= s # check convergence err = sum([abs(x[n]-xlast[n]) for n in x]) if err < nnodes*treshold: return x return nx.PowerIterationFailedConvergence(max_iter)
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # Choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: # Normalized nstart vector s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) if personalization is None: # Assign uniform personalization vector if not given p = dict.fromkeys(W, 1.0 / N) else: s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) if dangling is None: # Use personalization vector if dangling vector not specified dangling_weights = p else: s = float(sum(dangling.values())) dangling_weights = dict((k, v / s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights.get( n, 0) + (1.0 - alpha) * p.get(n, 0) # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def pagerank_edgetypes_indirect(D, edgetype_scale, indirect_nodes, max_iter=100, tol=1.0e-6, weight='weight'): W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() direct_nodes = [a for a in W if a not in indirect_nodes] x = dict.fromkeys(direct_nodes, 1.0 / len(direct_nodes)) p = dict.fromkeys(direct_nodes, 1.0 / len(direct_nodes)) for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0.0) weight_to_distribute = sum([(xlast[n] * W[n][nbr]['weight'] * edgetype_scale[W[n][nbr]['type']]) for n in x for nbr in W[n]]) undistributed_weight = 1 - weight_to_distribute for n in x: for nbr in W[n]: if nbr in indirect_nodes: contribution = xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']] / len(W[nbr]) for nbr_adj in W[nbr]: x[nbr_adj] += contribution else: x[nbr] += xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']] x[n] += undistributed_weight * p.get(n, 0) err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def salsa_algorithm(graph, max_iter=1000, tolerance=1.0e-7, normalized=True): """ We used networkx's Hits algorithm. We changed variables' names and made the appropriate modifications""" if len(graph) == 0: raise ValueError("Graph has zero nodes") adjacency_matrix = nx.to_scipy_sparse_matrix(graph, nodelist=list(graph)) # A from slides (number_rows, number_cols) = adjacency_matrix.shape # should be square adjacency_matrix_c = normalize(adjacency_matrix, norm='l1', axis=0) # Normalize adjacency_matrix (columns) adjacency_matrix_r = normalize(adjacency_matrix, norm='l1', axis=1) # Normalize adjacency_matrix (rows) authority_matrix = adjacency_matrix_r.T * adjacency_matrix_c # authority matrix - ArT * Ac from slides a = scipy.ones((number_rows, 1)) / number_rows # initial guess # power iteration on authority matrix iteration_index = 0 while True: last_a = a a = authority_matrix * a a = a / a.max() # check convergence, l1 norm err = scipy.absolute(a - last_a).sum() if err < tolerance: break if iteration_index > max_iter: raise nx.PowerIterationFailedConvergence(max_iter) iteration_index += 1 a = np.asarray(a).flatten() h = np.asarray(adjacency_matrix_c * a).flatten() # h=adjacency_matrix_c*a if normalized: h = h / h.sum() a = a / a.sum() hubs = dict(zip(graph, map(float, h))) authorities = dict(zip(graph, map(float, a))) return hubs, authorities
def eigenvector_in_query_set( G , Q=list(G.nodes) ): #part of this code is from https://networkx.github.io/documentation/stable/_modules/networkx/algorithms/centrality/eigenvector.html #reshaped to fit this purposes with Q query set max_iter = 100 treshold = 1.0e-6 #x is the starting vector x = dict([(n,1.0/len(G)) for n in G]) # normalize starting vector s = 1.0/sum(x.values()) for k in x: x[k] *= s nnodes = len(G.nodes) for i in range(max_iter): xlast = x x = dict.fromkeys(xlast, 0) # do the multiplication y^T = x^T AQ for n in x: #n is each node for nbr in G[n]: # nbr iterate over the neighbors of n Aij = 0 if nbr in Q or n in Q : Aij = G[n][nbr].get("weight", 1) #here we add the weight only if the edge (n , nbr) belogns to Q x[nbr] += xlast[n] * Aij # normalize vector try: s = 1.0/sqrt(sum(v**2 for v in x.values())) # this should never be zero? except ZeroDivisionError: s = 1.0 for n in x: x[n] *= s # check convergence err = sum([abs(x[n]-xlast[n]) for n in x]) if err < nnodes*treshold: return x return nx.PowerIterationFailedConvergence(max_iter)
def my_hits_alg(graph, max_iter=1000, tol=1.0e-6, nstart=None): # check if the graph is empty if len(graph) == 0: return {}, {} # if no starting node is given, choose one if nstart is None: hub_scores = dict.fromkeys(graph, 1.0 / graph.number_of_nodes()) else: hub_scores = nstart # normalize starting vector normalizer = 1.0 / sum(hub_scores.values()) for k in hub_scores: hub_scores[k] *= normalizer for _ in range( max_iter): # power iteration: make up to max_iter iterations hlast = hub_scores hub_scores = dict.fromkeys(hlast.keys(), 0) authority_scores = dict.fromkeys(hlast.keys(), 0) # matrix multiplication to calculate scores for n in hub_scores: for m in graph[n]: authority_scores[m] += hlast[n] * graph[n][m].get('weight', 1) for n in hub_scores: for m in graph[n]: hub_scores[n] += authority_scores[m] * graph[n][m].get( 'weight', 1) # normalize scores normalizer = 1.0 / max(hub_scores.values()) for n in hub_scores: hub_scores[n] *= normalizer normalizer = 1.0 / max(authority_scores.values()) for n in authority_scores: authority_scores[n] *= normalizer # check convergence, l1 norm err = sum([abs(hub_scores[n] - hlast[n]) for n in hub_scores]) if err < tol: print('Iterations: ', _) break # if the algorithm fails to converge within the max number of iterations else: raise nx.PowerIterationFailedConvergence(max_iter) # normalize scores normalizer = 1.0 / sum(authority_scores.values()) for n in authority_scores: authority_scores[n] *= normalizer normalizer = 1.0 / sum(hub_scores.values()) for n in hub_scores: hub_scores[n] *= normalizer return hub_scores, authority_scores
def katz_centrality( G, alpha=0.1, beta=1.0, max_iter=1000, tol=1.0e-6, nstart=None, normalized=True, weight=None, ): if len(G) == 0: return {} nnodes = G.number_of_nodes() if nstart is None: # choose starting vector with entries of 0 x = {n: 0 for n in G} else: x = nstart try: b = dict.fromkeys(G, float(beta)) except (TypeError, ValueError, AttributeError) as e: b = beta if set(beta) != set(G): raise nx.NetworkXError("beta dictionary " "must have a value for every node") from e # make up to max_iter iterations for i in range(max_iter): xlast = x x = dict.fromkeys(xlast, 0) # do the multiplication y^T = Alpha * x^T A - Beta for n in x: for nbr in G[n]: x[nbr] += xlast[n] * G[n][nbr].get(weight, 1) for n in x: x[n] = alpha * x[n] + b[n] # check convergence err = sum([abs(x[n] - xlast[n]) for n in x]) if err < nnodes * tol: if normalized: # normalize vector try: s = 1.0 / sqrt(sum(v**2 for v in x.values())) # this should never be zero? except ZeroDivisionError: s = 1.0 else: s = 1 for n in x: x[n] *= s return x raise nx.PowerIterationFailedConvergence(max_iter)
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight="weight", dangling=None): # 节点的pagerank值 if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: s = float(sum(nstart.values())) x = {k: v / s for k, v in nstart.items()} if personalization is None: p = dict.fromkeys(W, 1.0 / N) else: s = float(sum(personalization.values())) p = {k: v / s for k, v in personalization.items()} if dangling is None: dangling_weights = p else: s = float(sum(dangling.values())) dangling_weights = {k: v / s for k, v in dangling.items()} dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights.get( n, 0) + (1.0 - alpha) * p.get(n, 0) err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def _hits_python(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: raise Exception("hits() not defined for graphs with multiedges.") if len(G) == 0: return {}, {} # choose fixed starting vector if not given if nstart is None: h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) else: h = nstart # normalize starting vector s = 1.0 / sum(h.values()) for k in h: h[k] *= s for _ in range( max_iter): # power iteration: make up to max_iter iterations hlast = h h = dict.fromkeys(hlast.keys(), 0) a = dict.fromkeys(hlast.keys(), 0) # this "matrix multiply" looks odd because it is # doing a left multiply a^T=hlast^T*G for n in h: for nbr in G[n]: a[nbr] += hlast[n] * G[n][nbr].get("weight", 1) # now multiply h=Ga for n in h: for nbr in G[n]: h[n] += a[nbr] * G[n][nbr].get("weight", 1) # normalize vector s = 1.0 / max(h.values()) for n in h: h[n] *= s # normalize vector s = 1.0 / max(a.values()) for n in a: a[n] *= s # check convergence, l1 norm err = sum([abs(h[n] - hlast[n]) for n in h]) if err < tol: break else: raise nx.PowerIterationFailedConvergence(max_iter) if normalized: s = 1.0 / sum(a.values()) for n in a: a[n] *= s s = 1.0 / sum(h.values()) for n in h: h[n] *= s return h, a
def katz_centrality(G, alpha, beta=1.0, max_iter=1000, tol=1.0e-6, weight=None, normalized=True): if len(G) == 0: return {} nodes = G.number_of_nodes() dict_of_nodes = dict([(n, 0) for n in G]) try: beta_dictionary = dict.fromkeys(G, float(beta)) except (TypeError, ValueError, AttributeError): beta_dictionary = beta if set(beta) != set(G): raise nx.NetworkXError( 'beta dictionary must have a value for every node') for i in range(max_iter): dict_of_nodes_helper = dict_of_nodes dict_of_nodes = dict.fromkeys(dict_of_nodes_helper, 0) for n in dict_of_nodes: for nbr in G[n]: dict_of_nodes[nbr] += dict_of_nodes_helper[n] * G[n][nbr].get( weight, 1) for n in dict_of_nodes: dict_of_nodes[n] = alpha * dict_of_nodes[n] + beta_dictionary[n] err = sum([ abs(dict_of_nodes[n] - dict_of_nodes_helper[n]) for n in dict_of_nodes ]) if err < nodes * tol: if normalized: min_item = min(dict_of_nodes.values()) max_item = max(dict_of_nodes.values()) for k, v in dict_of_nodes.items(): if dict_of_nodes[k] == min_item: dict_of_nodes[k] = 0.0 continue dict_of_nodes[k] = v / max_item return dict_of_nodes raise nx.PowerIterationFailedConvergence(max_iter)
def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, weight=None): '''compute eigenvector_centrality''' if len(G) == 0: raise nx.NetworkXPointlessConcept( 'cannot compute centrality for the' ' null graph') # If no initial vector is provided, start with the all-ones vector. if nstart is None: nstart = {v: 1 for v in G} if all(v == 0 for v in nstart.values()): raise nx.NetworkXError( 'initial vector cannot have all zero values') # Normalize the initial vector so that each entry is in [0, 1]. This is # guaranteed to never have a divide-by-zero error by the previous line. nstart_sum = sum(nstart.values()) x = {k: v / nstart_sum for k, v in nstart.items()} nnodes = G.number_of_nodes() # make up to max_iter iterations for i in range(max_iter): xlast = x x = xlast.copy() # Start with xlast times I to iterate with (A+I) # do the multiplication y^T = x^T A (left eigenvector) for n in x: for nbr in G[n]: w = G[n][nbr].get(weight, 1) if weight else 1 x[nbr] += xlast[n] * w # Normalize the vector. The normalization denominator `norm` # should never be zero by the Perron--Frobenius # theorem. However, in case it is due to numerical error, we # assume the norm to be one instead. norm = sqrt(sum(z**2 for z in x.values())) or 1 x = {k: v / norm for k, v in x.items()} # Check for convergence (in the L_1 norm). if sum(abs(x[n] - xlast[n]) for n in x) < nnodes * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def pagerank_edgetypes(D, edgetype_scale, max_iter=100, tol=1.0e-6, weight='weight'): W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() x = dict.fromkeys(W, 1.0 / N) p = dict.fromkeys(W, 1.0 / N) for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0.0) weight_to_distribute = sum([(xlast[n] * W[n][nbr]['weight'] * edgetype_scale[W[n][nbr]['type']]) for n in x for nbr in W[n]]) undistributed_weight = 1 - weight_to_distribute for n in x: for nbr in W[n]: x[nbr] += xlast[n] * W[n][nbr][weight] * edgetype_scale[W[n][nbr]['type']] x[n] += undistributed_weight * p.get(n, 0) err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): # 节点的hub值和authority值 if len(G) == 0: return {}, {} if nstart is None: h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) else: h = nstart s = 1.0 / sum(h.values()) for k in h: h[k] *= s for _ in range(max_iter): hlast = h h = dict.fromkeys(hlast.keys(), 0) a = dict.fromkeys(hlast.keys(), 0) for n in h: for nbr in G[n]: a[nbr] += hlast[n] * G[n][nbr].get("weight", 1) for n in h: for nbr in G[n]: h[n] += a[nbr] * G[n][nbr].get("weight", 1) s = 1.0 / max(h.values()) for n in h: h[n] *= s s = 1.0 / max(a.values()) for n in a: a[n] *= s err = sum([abs(h[n] - hlast[n]) for n in h]) if err < tol: break else: raise nx.PowerIterationFailedConvergence(max_iter) if normalized: s = 1.0 / sum(a.values()) for n in a: a[n] *= s s = 1.0 / sum(h.values()) for n in h: h[n] *= s return h, a
def pagerank_scipy(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, weight='weight', dangling=None): """Return the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified) This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G = nx.DiGraph(nx.path_graph(4)) >>> pr = nx.pagerank_scipy(G, alpha=0.9) Notes ----- The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. This implementation works with Multi(Di)Graphs. For multigraphs the weight between two nodes is set to be the sum of all edge weights between those nodes. See Also -------- pagerank, pagerank_numpy, google_matrix Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ import scipy.sparse N = len(G) if N == 0: return {} nodelist = list(G) M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M # initial vector x = scipy.repeat(1.0 / N, N) # Personalization vector if personalization is None: p = scipy.repeat(1.0 / N, N) else: missing = set(nodelist) - set(personalization) if missing: raise NetworkXError('Personalization vector dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) p = scipy.array([personalization[n] for n in nodelist], dtype=float) p = p / p.sum() # Dangling nodes if dangling is None: dangling_weights = p else: missing = set(nodelist) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) # Convert the dangling dictionary into an array in nodelist order dangling_weights = scipy.array([dangling[n] for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() is_dangling = scipy.where(S == 0)[0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \ (1 - alpha) * p # check convergence, l1 norm err = scipy.absolute(x - xlast).sum() if err < N * tol: return dict(zip(nodelist, map(float, x))) raise nx.PowerIterationFailedConvergence(max_iter)
def pagerank(G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight='weight', dangling=None): """Return the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key for every graph node and nonzero personalization value for each node. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified). This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G = nx.DiGraph(nx.path_graph(4)) >>> pr = nx.pagerank(G, alpha=0.9) Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after an error tolerance of ``len(G) * tol`` has been reached. If the number of iterations exceed `max_iter`, a :exc:`networkx.exception.PowerIterationFailedConvergence` exception is raised. The PageRank algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs by converting each edge in the directed graph to two edges. See Also -------- pagerank_numpy, pagerank_scipy, google_matrix Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ if len(G) == 0: return {} if not G.is_directed(): D = G.to_directed() else: D = G # Create a copy in (right) stochastic form W = nx.stochastic_graph(D, weight=weight) N = W.number_of_nodes() # Choose fixed starting vector if not given if nstart is None: x = dict.fromkeys(W, 1.0 / N) else: # Normalized nstart vector s = float(sum(nstart.values())) x = dict((k, v / s) for k, v in nstart.items()) if personalization is None: # Assign uniform personalization vector if not given p = dict.fromkeys(W, 1.0 / N) else: missing = set(G) - set(personalization) if missing: raise NetworkXError('Personalization dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(personalization.values())) p = dict((k, v / s) for k, v in personalization.items()) if dangling is None: # Use personalization vector if dangling vector not specified dangling_weights = p else: missing = set(G) - set(dangling) if missing: raise NetworkXError('Dangling node dictionary ' 'must have a value for every node. ' 'Missing nodes %s' % missing) s = float(sum(dangling.values())) dangling_weights = dict((k, v / s) for k, v in dangling.items()) dangling_nodes = [n for n in W if W.out_degree(n, weight=weight) == 0.0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = dict.fromkeys(xlast.keys(), 0) danglesum = alpha * sum(xlast[n] for n in dangling_nodes) for n in x: # this matrix multiply looks odd because it is # doing a left multiply x^T=xlast^T*W for nbr in W[n]: x[nbr] += alpha * xlast[n] * W[n][nbr][weight] x[n] += danglesum * dangling_weights[n] + (1.0 - alpha) * p[n] # check convergence, l1 norm err = sum([abs(x[n] - xlast[n]) for n in x]) if err < N * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def eigenvector_centrality(G, max_iter=100, tol=1.0e-6, nstart=None, weight=None): r"""Compute the eigenvector centrality for the graph `G`. Eigenvector centrality computes the centrality for a node based on the centrality of its neighbors. The eigenvector centrality for node $i$ is .. math:: Ax = \lambda x where $A$ is the adjacency matrix of the graph `G` with eigenvalue $\lambda$. By virtue of the Perron–Frobenius theorem, there is a unique and positive solution if $\lambda$ is the largest eigenvalue associated with the eigenvector of the adjacency matrix $A$ ([2]_). Parameters ---------- G : graph A networkx graph max_iter : integer, optional (default=100) Maximum number of iterations in power method. tol : float, optional (default=1.0e-6) Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional (default=None) Starting value of eigenvector iteration for each node. weight : None or string, optional (default=None) If None, all edge weights are considered equal. Otherwise holds the name of the edge attribute used as weight. Returns ------- nodes : dictionary Dictionary of nodes with eigenvector centrality as the value. Examples -------- >>> G = nx.path_graph(4) >>> centrality = nx.eigenvector_centrality(G) >>> sorted((v, '{:0.2f}'.format(c)) for v, c in centrality.items()) [(0, '0.37'), (1, '0.60'), (2, '0.60'), (3, '0.37')] Raises ------ NetworkXPointlessConcept If the graph `G` is the null graph. NetworkXError If each value in `nstart` is zero. PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. See Also -------- eigenvector_centrality_numpy pagerank hits Notes ----- The measure was introduced by [1]_ and is discussed in [2]_. The power iteration method is used to compute the eigenvector and convergence is **not** guaranteed. Our method stops after ``max_iter`` iterations or when the change in the computed vector between two iterations is smaller than an error tolerance of ``G.number_of_nodes() * tol``. This implementation uses ($A + I$) rather than the adjacency matrix $A$ because it shifts the spectrum to enable discerning the correct eigenvector even for networks with multiple dominant eigenvalues. For directed graphs this is "left" eigenvector centrality which corresponds to the in-edges in the graph. For out-edges eigenvector centrality first reverse the graph with ``G.reverse()``. References ---------- .. [1] Phillip Bonacich. "Power and Centrality: A Family of Measures." *American Journal of Sociology* 92(5):1170–1182, 1986 <http://www.leonidzhukov.net/hse/2014/socialnetworks/papers/Bonacich-Centrality.pdf> .. [2] Mark E. J. Newman. *Networks: An Introduction.* Oxford University Press, USA, 2010, pp. 169. """ if len(G) == 0: raise nx.NetworkXPointlessConcept('cannot compute centrality for the' ' null graph') # If no initial vector is provided, start with the all-ones vector. if nstart is None: nstart = {v: 1 for v in G} if all(v == 0 for v in nstart.values()): raise nx.NetworkXError('initial vector cannot have all zero values') # Normalize the initial vector so that each entry is in [0, 1]. This is # guaranteed to never have a divide-by-zero error by the previous line. x = {k: v / sum(nstart.values()) for k, v in nstart.items()} nnodes = G.number_of_nodes() # make up to max_iter iterations for i in range(max_iter): xlast = x x = xlast.copy() # Start with xlast times I to iterate with (A+I) # do the multiplication y^T = x^T A (left eigenvector) for n in x: for nbr in G[n]: x[nbr] += xlast[n] * G[n][nbr].get(weight, 1) # Normalize the vector. The normalization denominator `norm` # should never be zero by the Perron--Frobenius # theorem. However, in case it is due to numerical error, we # assume the norm to be one instead. norm = sqrt(sum(z**2 for z in x.values())) or 1 x = {k: v / norm for k, v in x.items()} # Check for convergence (in the L_1 norm). if sum(abs(x[n] - xlast[n]) for n in x) < nnodes * tol: return x raise nx.PowerIterationFailedConvergence(max_iter)
def hits(G, max_iter=100, tol=1.0e-8, nstart=None, normalized=True): """Returns HITS hubs and authorities values for nodes. The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. Parameters ---------- G : graph A NetworkX graph max_iter : integer, optional Maximum number of iterations in power method. tol : float, optional Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of each node for power method iteration. normalized : bool (default=True) Normalize results by the sum of all of the values. Returns ------- (hubs,authorities) : two-tuple of dictionaries Two dictionaries keyed by node containing the hub and authority values. Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. Examples -------- >>> G = nx.path_graph(4) >>> h, a = nx.hits(G) Notes ----- The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The HITS algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Jon Kleinberg, Authoritative sources in a hyperlinked environment Journal of the ACM 46 (5): 604-32, 1999. doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: raise Exception("hits() not defined for graphs with multiedges.") if len(G) == 0: return {}, {} # choose fixed starting vector if not given if nstart is None: h = dict.fromkeys(G, 1.0 / G.number_of_nodes()) else: h = nstart # normalize starting vector s = 1.0 / sum(h.values()) for k in h: h[k] *= s for _ in range(max_iter): # power iteration: make up to max_iter iterations hlast = h h = dict.fromkeys(hlast.keys(), 0) a = dict.fromkeys(hlast.keys(), 0) # this "matrix multiply" looks odd because it is # doing a left multiply a^T=hlast^T*G for n in h: for nbr in G[n]: a[nbr] += hlast[n] * G[n][nbr].get("weight", 1) # now multiply h=Ga for n in h: for nbr in G[n]: h[n] += a[nbr] * G[n][nbr].get("weight", 1) # normalize vector s = 1.0 / max(h.values()) for n in h: h[n] *= s # normalize vector s = 1.0 / max(a.values()) for n in a: a[n] *= s # check convergence, l1 norm err = sum([abs(h[n] - hlast[n]) for n in h]) if err < tol: break else: raise nx.PowerIterationFailedConvergence(max_iter) if normalized: s = 1.0 / sum(a.values()) for n in a: a[n] *= s s = 1.0 / sum(h.values()) for n in h: h[n] *= s return h, a
def hits_scipy(G, max_iter=100, tol=1.0e-6, normalized=True): """Returns HITS hubs and authorities values for nodes. The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. Parameters ---------- G : graph A NetworkX graph max_iter : integer, optional Maximum number of iterations in power method. tol : float, optional Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of each node for power method iteration. normalized : bool (default=True) Normalize results by the sum of all of the values. Returns ------- (hubs,authorities) : two-tuple of dictionaries Two dictionaries keyed by node containing the hub and authority values. Examples -------- >>> G = nx.path_graph(4) >>> h, a = nx.hits(G) Notes ----- This implementation uses SciPy sparse matrices. The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The HITS algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs. Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Jon Kleinberg, Authoritative sources in a hyperlinked environment Journal of the ACM 46 (5): 604-632, 1999. doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ try: import numpy as np except ImportError as e: raise ImportError( "hits_scipy() requires SciPy and NumPy:" "http://scipy.org/ http://numpy.org/" ) from e if len(G) == 0: return {}, {} M = nx.to_scipy_sparse_matrix(G, nodelist=list(G)) (n, m) = M.shape # should be square A = M.T * M # authority matrix x = np.ones((n, 1)) / n # initial guess # power iteration on authority matrix i = 0 while True: xlast = x x = A * x x = x / x.max() # check convergence, l1 norm err = np.absolute(x - xlast).sum() if err < tol: break if i > max_iter: raise nx.PowerIterationFailedConvergence(max_iter) i += 1 a = np.asarray(x).flatten() # h=M*a h = np.asarray(M * a).flatten() if normalized: h = h / h.sum() a = a / a.sum() hubs = dict(zip(G, map(float, h))) authorities = dict(zip(G, map(float, a))) return hubs, authorities
def katz_centrality( G, alpha=0.1, beta=1.0, max_iter=1000, tol=1.0e-6, nstart=None, normalized=True, weight=None, ): r"""Compute the Katz centrality for the nodes of the graph G. Katz centrality computes the centrality for a node based on the centrality of its neighbors. It is a generalization of the eigenvector centrality. The Katz centrality for node $i$ is .. math:: x_i = \alpha \sum_{j} A_{ij} x_j + \beta, where $A$ is the adjacency matrix of graph G with eigenvalues $\lambda$. The parameter $\beta$ controls the initial centrality and .. math:: \alpha < \frac{1}{\lambda_{\max}}. Katz centrality computes the relative influence of a node within a network by measuring the number of the immediate neighbors (first degree nodes) and also all other nodes in the network that connect to the node under consideration through these immediate neighbors. Extra weight can be provided to immediate neighbors through the parameter $\beta$. Connections made with distant neighbors are, however, penalized by an attenuation factor $\alpha$ which should be strictly less than the inverse largest eigenvalue of the adjacency matrix in order for the Katz centrality to be computed correctly. More information is provided in [1]_. Parameters ---------- G : graph A NetworkX graph. alpha : float Attenuation factor beta : scalar or dictionary, optional (default=1.0) Weight attributed to the immediate neighborhood. If not a scalar, the dictionary must have an value for every node. max_iter : integer, optional (default=1000) Maximum number of iterations in power method. tol : float, optional (default=1.0e-6) Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of Katz iteration for each node. normalized : bool, optional (default=True) If True normalize the resulting values. weight : None or string, optional (default=None) If None, all edge weights are considered equal. Otherwise holds the name of the edge attribute used as weight. In this measure the weight is interpreted as the connection strength. Returns ------- nodes : dictionary Dictionary of nodes with Katz centrality as the value. Raises ------ NetworkXError If the parameter `beta` is not a scalar but lacks a value for at least one node PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. Examples -------- >>> import math >>> G = nx.path_graph(4) >>> phi = (1 + math.sqrt(5)) / 2.0 # largest eigenvalue of adj matrix >>> centrality = nx.katz_centrality(G, 1 / phi - 0.01) >>> for n, c in sorted(centrality.items()): ... print(f"{n} {c:.2f}") 0 0.37 1 0.60 2 0.60 3 0.37 See Also -------- katz_centrality_numpy eigenvector_centrality eigenvector_centrality_numpy pagerank hits Notes ----- Katz centrality was introduced by [2]_. This algorithm it uses the power method to find the eigenvector corresponding to the largest eigenvalue of the adjacency matrix of ``G``. The parameter ``alpha`` should be strictly less than the inverse of largest eigenvalue of the adjacency matrix for the algorithm to converge. You can use ``max(nx.adjacency_spectrum(G))`` to get $\lambda_{\max}$ the largest eigenvalue of the adjacency matrix. The iteration will stop after ``max_iter`` iterations or an error tolerance of ``number_of_nodes(G) * tol`` has been reached. When $\alpha = 1/\lambda_{\max}$ and $\beta=0$, Katz centrality is the same as eigenvector centrality. For directed graphs this finds "left" eigenvectors which corresponds to the in-edges in the graph. For out-edges Katz centrality first reverse the graph with ``G.reverse()``. References ---------- .. [1] Mark E. J. Newman: Networks: An Introduction. Oxford University Press, USA, 2010, p. 720. .. [2] Leo Katz: A New Status Index Derived from Sociometric Index. Psychometrika 18(1):39–43, 1953 https://link.springer.com/content/pdf/10.1007/BF02289026.pdf """ if len(G) == 0: return {} nnodes = G.number_of_nodes() if nstart is None: # choose starting vector with entries of 0 x = {n: 0 for n in G} else: x = nstart try: b = dict.fromkeys(G, float(beta)) except (TypeError, ValueError, AttributeError) as err: b = beta if set(beta) != set(G): raise nx.NetworkXError("beta dictionary " "must have a value for every node") from err # make up to max_iter iterations for i in range(max_iter): xlast = x x = dict.fromkeys(xlast, 0) # do the multiplication y^T = Alpha * x^T A - Beta for n in x: for nbr in G[n]: x[nbr] += xlast[n] * G[n][nbr].get(weight, 1) for n in x: x[n] = alpha * x[n] + b[n] # check convergence error = sum(abs(x[n] - xlast[n]) for n in x) if error < nnodes * tol: if normalized: # normalize vector try: s = 1.0 / math.hypot(*x.values()) # this should never be zero? except ZeroDivisionError: s = 1.0 else: s = 1 for n in x: x[n] *= s return x raise nx.PowerIterationFailedConvergence(max_iter)
def hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True): """Returns HITS hubs and authorities values for nodes. .. deprecated:: 2.6 hits_scipy is deprecated and will be removed in networkx 3.0 The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. Parameters ---------- G : graph A NetworkX graph max_iter : integer, optional Maximum number of iterations in power method. tol : float, optional Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of each node for power method iteration. normalized : bool (default=True) Normalize results by the sum of all of the values. Returns ------- (hubs,authorities) : two-tuple of dictionaries Two dictionaries keyed by node containing the hub and authority values. Examples -------- >>> G = nx.path_graph(4) >>> h, a = nx.hits(G) Notes ----- This implementation uses SciPy sparse matrices. The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The HITS algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs. Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Jon Kleinberg, Authoritative sources in a hyperlinked environment Journal of the ACM 46 (5): 604-632, 1999. doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ import numpy as np import warnings warnings.warn( ("networkx.hits_scipy is deprecated and will be removed" "in NetworkX 3.0, use networkx.hits instead."), DeprecationWarning, stacklevel=2, ) if len(G) == 0: return {}, {} A = nx.to_scipy_sparse_array(G, nodelist=list(G)) (n, m) = A.shape # should be square ATA = A.T @ A # authority matrix # choose fixed starting vector if not given if nstart is None: x = np.ones((n, 1)) / n else: x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float) x = x / x.sum() # power iteration on authority matrix i = 0 while True: xlast = x x = ATA @ x x /= x.max() # check convergence, l1 norm err = np.absolute(x - xlast).sum() if err < tol: break if i > max_iter: raise nx.PowerIterationFailedConvergence(max_iter) i += 1 a = x.flatten() h = A @ a if normalized: h = h / h.sum() a = a / a.sum() hubs = dict(zip(G, map(float, h))) authorities = dict(zip(G, map(float, a))) return hubs, authorities
def _compute_page_rank(self, max_iter=100): """Return the PageRank of the nodes in the graph. Adapted to return the number of iterations necessary to compute the Page Rank Source ------ github.com/networkx/networkx/blob/master/networkx/algorithms/link_analysis/pagerank_alg.py """ # Init graph structure if self._input_graph is None: # Compute input graph if not defined self.draw_input_graph(show_graph=False) W = nx.stochastic_graph(self._input_graph, weight=None) N = W.number_of_nodes() # Init fixed-point constants d = self._to_fp(self._get_damping_factor()) tol = self._to_fp(TOL) ZERO = self._to_fp(0) ONE = self._to_fp(1.) N = self._to_fp(N) damping_sum = self._to_fp(self._get_damping_sum()) # Iterate up to max_iter iterations x = dict.fromkeys(W, ONE / N) for iter in range(max_iter): logger.debug('\n===== TIME STEP = {} ====='.format(iter)) xlast = x x = dict.fromkeys(xlast.keys(), ZERO) for node in x: pkt = xlast[node] / self._to_fp(len(W[node])) logger.debug('[t=%04d|#%3s] Sending pkt %f[%s]' % (iter, node, pkt, self._to_hex(pkt))) # Exchange ranks for conn_node in W[node]: # edge: node -> conn_node prev = x[conn_node] # Simulates payload-lossy encoding of the iteration # See c_models/src/common/in_spikes.h:in_spikes_payload_format x[conn_node] += ((pkt >> ITER_BITS) << ITER_BITS) logger.debug("[idx=%3s] %f[%s] + %f[%s] = %f[%s]" % (conn_node, prev, self._to_hex(prev), pkt, self._to_hex(pkt), x[conn_node], self._to_hex(x[conn_node]))) # Compute dangling factor if d != ONE: for node in x: prev = x[node] x[node] = damping_sum + d * x[node] logger.debug( "[idx=%3s] %f[%s] * %f[%s] + %f[%s] = %f[%s]" % (node, d, self._to_hex(d), prev, self._to_hex(prev), damping_sum, self._to_hex(damping_sum), x[node], self._to_hex(x[node]))) # Check convergence, l1 norm err = sum([abs(x[node] - xlast[node]) for node in x]) if err < N * tol: if self._labels: x = np.array([np.float64(x[v]) for v in self._labels]) return x, iter + 1 # iter t+1 happens at the end of time t raise nx.PowerIterationFailedConvergence(max_iter)
def signed_hits(G, max_iter=100, tol=1.0e-8, normalized=True): if type(G) == nx.MultiGraph or type(G) == nx.MultiDiGraph: raise Exception("hits() not defined for graphs with multiedges.") if len(G) == 0: return {}, {} # choose fixed starting vector if not given h_p = dict.fromkeys(G, 1.0 / G.number_of_nodes()) h_n = dict.fromkeys(G, - 1.0 / G.number_of_nodes()) h = h_p a = h_p for _ in range(max_iter): # power iteration: make up to max_iter iterations h_p_last = h_p h_n_last = h_n h_p = dict.fromkeys(h_p_last.keys(), 0) h_n = dict.fromkeys(h_n_last.keys(), 0) a_p = dict.fromkeys(h_p_last.keys(), 0) a_n = dict.fromkeys(h_n_last.keys(), 0) # this "matrix multiply" looks odd because it is # doing a left multiply a^T=hlast^T*G for u in h_p: for v in G.pred[u]: if G[v][u]['weight'] >= 0 : a_p[u] += h_p_last[v] * G[v][u]['weight'] else : a_n[u] -= h_n_last[v] * G[v][u]['weight'] for u in h_p: for v in G.succ[u]: if G[u][v]['weight'] >= 0: h_p[u] += a_p[v] * G[u][v]['weight'] else : h_n[u] -= a_n[v] * G[u][v]['weight'] # normalize vector s = 1.0 / max(h_p.values()) for n in h_p: h_p[n] *= s # normalize vector s = -1.0 / min(h_n.values()) for n in h_n: h_n[n] *= s # normalize vector s = 1.0 / max(a_p.values()) for n in a_p: a_p[n] *= s # normalize vector s = -1.0 / min(a_n.values()) for n in a_n: a_n[n] *= s for key, value in h.items(): h[key] = h_p[key] - h_n[key] a[key] = a_p[key] - a_n[key] # check convergence, l1 norm err = sum([abs(h_p[n] - h_p_last[n]) for n in h_p] + [abs(h_n[n] - h_n_last[n]) for n in h_n] ) if err < tol: break else: raise nx.PowerIterationFailedConvergence(max_iter) if normalized: s = 1.0 / sum(a.values()) for n in a: a[n] *= s s = 1.0 / sum(h.values()) for n in h: h[n] *= s return h, a
def futureclusterank_CT_scipy(G, alpha=0.2, gamma=0.7, delta=0.1, personalization=None, clusterization=None, max_iter=100, tol=1.0e-6, weight='weight', dangling=None): """Return the FutureRank CT of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Parameter for FutureClusterRank CT, default=0.2 gamma : float, optional Parameter for FutureClusterRank CT, default=0.7 delta : float, optional Parameter for FutureClusterRank CT, default=0.1 personalization: dict, optional The "personalization vector" consisting of a dictionary with a key some subset of graph nodes and personalization value each of those. At least one personalization value must be non-zero. If not specfiied, a nodes personalization value will be zero. By default, a uniform distribution is used. clusterization: dict, optional The "clusterization vector" consisting of a dictionary with a key some subset of graph nodes and clusterization value each of those. At least one clusterization value must be non-zero. If not specfiied, a nodes clusterization value will be zero. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified) This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- FutureRank CT : dictionary Dictionary of nodes with FutureRank as value """ import scipy.sparse N = len(G) if N == 0: return {} nodelist = list(G) M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight, dtype=float) S = scipy.array(M.sum(axis=1)).flatten() S[S != 0] = 1.0 / S[S != 0] Q = scipy.sparse.spdiags(S.T, 0, *M.shape, format='csr') M = Q * M # initial vector x = scipy.repeat(1.0 / N, N) # Personalization vector if personalization is None: p = scipy.repeat(1.0 / N, N) else: p = scipy.array([personalization.get(n, 0.5) for n in nodelist], dtype=float) p = p / p.sum() # Clusterization vector if clusterization is None: q = scipy.repeat(1.0 / N, N) else: q = scipy.array([clusterization.get(n, 0.33) for n in nodelist], dtype=float) q = q / q.sum() # Dangling nodes if dangling is None: dangling_weights = p else: # Convert the dangling dictionary into an array in nodelist order dangling_weights = scipy.array([dangling.get(n, 0) for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() is_dangling = scipy.where(S == 0)[0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = alpha * (x * M + sum(x[is_dangling]) * dangling_weights) + \ + gamma * p + delta * q + \ (1 - alpha-gamma-delta) * scipy.repeat(1.0 / N, N) # check convergence, l1 norm err = scipy.absolute(x - xlast).sum() if err < N * tol: return dict(zip(nodelist, map(float, x))) raise nx.PowerIterationFailedConvergence(max_iter)
def pagerank_scipy( G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight="weight", dangling=None, ): """Returns the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key some subset of graph nodes and personalization value each of those. At least one personalization value must be non-zero. If not specfiied, a nodes personalization value will be zero. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified) This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G = nx.DiGraph(nx.path_graph(4)) >>> pr = nx.pagerank_scipy(G, alpha=0.9) Notes ----- The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. This implementation works with Multi(Di)Graphs. For multigraphs the weight between two nodes is set to be the sum of all edge weights between those nodes. See Also -------- pagerank, pagerank_numpy, google_matrix Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ msg = "networkx.pagerank_scipy is deprecated and will be removed in NetworkX 3.0, use networkx.pagerank instead." warn(msg, DeprecationWarning, stacklevel=2) import numpy as np import scipy as sp import scipy.sparse # call as sp.sparse N = len(G) if N == 0: return {} nodelist = list(G) A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float) S = A.sum(axis=1) S[S != 0] = 1.0 / S[S != 0] # TODO: csr_array Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) A = Q @ A # initial vector if nstart is None: x = np.repeat(1.0 / N, N) else: x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float) x = x / x.sum() # Personalization vector if personalization is None: p = np.repeat(1.0 / N, N) else: p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) if p.sum() == 0: raise ZeroDivisionError p = p / p.sum() # Dangling nodes if dangling is None: dangling_weights = p else: # Convert the dangling dictionary into an array in nodelist order dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() is_dangling = np.where(S == 0)[0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p # check convergence, l1 norm err = np.absolute(x - xlast).sum() if err < N * tol: return dict(zip(nodelist, map(float, x))) raise nx.PowerIterationFailedConvergence(max_iter)