def test_format_keyword_raise(self): with pytest.raises(nx.NetworkXError): WP4 = nx.Graph() WP4.add_edges_from( (n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3)) P4 = path_graph(4) nx.to_scipy_sparse_array(P4, format="any_other")
def test_selfloop_digraph(self): G = nx.DiGraph([(1, 1)]) M = nx.to_scipy_sparse_array(G) np.testing.assert_equal(M.toarray(), np.array([[1]])) G.add_edges_from([(2, 3), (3, 4)]) M = nx.to_scipy_sparse_array(G, nodelist=[2, 3, 4]) np.testing.assert_equal(M.toarray(), np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]]))
def test_weight_keyword(self): WP4 = nx.Graph() WP4.add_edges_from( (n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3)) P4 = path_graph(4) A = nx.to_scipy_sparse_array(P4) np.testing.assert_equal( A.todense(), nx.to_scipy_sparse_array(WP4, weight=None).todense()) np.testing.assert_equal(0.5 * A.todense(), nx.to_scipy_sparse_array(WP4).todense()) np.testing.assert_equal( 0.3 * A.todense(), nx.to_scipy_sparse_array(WP4, weight="other").todense())
def test_nodelist(self): """Conversion from graph to sparse matrix to graph with nodelist.""" P4 = path_graph(4) P3 = path_graph(3) nodelist = list(P3.nodes()) A = nx.to_scipy_sparse_array(P4, nodelist=nodelist) GA = nx.Graph(A) assert nx.is_isomorphic(GA, P3) pytest.raises(nx.NetworkXError, nx.to_scipy_sparse_array, P3, nodelist=[]) # Test nodelist duplicates. long_nl = nodelist + [0] pytest.raises(nx.NetworkXError, nx.to_scipy_sparse_array, P3, nodelist=long_nl) # Test nodelist contains non-nodes non_nl = [-1, 0, 1, 2] pytest.raises(nx.NetworkXError, nx.to_scipy_sparse_array, P3, nodelist=non_nl)
def __init__(self, transform: Optional[Callable] = None): super().__init__('.', transform) import networkx as nx G = nx.karate_club_graph() x = torch.eye(G.number_of_nodes(), dtype=torch.float) if hasattr(nx, 'to_scipy_sparse_array'): adj = nx.to_scipy_sparse_array(G).tocoo() else: adj = nx.to_scipy_sparse_matrix(G).tocoo() row = torch.from_numpy(adj.row.astype(np.int64)).to(torch.long) col = torch.from_numpy(adj.col.astype(np.int64)).to(torch.long) edge_index = torch.stack([row, col], dim=0) # Create communities. y = torch.tensor([ 1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0 ], dtype=torch.long) # Select a single training node for each community # (we just use the first one). train_mask = torch.zeros(y.size(0), dtype=torch.bool) for i in range(int(y.max()) + 1): train_mask[(y == i).nonzero(as_tuple=False)[0]] = True data = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask) self.data, self.slices = self.collate([data])
def test_adjacency_interface_scipy(self): A = nx.to_scipy_sparse_array(self.Gs, dtype="d") pos = nx.drawing.layout._sparse_fruchterman_reingold(A) assert pos.shape == (6, 2) pos = nx.drawing.layout._sparse_spectral(A) assert pos.shape == (6, 2) pos = nx.drawing.layout._sparse_fruchterman_reingold(A, dim=3) assert pos.shape == (6, 3)
def test_ordering(self): G = nx.DiGraph() G.add_edge(1, 2) G.add_edge(2, 3) G.add_edge(3, 1) M = nx.to_scipy_sparse_array(G, nodelist=[3, 2, 1]) np.testing.assert_equal(M.toarray(), np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0]]))
def MCL(cdr3, edgelist=None, mcl_hyper=[1.2, 2], outfile=None): """ Perform clustering on a network of CDR3 amino acid sequences with a known hamming distance, using the Markov clustering (MCL) algorithm. For more info about the inflation and expansion parameters, visit: https://micans.org/mcl/ Parameters ---------- edgelist : set, optional Tab-separated edgelist. The default is None. mcl_hyper : list, optional MCL hyperparameters: inflation and expansion. The default is [1.2,2]. outfile : str, optional Name of outfile. The default is None. Returns ------- clusters : pd.DataFrame pd.DataFrame containing two columns: 'CDR3' and 'cluster'. The first column contains CDR3 sequences, the second column contains the corresponding cluster ids. """ if edgelist is None: edgelist = create_edgelist(cdr3) try: G = nx.parse_adjlist(edgelist, nodetype=str) m = nx.to_scipy_sparse_array(G) # Run MCL result = mcl.run_mcl(m, inflation=mcl_hyper[0], expansion=mcl_hyper[1]) mcl_output = mcl.get_clusters(result) identifiers = list(G.nodes()) # Map cluster ids back to seqs cluster_ids = dict() for i in range(len(mcl_output)): cluster_ids[i] = list(identifiers[i] for i in mcl_output[i]) # Generate nodelist clusters = {"CDR3": [], "cluster": []} for c in cluster_ids: for seq in cluster_ids[c]: clusters["CDR3"].append(seq) clusters["cluster"].append(c) clusters = pd.DataFrame(data=clusters) # Write to file if outfile is not None: clusters.to_csv(outfile, sep="\t", index=False) except nx.NetworkXError: clusters = pd.DataFrame({"CDR3": [], "cluster": []}) return clusters
def laplacian_matrix(G, nodelist=None, weight="weight"): """Returns the Laplacian matrix of G. The graph Laplacian is the matrix L = D - A, where A is the adjacency matrix and D is the diagonal matrix of node degrees. Parameters ---------- G : graph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default='weight') The edge data key used to compute each value in the matrix. If None, then each edge has weight 1. Returns ------- L : SciPy sparse matrix The Laplacian matrix of G. Notes ----- For MultiGraph/MultiDiGraph, the edges weights are summed. See Also -------- to_numpy_array normalized_laplacian_matrix laplacian_spectrum """ import scipy as sp import scipy.sparse # call as sp.sparse if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, format="csr") n, m = A.shape # TODO: rm csr_array wrapper when spdiags can produce arrays D = sp.sparse.csr_array( sp.sparse.spdiags(A.sum(axis=1), 0, m, n, format="csr")) import warnings warnings.warn( "laplacian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.", FutureWarning, stacklevel=2, ) # TODO: rm sp.sparse.csr_matrix in version 3.0 return sp.sparse.csr_matrix(D - A)
def from_networkx(cls, graph, weight_col="weight"): """Convert a ``networkx`` graph to a PySAL ``W`` object. Parameters ---------- graph : networkx.Graph The graph to convert to a ``W``. weight_col : string If the graph is labeled, this should be the name of the field to use as the weight for the ``W``. Returns ------- w : libpysal.weights.W A ``W`` object containing the same graph as the ``networkx`` graph. """ try: import networkx as nx except ImportError: raise ImportError( "NetworkX 2.7+ is required to use this function.") sparse_array = nx.to_scipy_sparse_array(graph) w = WSP(sparse_array).to_W() return w
def spectral_layout(G, weight="weight", scale=1, center=None, dim=2): """Position nodes using the eigenvectors of the graph Laplacian. Using the unnormalized Laplacian, the layout shows possible clusters of nodes which are an approximation of the ratio cut. If dim is the number of dimensions then the positions are the entries of the dim eigenvectors corresponding to the ascending eigenvalues starting from the second one. Parameters ---------- G : NetworkX graph or list of nodes A position will be assigned to every node in G. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. If None, then all edge weights are 1. scale : number (default: 1) Scale factor for positions. center : array-like or None Coordinate pair around which to center the layout. dim : int Dimension of layout. Returns ------- pos : dict A dictionary of positions keyed by node Examples -------- >>> G = nx.path_graph(4) >>> pos = nx.spectral_layout(G) Notes ----- Directed graphs will be considered as undirected graphs when positioning the nodes. For larger graphs (>500 nodes) this will use the SciPy sparse eigenvalue solver (ARPACK). """ # handle some special cases that break the eigensolvers import numpy as np G, center = _process_params(G, center, dim) if len(G) <= 2: if len(G) == 0: pos = np.array([]) elif len(G) == 1: pos = np.array([center]) else: pos = np.array([np.zeros(dim), np.array(center) * 2.0]) return dict(zip(G, pos)) try: # Sparse matrix if len(G) < 500: # dense solver is faster for small graphs raise ValueError A = nx.to_scipy_sparse_array(G, weight=weight, dtype="d") # Symmetrize directed graphs if G.is_directed(): A = A + np.transpose(A) pos = _sparse_spectral(A, dim) except (ImportError, ValueError): # Dense matrix A = nx.to_numpy_array(G, weight=weight) # Symmetrize directed graphs if G.is_directed(): A += A.T pos = _spectral(A, dim) pos = rescale_layout(pos, scale=scale) + center pos = dict(zip(G, pos)) return pos
def pagerank_scipy( G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-6, nstart=None, weight="weight", dangling=None, ): """Returns the PageRank of the nodes in the graph. PageRank computes a ranking of the nodes in the graph G based on the structure of the incoming links. It was originally designed as an algorithm to rank web pages. Parameters ---------- G : graph A NetworkX graph. Undirected graphs will be converted to a directed graph with two directed edges for each undirected edge. alpha : float, optional Damping parameter for PageRank, default=0.85. personalization: dict, optional The "personalization vector" consisting of a dictionary with a key some subset of graph nodes and personalization value each of those. At least one personalization value must be non-zero. If not specfiied, a nodes personalization value will be zero. By default, a uniform distribution is used. max_iter : integer, optional Maximum number of iterations in power method eigenvalue solver. tol : float, optional Error tolerance used to check convergence in power method solver. nstart : dictionary, optional Starting value of PageRank iteration for each node. weight : key, optional Edge data key to use as weight. If None weights are set to 1. dangling: dict, optional The outedges to be assigned to any "dangling" nodes, i.e., nodes without any outedges. The dict key is the node the outedge points to and the dict value is the weight of that outedge. By default, dangling nodes are given outedges according to the personalization vector (uniform if not specified) This must be selected to result in an irreducible transition matrix (see notes under google_matrix). It may be common to have the dangling dict to be the same as the personalization dict. Returns ------- pagerank : dictionary Dictionary of nodes with PageRank as value Examples -------- >>> G = nx.DiGraph(nx.path_graph(4)) >>> pr = nx.pagerank_scipy(G, alpha=0.9) Notes ----- The eigenvector calculation uses power iteration with a SciPy sparse matrix representation. This implementation works with Multi(Di)Graphs. For multigraphs the weight between two nodes is set to be the sum of all edge weights between those nodes. See Also -------- pagerank, pagerank_numpy, google_matrix Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry, The PageRank citation ranking: Bringing order to the Web. 1999 http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf """ msg = "networkx.pagerank_scipy is deprecated and will be removed in NetworkX 3.0, use networkx.pagerank instead." warn(msg, DeprecationWarning, stacklevel=2) import numpy as np import scipy as sp import scipy.sparse # call as sp.sparse N = len(G) if N == 0: return {} nodelist = list(G) A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float) S = A.sum(axis=1) S[S != 0] = 1.0 / S[S != 0] # TODO: csr_array Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape)) A = Q @ A # initial vector if nstart is None: x = np.repeat(1.0 / N, N) else: x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float) x = x / x.sum() # Personalization vector if personalization is None: p = np.repeat(1.0 / N, N) else: p = np.array([personalization.get(n, 0) for n in nodelist], dtype=float) if p.sum() == 0: raise ZeroDivisionError p = p / p.sum() # Dangling nodes if dangling is None: dangling_weights = p else: # Convert the dangling dictionary into an array in nodelist order dangling_weights = np.array([dangling.get(n, 0) for n in nodelist], dtype=float) dangling_weights /= dangling_weights.sum() is_dangling = np.where(S == 0)[0] # power iteration: make up to max_iter iterations for _ in range(max_iter): xlast = x x = alpha * (x @ A + sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p # check convergence, l1 norm err = np.absolute(x - xlast).sum() if err < N * tol: return dict(zip(nodelist, map(float, x))) raise nx.PowerIterationFailedConvergence(max_iter)
def hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True): """Returns HITS hubs and authorities values for nodes. .. deprecated:: 2.6 hits_scipy is deprecated and will be removed in networkx 3.0 The HITS algorithm computes two numbers for a node. Authorities estimates the node value based on the incoming links. Hubs estimates the node value based on outgoing links. Parameters ---------- G : graph A NetworkX graph max_iter : integer, optional Maximum number of iterations in power method. tol : float, optional Error tolerance used to check convergence in power method iteration. nstart : dictionary, optional Starting value of each node for power method iteration. normalized : bool (default=True) Normalize results by the sum of all of the values. Returns ------- (hubs,authorities) : two-tuple of dictionaries Two dictionaries keyed by node containing the hub and authority values. Examples -------- >>> G = nx.path_graph(4) >>> h, a = nx.hits(G) Notes ----- This implementation uses SciPy sparse matrices. The eigenvector calculation is done by the power iteration method and has no guarantee of convergence. The iteration will stop after max_iter iterations or an error tolerance of number_of_nodes(G)*tol has been reached. The HITS algorithm was designed for directed graphs but this algorithm does not check if the input graph is directed and will execute on undirected graphs. Raises ------ PowerIterationFailedConvergence If the algorithm fails to converge to the specified tolerance within the specified number of iterations of the power iteration method. References ---------- .. [1] A. Langville and C. Meyer, "A survey of eigenvector methods of web information retrieval." http://citeseer.ist.psu.edu/713792.html .. [2] Jon Kleinberg, Authoritative sources in a hyperlinked environment Journal of the ACM 46 (5): 604-632, 1999. doi:10.1145/324133.324140. http://www.cs.cornell.edu/home/kleinber/auth.pdf. """ import numpy as np import warnings warnings.warn( ("networkx.hits_scipy is deprecated and will be removed" "in NetworkX 3.0, use networkx.hits instead."), DeprecationWarning, stacklevel=2, ) if len(G) == 0: return {}, {} A = nx.to_scipy_sparse_array(G, nodelist=list(G)) (n, m) = A.shape # should be square ATA = A.T @ A # authority matrix # choose fixed starting vector if not given if nstart is None: x = np.ones((n, 1)) / n else: x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float) x = x / x.sum() # power iteration on authority matrix i = 0 while True: xlast = x x = ATA @ x x /= x.max() # check convergence, l1 norm err = np.absolute(x - xlast).sum() if err < tol: break if i > max_iter: raise nx.PowerIterationFailedConvergence(max_iter) i += 1 a = x.flatten() h = A @ a if normalized: h = h / h.sum() a = a / a.sum() hubs = dict(zip(G, map(float, h))) authorities = dict(zip(G, map(float, a))) return hubs, authorities
def spring_layout( G, k=None, pos=None, fixed=None, iterations=50, threshold=1e-4, weight="weight", scale=1, center=None, dim=2, seed=None, ): """Position nodes using Fruchterman-Reingold force-directed algorithm. The algorithm simulates a force-directed representation of the network treating edges as springs holding nodes close, while treating nodes as repelling objects, sometimes called an anti-gravity force. Simulation continues until the positions are close to an equilibrium. There are some hard-coded values: minimal distance between nodes (0.01) and "temperature" of 0.1 to ensure nodes don't fly away. During the simulation, `k` helps determine the distance between nodes, though `scale` and `center` determine the size and place after rescaling occurs at the end of the simulation. Fixing some nodes doesn't allow them to move in the simulation. It also turns off the rescaling feature at the simulation's end. In addition, setting `scale` to `None` turns off rescaling. Parameters ---------- G : NetworkX graph or list of nodes A position will be assigned to every node in G. k : float (default=None) Optimal distance between nodes. If None the distance is set to 1/sqrt(n) where n is the number of nodes. Increase this value to move nodes farther apart. pos : dict or None optional (default=None) Initial positions for nodes as a dictionary with node as keys and values as a coordinate list or tuple. If None, then use random initial positions. fixed : list or None optional (default=None) Nodes to keep fixed at initial position. Nodes not in ``G.nodes`` are ignored. ValueError raised if `fixed` specified and `pos` not. iterations : int optional (default=50) Maximum number of iterations taken threshold: float optional (default = 1e-4) Threshold for relative error in node position changes. The iteration stops if the error is below this threshold. weight : string or None optional (default='weight') The edge attribute that holds the numerical value used for the edge weight. Larger means a stronger attractive force. If None, then all edge weights are 1. scale : number or None (default: 1) Scale factor for positions. Not used unless `fixed is None`. If scale is None, no rescaling is performed. center : array-like or None Coordinate pair around which to center the layout. Not used unless `fixed is None`. dim : int Dimension of layout. seed : int, RandomState instance or None optional (default=None) Set the random state for deterministic node layouts. If int, `seed` is the seed used by the random number generator, if numpy.random.RandomState instance, `seed` is the random number generator, if None, the random number generator is the RandomState instance used by numpy.random. Returns ------- pos : dict A dictionary of positions keyed by node Examples -------- >>> G = nx.path_graph(4) >>> pos = nx.spring_layout(G) # The same using longer but equivalent function name >>> pos = nx.fruchterman_reingold_layout(G) """ import numpy as np G, center = _process_params(G, center, dim) if fixed is not None: if pos is None: raise ValueError("nodes are fixed without positions given") for node in fixed: if node not in pos: raise ValueError("nodes are fixed without positions given") nfixed = {node: i for i, node in enumerate(G)} fixed = np.asarray([nfixed[node] for node in fixed if node in nfixed]) if pos is not None: # Determine size of existing domain to adjust initial positions dom_size = max(coord for pos_tup in pos.values() for coord in pos_tup) if dom_size == 0: dom_size = 1 pos_arr = seed.rand(len(G), dim) * dom_size + center for i, n in enumerate(G): if n in pos: pos_arr[i] = np.asarray(pos[n]) else: pos_arr = None dom_size = 1 if len(G) == 0: return {} if len(G) == 1: return {nx.utils.arbitrary_element(G.nodes()): center} try: # Sparse matrix if len(G) < 500: # sparse solver for large graphs raise ValueError A = nx.to_scipy_sparse_array(G, weight=weight, dtype="f") if k is None and fixed is not None: # We must adjust k by domain size for layouts not near 1x1 nnodes, _ = A.shape k = dom_size / np.sqrt(nnodes) pos = _sparse_fruchterman_reingold(A, k, pos_arr, fixed, iterations, threshold, dim, seed) except ValueError: A = nx.to_numpy_array(G, weight=weight) if k is None and fixed is not None: # We must adjust k by domain size for layouts not near 1x1 nnodes, _ = A.shape k = dom_size / np.sqrt(nnodes) pos = _fruchterman_reingold(A, k, pos_arr, fixed, iterations, threshold, dim, seed) if fixed is None and scale is not None: pos = rescale_layout(pos, scale=scale) + center pos = dict(zip(G, pos)) return pos
def _transition_matrix(G, nodelist=None, weight="weight", walk_type=None, alpha=0.95): """Returns the transition matrix of G. This is a row stochastic giving the transition probabilities while performing a random walk on the graph. Depending on the value of walk_type, P can be the transition matrix induced by a random walk, a lazy random walk, or a random walk with teleportation (PageRank). Parameters ---------- G : DiGraph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default='weight') The edge data key used to compute each value in the matrix. If None, then each edge has weight 1. walk_type : string or None, optional (default=None) If None, `P` is selected depending on the properties of the graph. Otherwise is one of 'random', 'lazy', or 'pagerank' alpha : real (1 - alpha) is the teleportation probability used with pagerank Returns ------- P : NumPy matrix transition matrix of G. Raises ------ NetworkXError If walk_type not specified or alpha not in valid range """ import numpy as np import scipy as sp import scipy.sparse # call as sp.sparse if walk_type is None: if nx.is_strongly_connected(G): if nx.is_aperiodic(G): walk_type = "random" else: walk_type = "lazy" else: walk_type = "pagerank" A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, dtype=float) n, m = A.shape if walk_type in ["random", "lazy"]: # TODO: Rm csr_array wrapper when spdiags array creation becomes available DI = sp.sparse.csr_array( sp.sparse.spdiags(1.0 / A.sum(axis=1), 0, n, n)) if walk_type == "random": P = DI @ A else: # TODO: Rm csr_array wrapper when identity array creation becomes available I = sp.sparse.csr_array(sp.sparse.identity(n)) P = (I + DI @ A) / 2.0 elif walk_type == "pagerank": if not (0 < alpha < 1): raise nx.NetworkXError("alpha must be between 0 and 1") # this is using a dense representation. NOTE: This should be sparsified! A = A.toarray() # add constant to dangling nodes' row A[A.sum(axis=1) == 0, :] = 1 / n # normalize A = A / A.sum(axis=1)[np.newaxis, :].T P = alpha * A + (1 - alpha) / n else: raise nx.NetworkXError("walk_type must be random, lazy, or pagerank") return P
def normalized_laplacian_matrix(G, nodelist=None, weight="weight"): r"""Returns the normalized Laplacian matrix of G. The normalized graph Laplacian is the matrix .. math:: N = D^{-1/2} L D^{-1/2} where `L` is the graph Laplacian and `D` is the diagonal matrix of node degrees. Parameters ---------- G : graph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default='weight') The edge data key used to compute each value in the matrix. If None, then each edge has weight 1. Returns ------- N : Scipy sparse matrix The normalized Laplacian matrix of G. Notes ----- For MultiGraph/MultiDiGraph, the edges weights are summed. See to_numpy_array for other options. If the Graph contains selfloops, D is defined as diag(sum(A,1)), where A is the adjacency matrix [2]_. See Also -------- laplacian_matrix normalized_laplacian_spectrum References ---------- .. [1] Fan Chung-Graham, Spectral Graph Theory, CBMS Regional Conference Series in Mathematics, Number 92, 1997. .. [2] Steve Butler, Interlacing For Weighted Graphs Using The Normalized Laplacian, Electronic Journal of Linear Algebra, Volume 16, pp. 90-98, March 2007. """ import numpy as np import scipy as sp import scipy.sparse # call as sp.sparse if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, format="csr") n, m = A.shape diags = A.sum(axis=1) # TODO: rm csr_array wrapper when spdiags can produce arrays D = sp.sparse.csr_array(sp.sparse.spdiags(diags, 0, m, n, format="csr")) L = D - A with sp.errstate(divide="ignore"): diags_sqrt = 1.0 / np.sqrt(diags) diags_sqrt[np.isinf(diags_sqrt)] = 0 # TODO: rm csr_array wrapper when spdiags can produce arrays DH = sp.sparse.csr_array( sp.sparse.spdiags(diags_sqrt, 0, m, n, format="csr")) import warnings warnings.warn( "normalized_laplacian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.", FutureWarning, stacklevel=2, ) # TODO: rm csr_matrix wrapper for NX 3.0 return sp.sparse.csr_matrix(DH @ (L @ DH))
def test_format_keyword(self): WP4 = nx.Graph() WP4.add_edges_from( (n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3)) P4 = path_graph(4) A = nx.to_scipy_sparse_array(P4, format="csr") np.testing.assert_equal( A.todense(), nx.to_scipy_sparse_array(WP4, weight=None).todense()) A = nx.to_scipy_sparse_array(P4, format="csc") np.testing.assert_equal( A.todense(), nx.to_scipy_sparse_array(WP4, weight=None).todense()) A = nx.to_scipy_sparse_array(P4, format="coo") np.testing.assert_equal( A.todense(), nx.to_scipy_sparse_array(WP4, weight=None).todense()) A = nx.to_scipy_sparse_array(P4, format="bsr") np.testing.assert_equal( A.todense(), nx.to_scipy_sparse_array(WP4, weight=None).todense()) A = nx.to_scipy_sparse_array(P4, format="lil") np.testing.assert_equal( A.todense(), nx.to_scipy_sparse_array(WP4, weight=None).todense()) A = nx.to_scipy_sparse_array(P4, format="dia") np.testing.assert_equal( A.todense(), nx.to_scipy_sparse_array(WP4, weight=None).todense()) A = nx.to_scipy_sparse_array(P4, format="dok") np.testing.assert_equal( A.todense(), nx.to_scipy_sparse_array(WP4, weight=None).todense())
def harmonic_function(G, max_iter=30, label_name="label"): """Node classification by Harmonic function Parameters ---------- G : NetworkX Graph max_iter : int maximum number of iterations allowed label_name : string name of target labels to predict Returns ------- predicted : list List of length ``len(G)`` with the predicted labels for each node. Raises ------ NetworkXError If no nodes in `G` have attribute `label_name`. Examples -------- >>> from networkx.algorithms import node_classification >>> G = nx.path_graph(4) >>> G.nodes[0]["label"] = "A" >>> G.nodes[3]["label"] = "B" >>> G.nodes(data=True) NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) >>> G.edges() EdgeView([(0, 1), (1, 2), (2, 3)]) >>> predicted = node_classification.harmonic_function(G) >>> predicted ['A', 'A', 'B', 'B'] References ---------- Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August). Semi-supervised learning using gaussian fields and harmonic functions. In ICML (Vol. 3, pp. 912-919). """ import numpy as np import scipy as sp import scipy.sparse # call as sp.sparse X = nx.to_scipy_sparse_array(G) # adjacency matrix labels, label_dict = _get_label_info(G, label_name) if labels.shape[0] == 0: raise nx.NetworkXError( f"No node on the input graph is labeled by '{label_name}'.") n_samples = X.shape[0] n_classes = label_dict.shape[0] F = np.zeros((n_samples, n_classes)) # Build propagation matrix degrees = X.sum(axis=0) degrees[degrees == 0] = 1 # Avoid division by 0 # TODO: csr_array D = sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0)) P = (D @ X).tolil() P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes # Build base matrix B = np.zeros((n_samples, n_classes)) B[labels[:, 0], labels[:, 1]] = 1 for _ in range(max_iter): F = (P @ F) + B return label_dict[np.argmax(F, axis=1)].tolist()
def test_identity_weighted_digraph_matrix(self): """Conversion from weighted digraph to sparse matrix to weighted digraph.""" A = nx.to_scipy_sparse_array(self.G4) self.identity_conversion(self.G4, A, nx.DiGraph())
def test_null_raise(self): with pytest.raises(nx.NetworkXError): nx.to_scipy_sparse_array(nx.Graph())
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"): """Node classification by Local and Global Consistency Parameters ---------- G : NetworkX Graph alpha : float Clamping factor max_iter : int Maximum number of iterations allowed label_name : string Name of target labels to predict Returns ------- predicted : list List of length ``len(G)`` with the predicted labels for each node. Raises ------ NetworkXError If no nodes in `G` have attribute `label_name`. Examples -------- >>> from networkx.algorithms import node_classification >>> G = nx.path_graph(4) >>> G.nodes[0]["label"] = "A" >>> G.nodes[3]["label"] = "B" >>> G.nodes(data=True) NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) >>> G.edges() EdgeView([(0, 1), (1, 2), (2, 3)]) >>> predicted = node_classification.local_and_global_consistency(G) >>> predicted ['A', 'A', 'B', 'B'] References ---------- Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004). Learning with local and global consistency. Advances in neural information processing systems, 16(16), 321-328. """ import numpy as np import scipy as sp import scipy.sparse # call as sp.sparse X = nx.to_scipy_sparse_array(G) # adjacency matrix labels, label_dict = _get_label_info(G, label_name) if labels.shape[0] == 0: raise nx.NetworkXError( f"No node on the input graph is labeled by '{label_name}'." ) n_samples = X.shape[0] n_classes = label_dict.shape[0] F = np.zeros((n_samples, n_classes)) # Build propagation matrix degrees = X.sum(axis=0) degrees[degrees == 0] = 1 # Avoid division by 0 # TODO: csr_array D2 = np.sqrt(sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0))) P = alpha * ((D2 @ X) @ D2) # Build base matrix B = np.zeros((n_samples, n_classes)) B[labels[:, 0], labels[:, 1]] = 1 - alpha for _ in range(max_iter): F = (P @ F) + B return label_dict[np.argmax(F, axis=1)].tolist()
def modularity_matrix(G, nodelist=None, weight=None): r"""Returns the modularity matrix of G. The modularity matrix is the matrix B = A - <A>, where A is the adjacency matrix and <A> is the average adjacency matrix, assuming that the graph is described by the configuration model. More specifically, the element B_ij of B is defined as .. math:: A_{ij} - {k_i k_j \over 2 m} where k_i is the degree of node i, and where m is the number of edges in the graph. When weight is set to a name of an attribute edge, Aij, k_i, k_j and m are computed using its value. Parameters ---------- G : Graph A NetworkX graph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default=None) The edge attribute that holds the numerical value used for the edge weight. If None then all edge weights are 1. Returns ------- B : Numpy matrix The modularity matrix of G. Examples -------- >>> k = [3, 2, 2, 1, 0] >>> G = nx.havel_hakimi_graph(k) >>> B = nx.modularity_matrix(G) See Also -------- to_numpy_array modularity_spectrum adjacency_matrix directed_modularity_matrix References ---------- .. [1] M. E. J. Newman, "Modularity and community structure in networks", Proc. Natl. Acad. Sci. USA, vol. 103, pp. 8577-8582, 2006. """ import numpy as np if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, format="csr") k = A.sum(axis=1) m = k.sum() * 0.5 # Expected adjacency matrix X = np.outer(k, k) / (2 * m) import warnings warnings.warn( "modularity_matrix will return a numpy array instead of a matrix in NetworkX 3.0.", FutureWarning, stacklevel=2, ) # TODO: rm np.asmatrix for networkx 3.0 return np.asmatrix(A - X)
def test_empty(self): G = nx.Graph() G.add_node(1) M = nx.to_scipy_sparse_array(G) np.testing.assert_equal(M.toarray(), np.array([[0]]))
def directed_modularity_matrix(G, nodelist=None, weight=None): """Returns the directed modularity matrix of G. The modularity matrix is the matrix B = A - <A>, where A is the adjacency matrix and <A> is the expected adjacency matrix, assuming that the graph is described by the configuration model. More specifically, the element B_ij of B is defined as .. math:: B_{ij} = A_{ij} - k_i^{out} k_j^{in} / m where :math:`k_i^{in}` is the in degree of node i, and :math:`k_j^{out}` is the out degree of node j, with m the number of edges in the graph. When weight is set to a name of an attribute edge, Aij, k_i, k_j and m are computed using its value. Parameters ---------- G : DiGraph A NetworkX DiGraph nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). weight : string or None, optional (default=None) The edge attribute that holds the numerical value used for the edge weight. If None then all edge weights are 1. Returns ------- B : Numpy matrix The modularity matrix of G. Examples -------- >>> G = nx.DiGraph() >>> G.add_edges_from( ... ( ... (1, 2), ... (1, 3), ... (3, 1), ... (3, 2), ... (3, 5), ... (4, 5), ... (4, 6), ... (5, 4), ... (5, 6), ... (6, 4), ... ) ... ) >>> B = nx.directed_modularity_matrix(G) Notes ----- NetworkX defines the element A_ij of the adjacency matrix as 1 if there is a link going from node i to node j. Leicht and Newman use the opposite definition. This explains the different expression for B_ij. See Also -------- to_numpy_array modularity_spectrum adjacency_matrix modularity_matrix References ---------- .. [1] E. A. Leicht, M. E. J. Newman, "Community structure in directed networks", Phys. Rev Lett., vol. 100, no. 11, p. 118703, 2008. """ import numpy as np if nodelist is None: nodelist = list(G) A = nx.to_scipy_sparse_array(G, nodelist=nodelist, weight=weight, format="csr") k_in = A.sum(axis=0) k_out = A.sum(axis=1) m = k_in.sum() # Expected adjacency matrix X = np.outer(k_out, k_in) / m import warnings warnings.warn( "directed_modularity_matrix will return a numpy array instead of a matrix in NetworkX 3.0.", FutureWarning, stacklevel=2, ) # TODO: rm np.asmatrix for networkx 3.0 return np.asmatrix(A - X)
def test_identity_graph_matrix(self): "Conversion from graph to sparse matrix to graph." A = nx.to_scipy_sparse_array(self.G1) self.identity_conversion(self.G1, A, nx.Graph())
def test_identity_digraph_matrix(self): "Conversion from digraph to sparse matrix to digraph." A = nx.to_scipy_sparse_array(self.G2) self.identity_conversion(self.G2, A, nx.DiGraph())
def bethe_hessian_matrix(G, r=None, nodelist=None): r"""Returns the Bethe Hessian matrix of G. The Bethe Hessian is a family of matrices parametrized by r, defined as H(r) = (r^2 - 1) I - r A + D where A is the adjacency matrix, D is the diagonal matrix of node degrees, and I is the identify matrix. It is equal to the graph laplacian when the regularizer r = 1. The default choice of regularizer should be the ratio [2] .. math:: r_m = \left(\sum k_i \right)^{-1}\left(\sum k_i^2 \right) - 1 Parameters ---------- G : Graph A NetworkX graph r : float Regularizer parameter nodelist : list, optional The rows and columns are ordered according to the nodes in nodelist. If nodelist is None, then the ordering is produced by G.nodes(). Returns ------- H : scipy.sparse.csr_matrix The Bethe Hessian matrix of G, with paramter r. Examples -------- >>> k = [3, 2, 2, 1, 0] >>> G = nx.havel_hakimi_graph(k) >>> H = nx.modularity_matrix(G) See Also -------- bethe_hessian_spectrum adjacency_matrix laplacian_matrix References ---------- .. [1] A. Saade, F. Krzakala and L. Zdeborová "Spectral clustering of graphs with the bethe hessian", Advances in Neural Information Processing Systems. 2014. .. [2] C. M. Lee, E. Levina "Estimating the number of communities in networks by spectral methods" arXiv:1507.00827, 2015. """ import scipy as sp import scipy.sparse # call as sp.sparse if nodelist is None: nodelist = list(G) if r is None: r = sum(d**2 for v, d in nx.degree(G)) / sum(d for v, d in nx.degree(G)) - 1 A = nx.to_scipy_sparse_array(G, nodelist=nodelist, format="csr") n, m = A.shape # TODO: Rm csr_array wrapper when spdiags array creation becomes available D = sp.sparse.csr_array( sp.sparse.spdiags(A.sum(axis=1), 0, m, n, format="csr")) # TODO: Rm csr_array wrapper when eye array creation becomes available I = sp.sparse.csr_array(sp.sparse.eye(m, n, format="csr")) import warnings warnings.warn( "bethe_hessian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0", FutureWarning, stacklevel=2, ) # TODO: Remove the csr_matrix wrapper in NetworkX 3.0 return sp.sparse.csr_matrix((r**2 - 1) * I - r * A + D)