def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True, force_bipartite: bool = False, force_directed: bool = False)\ -> Tuple[sparse.csr_matrix, bool]: """Check the input matrix and return a proper adjacency matrix. Parameters ---------- input_matrix : Adjacency matrix of biadjacency matrix of the graph. allow_directed : If ``True`` (default), allow the graph to be directed. force_bipartite : bool If ``True``, return the adjacency matrix of a bipartite graph. Otherwise (default), do it only if the input matrix is not square or not symmetric with ``allow_directed=False``. force_directed : If ``True`` return :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`. Otherwise (default), return :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`. """ input_matrix = check_format(input_matrix) bipartite = False if force_bipartite or not is_square(input_matrix) or not ( allow_directed or is_symmetric(input_matrix)): bipartite = True if bipartite: if force_directed: adjacency = bipartite2directed(input_matrix) else: adjacency = bipartite2undirected(input_matrix) else: adjacency = input_matrix return adjacency, bipartite
def depth_first_search(adjacency: sparse.csr_matrix, source: int, return_predecessors: bool = True): """Depth-first ordering starting with specified node. * Graphs * Digraphs Based on SciPy (scipy.sparse.csgraph.depth_first_order) Parameters ---------- adjacency : The adjacency matrix of the graph source : The node from which to start the ordering return_predecessors: If ``True``, the size predecessor matrix is returned Returns ------- node_array : np.ndarray The depth-first list of nodes, starting with specified node. The length of node_array is the number of nodes reachable from the specified node. predecessors : np.ndarray Returned only if ``return_predecessors == True``. The list of predecessors of each node in a depth-first tree. If node ``i`` is in the tree, then its parent is given by ``predecessors[i]``. If node ``i`` is not in the tree (and for the parent node) then ``predecessors[i] = -9999``. """ directed = not is_symmetric(adjacency) return sparse.csgraph.depth_first_order(adjacency, source, directed, return_predecessors)
def connected_components(adjacency: sparse.csr_matrix, connection: str = 'weak', return_components: bool = True) -> Union[int, Tuple[int, np.ndarray]]: """ Extract the connected components of the graph * Graphs * Digraphs Based on SciPy (scipy.sparse.csgraph.connected_components). Parameters ---------- adjacency: Adjacency matrix of the graph. connection Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs. return_components If ``True`` (default), then return the labels for each of the connected components. Returns ------- n_components: int The number of connected components. components: ndarray The array such that for each node ``i``, ``components[i]`` is the connected component of ``i``. """ return sparse.csgraph.connected_components(adjacency, (not is_symmetric(adjacency)), connection, return_components)
def connected_components(adjacency: sparse.csr_matrix, connection: str = 'weak') -> np.ndarray: """Extract the connected components of the graph. * Graphs * Digraphs Based on SciPy (scipy.sparse.csgraph.connected_components). Parameters ---------- adjacency : Adjacency matrix of the graph. connection : Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs. Returns ------- labels : np.ndarray Connected component of each node. """ adjacency = check_format(adjacency) if len(adjacency.data) == 0: raise ValueError('The graph is empty (no edge).') return sparse.csgraph.connected_components(adjacency, not is_symmetric(adjacency), connection, True)[1]
def is_bipartite(adjacency: sparse.csr_matrix, return_biadjacency: bool = False) \ -> Union[bool, Tuple[bool, Optional[sparse.csr_matrix], Optional[np.ndarray], Optional[np.ndarray]]]: """Check whether an undirected graph is bipartite. * Graphs Parameters ---------- adjacency : Adjacency matrix of the graph (symmetric). return_biadjacency : If ``True``, return a biadjacency matrix of the graph if bipartite. Returns ------- is_bipartite : bool A boolean denoting if the graph is bipartite. biadjacency : sparse.csr_matrix A biadjacency matrix of the graph if bipartite (optional). rows : np.ndarray Index of rows in the original graph (optional). cols : np.ndarray Index of columns in the original graph (optional). """ if not is_symmetric(adjacency): raise ValueError('The graph must be undirected.') if adjacency.diagonal().any(): if return_biadjacency: return False, None, None, None else: return False n = adjacency.indptr.shape[0] - 1 coloring = np.full(n, -1, dtype=int) exists_remaining = n while exists_remaining: src = np.argwhere(coloring == -1)[0, 0] next_nodes = [src] coloring[src] = 0 exists_remaining -= 1 while next_nodes: node = next_nodes.pop() for neighbor in adjacency.indices[adjacency.indptr[node]:adjacency. indptr[node + 1]]: if coloring[neighbor] == -1: coloring[neighbor] = 1 - coloring[node] next_nodes.append(neighbor) exists_remaining -= 1 elif coloring[neighbor] == coloring[node]: if return_biadjacency: return False, None, None, None else: return False if return_biadjacency: rows = np.argwhere(coloring == 0).ravel() cols = np.argwhere(coloring == 1).ravel() return True, adjacency[rows, :][:, cols], rows, cols else: return True
def test_bip2undir(self): n_row, n_col = self.biadjacency.shape n = n_row + n_col undirected_graph = bipartite2undirected(self.biadjacency) self.assertEqual(undirected_graph.shape, (n, n)) self.assertTrue(is_symmetric(undirected_graph)) slr = SparseLR(self.biadjacency, [(np.ones(n_row), np.ones(n_col))]) undirected_graph = bipartite2undirected(slr) self.assertTrue(type(undirected_graph) == SparseLR)
def is_bipartite(adjacency: sparse.csr_matrix, return_biadjacency: bool = False) -> Union[bool, Tuple[bool, Optional[sparse.csr_matrix]]]: """Check whether an undirected graph is bipartite and can return a possible biadjacency. * Graphs Parameters ---------- adjacency: The symmetric adjacency matrix of the graph. return_biadjacency: If ``True`` , a possible biadjacency is returned if the graph is bipartite (None is returned otherwise) Returns ------- is_bipartite: bool A boolean denoting if the graph is bipartite biadjacency: sparse.csr_matrix A possible biadjacency of the bipartite graph (None if the graph is not bipartite) """ if not is_symmetric(adjacency): raise ValueError('The graph must be undirected.') if adjacency.diagonal().any(): if return_biadjacency: return False, None else: return False n_nodes = adjacency.indptr.shape[0] - 1 coloring = np.full(n_nodes, -1, dtype=int) exists_remaining = n_nodes while exists_remaining: src = np.argwhere(coloring == -1)[0, 0] next_nodes = [src] coloring[src] = 0 exists_remaining -= 1 while next_nodes: node = next_nodes.pop() for neighbor in adjacency.indices[adjacency.indptr[node]:adjacency.indptr[node + 1]]: if coloring[neighbor] == -1: coloring[neighbor] = 1 - coloring[node] next_nodes.append(neighbor) exists_remaining -= 1 elif coloring[neighbor] == coloring[node]: if return_biadjacency: return False, None else: return False if return_biadjacency: return True, adjacency[coloring == 0, :][:, coloring == 1] else: return True
def test_dir2undir(self): n = 3 adjacency = cyclic_digraph(n) ref = directed2undirected(adjacency) self.assertEqual(ref.shape, adjacency.shape) self.assertTrue(is_symmetric(ref)) adjacency = house() n = adjacency.shape[0] error = 0.5 * directed2undirected(adjacency) - adjacency self.assertEqual(error.nnz, 0) slr = SparseLR(adjacency, [(np.zeros(n), np.zeros(n))]) slr = 0.5 * directed2undirected(slr) self.assertEqual(slr.shape, (n, n)) x = np.random.randn(n) error = np.linalg.norm(slr.dot(x) - adjacency.dot(x)) self.assertAlmostEqual(error, 0)
def is_acyclic(adjacency: sparse.csr_matrix) -> bool: """Check whether a graph has no cycle. Parameters ---------- adjacency: Adjacency matrix of the graph. Returns ------- is_acyclic : bool A boolean with value True if the graph has no cycle and False otherwise """ n_nodes = adjacency.shape[0] n_cc = sparse.csgraph.connected_components(adjacency, (not is_symmetric(adjacency)), 'strong', False) if n_cc == n_nodes: # check for self-loops (= cycles) return (adjacency.diagonal() == 0).all() else: return False
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], position_init: Optional[np.ndarray] = None, n_iter: Optional[int] = None) -> 'Spring': """Compute layout. Parameters ---------- adjacency : Adjacency matrix of the graph, treated as undirected. position_init : np.ndarray Custom initial positions of the nodes. Shape must be (n, 2). If ``None``, use the value of self.pos_init. n_iter : int Number of iterations to update positions. If ``None``, use the value of self.n_iter. Returns ------- self: :class:`Spring` """ adjacency = check_format(adjacency) check_square(adjacency) if not is_symmetric(adjacency): adjacency = directed2undirected(adjacency) n = adjacency.shape[0] position = np.zeros((n, self.n_components)) if position_init is None: if self.position_init == 'random': position = np.random.randn(n, self.n_components) elif self.position_init == 'spectral': position = Spectral(n_components=self.n_components, normalized=False).fit_transform(adjacency) elif isinstance(position_init, np.ndarray): if position_init.shape == (n, self.n_components): position = position_init.copy() else: raise ValueError('Initial position has invalid shape.') else: raise TypeError('Initial position must be a numpy array.') if n_iter is None: n_iter = self.n_iter if self.strength is None: strength = np.sqrt((1 / n)) else: strength = self.strength pos_max = position.max(axis=0) pos_min = position.min(axis=0) step_max: float = 0.1 * (pos_max - pos_min).max() step: float = step_max / (n_iter + 1) tree = None delta = np.zeros((n, self.n_components)) for iteration in range(n_iter): delta *= 0 if self.approx_radius > 0: tree = cKDTree(position) for i in range(n): # attraction indices = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i+1]] attraction = adjacency.data[adjacency.indptr[i]:adjacency.indptr[i+1]] / strength grad = position[i] - position[indices] attraction *= np.linalg.norm(grad, axis=1) attraction = (grad * attraction[:, np.newaxis]).sum(axis=0) # repulsion if tree is None: grad: np.ndarray = (position[i] - position) # shape (n, n_components) distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n,) else: neighbors = tree.query_ball_point(position[i], self.approx_radius) grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components) distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,) distance = np.where(distance < 0.01, 0.01, distance) repulsion = (grad * (strength / distance)[:, np.newaxis] ** 2).sum(axis=0) # total force delta[i]: np.ndarray = repulsion - attraction length = np.linalg.norm(delta, axis=0) length = np.where(length < 0.01, 0.1, length) delta = delta * step_max / length position += delta step_max -= step err: float = np.linalg.norm(delta) / n if err < self.tol: break self.embedding_ = position return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None, n_iter: Optional[int] = None) -> 'ForceAtlas': """Compute layout. Parameters ---------- adjacency : Adjacency matrix of the graph, treated as undirected. pos_init : Position to start with. Random if not provided. n_iter : int Number of iterations to update positions. If ``None``, use the value of self.n_iter. Returns ------- self: :class:`ForceAtlas` """ # verify the format of the adjacency matrix adjacency = check_format(adjacency) check_square(adjacency) if not is_symmetric(adjacency): adjacency = directed2undirected(adjacency) n = adjacency.shape[0] # setting of the tolerance according to the size of the graph if n < 5000: tolerance = 0.1 elif 5000 <= n < 50000: # pragma: no cover tolerance = 1 else: # pragma: no cover tolerance = 10 if n_iter is None: n_iter = self.n_iter # initial position of the nodes of the graph if pos_init is None: position: np.ndarray = np.random.randn(n, self.n_components) else: if pos_init.shape != (n, self.n_components): raise ValueError( 'The initial position does not have valid dimensions.') else: position = pos_init # compute the vector with the degree of each node degree: np.ndarray = adjacency.dot(np.ones(adjacency.shape[1])) + 1 # initialization of variation of position of nodes resultants = np.zeros(n) delta: np.ndarray = np.zeros((n, self.n_components)) swing_vector: np.ndarray = np.zeros(n) global_speed = 1 for iteration in range(n_iter): delta *= 0 global_swing = 0 global_traction = 0 if self.approx_radius > 0: tree = cKDTree(position) else: tree = None for i in range(n): # attraction indices = adjacency.indices[adjacency.indptr[i]:adjacency. indptr[i + 1]] attraction = position[i] - position[indices] if self.lin_log: attraction = np.sign(attraction) * np.log( 1 + np.abs(10 * attraction)) attraction = attraction.sum(axis=0) # repulsion if tree is None: neighbors = np.arange(n) else: neighbors = tree.query_ball_point(position[i], self.approx_radius) grad: np.ndarray = (position[i] - position[neighbors] ) # shape (n_neigh, n_components) distance: np.ndarray = np.linalg.norm( grad, axis=1) # shape (n_neigh,) distance = np.where(distance < 0.01, 0.01, distance) repulsion = grad * (degree[neighbors] / distance)[:, np.newaxis] repulsion *= self.repulsive_factor * degree[i] repulsion = repulsion.sum(axis=0) # gravity gravity = self.gravity_factor * degree[i] * grad gravity = gravity.sum(axis=0) # forces resultant applied on node i for traction, swing and speed computation force = repulsion - attraction - gravity resultant_new: float = np.linalg.norm(force) resultant_old: float = resultants[i] swing_node: float = np.abs( resultant_new - resultant_old) # force variation applied on node i swing_vector[i] = swing_node global_swing += (degree[i] + 1) * swing_node traction: float = np.abs( resultant_new + resultant_old) / 2 # traction force applied on node i global_traction += (degree[i] + 1) * traction node_speed = self.speed * global_speed / ( 1 + global_speed * np.sqrt(swing_node)) if node_speed > self.speed_max / resultant_new: # pragma: no cover node_speed = self.speed_max / resultant_new delta[i]: np.ndarray = node_speed * force resultants[i] = resultant_new global_speed = tolerance * global_traction / global_swing position += delta # calculating displacement and final position of points after iteration if (swing_vector < 1).all(): break # if the swing of all nodes is zero, then convergence is reached and we break. self.embedding_ = position return self
def distance(adjacency: sparse.csr_matrix, sources: Optional[Union[int, Iterable]] = None, method: str = 'D', return_predecessors: bool = False, unweighted: bool = False, n_jobs: Optional[int] = None): """Compute distances between nodes. * Graphs * Digraphs Based on SciPy (scipy.sparse.csgraph.shortest_path) Parameters ---------- adjacency : The adjacency matrix of the graph sources : If specified, only compute the paths for the points at the given indices. Will not work with ``method =='FW'``. method : The method to be used. * ``'D'`` (Dijkstra), * ``'BF'`` (Bellman-Ford), * ``'J'`` (Johnson). return_predecessors : If ``True``, the size predecessor matrix is returned unweighted : If ``True``, the weights of the edges are ignored n_jobs : If an integer value is given, denotes the number of workers to use (-1 means the maximum number will be used). If ``None``, no parallel computations are made. Returns ------- dist_matrix : np.ndarray The matrix of distances between graph nodes. ``dist_matrix[i,j]`` gives the shortest distance from point ``i`` to point ``j`` along the graph. If no path exists between nodes ``i`` and ``j``, then ``dist_matrix[i, j] = np.inf``. predecessors : np.ndarray, optional Returned only if ``return_predecessors == True``. The matrix of predecessors, which can be used to reconstruct the shortest paths. Row i of the predecessor matrix contains information on the shortest paths from point ``i``: each entry ``predecessors[i, j]`` gives the index of the previous node in the path from point ``i`` to point ``j``. If no path exists between nodes ``i`` and ``j``, then ``predecessors[i, j] = -9999``. Examples -------- >>> from sknetwork.data import cyclic_digraph >>> adjacency = cyclic_digraph(3) >>> distance(adjacency, sources=0) array([0., 1., 2.]) >>> distance(adjacency, sources=0, return_predecessors=True) (array([0., 1., 2.]), array([-9999, 0, 1])) """ n_jobs = check_n_jobs(n_jobs) if method == 'FW' and n_jobs != 1: raise ValueError( 'The Floyd-Warshall algorithm cannot be used with parallel computations.' ) if sources is None: sources = np.arange(adjacency.shape[0]) elif np.issubdtype(type(sources), np.integer): sources = np.array([sources]) n = len(sources) directed = not is_symmetric(adjacency) local_function = partial(sparse.csgraph.shortest_path, adjacency, method, directed, return_predecessors, unweighted, False) if n_jobs == 1 or n == 1: res = sparse.csgraph.shortest_path(adjacency, method, directed, return_predecessors, unweighted, False, sources) else: with Pool(n_jobs) as pool: res = np.array(pool.map(local_function, sources)) if return_predecessors: if n == 1: return res[0].ravel(), res[1].astype(int).ravel() else: return res[0], res[1].astype(int) else: if n == 1: return res.ravel() else: return res
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], position_init: Optional[np.ndarray] = None, n_iter: Optional[int] = None) -> 'Spring': """Compute layout. Parameters ---------- adjacency : Adjacency matrix of the graph, treated as undirected. position_init : np.ndarray Custom initial positions of the nodes. Shape must be (n, 2). If ``None``, use the value of self.pos_init. n_iter : int Number of iterations to update positions. If ``None``, use the value of self.n_iter. Returns ------- self: :class:`Spring` """ adjacency = check_format(adjacency) check_square(adjacency) if not is_symmetric(adjacency): adjacency = directed2undirected(adjacency) n = adjacency.shape[0] position = np.zeros((n, 2)) if position_init is None: if self.position_init == 'random': position = np.random.randn(n, 2) elif self.position_init == 'spectral': position = Spectral(n_components=2, normalized=False).fit_transform(adjacency) elif isinstance(position_init, np.ndarray): if position_init.shape == (n, 2): position = position_init.copy() else: raise ValueError('Initial position has invalid shape.') else: raise TypeError('Initial position must be a numpy array.') if n_iter is None: n_iter = self.n_iter if self.strength is None: strength = np.sqrt((1 / n)) else: strength = self.strength delta_x: float = position[:, 0].max() - position[:, 0].min() delta_y: float = position[:, 1].max() - position[:, 1].min() step_max: float = 0.1 * max(delta_x, delta_y) step: float = step_max / (n_iter + 1) delta = np.zeros((n, 2)) for iteration in range(n_iter): delta *= 0 for i in range(n): indices = adjacency.indices[adjacency.indptr[i]:adjacency. indptr[i + 1]] data = adjacency.data[adjacency.indptr[i]:adjacency.indptr[i + 1]] grad: np.ndarray = (position[i] - position) # shape (n, 2) distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n,) distance = np.where(distance < 0.01, 0.01, distance) attraction = np.zeros(n) attraction[indices] += data * distance[indices] / strength repulsion = (strength / distance)**2 delta[i]: np.ndarray = ( grad * (repulsion - attraction)[:, np.newaxis]).sum( axis=0) # shape (2,) length = np.linalg.norm(delta, axis=0) length = np.where(length < 0.01, 0.1, length) delta = delta * step_max / length position += delta step_max -= step err: float = np.linalg.norm(delta) / n if err < self.tol: break self.embedding_ = position return self