def test_is_bipartite(self): biadjacency = star_wars(metadata=False) adjacency = bipartite2undirected(biadjacency) self.assertTrue(is_bipartite(adjacency)) bipartite, biadjacency_pred, _, _ = is_bipartite( adjacency, return_biadjacency=True) self.assertEqual(bipartite, True) self.assertEqual(np.all(biadjacency.data == biadjacency_pred.data), True) adjacency = sparse.identity(2, format='csr') bipartite, biadjacency, _, _ = is_bipartite(adjacency, return_biadjacency=True) self.assertEqual(bipartite, False) self.assertIsNone(biadjacency) adjacency = directed2undirected(cyclic_digraph(3)) bipartite, biadjacency, _, _ = is_bipartite(adjacency, return_biadjacency=True) self.assertEqual(bipartite, False) self.assertIsNone(biadjacency) with self.assertRaises(ValueError): is_bipartite(cyclic_digraph(3)) self.assertFalse(is_bipartite(sparse.eye(3))) adjacency = directed2undirected(cyclic_digraph(3)) bipartite = is_bipartite(adjacency, return_biadjacency=False) self.assertEqual(bipartite, False)
def cyclic_graph(n: int = 3, metadata: bool = False) -> Union[sparse.csr_matrix, Bunch]: """Cyclic graph (undirected). Parameters ---------- n : int Number of nodes. metadata : bool If ``True``, return a `Bunch` object with metadata. Returns ------- adjacency or graph : Union[sparse.csr_matrix, Bunch] Adjacency matrix or graph with metadata (positions). Example ------- >>> from sknetwork.data import cyclic_graph >>> adjacency = cyclic_graph(5) >>> adjacency.shape (5, 5) """ graph = cyclic_digraph(n, True) graph.adjacency = directed2undirected(graph.adjacency) if metadata: return graph else: return graph.adjacency
def _optimize(self, n_nodes, adjacency_norm, probs_out, probs_in): """One local optimization pass of the Louvain algorithm Parameters ---------- n_nodes : the number of nodes in the adjacency adjacency_norm : the norm of the adjacency probs_out : the array of degrees of the adjacency probs_in : the array of degrees of the transpose of the adjacency Returns ------- labels : the communities of each node after optimization pass_increase : the increase in modularity gained after optimization """ node_probs_in = probs_in node_probs_out = probs_out adjacency = 0.5 * directed2undirected(adjacency_norm) self_loops = adjacency.diagonal() indptr: np.ndarray = adjacency.indptr indices: np.ndarray = adjacency.indices data: np.ndarray = adjacency.data return fit_core(self.resolution, self.tol, n_nodes, node_probs_out, node_probs_in, self_loops, data, indices, indptr)
def _optimize(self, adjacency_norm, probs_ou, probs_in): """One local optimization pass of the Louvain algorithm Parameters ---------- adjacency_norm : the norm of the adjacency probs_ou : the array of degrees of the adjacency probs_in : the array of degrees of the transpose of the adjacency Returns ------- labels : the communities of each node after optimization pass_increase : the increase in modularity gained after optimization """ node_probs_in = probs_in.astype(np.float32) node_probs_ou = probs_ou.astype(np.float32) adjacency = 0.5 * directed2undirected(adjacency_norm) self_loops = adjacency.diagonal().astype(np.float32) indptr: np.ndarray = adjacency.indptr.astype(np.int32) indices: np.ndarray = adjacency.indices.astype(np.int32) data: np.ndarray = adjacency.data.astype(np.float32) return fit_core(self.resolution, self.tol, node_probs_ou, node_probs_in, self_loops, data, indices, indptr)
def make_undirected(self): """Modifies the adjacency to match desired constrains.""" if self.adjacency_ is not None and self.undirected: dtype = self.adjacency_.dtype self.adjacency_ = directed2undirected(self.adjacency_, weighted=False).astype(dtype) return self
def _instantiate_vars(adjacency: sparse.csr_matrix, weights: str = 'uniform'): """Initialize standard variables for metrics.""" weights_row = get_probs(weights, adjacency) weights_col = get_probs(weights, adjacency.T) sym_adjacency = directed2undirected(adjacency) aggregate_graph = AggregateGraph(weights_row, weights_col, sym_adjacency.data.astype(float), sym_adjacency.indices, sym_adjacency.indptr) return aggregate_graph, weights_row, weights_col
def edgelist2adjacency(edge_list: list, undirected: bool = False, weighted: bool = True) \ -> sparse.csr_matrix: """Build an adjacency matrix from a list of edges. Parameters ---------- edge_list : list List of edges as pairs (i, j) or triplets (i, j, w) for weighted edges. undirected : bool If ``True``, return a symmetric adjacency matrix. weighted : bool If ``True``, return a weighted adjacency matrix. If weights are not specified, the weight of each edge is equal to its count in the list. Returns ------- adjacency : sparse.csr_matrix Examples -------- >>> edge_list = [(0, 1), (1, 2), (2, 0)] >>> adjacency = edgelist2adjacency(edge_list) >>> adjacency.shape, adjacency.nnz ((3, 3), 3) >>> adjacency = edgelist2adjacency(edge_list, undirected=True) >>> adjacency.shape, adjacency.nnz ((3, 3), 6) >>> weighted_edge_list = [(0, 1, 0.2), (1, 2, 4), (2, 0, 1.3)] >>> adjacency = edgelist2adjacency(weighted_edge_list) >>> adjacency.dtype dtype('float64') """ edges = np.array(edge_list) row, col = edges[:, 0].astype(np.int32), edges[:, 1].astype(np.int32) n = max(row.max(), col.max()) + 1 if edges.shape[1] > 2: data = edges[:, 2] else: data = np.ones_like(row, dtype=int) if np.max(data) == 1: weighted = False adjacency = sparse.csr_matrix((data, (row, col)), shape=(n, n)) if not weighted: adjacency = adjacency.astype(bool) if undirected: adjacency = directed2undirected(adjacency) return adjacency
def _instanciate_vars(adjacency: sparse.csr_matrix, weights: str = 'uniform'): """Initialize standard variables for metrics.""" n = adjacency.shape[0] weights_row = check_probs(weights, adjacency) weights_col = check_probs(weights, adjacency.T) sym_adjacency = directed2undirected(adjacency) aggregate_graph = AggregateGraph(weights_row, weights_col, sym_adjacency.data.astype(np.float), sym_adjacency.indices, sym_adjacency.indptr) height = np.zeros(n - 1) cluster_weight = np.zeros(n - 1) edge_sampling = np.zeros(n - 1) return aggregate_graph, height, cluster_weight, edge_sampling, weights_row, weights_col
def block_model(sizes: Iterable, p_in: Union[float, list, np.ndarray] = .2, p_out: float = .05, random_state: Optional[int] = None, metadata: bool = False) \ -> Union[sparse.csr_matrix, Bunch]: """Stochastic block model. Parameters ---------- sizes : Block sizes. p_in : Probability of connection within blocks. p_out : Probability of connection across blocks. random_state : Seed of the random generator (optional). metadata : If ``True``, return a `Bunch` object with metadata. Returns ------- adjacency or graph : Union[sparse.csr_matrix, Bunch] Adjacency matrix or graph with metadata (labels). Example ------- >>> from sknetwork.data import block_model >>> sizes = np.array([4, 5]) >>> adjacency = block_model(sizes) >>> adjacency.shape (9, 9) References ---------- Airoldi, E., Blei, D., Feinberg, S., Xing, E. (2007). `Mixed membership stochastic blockmodels. <https://arxiv.org/pdf/0705.4485.pdf>`_ Journal of Machine Learning Research. """ np.random.seed(random_state) sizes = np.array(sizes) if isinstance(p_in, (np.floating, float)): p_in = p_in * np.ones_like(sizes) else: p_in = np.array(p_in) # each edge is considered twice p_in = p_in / 2 matrix = [] for i, a in enumerate(sizes): row = [] for j, b in enumerate(sizes): if j < i: row.append(None) elif j > i: row.append(sparse.random(a, b, p_out, dtype=bool)) else: row.append(sparse.random(a, a, p_in[i], dtype=bool)) matrix.append(row) adjacency = sparse.bmat(matrix) adjacency.setdiag(0) adjacency = directed2undirected(adjacency.tocsr(), weighted=False) if metadata: graph = Bunch() graph.adjacency = adjacency labels = np.repeat(np.arange(len(sizes)), sizes) graph.labels = labels return graph else: return adjacency
def from_edge_list(row: np.ndarray, col: np.ndarray, data: np.ndarray, directed: bool = False, bipartite: bool = False, reindex: bool = True, named: Optional[bool] = None) -> Bunch: """Turn an edge list given as a triplet of NumPy arrays into a :class:`Bunch`. Parameters ---------- row : np.ndarray The array of sources in the graph. col : np.ndarray The array of targets in the graph. data : np.ndarray The array of weights in the graph. Pass an empty array for unweighted graphs. directed : bool If ``True``, considers the graph as directed. bipartite : bool If ``True``, returns a biadjacency matrix of shape (n1, n2). reindex : bool If True and the graph nodes have numeric values, the size of the returned adjacency will be determined by the maximum of those values. Does not work for bipartite graphs. named : Optional[bool] Retrieves the names given to the nodes and renumbers them. Returns an additional array. None makes a guess based on the first lines. Returns ------- graph: :class:`Bunch` """ reindexed = False if named is None: named = (row.dtype != int) or (col.dtype != int) weighted = bool(len(data)) n_edges = len(row) graph = Bunch() if bipartite: names_row, row = np.unique(row, return_inverse=True) names_col, col = np.unique(col, return_inverse=True) if not reindex: n_row = names_row.max() + 1 n_col = names_col.max() + 1 else: n_row = len(names_row) n_col = len(names_col) if not weighted: data = np.ones(n_edges, dtype=bool) biadjacency = sparse.csr_matrix((data, (row, col)), shape=(n_row, n_col)) graph.biadjacency = biadjacency if named or reindex: graph.names = names_row graph.names_row = names_row graph.names_col = names_col else: nodes = np.concatenate((row, col), axis=None) names, new_nodes = np.unique(nodes, return_inverse=True) if not reindex: n_nodes = names.max() + 1 else: n_nodes = len(names) if named: row = new_nodes[:n_edges] col = new_nodes[n_edges:] else: should_reindex = not (names[0] == 0 and names[-1] == n_nodes - 1) if should_reindex and reindex: reindexed = True row = new_nodes[:n_edges] col = new_nodes[n_edges:] if not weighted: data = np.ones(n_edges, dtype=bool) adjacency = sparse.csr_matrix((data, (row, col)), shape=(n_nodes, n_nodes)) if not directed: adjacency = directed2undirected(adjacency, weighted=weighted) graph.adjacency = adjacency if named or reindexed: graph.names = names return graph
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], position_init: Optional[np.ndarray] = None, n_iter: Optional[int] = None) -> 'Spring': """Compute layout. Parameters ---------- adjacency : Adjacency matrix of the graph, treated as undirected. position_init : np.ndarray Custom initial positions of the nodes. Shape must be (n, 2). If ``None``, use the value of self.pos_init. n_iter : int Number of iterations to update positions. If ``None``, use the value of self.n_iter. Returns ------- self: :class:`Spring` """ adjacency = check_format(adjacency) check_square(adjacency) if not is_symmetric(adjacency): adjacency = directed2undirected(adjacency) n = adjacency.shape[0] position = np.zeros((n, self.n_components)) if position_init is None: if self.position_init == 'random': position = np.random.randn(n, self.n_components) elif self.position_init == 'spectral': position = Spectral(n_components=self.n_components, normalized=False).fit_transform(adjacency) elif isinstance(position_init, np.ndarray): if position_init.shape == (n, self.n_components): position = position_init.copy() else: raise ValueError('Initial position has invalid shape.') else: raise TypeError('Initial position must be a numpy array.') if n_iter is None: n_iter = self.n_iter if self.strength is None: strength = np.sqrt((1 / n)) else: strength = self.strength pos_max = position.max(axis=0) pos_min = position.min(axis=0) step_max: float = 0.1 * (pos_max - pos_min).max() step: float = step_max / (n_iter + 1) tree = None delta = np.zeros((n, self.n_components)) for iteration in range(n_iter): delta *= 0 if self.approx_radius > 0: tree = cKDTree(position) for i in range(n): # attraction indices = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i+1]] attraction = adjacency.data[adjacency.indptr[i]:adjacency.indptr[i+1]] / strength grad = position[i] - position[indices] attraction *= np.linalg.norm(grad, axis=1) attraction = (grad * attraction[:, np.newaxis]).sum(axis=0) # repulsion if tree is None: grad: np.ndarray = (position[i] - position) # shape (n, n_components) distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n,) else: neighbors = tree.query_ball_point(position[i], self.approx_radius) grad: np.ndarray = (position[i] - position[neighbors]) # shape (n_neigh, n_components) distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n_neigh,) distance = np.where(distance < 0.01, 0.01, distance) repulsion = (grad * (strength / distance)[:, np.newaxis] ** 2).sum(axis=0) # total force delta[i]: np.ndarray = repulsion - attraction length = np.linalg.norm(delta, axis=0) length = np.where(length < 0.01, 0.1, length) delta = delta * step_max / length position += delta step_max -= step err: float = np.linalg.norm(delta) / n if err < self.tol: break self.embedding_ = position return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], pos_init: Optional[np.ndarray] = None, n_iter: Optional[int] = None) -> 'ForceAtlas': """Compute layout. Parameters ---------- adjacency : Adjacency matrix of the graph, treated as undirected. pos_init : Position to start with. Random if not provided. n_iter : int Number of iterations to update positions. If ``None``, use the value of self.n_iter. Returns ------- self: :class:`ForceAtlas` """ # verify the format of the adjacency matrix adjacency = check_format(adjacency) check_square(adjacency) if not is_symmetric(adjacency): adjacency = directed2undirected(adjacency) n = adjacency.shape[0] # setting of the tolerance according to the size of the graph if n < 5000: tolerance = 0.1 elif 5000 <= n < 50000: # pragma: no cover tolerance = 1 else: # pragma: no cover tolerance = 10 if n_iter is None: n_iter = self.n_iter # initial position of the nodes of the graph if pos_init is None: position: np.ndarray = np.random.randn(n, self.n_components) else: if pos_init.shape != (n, self.n_components): raise ValueError( 'The initial position does not have valid dimensions.') else: position = pos_init # compute the vector with the degree of each node degree: np.ndarray = adjacency.dot(np.ones(adjacency.shape[1])) + 1 # initialization of variation of position of nodes resultants = np.zeros(n) delta: np.ndarray = np.zeros((n, self.n_components)) swing_vector: np.ndarray = np.zeros(n) global_speed = 1 for iteration in range(n_iter): delta *= 0 global_swing = 0 global_traction = 0 if self.approx_radius > 0: tree = cKDTree(position) else: tree = None for i in range(n): # attraction indices = adjacency.indices[adjacency.indptr[i]:adjacency. indptr[i + 1]] attraction = position[i] - position[indices] if self.lin_log: attraction = np.sign(attraction) * np.log( 1 + np.abs(10 * attraction)) attraction = attraction.sum(axis=0) # repulsion if tree is None: neighbors = np.arange(n) else: neighbors = tree.query_ball_point(position[i], self.approx_radius) grad: np.ndarray = (position[i] - position[neighbors] ) # shape (n_neigh, n_components) distance: np.ndarray = np.linalg.norm( grad, axis=1) # shape (n_neigh,) distance = np.where(distance < 0.01, 0.01, distance) repulsion = grad * (degree[neighbors] / distance)[:, np.newaxis] repulsion *= self.repulsive_factor * degree[i] repulsion = repulsion.sum(axis=0) # gravity gravity = self.gravity_factor * degree[i] * grad gravity = gravity.sum(axis=0) # forces resultant applied on node i for traction, swing and speed computation force = repulsion - attraction - gravity resultant_new: float = np.linalg.norm(force) resultant_old: float = resultants[i] swing_node: float = np.abs( resultant_new - resultant_old) # force variation applied on node i swing_vector[i] = swing_node global_swing += (degree[i] + 1) * swing_node traction: float = np.abs( resultant_new + resultant_old) / 2 # traction force applied on node i global_traction += (degree[i] + 1) * traction node_speed = self.speed * global_speed / ( 1 + global_speed * np.sqrt(swing_node)) if node_speed > self.speed_max / resultant_new: # pragma: no cover node_speed = self.speed_max / resultant_new delta[i]: np.ndarray = node_speed * force resultants[i] = resultant_new global_speed = tolerance * global_traction / global_swing position += delta # calculating displacement and final position of points after iteration if (swing_vector < 1).all(): break # if the swing of all nodes is zero, then convergence is reached and we break. self.embedding_ = position return self
def load_edge_list(file: str, directed: bool = False, bipartite: bool = False, weighted: Optional[bool] = None, named: Optional[bool] = None, comment: str = '%#', delimiter: str = None, reindex: bool = True, fast_format: bool = True) -> Bunch: """Parser for Tabulation-Separated, Comma-Separated or Space-Separated (or other) Values datasets in the form of edge lists. Parameters ---------- file : str The path to the dataset in TSV format directed : bool If ``True``, considers the graph as directed. bipartite : bool If ``True``, returns a biadjacency matrix of shape (n1, n2). weighted : Optional[bool] Retrieves the weights in the third field of the file. None makes a guess based on the first lines. named : Optional[bool] Retrieves the names given to the nodes and renumbers them. Returns an additional array. None makes a guess based on the first lines. comment : str Set of characters denoting lines to ignore. delimiter : str delimiter used in the file. None makes a guess reindex : bool If True and the graph nodes have numeric values, the size of the returned adjacency will be determined by the maximum of those values. Does not work for bipartite graphs. fast_format : bool If True, assumes that the file is well-formatted: * no comments except for the header * only 2 or 3 columns * only int or float values Returns ------- graph: :class:`Bunch` """ reindexed = False header_len, guess_delimiter, guess_weighted, guess_named, guess_string_present, guess_type = scan_header(file, comment) if weighted is None: weighted = guess_weighted if named is None: named = guess_named if delimiter is None: delimiter = guess_delimiter with open(file, 'r', encoding='utf-8') as f: for i in range(header_len): f.readline() if fast_format and not guess_string_present: # fromfile raises a DeprecationWarning on fail. This should be changed to ValueError in the future. warnings.filterwarnings("error") try: parsed = np.fromfile(f, sep=guess_delimiter, dtype=guess_type) except (DeprecationWarning, ValueError): raise ValueError('File not suitable for fast parsing. Set fast_format to False.') warnings.filterwarnings("default") n_entries = len(parsed) if weighted: parsed.resize((n_entries//3, 3)) row, col, data = parsed[:, 0], parsed[:, 1], parsed[:, 2] else: parsed.resize((n_entries//2, 2)) row, col = parsed[:, 0], parsed[:, 1] data = np.ones(row.shape[0], dtype=bool) else: row, col, data = [], [], [] csv_reader = reader(f, delimiter=delimiter) for line in csv_reader: if line[0] not in comment: if named: row.append(line[0]) col.append(line[1]) else: row.append(int(line[0])) col.append(int(line[1])) if weighted: data.append(float(line[2])) n_edges = len(row) graph = Bunch() if bipartite: names_row, row = np.unique(row, return_inverse=True) names_col, col = np.unique(col, return_inverse=True) if not reindex: n_row = names_row.max() + 1 n_col = names_col.max() + 1 else: n_row = len(names_row) n_col = len(names_col) if not weighted: data = np.ones(n_edges, dtype=bool) biadjacency = sparse.csr_matrix((data, (row, col)), shape=(n_row, n_col)) graph.biadjacency = biadjacency if named or reindex: graph.names = names_row graph.names_row = names_row graph.names_col = names_col else: nodes = np.concatenate((row, col), axis=None) names, new_nodes = np.unique(nodes, return_inverse=True) if not reindex: n_nodes = names.max() + 1 else: n_nodes = len(names) if named: row = new_nodes[:n_edges] col = new_nodes[n_edges:] else: should_reindex = not (names[0] == 0 and names[-1] == n_nodes - 1) if should_reindex and reindex: reindexed = True row = new_nodes[:n_edges] col = new_nodes[n_edges:] if not weighted: data = np.ones(n_edges, dtype=bool) adjacency = sparse.csr_matrix((data, (row, col)), shape=(n_nodes, n_nodes)) if not directed: adjacency = directed2undirected(adjacency, weighted=weighted) graph.adjacency = adjacency if named or reindexed: graph.names = names return graph
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], position_init: Optional[np.ndarray] = None, n_iter: Optional[int] = None) -> 'Spring': """Compute layout. Parameters ---------- adjacency : Adjacency matrix of the graph, treated as undirected. position_init : np.ndarray Custom initial positions of the nodes. Shape must be (n, 2). If ``None``, use the value of self.pos_init. n_iter : int Number of iterations to update positions. If ``None``, use the value of self.n_iter. Returns ------- self: :class:`Spring` """ adjacency = check_format(adjacency) check_square(adjacency) if not is_symmetric(adjacency): adjacency = directed2undirected(adjacency) n = adjacency.shape[0] position = np.zeros((n, 2)) if position_init is None: if self.position_init == 'random': position = np.random.randn(n, 2) elif self.position_init == 'spectral': position = Spectral(n_components=2, normalized=False).fit_transform(adjacency) elif isinstance(position_init, np.ndarray): if position_init.shape == (n, 2): position = position_init.copy() else: raise ValueError('Initial position has invalid shape.') else: raise TypeError('Initial position must be a numpy array.') if n_iter is None: n_iter = self.n_iter if self.strength is None: strength = np.sqrt((1 / n)) else: strength = self.strength delta_x: float = position[:, 0].max() - position[:, 0].min() delta_y: float = position[:, 1].max() - position[:, 1].min() step_max: float = 0.1 * max(delta_x, delta_y) step: float = step_max / (n_iter + 1) delta = np.zeros((n, 2)) for iteration in range(n_iter): delta *= 0 for i in range(n): indices = adjacency.indices[adjacency.indptr[i]:adjacency. indptr[i + 1]] data = adjacency.data[adjacency.indptr[i]:adjacency.indptr[i + 1]] grad: np.ndarray = (position[i] - position) # shape (n, 2) distance: np.ndarray = np.linalg.norm(grad, axis=1) # shape (n,) distance = np.where(distance < 0.01, 0.01, distance) attraction = np.zeros(n) attraction[indices] += data * distance[indices] / strength repulsion = (strength / distance)**2 delta[i]: np.ndarray = ( grad * (repulsion - attraction)[:, np.newaxis]).sum( axis=0) # shape (2,) length = np.linalg.norm(delta, axis=0) length = np.where(length < 0.01, 0.1, length) delta = delta * step_max / length position += delta step_max -= step err: float = np.linalg.norm(delta) / n if err < self.tol: break self.embedding_ = position return self