def get_adjacency_seeds(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True, force_bipartite: bool = False, force_directed: bool = False, seeds: Optional[Union[dict, np.ndarray]] = None, seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None, default_value: float = -1, which: Optional[str] = None) \ -> Tuple[sparse.csr_matrix, np.ndarray, bool]: """Check the input matrix and return a proper adjacency matrix with seeds. Parameters ---------- input_matrix : Adjacency matrix of biadjacency matrix of the graph. allow_directed : If ``True`` (default), allow the graph to be directed. force_bipartite : bool If ``True``, return the adjacency matrix of a bipartite graph. Otherwise (default), do it only if the input matrix is not square or not symmetric with ``allow_directed=False``. force_directed : If ``True`` return :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`. Otherwise (default), return :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`. seeds : Values of seed nodes in initial state (dictionary or vector). Negative values ignored. seeds_row, seeds_col : Values of rows and columns for bipartite graphs. Negative values ignored. default_value : Value of non-seed nodes (default = -1). which : Which seed values. If ``'probs'``, return a probability distribution. If ``'labels'``, return distinct integer values if all are equal. """ if seeds_row is not None or seeds_col is not None: force_bipartite = True adjacency, bipartite = get_adjacency(input_matrix, allow_directed=allow_directed, force_bipartite=force_bipartite, force_directed=force_directed) if bipartite: if seeds is None: seeds = stack_seeds(input_matrix.shape, seeds_row, seeds_col, default_value=default_value) else: seeds = stack_seeds(input_matrix.shape, seeds, default_value=default_value) else: seeds = get_seeds(input_matrix.shape, seeds, default_value=default_value) if which == 'probs': if seeds.sum() > 0: seeds /= seeds.sum() elif which == 'labels': if len(set(seeds[seeds >= 0])) == 1: seeds = np.arange(len(seeds)) return adjacency, seeds, bipartite
def test_stack_seeds(self): n_row, n_col = 4, 3 seeds_row_array = np.array([0, 1, -1, 0]) seeds_row_dict = {0: 0, 1: 1, 3: 0} seeds_col_array = np.array([0, 1, -1]) seeds_col_dict = {0: 0, 1: 1} seeds1 = stack_seeds(n_row, n_col, seeds_row_array, seeds_col_array) seeds2 = stack_seeds(n_row, n_col, seeds_row_dict, seeds_col_dict) seeds3 = stack_seeds(n_row, n_col, seeds_row_array, seeds_col_dict) seeds4 = stack_seeds(n_row, n_col, seeds_row_dict, seeds_col_array) self.assertTrue(np.allclose(seeds1, seeds2)) self.assertTrue(np.allclose(seeds2, seeds3)) self.assertTrue(np.allclose(seeds3, seeds4)) seeds1 = stack_seeds(n_row, n_col, seeds_row_array, None) seeds2 = stack_seeds(n_row, n_col, seeds_row_dict, None) self.assertTrue(np.allclose(seeds1, seeds2)) seeds1 = stack_seeds(n_col, n_row, None, seeds_row_array) seeds2 = stack_seeds(n_col, n_row, None, seeds_row_dict) self.assertTrue(np.allclose(seeds1, seeds2))
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Union[np.ndarray, dict], seeds_col: Union[np.ndarray, dict, None] = None) -> 'RankClassifier': """Compute labels. Parameters ---------- biadjacency : Biadjacency matrix of the graph. seeds_row : Seed rows (labels as dictionary or array; negative values ignored). seeds_col : Seed columns (optional). Same format. Returns ------- self: :class:`BiPageRankClassifier` """ n_row, n_col = biadjacency.shape labels = stack_seeds(n_row, n_col, seeds_row, seeds_col) adjacency = bipartite2undirected(biadjacency) RankClassifier.fit(self, adjacency, labels) self.labels_row_ = self.labels_[:n_row] self.labels_col_ = self.labels_[n_row:] self.labels_ = self.labels_row_ self.membership_row_ = self.membership_[:n_row] self.membership_col_ = self.membership_[n_row:] self.membership_ = self.membership_row_ return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) \ -> 'BiPageRank': """Fit algorithm to data. Parameters ---------- biadjacency : Biadjacency matrix. seeds_row, seeds_col : Parameter to be used for Personalized BiPageRank. Restart distribution as vectors or dicts on rows, columns (node: weight). If both seeds_row and seeds_col are ``None`` (default), the uniform distribution on rows is used. Returns ------- self: :class:`BiPageRank` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape adjacency = bipartite2undirected(biadjacency) seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) PageRank.fit(self, adjacency, seeds) self._split_vars(n_row) self.scores_row_ /= self.scores_row_.sum() self.scores_col_ /= self.scores_col_.sum() self.scores_ = self.scores_row_ return self
def fit( self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Union[np.ndarray, dict], seeds_col: Optional[Union[np.ndarray, dict]] = None) -> 'BiPropagation': """Node classification by k-nearest neighbors in the embedding space. Parameters ---------- biadjacency : Biadjacency matrix of the graph. seeds_row : Seed rows. Can be a dict {node: label} or an array where "-1" means no label. seeds_col : Seed columns (optional). Same format. Returns ------- self: :class:`BiPropagation` """ n_row, n_col = biadjacency.shape biadjacency = check_format(biadjacency) adjacency = bipartite2undirected(biadjacency) seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col).astype(int) Propagation.fit(self, adjacency, seeds) self._split_vars(n_row) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) \ -> 'BiPageRank': """Fit algorithm to data. Parameters ---------- biadjacency : Biadjacency matrix. seeds_row : Seed rows, as a dict or a vector. seeds_col : Seed columns, as a dict or a vector. If both seeds_row and seeds_col are ``None``, the uniform distribution is used. Returns ------- self: :class:`BiPageRank` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape adjacency = bipartite2undirected(biadjacency) seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) PageRank.fit(self, adjacency, seeds) self._split_vars(n_row) self.scores_row_ /= self.scores_row_.sum() self.scores_col_ /= self.scores_col_.sum() self.scores_ = self.scores_row_ return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) -> 'BiDirichlet': """Compute the solution to the Dirichlet problem (temperatures at equilibrium). Parameters ---------- biadjacency : Biadjacency matrix, shape (n_row, n_col). seeds_row : Temperatures of seed rows (dictionary or vector of size n_row). Negative temperatures ignored. seeds_col : Temperatures of seed columns (dictionary or vector of size n_col). Negative temperatures ignored. init : Temperature of non-seed nodes in initial state. If ``None``, use the average temperature of seed nodes (default). Returns ------- self: :class:`BiDirichlet` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) adjacency = bipartite2undirected(biadjacency) Dirichlet.fit(self, adjacency, seeds, init) self._split_vars(n_row) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None, initial_state: Optional = None) -> 'BiDiffusion': """Compute the diffusion (temperature at equilibrium). Parameters ---------- biadjacency : Biadjacency matrix, shape (n_row, n_col). seeds_row : Temperatures of row border nodes (dictionary or vector of size n_row). Negative temperatures ignored. seeds_col : Temperatures of column border nodes (dictionary or vector of size n_row). Negative temperatures ignored. initial_state : Initial state of temperatures. Returns ------- self: :class:`BiDiffusion` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) adjacency = bipartite2undirected(biadjacency) Diffusion.fit(self, adjacency, seeds) self._split_vars(n_row) return self
def _instanciate_vars(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Union[np.ndarray, dict], seeds_col: Optional[Union[np.ndarray, dict]] = None): n_row, n_col = biadjacency.shape labels = stack_seeds(n_row, n_col, seeds_row, seeds_col).astype(int) index_seed = np.argwhere(labels >= 0).ravel() index_remain = np.argwhere(labels < 0).ravel() labels_seed = labels[index_seed] self.embedding_method.fit(biadjacency) embedding_row = self.embedding_method.embedding_row_ embedding_col = self.embedding_method.embedding_col_ embedding = np.vstack((embedding_row, embedding_col)) return index_seed, index_remain, labels_seed, embedding
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) -> 'BiDiffusion': """Compute the diffusion (temperatures at equilibrium). Parameters ---------- biadjacency : Biadjacency matrix, shape (n_row, n_col). seeds_row : Temperatures of seed rows in initial state (dictionary or vector of size n_row). Negative temperatures ignored. seeds_col : Temperatures of seed columns in initial state (dictionary or vector of size n_col). Negative temperatures ignored. init : Temperature of non-seed nodes in initial state. If ``None``, use the average temperature of seed nodes (default). Returns ------- self: :class:`BiDiffusion` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) adjacency = bipartite2undirected(biadjacency) Diffusion.fit(self, adjacency, seeds, init) # average over 2 successive iterations because the graph is bipartite diffusion = DirichletOperator(adjacency, self.damping_factor) self.scores_ += diffusion.dot(self.scores_) self.scores_ /= 2 self._split_vars(n_row) return self