def stack_seeds(n_row: int, n_col: int, seeds_row: Optional[Union[np.ndarray, dict]], seeds_col: Optional[Union[np.ndarray, dict]] = None) -> np.ndarray: """Process seeds for rows and columns and stack the results into a single vector.""" if seeds_row is None and seeds_col is None: seeds_row = np.ones(n_row) seeds_col = -np.ones(n_col) elif seeds_row is None: seeds_row = -np.ones(n_row) elif seeds_col is None: seeds_col = -np.ones(n_col) seeds_row = check_seeds(seeds_row, n_row) seeds_col = check_seeds(seeds_col, n_col) return np.hstack((seeds_row, seeds_col))
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Union[np.ndarray, dict], seeds_col: Union[np.ndarray, dict, None] = None) -> 'RankClassifier': """Compute labels. Parameters ---------- biadjacency : Biadjacency matrix of the graph. seeds_row : Seed rows. Can be a dict {node: label} or an array where "-1" means no label. seeds_col : Seed columns (optional). Same format. Returns ------- self: :class:`CoPageRankClassifier` """ n_row, n_col = biadjacency.shape seeds_labels_row = check_seeds(seeds_row, n_row).astype(int) RankBiClassifier.fit(self, biadjacency, seeds_labels_row) self.labels_row_ = self.labels_ self.membership_row_ = self.membership_ transition = normalize(biadjacency.T).tocsr() self.membership_col_ = normalize(transition.dot(self.membership_row_)) membership_col = self.membership_col_.toarray() self.labels_col_ = np.argmax(membership_col, axis=1) return self
def seeds2probs(n: int, seeds: Union[dict, np.ndarray] = None) -> np.ndarray: """Transform seeds into probability vector. Parameters ---------- n : int Total number of samples. seeds : If ``None``, the uniform distribution is used. Otherwise, a non-negative, non-zero vector or a dictionary must be provided. Returns ------- probs: np.ndarray A probability vector. """ if seeds is None: return np.ones(n) / n else: seeds = check_seeds(seeds, n) probs = np.zeros_like(seeds) ix = (seeds > 0) probs[ix] = seeds[ix] w: float = probs.sum() if w > 0: return probs / w else: raise ValueError('At least one seeds must have a positive probability.')
def _instanciate_vars(adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]): n = adjacency.shape[0] labels = check_seeds(seeds, n) index_seed = np.argwhere(labels >= 0).ravel() index_remain = np.argwhere(labels < 0).ravel() labels_seed = labels[index_seed] return index_seed.astype(np.int32), index_remain.astype( np.int32), labels_seed.astype(np.int32)
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Optional[Union[dict, np.ndarray]] = None, initial_state: Optional = None) -> 'Diffusion': """Compute the diffusion (temperature at equilibrium). Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Temperatures of border nodes (dictionary or vector). Negative temperatures ignored. initial_state : Initial state of temperatures. Returns ------- self: :class:`Diffusion` """ adjacency = check_format(adjacency) check_square(adjacency) n: int = adjacency.shape[0] if seeds is None: self.scores_ = np.ones(n) / n return self seeds = check_seeds(seeds, n) b, border = limit_conditions(seeds) tmin, tmax = np.min(b[border]), np.max(b) interior: sparse.csr_matrix = sparse.diags(~border, shape=(n, n), format='csr', dtype=float) diffusion_matrix = interior.dot(normalize(adjacency)) if initial_state is None: if tmin != tmax: initial_state = b[border].mean() * np.ones(n) else: initial_state = np.zeros(n) initial_state[border] = b[border] if self.n_iter > 0: scores = initial_state for i in range(self.n_iter): scores = diffusion_matrix.dot(scores) scores[border] = b[border] else: a = sparse.eye(n, format='csr', dtype=float) - diffusion_matrix scores, info = bicgstab(a, b, atol=0., x0=initial_state) self._scipy_solver_info(info) if tmin != tmax: self.scores_ = np.clip(scores, tmin, tmax) else: self.scores_ = scores return self
def _instanciate_vars(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]): n = adjacency.shape[0] labels = check_seeds(seeds, n).astype(int) index_seed = np.argwhere(labels >= 0).ravel() index_remain = np.argwhere(labels < 0).ravel() labels_seed = labels[index_seed] embedding = self.embedding_method.fit_transform(adjacency) return index_seed, index_remain, labels_seed, embedding
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) -> 'Dirichlet': """Compute the solution to the Dirichlet problem (temperatures at equilibrium). Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Temperatures of seed nodes (dictionary or vector). Negative temperatures ignored. init : Temperature of non-seed nodes in initial state. If ``None``, use the average temperature of seed nodes (default). Returns ------- self: :class:`Dirichlet` """ adjacency = check_format(adjacency) check_square(adjacency) n: int = adjacency.shape[0] if seeds is None: self.scores_ = np.ones(n) / n return self seeds = check_seeds(seeds, n) border = (seeds >= 0) if init is None: scores = seeds[border].mean() * np.ones(n) else: scores = init * np.ones(n) scores[border] = seeds[border] if self.n_iter > 0: diffusion = DirichletOperator(adjacency, self.damping_factor, border) for i in range(self.n_iter): scores = diffusion.dot(scores) scores[border] = seeds[border] else: a = DeltaDirichletOperator(adjacency, self.damping_factor, border) b = -seeds b[~border] = 0 scores, info = bicgstab(a, b, atol=0., x0=scores) self._scipy_solver_info(info) tmin, tmax = seeds[border].min(), seeds[border].max() self.scores_ = np.clip(scores, tmin, tmax) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) \ -> 'Diffusion': """Compute the diffusion (temperatures at equilibrium). Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Temperatures of seed nodes in initial state (dictionary or vector). Negative temperatures ignored. init : Temperature of non-seed nodes in initial state. If ``None``, use the average temperature of seed nodes (default). Returns ------- self: :class:`Diffusion` """ adjacency = check_format(adjacency) check_square(adjacency) n: int = adjacency.shape[0] if seeds is None: self.scores_ = np.ones(n) / n return self seeds = check_seeds(seeds, n) border = (seeds >= 0) if init is None: scores = seeds[border].mean() * np.ones(n) else: scores = init * np.ones(n) scores[border] = seeds[border] diffusion = DirichletOperator(adjacency, self.damping_factor) for i in range(self.n_iter): scores = diffusion.dot(scores) self.scores_ = scores return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) -> 'RankClassifier': """Fit algorithm to the data. Parameters ---------- adjacency: Adjacency matrix of the graph. seeds: Seed nodes (labels as dictionary or array; negative values ignored). Returns ------- self: :class:`RankClassifier` """ n = adjacency.shape[0] seeds_labels = check_seeds(seeds, n).astype(int) classes, n_classes = check_labels(seeds_labels) seeds_all = self._process_seeds(seeds_labels) local_function = partial(self.algorithm.fit_transform, adjacency) with Pool(self.n_jobs) as pool: scores = np.array(pool.map(local_function, seeds_all)) scores = scores.T scores = self._process_scores(scores) scores = normalize(scores) membership = sparse.coo_matrix(scores) membership.col = classes[membership.col] labels = np.argmax(scores, axis=1) self.labels_ = classes[labels] self.membership_ = sparse.csr_matrix(membership, shape=(n, np.max(seeds_labels) + 1)) return self