Exemple #1
0
def stack_seeds(n_row: int, n_col: int, seeds_row: Optional[Union[np.ndarray, dict]],
                seeds_col: Optional[Union[np.ndarray, dict]] = None) -> np.ndarray:
    """Process seeds for rows and columns and stack the results into a single vector."""
    if seeds_row is None and seeds_col is None:
        seeds_row = np.ones(n_row)
        seeds_col = -np.ones(n_col)
    elif seeds_row is None:
        seeds_row = -np.ones(n_row)
    elif seeds_col is None:
        seeds_col = -np.ones(n_col)
    seeds_row = check_seeds(seeds_row, n_row)
    seeds_col = check_seeds(seeds_col, n_col)
    return np.hstack((seeds_row, seeds_col))
    def fit(self,
            biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Union[np.ndarray, dict],
            seeds_col: Union[np.ndarray, dict,
                             None] = None) -> 'RankClassifier':
        """Compute labels.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix of the graph.
        seeds_row :
            Seed rows. Can be a dict {node: label} or an array where "-1" means no label.
        seeds_col :
            Seed columns (optional). Same format.

        Returns
        -------
        self: :class:`CoPageRankClassifier`
        """
        n_row, n_col = biadjacency.shape
        seeds_labels_row = check_seeds(seeds_row, n_row).astype(int)

        RankBiClassifier.fit(self, biadjacency, seeds_labels_row)

        self.labels_row_ = self.labels_
        self.membership_row_ = self.membership_

        transition = normalize(biadjacency.T).tocsr()
        self.membership_col_ = normalize(transition.dot(self.membership_row_))
        membership_col = self.membership_col_.toarray()
        self.labels_col_ = np.argmax(membership_col, axis=1)

        return self
Exemple #3
0
def seeds2probs(n: int, seeds: Union[dict, np.ndarray] = None) -> np.ndarray:
    """Transform seeds into probability vector.

    Parameters
    ----------
    n : int
        Total number of samples.
    seeds :
        If ``None``, the uniform distribution is used.
        Otherwise, a non-negative, non-zero vector or a dictionary must be provided.

    Returns
    -------
    probs: np.ndarray
        A probability vector.
    """
    if seeds is None:
        return np.ones(n) / n
    else:
        seeds = check_seeds(seeds, n)
        probs = np.zeros_like(seeds)
        ix = (seeds > 0)
        probs[ix] = seeds[ix]
        w: float = probs.sum()
        if w > 0:
            return probs / w
        else:
            raise ValueError('At least one seeds must have a positive probability.')
 def _instanciate_vars(adjacency: Union[sparse.csr_matrix, np.ndarray],
                       seeds: Union[np.ndarray, dict]):
     n = adjacency.shape[0]
     labels = check_seeds(seeds, n)
     index_seed = np.argwhere(labels >= 0).ravel()
     index_remain = np.argwhere(labels < 0).ravel()
     labels_seed = labels[index_seed]
     return index_seed.astype(np.int32), index_remain.astype(
         np.int32), labels_seed.astype(np.int32)
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None, initial_state: Optional = None) -> 'Diffusion':
        """Compute the diffusion (temperature at equilibrium).

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Temperatures of border nodes (dictionary or vector). Negative temperatures ignored.
        initial_state :
            Initial state of temperatures.

        Returns
        -------
        self: :class:`Diffusion`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n: int = adjacency.shape[0]
        if seeds is None:
            self.scores_ = np.ones(n) / n
            return self

        seeds = check_seeds(seeds, n)
        b, border = limit_conditions(seeds)
        tmin, tmax = np.min(b[border]), np.max(b)

        interior: sparse.csr_matrix = sparse.diags(~border, shape=(n, n), format='csr', dtype=float)
        diffusion_matrix = interior.dot(normalize(adjacency))

        if initial_state is None:
            if tmin != tmax:
                initial_state = b[border].mean() * np.ones(n)
            else:
                initial_state = np.zeros(n)
            initial_state[border] = b[border]

        if self.n_iter > 0:
            scores = initial_state
            for i in range(self.n_iter):
                scores = diffusion_matrix.dot(scores)
                scores[border] = b[border]

        else:
            a = sparse.eye(n, format='csr', dtype=float) - diffusion_matrix
            scores, info = bicgstab(a, b, atol=0., x0=initial_state)
            self._scipy_solver_info(info)

        if tmin != tmax:
            self.scores_ = np.clip(scores, tmin, tmax)
        else:
            self.scores_ = scores
        return self
Exemple #6
0
    def _instanciate_vars(self, adjacency: Union[sparse.csr_matrix,
                                                 np.ndarray],
                          seeds: Union[np.ndarray, dict]):
        n = adjacency.shape[0]
        labels = check_seeds(seeds, n).astype(int)
        index_seed = np.argwhere(labels >= 0).ravel()
        index_remain = np.argwhere(labels < 0).ravel()
        labels_seed = labels[index_seed]

        embedding = self.embedding_method.fit_transform(adjacency)

        return index_seed, index_remain, labels_seed, embedding
Exemple #7
0
    def fit(self,
            adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None,
            init: Optional[float] = None) -> 'Dirichlet':
        """Compute the solution to the Dirichlet problem (temperatures at equilibrium).

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Temperatures of seed nodes (dictionary or vector). Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`Dirichlet`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n: int = adjacency.shape[0]
        if seeds is None:
            self.scores_ = np.ones(n) / n
            return self

        seeds = check_seeds(seeds, n)
        border = (seeds >= 0)

        if init is None:
            scores = seeds[border].mean() * np.ones(n)
        else:
            scores = init * np.ones(n)
        scores[border] = seeds[border]

        if self.n_iter > 0:
            diffusion = DirichletOperator(adjacency, self.damping_factor,
                                          border)
            for i in range(self.n_iter):
                scores = diffusion.dot(scores)
                scores[border] = seeds[border]
        else:
            a = DeltaDirichletOperator(adjacency, self.damping_factor, border)
            b = -seeds
            b[~border] = 0
            scores, info = bicgstab(a, b, atol=0., x0=scores)
            self._scipy_solver_info(info)

        tmin, tmax = seeds[border].min(), seeds[border].max()
        self.scores_ = np.clip(scores, tmin, tmax)

        return self
Exemple #8
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) \
            -> 'Diffusion':
        """Compute the diffusion (temperatures at equilibrium).

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph.
        seeds :
            Temperatures of seed nodes in initial state (dictionary or vector). Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`Diffusion`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        n: int = adjacency.shape[0]
        if seeds is None:
            self.scores_ = np.ones(n) / n
            return self

        seeds = check_seeds(seeds, n)
        border = (seeds >= 0)

        if init is None:
            scores = seeds[border].mean() * np.ones(n)
        else:
            scores = init * np.ones(n)
        scores[border] = seeds[border]

        diffusion = DirichletOperator(adjacency, self.damping_factor)
        for i in range(self.n_iter):
            scores = diffusion.dot(scores)

        self.scores_ = scores

        return self
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds: Union[np.ndarray, dict]) -> 'RankClassifier':
        """Fit algorithm to the data.

        Parameters
        ----------
        adjacency:
            Adjacency matrix of the graph.
        seeds:
            Seed nodes (labels as dictionary or array; negative values ignored).

        Returns
        -------
        self: :class:`RankClassifier`
        """
        n = adjacency.shape[0]
        seeds_labels = check_seeds(seeds, n).astype(int)
        classes, n_classes = check_labels(seeds_labels)

        seeds_all = self._process_seeds(seeds_labels)
        local_function = partial(self.algorithm.fit_transform, adjacency)
        with Pool(self.n_jobs) as pool:
            scores = np.array(pool.map(local_function, seeds_all))
        scores = scores.T

        scores = self._process_scores(scores)
        scores = normalize(scores)

        membership = sparse.coo_matrix(scores)
        membership.col = classes[membership.col]

        labels = np.argmax(scores, axis=1)
        self.labels_ = classes[labels]
        self.membership_ = sparse.csr_matrix(membership,
                                             shape=(n,
                                                    np.max(seeds_labels) + 1))

        return self