Beispiel #1
0
def get_adjacency_seeds(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
                        force_bipartite: bool = False, force_directed: bool = False,
                        seeds: Optional[Union[dict, np.ndarray]] = None,
                        seeds_row: Optional[Union[dict, np.ndarray]] = None,
                        seeds_col: Optional[Union[dict, np.ndarray]] = None,
                        default_value: float = -1,
                        which: Optional[str] = None) \
        -> Tuple[sparse.csr_matrix, np.ndarray, bool]:
    """Check the input matrix and return a proper adjacency matrix with seeds.
    Parameters
    ----------
    input_matrix :
        Adjacency matrix of biadjacency matrix of the graph.
    allow_directed :
        If ``True`` (default), allow the graph to be directed.
    force_bipartite : bool
        If ``True``, return the adjacency matrix of a bipartite graph.
        Otherwise (default), do it only if the input matrix is not square or not symmetric
        with ``allow_directed=False``.
    force_directed :
        If ``True`` return :math:`A  = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
        Otherwise (default), return :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
    seeds :
        Values of seed nodes in initial state (dictionary or vector). Negative values ignored.
    seeds_row, seeds_col :
        Values of rows and columns for bipartite graphs. Negative values ignored.
    default_value :
        Value of non-seed nodes (default = -1).
    which :
        Which seed values.
        If ``'probs'``, return a probability distribution.
        If ``'labels'``, return distinct integer values if all are equal.
    """
    if seeds_row is not None or seeds_col is not None:
        force_bipartite = True
    adjacency, bipartite = get_adjacency(input_matrix,
                                         allow_directed=allow_directed,
                                         force_bipartite=force_bipartite,
                                         force_directed=force_directed)
    if bipartite:
        if seeds is None:
            seeds = stack_seeds(input_matrix.shape,
                                seeds_row,
                                seeds_col,
                                default_value=default_value)
        else:
            seeds = stack_seeds(input_matrix.shape,
                                seeds,
                                default_value=default_value)
    else:
        seeds = get_seeds(input_matrix.shape,
                          seeds,
                          default_value=default_value)
    if which == 'probs':
        if seeds.sum() > 0:
            seeds /= seeds.sum()
    elif which == 'labels':
        if len(set(seeds[seeds >= 0])) == 1:
            seeds = np.arange(len(seeds))
    return adjacency, seeds, bipartite
Beispiel #2
0
    def test_stack_seeds(self):
        n_row, n_col = 4, 3
        seeds_row_array = np.array([0, 1, -1, 0])
        seeds_row_dict = {0: 0, 1: 1, 3: 0}
        seeds_col_array = np.array([0, 1, -1])
        seeds_col_dict = {0: 0, 1: 1}

        seeds1 = stack_seeds(n_row, n_col, seeds_row_array, seeds_col_array)
        seeds2 = stack_seeds(n_row, n_col, seeds_row_dict, seeds_col_dict)
        seeds3 = stack_seeds(n_row, n_col, seeds_row_array, seeds_col_dict)
        seeds4 = stack_seeds(n_row, n_col, seeds_row_dict, seeds_col_array)

        self.assertTrue(np.allclose(seeds1, seeds2))
        self.assertTrue(np.allclose(seeds2, seeds3))
        self.assertTrue(np.allclose(seeds3, seeds4))

        seeds1 = stack_seeds(n_row, n_col, seeds_row_array, None)
        seeds2 = stack_seeds(n_row, n_col, seeds_row_dict, None)

        self.assertTrue(np.allclose(seeds1, seeds2))

        seeds1 = stack_seeds(n_col, n_row, None, seeds_row_array)
        seeds2 = stack_seeds(n_col, n_row, None, seeds_row_dict)

        self.assertTrue(np.allclose(seeds1, seeds2))
    def fit(self,
            biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Union[np.ndarray, dict],
            seeds_col: Union[np.ndarray, dict,
                             None] = None) -> 'RankClassifier':
        """Compute labels.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix of the graph.
        seeds_row :
            Seed rows (labels as dictionary or array; negative values ignored).
        seeds_col :
            Seed columns (optional). Same format.

        Returns
        -------
        self: :class:`BiPageRankClassifier`
        """
        n_row, n_col = biadjacency.shape
        labels = stack_seeds(n_row, n_col, seeds_row, seeds_col)
        adjacency = bipartite2undirected(biadjacency)
        RankClassifier.fit(self, adjacency, labels)

        self.labels_row_ = self.labels_[:n_row]
        self.labels_col_ = self.labels_[n_row:]
        self.labels_ = self.labels_row_
        self.membership_row_ = self.membership_[:n_row]
        self.membership_col_ = self.membership_[n_row:]
        self.membership_ = self.membership_row_

        return self
Beispiel #4
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) \
            -> 'BiPageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix.
        seeds_row, seeds_col :
            Parameter to be used for Personalized BiPageRank.
            Restart distribution as vectors or dicts on rows, columns (node: weight).
            If both seeds_row and seeds_col are ``None`` (default), the uniform distribution on rows is used.

        Returns
        -------
        self: :class:`BiPageRank`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        adjacency = bipartite2undirected(biadjacency)
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)

        PageRank.fit(self, adjacency, seeds)
        self._split_vars(n_row)

        self.scores_row_ /= self.scores_row_.sum()
        self.scores_col_ /= self.scores_col_.sum()
        self.scores_ = self.scores_row_

        return self
Beispiel #5
0
    def fit(
        self,
        biadjacency: Union[sparse.csr_matrix, np.ndarray],
        seeds_row: Union[np.ndarray, dict],
        seeds_col: Optional[Union[np.ndarray,
                                  dict]] = None) -> 'BiPropagation':
        """Node classification by k-nearest neighbors in the embedding space.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix of the graph.
        seeds_row :
            Seed rows. Can be a dict {node: label} or an array where "-1" means no label.
        seeds_col :
            Seed columns (optional). Same format.

        Returns
        -------
        self: :class:`BiPropagation`
        """
        n_row, n_col = biadjacency.shape
        biadjacency = check_format(biadjacency)
        adjacency = bipartite2undirected(biadjacency)
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col).astype(int)

        Propagation.fit(self, adjacency, seeds)
        self._split_vars(n_row)

        return self
Beispiel #6
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) \
            -> 'BiPageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix.
        seeds_row :
            Seed rows, as a dict or a vector.
        seeds_col :
            Seed columns, as a dict or a vector.
            If both seeds_row and seeds_col are ``None``, the uniform distribution is used.

        Returns
        -------
        self: :class:`BiPageRank`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        adjacency = bipartite2undirected(biadjacency)
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)

        PageRank.fit(self, adjacency, seeds)
        self._split_vars(n_row)

        self.scores_row_ /= self.scores_row_.sum()
        self.scores_col_ /= self.scores_col_.sum()
        self.scores_ = self.scores_row_

        return self
Beispiel #7
0
    def fit(self,
            biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None,
            seeds_col: Optional[Union[dict, np.ndarray]] = None,
            init: Optional[float] = None) -> 'BiDirichlet':
        """Compute the solution to the Dirichlet problem (temperatures at equilibrium).

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix, shape (n_row, n_col).
        seeds_row :
            Temperatures of seed rows (dictionary or vector of size n_row). Negative temperatures ignored.
        seeds_col :
            Temperatures of seed columns (dictionary or vector of size n_col). Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`BiDirichlet`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)
        adjacency = bipartite2undirected(biadjacency)
        Dirichlet.fit(self, adjacency, seeds, init)
        self._split_vars(n_row)

        return self
Beispiel #8
0
    def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None,
            initial_state: Optional = None) -> 'BiDiffusion':
        """Compute the diffusion (temperature at equilibrium).

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix, shape (n_row, n_col).
        seeds_row :
            Temperatures of row border nodes (dictionary or vector of size n_row). Negative temperatures ignored.
        seeds_col :
            Temperatures of column border nodes (dictionary or vector of size n_row). Negative temperatures ignored.
        initial_state :
            Initial state of temperatures.

        Returns
        -------
        self: :class:`BiDiffusion`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)

        adjacency = bipartite2undirected(biadjacency)
        Diffusion.fit(self, adjacency, seeds)
        self._split_vars(n_row)

        return self
Beispiel #9
0
    def _instanciate_vars(self,
                          biadjacency: Union[sparse.csr_matrix, np.ndarray],
                          seeds_row: Union[np.ndarray, dict],
                          seeds_col: Optional[Union[np.ndarray, dict]] = None):
        n_row, n_col = biadjacency.shape
        labels = stack_seeds(n_row, n_col, seeds_row, seeds_col).astype(int)
        index_seed = np.argwhere(labels >= 0).ravel()
        index_remain = np.argwhere(labels < 0).ravel()
        labels_seed = labels[index_seed]

        self.embedding_method.fit(biadjacency)
        embedding_row = self.embedding_method.embedding_row_
        embedding_col = self.embedding_method.embedding_col_
        embedding = np.vstack((embedding_row, embedding_col))

        return index_seed, index_remain, labels_seed, embedding
Beispiel #10
0
    def fit(self,
            biadjacency: Union[sparse.csr_matrix, np.ndarray],
            seeds_row: Optional[Union[dict, np.ndarray]] = None,
            seeds_col: Optional[Union[dict, np.ndarray]] = None,
            init: Optional[float] = None) -> 'BiDiffusion':
        """Compute the diffusion (temperatures at equilibrium).

        Parameters
        ----------
        biadjacency :
            Biadjacency matrix, shape (n_row, n_col).
        seeds_row :
            Temperatures of seed rows in initial state (dictionary or vector of size n_row).
            Negative temperatures ignored.
        seeds_col :
            Temperatures of seed columns  in initial state (dictionary or vector of size n_col).
            Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).
        Returns
        -------
        self: :class:`BiDiffusion`
        """
        biadjacency = check_format(biadjacency)
        n_row, n_col = biadjacency.shape
        seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col)
        adjacency = bipartite2undirected(biadjacency)
        Diffusion.fit(self, adjacency, seeds, init)
        # average over 2 successive iterations because the graph is bipartite
        diffusion = DirichletOperator(adjacency, self.damping_factor)
        self.scores_ += diffusion.dot(self.scores_)
        self.scores_ /= 2
        self._split_vars(n_row)

        return self