Ejemplo n.º 1
0
    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict] = None,
            seeds_row: Union[np.ndarray, dict] = None, seeds_col: Union[np.ndarray, dict] = None) -> 'KNN':
        """Node classification by k-nearest neighbors in the embedding space.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.
        seeds_row, seeds_col :
            Seeds of rows and columns (for bipartite graphs).

        Returns
        -------
        self: :class:`KNN`
        """
        adjacency, seeds, self.bipartite = get_adjacency_seeds(input_matrix, seeds=seeds, seeds_row=seeds_row,
                                                               seeds_col=seeds_col)
        index_seed, index_remain, labels_seed = self._instantiate_vars(seeds)
        embedding = self.embedding_method.fit_transform(adjacency)
        membership, labels = self._fit_core(adjacency.shape[0], labels_seed, embedding, index_seed, index_remain)

        self.membership_ = membership
        self.labels_ = labels

        if self.bipartite:
            self._split_vars(input_matrix.shape)

        return self
Ejemplo n.º 2
0
    def fit(self,
            input_matrix: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None,
            seeds_row: Optional[Union[dict, np.ndarray]] = None,
            seeds_col: Optional[Union[dict, np.ndarray]] = None,
            init: Optional[float] = None) -> 'Dirichlet':
        """Compute the solution to the Dirichlet problem (temperatures at equilibrium).

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        seeds :
            Temperatures of seed nodes (dictionary or vector). Negative temperatures ignored.
        seeds_row, seeds_col :
            Temperatures of rows and columns for bipartite graphs. Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`Dirichlet`
        """
        adjacency, seeds, self.bipartite = get_adjacency_seeds(
            input_matrix,
            seeds=seeds,
            seeds_row=seeds_row,
            seeds_col=seeds_col)
        scores, border = init_temperatures(seeds, init)
        if self.n_iter > 0:
            diffusion = DirichletOperator(adjacency, self.damping_factor,
                                          border)
            for i in range(self.n_iter):
                scores = diffusion.dot(scores)
                scores[border] = seeds[border]
        else:
            a = DeltaDirichletOperator(adjacency, self.damping_factor, border)
            b = -seeds
            b[~border] = 0
            scores, info = bicgstab(a, b, atol=0., x0=scores)
            self._scipy_solver_info(info)

        tmin, tmax = seeds[border].min(), seeds[border].max()
        self.scores_ = np.clip(scores, tmin, tmax)
        if self.bipartite:
            self._split_vars(input_matrix.shape)

        return self
Ejemplo n.º 3
0
    def fit(self,
            input_matrix: Union[sparse.csr_matrix, np.ndarray],
            seeds: Union[np.ndarray, dict] = None,
            seeds_row: Union[np.ndarray, dict] = None,
            seeds_col: Union[np.ndarray, dict] = None) -> 'RankClassifier':
        """Fit algorithm to data.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        seeds :
            Seed nodes (labels as dictionary or array; negative values ignored).
        seeds_row, seeds_col :
            Seed rows and columns (for bipartite graphs).
        Returns
        -------
        self: :class:`RankClassifier`
        """
        adjacency, seeds_labels, bipartite = get_adjacency_seeds(
            input_matrix,
            seeds=seeds,
            seeds_row=seeds_row,
            seeds_col=seeds_col)
        seeds_labels = seeds_labels.astype(int)
        classes, n_classes = check_labels(seeds_labels)
        seeds_all = self._process_seeds(seeds_labels)
        local_function = partial(self.algorithm.fit_transform, adjacency)
        with Pool(self.n_jobs) as pool:
            scores = np.array(pool.map(local_function, seeds_all))
        scores = scores.T

        scores = self._process_scores(scores)
        scores = normalize(scores)

        membership = sparse.coo_matrix(scores)
        membership.col = classes[membership.col]

        labels = np.argmax(scores, axis=1)
        self.labels_ = classes[labels]
        self.membership_ = sparse.csr_matrix(membership,
                                             shape=(adjacency.shape[0],
                                                    np.max(seeds_labels) + 1))

        if bipartite:
            self._split_vars(input_matrix.shape)

        return self
Ejemplo n.º 4
0
    def fit(self,
            input_matrix: Union[sparse.csr_matrix, np.ndarray, LinearOperator],
            seeds: Optional[Union[dict, np.ndarray]] = None,
            seeds_row: Optional[Union[dict, np.ndarray]] = None,
            seeds_col: Optional[Union[dict, np.ndarray]] = None,
            force_bipartite: bool = False) -> 'PageRank':
        """Fit algorithm to data.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        seeds :
            Parameter to be used for Personalized PageRank.
            Restart distribution as a vector or a dict (node: weight).
            If ``None``, the uniform distribution is used (no personalization, default).
        seeds_row, seeds_col :
            Parameter to be used for Personalized PageRank on bipartite graphs.
            Restart distribution as vectors or dicts on rows, columns (node: weight).
            If both seeds_row and seeds_col are ``None`` (default), the uniform distribution on rows is used.
        force_bipartite :
            If ``True``, consider the input matrix as the biadjacency matrix of a bipartite graph.
        Returns
        -------
        self: :class:`PageRank`
        """
        adjacency, seeds, self.bipartite = get_adjacency_seeds(
            input_matrix,
            force_bipartite=force_bipartite,
            seeds=seeds,
            seeds_row=seeds_row,
            seeds_col=seeds_col,
            default_value=0,
            which='probs')
        self.scores_ = get_pagerank(adjacency,
                                    seeds,
                                    damping_factor=self.damping_factor,
                                    n_iter=self.n_iter,
                                    solver=self.solver,
                                    tol=self.tol)
        if self.bipartite:
            self._split_vars(input_matrix.shape)
        return self
Ejemplo n.º 5
0
    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray],
            seeds: Optional[Union[dict, np.ndarray]] = None, seeds_row: Optional[Union[dict, np.ndarray]] = None,
            seeds_col: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) \
            -> 'Diffusion':
        """Compute the diffusion (temperatures at equilibrium).

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        seeds :
            Temperatures of seed nodes in initial state (dictionary or vector). Negative temperatures ignored.
        seeds_row, seeds_col :
            Temperatures of rows and columns for bipartite graphs. Negative temperatures ignored.
        init :
            Temperature of non-seed nodes in initial state.
            If ``None``, use the average temperature of seed nodes (default).

        Returns
        -------
        self: :class:`Diffusion`
        """
        adjacency, seeds, self.bipartite = get_adjacency_seeds(
            input_matrix,
            allow_directed=True,
            seeds=seeds,
            seeds_row=seeds_row,
            seeds_col=seeds_col)
        scores, _ = init_temperatures(seeds, init)
        diffusion = DirichletOperator(adjacency, self.damping_factor)
        for i in range(self.n_iter):
            scores = diffusion.dot(scores)

        self.scores_ = scores
        if self.bipartite:
            self._split_vars(input_matrix.shape)

        return self
Ejemplo n.º 6
0
    def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict] = None,
            seeds_row: Union[np.ndarray, dict] = None, seeds_col: Union[np.ndarray, dict] = None) \
            -> 'Propagation':
        """Node classification by label propagation.

        Parameters
        ----------
        input_matrix :
            Adjacency matrix or biadjacency matrix of the graph.
        seeds :
            Seed nodes. Can be a dict {node: label} or an array where "-1" means no label.
        seeds_row, seeds_col :
            Seeds of rows and columns (for bipartite graphs).
        Returns
        -------
        self: :class:`Propagation`
        """
        adjacency, seeds, self.bipartite = get_adjacency_seeds(
            input_matrix,
            seeds=seeds,
            seeds_row=seeds_row,
            seeds_col=seeds_col,
            which='labels')
        n = adjacency.shape[0]
        index_seed, index_remain, labels_seed = self._instantiate_vars(seeds)

        if self.node_order == 'random':
            np.random.shuffle(index_remain)
        elif self.node_order == 'decreasing':
            index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32)
            index_remain = index[index_remain]
        elif self.node_order == 'increasing':
            index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32)
            index_remain = index[index_remain]

        labels = -np.ones(n, dtype=np.int32)
        labels[index_seed] = labels_seed
        labels_remain = np.zeros_like(index_remain, dtype=np.int32)

        indptr = adjacency.indptr.astype(np.int32)
        indices = adjacency.indices.astype(np.int32)
        if self.weighted:
            data = adjacency.data.astype(np.float32)
        else:
            data = np.ones(n, dtype=np.float32)

        t = 0
        while t < self.n_iter and not np.array_equal(labels_remain,
                                                     labels[index_remain]):
            t += 1
            labels_remain = labels[index_remain].copy()
            labels = np.asarray(
                vote_update(indptr, indices, data, labels, index_remain))

        membership = membership_matrix(labels)
        membership = normalize(adjacency.dot(membership))

        self.labels_ = labels
        self.membership_ = membership

        if self.bipartite:
            self._split_vars(input_matrix.shape)

        return self