Ejemplo n.º 1
0
    def _get_refinement_operator(self, name):
        """Get the refinement operator.

        Args:
            name: operator class name as a string

        Returns:
            object of the operator

        Raises:
            ValueError: if name is an unknown refinement operation
        """
        if name == "CropDiagonal":
            return refinement.CropDiagonal()
        elif name == "GaussianBlur":
            return refinement.GaussianBlur(self.gaussian_blur_sigma)
        elif name == "RowWiseThreshold":
            return refinement.RowWiseThreshold(
                self.p_percentile, self.thresholding_soft_multiplier,
                self.thresholding_with_row_max)
        elif name == "Symmetrize":
            return refinement.Symmetrize()
        elif name == "Diffuse":
            return refinement.Diffuse()
        elif name == "RowWiseNormalize":
            return refinement.RowWiseNormalize()
        else:
            raise ValueError("Unknown refinement operation: {}".format(name))
Ejemplo n.º 2
0
 def test_3by3_matrix(self):
     X = np.array([
         [0.5, 2.0, 3.0],
         [3.0, 4.0, 5.0],
         [4.0, 2.0, 1.0]])
     Y = refinement.RowWiseNormalize().refine(X)
     expected = np.array([
         [0.167, 0.667, 1.0],
         [0.6, 0.8, 1.0],
         [1.0, 0.5, 0.25]])
     self.assertTrue(np.allclose(expected, Y, atol=0.001))
    def predict(self, X):
        """Perform spectral clustering on data X.

        Args:
            X: numpy array of shape (n_samples, n_features)

        Returns:
            labels: numpy array of shape (n_samples,)

        Raises:
            TypeError: if X has wrong type
            ValueError: if X has wrong shape, or we see an unknown refinement
                operation
        """
        if not isinstance(X, np.ndarray):
            raise TypeError("X must be a numpy array")
        if len(X.shape) != 2:
            raise ValueError("X must be 2-dimensional")
        #  Compute affinity matrix.
        affinity = utils.compute_affinity_matrix(X)

        # Refinement opertions on the affinity matrix.
        for op in self.refinement_sequence:
            if op == "CropDiagonal":
                affinity = refinement.CropDiagonal().refine(affinity)
            elif op == "GaussianBlur":
                affinity = refinement.GaussianBlur(
                    self.gaussian_blur_sigma).refine(affinity)
            elif op == "RowWiseThreshold":
                affinity = refinement.RowWiseThreshold(
                    self.p_percentile,
                    self.thresholding_soft_multiplier).refine(affinity)
            elif op == "Symmetrize":
                affinity = refinement.Symmetrize().refine(affinity)
            elif op == "Diffuse":
                affinity = refinement.Diffuse().refine(affinity)
            elif op == "RowWiseNormalize":
                affinity = refinement.RowWiseNormalize().refine(affinity)
            else:
                raise ValueError("Unknown refinement operation: {}".format(op))

        # Perform eigen decomposion.
        (eigenvalues,
         eigenvectors) = utils.compute_sorted_eigenvectors(affinity)
        # Get number of clusters.
        k = utils.compute_number_of_clusters(eigenvalues, self.stop_eigenvalue)
        if self.min_clusters is not None:
            k = max(k, self.min_clusters)
        if self.max_clusters is not None:
            k = min(k, self.max_clusters)

        # Get spectral embeddings.
        spectral_embeddings = eigenvectors[:, :k]

        # Run K-Means++ on spectral embeddings.
        # Note: The correct way should be using a K-Means implementation
        # that supports customized distance measure such as cosine distance.
        # This implemention from scikit-learn does NOT, which is inconsistent
        # with the paper.
        kmeans_clusterer = KMeans(n_clusters=k,
                                  init="k-means++",
                                  max_iter=300,
                                  random_state=0)
        labels = kmeans_clusterer.fit_predict(spectral_embeddings)
        return labels