Example #1
0
    def fit(self, X, y=None, **fit_params):
        """Train the self-organizing map.

        Parameters
        ----------
        X : array-like or sparse matrix, shape=(n_samples, n_features)
            Training instances to cluster.

        y : Ignored
        """

        # Check and normalize input data
        X = minmax_scale(check_array(X, dtype=np.float32))

        # Initialize Somoclu object
        if not hasattr(self, 'algorithm_'):

            # Set number of columns and rows from number of clusters
            if self.n_clusters is not None:
                self.n_columns_ = self.n_rows_ = int(self.n_clusters *
                                                     (np.sqrt(len(X)) - 2) + 2)
            else:
                self.n_columns_, self.n_rows_ = self.n_columns, self.n_rows

            # Create object
            self.algorithm_ = Somoclu(n_columns=self.n_columns_,
                                      n_rows=self.n_rows_,
                                      initialcodebook=self.initialcodebook,
                                      kerneltype=self.kerneltype,
                                      maptype=self.maptype,
                                      gridtype=self.gridtype,
                                      compactsupport=self.compactsupport,
                                      neighborhood=self.neighborhood,
                                      std_coeff=self.std_coeff,
                                      initialization=self.initialization,
                                      data=None,
                                      verbose=self.verbose)

        # Fit Somoclu
        self.algorithm_.train(data=X, **fit_params)

        # Grid labels
        grid_labels = [
            tuple(grid_label) for grid_label in self.algorithm_.bmus
        ]

        # Generate labels mapping
        labels_mapping = self._generate_labels_mapping(grid_labels)

        # Generate cluster labels
        self.labels_ = np.array(
            [labels_mapping[grid_label] for grid_label in grid_labels])

        # Generate labels neighbors
        self.neighbors_ = self._generate_neighbors(grid_labels, labels_mapping)

        return self
Example #2
0
 def test_deterministic_codebook(self):
     n_rows, n_columns = 2, 2
     codebook = np.zeros((2*2, 2), dtype=np.float32)
     data = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32)
     som = Somoclu(n_columns, n_rows, initialcodebook=codebook,
                   compactsupport=False)
     som.train(data)
     correct_codebook = np.array([[[ 0.2       ,  0.30000001],
                                   [ 0.10359724,  0.20359723]],
                                  [[ 0.29640275,  0.39640275],
                                   [ 0.2       ,  0.30000001]]], dtype=np.float32)
     self.assertTrue(sum(codebook.reshape((n_rows*n_columns*2)) -
                         correct_codebook.reshape((n_rows*n_columns*2))) < 10e-8)
Example #3
0
    def fit(self, X, y=None, **fit_params):
        """Train the self-organizing map.

        Parameters
        ----------
        X : array-like or sparse matrix, shape=(n_samples, n_features)
            Training instances to cluster.

        y : Ignored
        """

        # Check and normalize input data
        X = minmax_scale(check_array(X, dtype=np.float32))

        # Check random_state
        self.random_state_ = check_random_state(self.random_state)

        # Initialize codebook
        if self.initialcodebook is None:
            if self.random_state is None:
                initialcodebook = None
                initialization = 'random'
            else:
                codebook_size = self.n_columns * self.n_rows * X.shape[1]
                initialcodebook = self.random_state_.random_sample(
                    codebook_size).astype(np.float32)
                initialization = None
        elif self.initialcodebook == 'pca':
            initialcodebook = None
            initialization = 'random'
        else:
            initialcodebook = self.initialcodebook
            initialization = None

        # Create Somoclu object
        self.algorithm_ = Somoclu(
            n_columns=self.n_columns,
            n_rows=self.n_rows,
            initialcodebook=initialcodebook,
            kerneltype=self.kerneltype,
            maptype=self.maptype,
            gridtype=self.gridtype,
            compactsupport=self.compactsupport,
            neighborhood=self.neighborhood,
            std_coeff=self.std_coeff,
            initialization=initialization,
            data=None,
            verbose=self.verbose,
        )

        # Fit Somoclu
        self.algorithm_.train(data=X, **fit_params)

        # Grid labels
        grid_labels = [
            tuple(grid_label) for grid_label in self.algorithm_.bmus
        ]

        # Generate labels mapping
        self.labels_mapping_ = self._generate_labels_mapping(grid_labels)

        # Generate cluster labels
        self.labels_ = np.array(
            [self.labels_mapping_[grid_label] for grid_label in grid_labels])

        # Generate labels neighbors
        self.neighbors_ = self._generate_neighbors(
            np.unique(grid_labels, axis=0), self.labels_mapping_)

        return self