def fit(self, X, y=None, **fit_params): """Train the self-organizing map. Parameters ---------- X : array-like or sparse matrix, shape=(n_samples, n_features) Training instances to cluster. y : Ignored """ # Check and normalize input data X = minmax_scale(check_array(X, dtype=np.float32)) # Initialize Somoclu object if not hasattr(self, 'algorithm_'): # Set number of columns and rows from number of clusters if self.n_clusters is not None: self.n_columns_ = self.n_rows_ = int(self.n_clusters * (np.sqrt(len(X)) - 2) + 2) else: self.n_columns_, self.n_rows_ = self.n_columns, self.n_rows # Create object self.algorithm_ = Somoclu(n_columns=self.n_columns_, n_rows=self.n_rows_, initialcodebook=self.initialcodebook, kerneltype=self.kerneltype, maptype=self.maptype, gridtype=self.gridtype, compactsupport=self.compactsupport, neighborhood=self.neighborhood, std_coeff=self.std_coeff, initialization=self.initialization, data=None, verbose=self.verbose) # Fit Somoclu self.algorithm_.train(data=X, **fit_params) # Grid labels grid_labels = [ tuple(grid_label) for grid_label in self.algorithm_.bmus ] # Generate labels mapping labels_mapping = self._generate_labels_mapping(grid_labels) # Generate cluster labels self.labels_ = np.array( [labels_mapping[grid_label] for grid_label in grid_labels]) # Generate labels neighbors self.neighbors_ = self._generate_neighbors(grid_labels, labels_mapping) return self
def test_deterministic_codebook(self): n_rows, n_columns = 2, 2 codebook = np.zeros((2*2, 2), dtype=np.float32) data = np.array([[0.1, 0.2], [0.3, 0.4]], dtype=np.float32) som = Somoclu(n_columns, n_rows, initialcodebook=codebook, compactsupport=False) som.train(data) correct_codebook = np.array([[[ 0.2 , 0.30000001], [ 0.10359724, 0.20359723]], [[ 0.29640275, 0.39640275], [ 0.2 , 0.30000001]]], dtype=np.float32) self.assertTrue(sum(codebook.reshape((n_rows*n_columns*2)) - correct_codebook.reshape((n_rows*n_columns*2))) < 10e-8)
def fit(self, X, y=None, **fit_params): """Train the self-organizing map. Parameters ---------- X : array-like or sparse matrix, shape=(n_samples, n_features) Training instances to cluster. y : Ignored """ # Check and normalize input data X = minmax_scale(check_array(X, dtype=np.float32)) # Check random_state self.random_state_ = check_random_state(self.random_state) # Initialize codebook if self.initialcodebook is None: if self.random_state is None: initialcodebook = None initialization = 'random' else: codebook_size = self.n_columns * self.n_rows * X.shape[1] initialcodebook = self.random_state_.random_sample( codebook_size).astype(np.float32) initialization = None elif self.initialcodebook == 'pca': initialcodebook = None initialization = 'random' else: initialcodebook = self.initialcodebook initialization = None # Create Somoclu object self.algorithm_ = Somoclu( n_columns=self.n_columns, n_rows=self.n_rows, initialcodebook=initialcodebook, kerneltype=self.kerneltype, maptype=self.maptype, gridtype=self.gridtype, compactsupport=self.compactsupport, neighborhood=self.neighborhood, std_coeff=self.std_coeff, initialization=initialization, data=None, verbose=self.verbose, ) # Fit Somoclu self.algorithm_.train(data=X, **fit_params) # Grid labels grid_labels = [ tuple(grid_label) for grid_label in self.algorithm_.bmus ] # Generate labels mapping self.labels_mapping_ = self._generate_labels_mapping(grid_labels) # Generate cluster labels self.labels_ = np.array( [self.labels_mapping_[grid_label] for grid_label in grid_labels]) # Generate labels neighbors self.neighbors_ = self._generate_neighbors( np.unique(grid_labels, axis=0), self.labels_mapping_) return self