def test_mlpca(tol, max_iter): # Define shape etc. m = 100 # Dimensionality n = 101 # Number of samples r = 3 rng = np.random.RandomState(101) U = rng.uniform(0, 1, size=(m, r)) V = rng.uniform(0, 10, size=(n, r)) varX = U @ V.T X = rng.poisson(varX) rank = r # Test tolerance tol = 300 U, S, V, Sobj = mlpca(X, varX, output_dimension=rank, tol=tol, max_iter=max_iter) X = U @ np.diag(S) @ V.T # Check the low-rank component MSE normX = np.linalg.norm(X - X) assert normX < tol # Check singular values S_norm = S / np.sum(S) np.testing.assert_allclose(S_norm[:rank].sum(), 1.0)
def decomposition(self, normalize_poissonian_noise=False, algorithm = 'svd', output_dimension=None, centre = None, auto_transpose = True, navigation_mask=None, signal_mask=None, var_array=None, var_func=None, polyfit=None, on_peaks=False, reproject=None, **kwargs): """Decomposition with a choice of algorithms The results are stored in self.mva_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' | 'sparse_pca' | 'mini_batch_sparse_pca' output_dimension : None or int number of components to keep/calculate centre : None | 'variables' | 'trials' If None no centring is applied. If 'variable' the centring will be performed in the variable axis. If 'trials', the centring will be performed in the 'trials' axis. It only has effect when using the svd or fast_svd algorithms auto_transpose : bool If True, automatically transposes the data to boost performance. Only has effect when using the svd of fast_svd algorithms. navigation_mask : boolean numpy array signal_mask : boolean numpy array var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomial. polyfit : reproject : None | signal | navigation | both If not None, the results of the decomposition will be projected in the selected masked area. See also -------- plot_decomposition_factors, plot_decomposition_scores, plot_lev """ # backup the original data if on_peaks: if hasattr(self.mapped_parameters,'peak_chars'): self._data_before_treatments = \ self.mapped_parameters.peak_chars.copy() else: print """No peak characteristics found. You must run the peak_char_stack function to obtain these before you can run PCA or ICA on them.""" else: self._data_before_treatments = self.data.copy() if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: messages.warning_exit("With the mlpca algorithm the " "output_dimension must be expecified") # Apply pre-treatments # Transform the data in a line spectrum self._unfolded4decomposition = self.unfold_if_multidim() if hasattr(navigation_mask, 'ravel'): navigation_mask = navigation_mask.ravel() if hasattr(signal_mask, 'ravel'): signal_mask = signal_mask.ravel() # Normalize the poissonian noise # TODO this function can change the masks and this can cause # problems when reprojecting if normalize_poissonian_noise is True: if reproject is None: navigation_mask, signal_mask = \ self.normalize_poissonian_noise( navigation_mask=navigation_mask, signal_mask=signal_mask, return_masks = True) elif reproject == 'both': _, _ = \ self.normalize_poissonian_noise(return_masks = True) elif reproject == 'navigation': _, signal_mask = \ self.normalize_poissonian_noise(return_masks = True, signal_mask=signal_mask,) elif reproject == 'signal': navigation_mask, _ = \ self.normalize_poissonian_noise(return_masks = True, navigation_mask=navigation_mask,) messages.information('Performing decomposition analysis') if on_peaks: dc = self.mapped_parameters.peak_chars else: # The data must be transposed both for Images and Spectra dc = self.data #set the output target (peak results or not?) target = self._get_target(on_peaks) # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) if signal_mask is None: signal_mask = slice(None) # Reset the explained_variance which is not set by all the algorithms explained_variance = None explained_variance_ratio = None mean = None if algorithm == 'svd': factors, scores, explained_variance, mean = svd_pca( dc[:,signal_mask][navigation_mask,:], centre = centre, auto_transpose = auto_transpose) elif algorithm == 'fast_svd': factors, scores, explained_variance, mean = svd_pca( dc[:,signal_mask][navigation_mask,:], fast = True, output_dimension = output_dimension, centre = centre, auto_transpose = auto_transpose) elif algorithm == 'sklearn_pca': sk = sklearn.decomposition.PCA(**kwargs) sk.n_components = output_dimension scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:])) factors = sk.components_.T explained_variance = sk.explained_variance_ mean = sk.mean_ centre = 'trials' elif algorithm == 'nmf': sk = sklearn.decomposition.NMF(**kwargs) sk.n_components = output_dimension scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:])) factors = sk.components_.T elif algorithm == 'sparse_pca': sk = sklearn.decomposition.SparsePCA(output_dimension, **kwargs) scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:]) factors = sk.components_.T elif algorithm == 'mini_batch_sparse_pca': sk = sklearn.decomposition.MiniBatchSparsePCA(output_dimension, **kwargs) scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:]) factors = sk.components_.T elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: messages.warning_exit( "For MLPCA it is mandatory to define the output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc[:,signal_mask][navigation_mask,:] if var_array is not None and var_func is not None: messages.warning_exit( "You have defined both the var_func and var_array keywords" "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func(dc[signal_mask,...][:,navigation_mask]) else: try: var_array = np.polyval(polyfit,dc[signal_mask, navigation_mask]) except: messages.warning_exit( 'var_func must be either a function or an array' 'defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True U,S,V,Sobj, ErrFlag = mlpca( dc[:,signal_mask][navigation_mask,:], var_array, output_dimension, fast = fast) scores = U * S factors = V explained_variance_ratio = S ** 2 / Sobj explained_variance = S ** 2 / len(factors) else: messages.information('Error: Algorithm not recognised. ' 'Nothing done') return False # We must calculate the ratio here because otherwise the sum information # can be lost if the user call crop_decomposition_dimension if explained_variance is not None and explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # Store the results in mva_results target.factors = factors target.scores = scores target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio target.decomposition_algorithm = algorithm target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4decomposition target.centre = centre target.mean = mean if output_dimension and factors.shape[1] != output_dimension: target.crop_decomposition_dimension(output_dimension) # Delete the unmixing information, because it'll refer to a previous # decompositions target.unmixing_matrix = None target.ica_algorithm = None if self._unfolded4decomposition is True: target.original_shape = self._shape_before_unfolding # Reproject if mean is None: mean = 0 if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): scores_ = np.dot(dc[:,signal_mask] - mean, factors) else: scores_ = sk.transform(dc[:,signal_mask]) target.scores = scores_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): factors = np.dot(np.linalg.pinv(scores), dc[navigation_mask,:] - mean).T target.factors = factors else: messages.information("Reprojecting the signal is not yet " "supported for this algorithm") if reproject == 'both': reproject = 'signal' else: reproject = None # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors[:] *= self._root_bH.T target.scores[:] *= self._root_aG # Set the pixels that were not processed to nan if not isinstance(signal_mask, slice): target.signal_mask = signal_mask if reproject not in ('both', 'signal'): factors = np.zeros((dc.shape[-1], target.factors.shape[1])) factors[signal_mask == True,:] = target.factors factors[signal_mask == False,:] = np.nan target.factors = factors if not isinstance(navigation_mask, slice): target.navigation_mask = navigation_mask if reproject not in ('both', 'navigation'): scores = np.zeros((dc.shape[0], target.scores.shape[1])) scores[navigation_mask == True,:] = target.scores scores[navigation_mask == False,:] = np.nan target.scores = scores #undo any pre-treatments self.undo_treatments(on_peaks) if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False
def decomposition(self, normalize_poissonian_noise=False, algorithm='svd', output_dimension=None, centre=None, auto_transpose=True, navigation_mask=None, signal_mask=None, var_array=None, var_func=None, polyfit=None, reproject=None, **kwargs): """Decomposition with a choice of algorithms The results are stored in self.learning_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' | 'sparse_pca' | 'mini_batch_sparse_pca' output_dimension : None or int number of components to keep/calculate centre : None | 'variables' | 'trials' If None no centring is applied. If 'variable' the centring will be performed in the variable axis. If 'trials', the centring will be performed in the 'trials' axis. It only has effect when using the svd or fast_svd algorithms auto_transpose : bool If True, automatically transposes the data to boost performance. Only has effect when using the svd of fast_svd algorithms. navigation_mask : boolean numpy array The navigation locations marked as True are not used in the decompostion. signal_mask : boolean numpy array The signal locations marked as True are not used in the decomposition. var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomial. polyfit : reproject : None | signal | navigation | both If not None, the results of the decomposition will be projected in the selected masked area. See also -------- plot_decomposition_factors, plot_decomposition_loadings, plot_lev """ # Check if it is the wrong data type if self.data.dtype.char not in ['e', 'f', 'd']: # If not float messages.warning( 'To perform a decomposition the data must be of the float ' 'type. You can change the type using the change_dtype method' ' e.g. s.change_dtype(\'float64\')\n' 'Nothing done.') return if self.axes_manager.navigation_size < 2: raise AttributeError("It is not possible to decompose a dataset " "with navigation_size < 2") # backup the original data self._data_before_treatments = self.data.copy() if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: raise ValueError("With the mlpca algorithm the " "output_dimension must be expecified") # Apply pre-treatments # Transform the data in a line spectrum self._unfolded4decomposition = self.unfold() try: if hasattr(navigation_mask, 'ravel'): navigation_mask = navigation_mask.ravel() if hasattr(signal_mask, 'ravel'): signal_mask = signal_mask.ravel() # Normalize the poissonian noise # TODO this function can change the masks and this can cause # problems when reprojecting if normalize_poissonian_noise is True: self.normalize_poissonian_noise( navigation_mask=navigation_mask, signal_mask=signal_mask,) messages.information('Performing decomposition analysis') # The rest of the code assumes that the first data axis # is the navigation axis. We transpose the data if that is not the # case. dc = (self.data if self.axes_manager[0].index_in_array == 0 else self.data.T) # set the output target (peak results or not?) target = self.learning_results # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = ~navigation_mask if signal_mask is None: signal_mask = slice(None) else: signal_mask = ~signal_mask # WARNING: signal_mask and navigation_mask values are now their # negaties i.e. True -> False and viceversa. However, the # stored value (at the end of the method) coincides with the # input masks # Reset the explained_variance which is not set by all the # algorithms explained_variance = None explained_variance_ratio = None mean = None if algorithm == 'svd': factors, loadings, explained_variance, mean = svd_pca( dc[:, signal_mask][navigation_mask, :], centre=centre, auto_transpose=auto_transpose) elif algorithm == 'fast_svd': factors, loadings, explained_variance, mean = svd_pca( dc[:, signal_mask][navigation_mask, :], fast=True, output_dimension=output_dimension, centre=centre, auto_transpose=auto_transpose) elif algorithm == 'sklearn_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.PCA(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:, signal_mask][navigation_mask, :])) factors = sk.components_.T explained_variance = sk.explained_variance_ mean = sk.mean_ centre = 'trials' elif algorithm == 'nmf': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.NMF(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:, signal_mask][navigation_mask, :])) factors = sk.components_.T elif algorithm == 'sparse_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.SparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:, signal_mask][navigation_mask, :]) factors = sk.components_.T elif algorithm == 'mini_batch_sparse_pca': if import_sklearn.sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:, signal_mask][navigation_mask, :]) factors = sk.components_.T elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: raise ValueError( "For MLPCA it is mandatory to define the " "output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc[:, signal_mask][navigation_mask, :] if var_array is not None and var_func is not None: raise ValueError( "You have defined both the var_func and var_array " "keywords." "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func( dc[signal_mask, ...][:, navigation_mask]) else: try: var_array = np.polyval( polyfit, dc[ signal_mask, navigation_mask]) except: raise ValueError( 'var_func must be either a function or an ' 'array defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True U, S, V, Sobj, ErrFlag = mlpca( dc[:, signal_mask][navigation_mask, :], var_array, output_dimension, fast=fast) loadings = U * S factors = V explained_variance_ratio = S ** 2 / Sobj explained_variance = S ** 2 / len(factors) else: raise ValueError('Algorithm not recognised. ' 'Nothing done') # We must calculate the ratio here because otherwise the sum # information can be lost if the user call # crop_decomposition_dimension if explained_variance is not None and \ explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # Store the results in learning_results target.factors = factors target.loadings = loadings target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio target.decomposition_algorithm = algorithm target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4decomposition target.centre = centre target.mean = mean if output_dimension and factors.shape[1] != output_dimension: target.crop_decomposition_dimension(output_dimension) # Delete the unmixing information, because it'll refer to a # previous decomposition target.unmixing_matrix = None target.bss_algorithm = None if self._unfolded4decomposition is True: folding = \ self.metadata._HyperSpy.Folding target.original_shape = folding.original_shape # Reproject if mean is None: mean = 0 if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): loadings_ = np.dot(dc[:, signal_mask] - mean, factors) else: loadings_ = sk.transform(dc[:, signal_mask]) target.loadings = loadings_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): factors = np.dot(np.linalg.pinv(loadings), dc[navigation_mask, :] - mean).T target.factors = factors else: messages.information("Reprojecting the signal is not yet " "supported for this algorithm") if reproject == 'both': reproject = 'signal' else: reproject = None # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors[:] *= self._root_bH.T target.loadings[:] *= self._root_aG # Set the pixels that were not processed to nan if not isinstance(signal_mask, slice): # Store the (inverted, as inputed) signal mask target.signal_mask = ~signal_mask.reshape( self.axes_manager._signal_shape_in_array) if reproject not in ('both', 'signal'): factors = np.zeros((dc.shape[-1], target.factors.shape[1])) factors[signal_mask, :] = target.factors factors[~signal_mask, :] = np.nan target.factors = factors if not isinstance(navigation_mask, slice): # Store the (inverted, as inputed) navigation mask target.navigation_mask = ~navigation_mask.reshape( self.axes_manager._navigation_shape_in_array) if reproject not in ('both', 'navigation'): loadings = np.zeros( (dc.shape[0], target.loadings.shape[1])) loadings[navigation_mask, :] = target.loadings loadings[~navigation_mask, :] = np.nan target.loadings = loadings finally: # undo any pre-treatments self.undo_treatments() if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False
def decomposition(self, normalize_poissonian_noise=False, algorithm = 'svd', output_dimension=None, centre=None, auto_transpose=True, navigation_mask=None, signal_mask=None, var_array=None, var_func=None, polyfit=None, reproject=None, **kwargs): """Decomposition with a choice of algorithms The results are stored in self.learning_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' | 'sparse_pca' | 'mini_batch_sparse_pca' output_dimension : None or int number of components to keep/calculate centre : None | 'variables' | 'trials' If None no centring is applied. If 'variable' the centring will be performed in the variable axis. If 'trials', the centring will be performed in the 'trials' axis. It only has effect when using the svd or fast_svd algorithms auto_transpose : bool If True, automatically transposes the data to boost performance. Only has effect when using the svd of fast_svd algorithms. navigation_mask : boolean numpy array The navigation locations marked as True are not used in the decompostion. signal_mask : boolean numpy array The signal locations marked as True are not used in the decomposition. var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomial. polyfit : reproject : None | signal | navigation | both If not None, the results of the decomposition will be projected in the selected masked area. See also -------- plot_decomposition_factors, plot_decomposition_loadings, plot_lev """ # Check if it is the wrong data type if self.data.dtype.char not in ['e', 'f', 'd']: # If not float messages.warning( 'To perform a decomposition the data must be of the float type.' ' You can change the type using the change_dtype method' ' e.g. s.change_dtype(\'float64\')\n' 'Nothing done.') return # backup the original data self._data_before_treatments = self.data.copy() if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: messages.warning_exit("With the mlpca algorithm the " "output_dimension must be expecified") # Apply pre-treatments # Transform the data in a line spectrum self._unfolded4decomposition = self.unfold_if_multidim() try: if hasattr(navigation_mask, 'ravel'): navigation_mask = navigation_mask.ravel() if hasattr(signal_mask, 'ravel'): signal_mask = signal_mask.ravel() # Normalize the poissonian noise # TODO this function can change the masks and this can cause # problems when reprojecting if normalize_poissonian_noise is True: self.normalize_poissonian_noise( navigation_mask=navigation_mask, signal_mask=signal_mask,) messages.information('Performing decomposition analysis') dc = self.data #set the output target (peak results or not?) target = self.learning_results # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) else: navigation_mask = ~navigation_mask if signal_mask is None: signal_mask = slice(None) else: signal_mask = ~signal_mask # WARNING: signal_mask and navigation_mask values are now their # negaties i.e. True -> False and viceversa. However, the # stored value (at the end of the method) coincides with the # input masks # Reset the explained_variance which is not set by all the # algorithms explained_variance = None explained_variance_ratio = None mean = None if algorithm == 'svd': factors, loadings, explained_variance, mean = svd_pca( dc[:,signal_mask][navigation_mask,:], centre = centre, auto_transpose = auto_transpose) elif algorithm == 'fast_svd': factors, loadings, explained_variance, mean = svd_pca( dc[:,signal_mask][navigation_mask,:], fast=True, output_dimension=output_dimension, centre=centre, auto_transpose=auto_transpose) elif algorithm == 'sklearn_pca': if sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = sklearn.decomposition.PCA(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:,signal_mask][navigation_mask,:])) factors = sk.components_.T explained_variance = sk.explained_variance_ mean = sk.mean_ centre = 'trials' elif algorithm == 'nmf': if sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = sklearn.decomposition.NMF(**kwargs) sk.n_components = output_dimension loadings = sk.fit_transform(( dc[:,signal_mask][navigation_mask,:])) factors = sk.components_.T elif algorithm == 'sparse_pca': if sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = sklearn.decomposition.SparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:,signal_mask][navigation_mask,:]) factors = sk.components_.T elif algorithm == 'mini_batch_sparse_pca': if sklearn_installed is False: raise ImportError( 'sklearn is not installed. Nothing done') sk = sklearn.decomposition.MiniBatchSparsePCA( output_dimension, **kwargs) loadings = sk.fit_transform( dc[:,signal_mask][navigation_mask,:]) factors = sk.components_.T elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: messages.warning_exit( "For MLPCA it is mandatory to define the " "output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc[:,signal_mask][navigation_mask,:] if var_array is not None and var_func is not None: messages.warning_exit( "You have defined both the var_func and var_array " "keywords." "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func( dc[signal_mask,...][:,navigation_mask]) else: try: var_array = np.polyval(polyfit,dc[signal_mask, navigation_mask]) except: messages.warning_exit( 'var_func must be either a function or an array' 'defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True U,S,V,Sobj, ErrFlag = mlpca( dc[:,signal_mask][navigation_mask,:], var_array, output_dimension, fast = fast) loadings = U * S factors = V explained_variance_ratio = S ** 2 / Sobj explained_variance = S ** 2 / len(factors) else: raise ValueError('Algorithm not recognised. ' 'Nothing done') # We must calculate the ratio here because otherwise the sum # information can be lost if the user call # crop_decomposition_dimension if explained_variance is not None and \ explained_variance_ratio is None: explained_variance_ratio = \ explained_variance / explained_variance.sum() # Store the results in learning_results target.factors = factors target.loadings = loadings target.explained_variance = explained_variance target.explained_variance_ratio = explained_variance_ratio target.decomposition_algorithm = algorithm target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4decomposition target.centre = centre target.mean = mean if output_dimension and factors.shape[1] != output_dimension: target.crop_decomposition_dimension(output_dimension) # Delete the unmixing information, because it'll refer to a previous # decompositions target.unmixing_matrix = None target.bss_algorithm = None if self._unfolded4decomposition is True: folding = \ self.mapped_parameters._internal_parameters.folding target.original_shape = folding.original_shape # Reproject if mean is None: mean = 0 if reproject in ('navigation', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): loadings_ = np.dot(dc[:,signal_mask] - mean, factors) else: loadings_ = sk.transform(dc[:,signal_mask]) target.loadings = loadings_ if reproject in ('signal', 'both'): if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'): factors = np.dot(np.linalg.pinv(loadings), dc[navigation_mask,:] - mean).T target.factors = factors else: messages.information("Reprojecting the signal is not yet " "supported for this algorithm") if reproject == 'both': reproject = 'signal' else: reproject = None # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.factors[:] *= self._root_bH.T target.loadings[:] *= self._root_aG # Set the pixels that were not processed to nan if not isinstance(signal_mask, slice): # Store the (inverted, as inputed) signal mask target.signal_mask = ~signal_mask.reshape( self.axes_manager._signal_shape_in_array) if reproject not in ('both', 'signal'): factors = np.zeros((dc.shape[-1], target.factors.shape[1])) factors[signal_mask == True,:] = target.factors factors[signal_mask == False,:] = np.nan target.factors = factors if not isinstance(navigation_mask, slice): # Store the (inverted, as inputed) navigation mask target.navigation_mask = ~navigation_mask.reshape( self.axes_manager._navigation_shape_in_array) if reproject not in ('both', 'navigation'): loadings = np.zeros((dc.shape[0], target.loadings.shape[1])) loadings[navigation_mask == True,:] = target.loadings loadings[navigation_mask == False,:] = np.nan target.loadings = loadings finally: #undo any pre-treatments self.undo_treatments() if self._unfolded4decomposition is True: self.fold() self._unfolded4decomposition is False
def principal_components_analysis(self, normalize_poissonian_noise = False, algorithm = 'svd', output_dimension = None, navigation_mask = None, signal_mask = None, center = False, variance2one = False, var_array = None, var_func = None, polyfit = None, on_peaks=False): """Principal components analysis. The results are stored in self.mva_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'} output_dimension : None or int number of PCA to keep navigation_mask : boolean numpy array signal_mask : boolean numpy array center : bool Perform energy centering before PCA variance2one : bool Perform whitening before PCA var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomy. polyfit : See also -------- plot_principal_components, plot_principal_components_maps, plot_lev """ # backup the original data if on_peaks: self._data_before_treatments = self.peak_chars.copy() else: self._data_before_treatments = self.data.copy() # Check for conflicting options and correct them when possible if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False: print \ """ The PCA algorithms from the MDP toolking (mdp and NIPALS) do not permit deactivating data centering. Therefore, the algorithm will proceed to center the data. """ center = True if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: messages.warning_exit( "With the mlpca algorithm the output_dimension must be expecified") if center is True and normalize_poissonian_noise is True: messages.warning( "Centering is not compatible with poissonian noise normalization\n" "Disabling centering") center = False if variance2one is True and normalize_poissonian_noise is True: messages.warning( "Variance normalization is not compatible with poissonian noise" "normalization.\n" "Disabling variance2one") variance2one = False # Apply pre-treatments # Centering if center is True: self.energy_center() # Variance normalization if variance2one is True: self.variance2one() # Transform the data in a line spectrum self._unfolded4pca = self.unfold_if_multidim() # Normalize the poissonian noise # Note that this function can change the masks if normalize_poissonian_noise is True: navigation_mask, signal_mask = \ self.normalize_poissonian_noise(navigation_mask = navigation_mask, signal_mask = signal_mask, return_masks = True) navigation_mask = self._correct_navigation_mask_when_unfolded(navigation_mask) messages.information('Performing principal components analysis') if on_peaks: dc=self.peak_chars else: # The data must be transposed both for Images and Spectra dc = self.data.T.squeeze() #set the output target (peak results or not?) target=self._get_target(on_peaks) # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) if signal_mask is None: signal_mask = slice(None) if algorithm == 'mdp' or algorithm == 'NIPALS': if algorithm == 'mdp': target.pca_node = mdp.nodes.PCANode( output_dim=output_dimension, svd = True) elif algorithm == 'NIPALS': target.pca_node = mdp.nodes.NIPALSNode( output_dim=output_dimension) # Train the node print "\nPerforming the PCA node training" print "This include variance normalizing" target.pca_node.train( dc[signal_mask,:][:,navigation_mask]) print "Performing PCA projection" pc = target.pca_node.execute(dc[:,navigation_mask]) pca_v = target.pca_node.v pca_V = target.pca_node.d target.output_dimension = output_dimension elif algorithm == 'svd': pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask]) pc = np.dot(dc[:,navigation_mask], pca_v) elif algorithm == 'fast_svd': pca_v, pca_V = pca(dc[signal_mask,:][:,navigation_mask], fast = True, output_dimension = output_dimension) pc = np.dot(dc[:,navigation_mask], pca_v) elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: messages.warning_exit( "For MLPCA it is mandatory to define the output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc.squeeze()[signal_mask,:][:,navigation_mask] if var_array is not None and var_func is not None: messages.warning_exit( "You have defined both the var_func and var_array keywords" "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func(dc[signal_mask,...][:,navigation_mask]) else: try: var_array = np.polyval(polyfit,dc[signal_mask, navigation_mask]) except: messages.warning_exit( 'var_func must be either a function or an array' 'defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True target.mlpca_output = mlpca( dc.squeeze()[signal_mask,:][:,navigation_mask], var_array.squeeze(), output_dimension, fast = fast) U,S,V,Sobj, ErrFlag = target.mlpca_output print "Performing PCA projection" pc = np.dot(dc[:,navigation_mask], V) pca_v = V pca_V = S ** 2 if output_dimension: print "trimming to %i dimensions"%output_dimension pca_v = pca_v[:,:output_dimension] pca_V = pca_V[:output_dimension] pc = pc[:,:output_dimension] target.pc = pc target.v = pca_v target.V = pca_V target.pca_algorithm = algorithm target.centered = center target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4pca target.variance2one = variance2one if self._unfolded4pca is True: target.original_shape = self._shape_before_unfolding # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.pc[signal_mask,:] *= self._root_bH target.v *= self._root_aG.T if isinstance(navigation_mask, slice): navigation_mask = None if isinstance(signal_mask, slice): signal_mask = None #undo any pre-treatments self.undo_treatments(on_peaks) # Set the pixels that were not processed to nan if navigation_mask is not None or not isinstance(navigation_mask, slice): v = np.zeros((dc.shape[1], target.v.shape[1]), dtype = target.v.dtype) v[navigation_mask == False,:] = np.nan v[navigation_mask,:] = target.v target.v = v if self._unfolded4pca is True: self.fold() self._unfolded4pca is False
def principal_components_analysis(self, normalize_poissonian_noise=False, algorithm='svd', output_dimension=None, navigation_mask=None, signal_mask=None, center=False, variance2one=False, var_array=None, var_func=None, polyfit=None, on_peaks=False): """Principal components analysis. The results are stored in self.mva_results Parameters ---------- normalize_poissonian_noise : bool If True, scale the SI to normalize Poissonian noise algorithm : {'svd', 'fast_svd', 'mlpca', 'fast_mlpca', 'mdp', 'NIPALS'} output_dimension : None or int number of PCA to keep navigation_mask : boolean numpy array signal_mask : boolean numpy array center : bool Perform energy centering before PCA variance2one : bool Perform whitening before PCA var_array : numpy array Array of variance for the maximum likelihood PCA algorithm var_func : function or numpy array If function, it will apply it to the dataset to obtain the var_array. Alternatively, it can a an array with the coefficients of a polynomy. polyfit : See also -------- plot_principal_components, plot_principal_components_maps, plot_lev """ # backup the original data if on_peaks: self._data_before_treatments = self.peak_chars.copy() else: self._data_before_treatments = self.data.copy() # Check for conflicting options and correct them when possible if (algorithm == 'mdp' or algorithm == 'NIPALS') and center is False: print \ """ The PCA algorithms from the MDP toolking (mdp and NIPALS) do not permit deactivating data centering. Therefore, the algorithm will proceed to center the data. """ center = True if algorithm == 'mlpca': if normalize_poissonian_noise is True: messages.warning( "It makes no sense to do normalize_poissonian_noise with " "the MLPCA algorithm. Therefore, " "normalize_poissonian_noise is set to False") normalize_poissonian_noise = False if output_dimension is None: messages.warning_exit( "With the mlpca algorithm the output_dimension must be expecified" ) if center is True and normalize_poissonian_noise is True: messages.warning( "Centering is not compatible with poissonian noise normalization\n" "Disabling centering") center = False if variance2one is True and normalize_poissonian_noise is True: messages.warning( "Variance normalization is not compatible with poissonian noise" "normalization.\n" "Disabling variance2one") variance2one = False # Apply pre-treatments # Centering if center is True: self.energy_center() # Variance normalization if variance2one is True: self.variance2one() # Transform the data in a line spectrum self._unfolded4pca = self.unfold_if_multidim() # Normalize the poissonian noise # Note that this function can change the masks if normalize_poissonian_noise is True: navigation_mask, signal_mask = \ self.normalize_poissonian_noise(navigation_mask = navigation_mask, signal_mask = signal_mask, return_masks = True) navigation_mask = self._correct_navigation_mask_when_unfolded( navigation_mask) messages.information('Performing principal components analysis') if on_peaks: dc = self.peak_chars else: # The data must be transposed both for Images and Spectra dc = self.data.T.squeeze() #set the output target (peak results or not?) target = self._get_target(on_peaks) # Transform the None masks in slices to get the right behaviour if navigation_mask is None: navigation_mask = slice(None) if signal_mask is None: signal_mask = slice(None) if algorithm == 'mdp' or algorithm == 'NIPALS': if algorithm == 'mdp': target.pca_node = mdp.nodes.PCANode( output_dim=output_dimension, svd=True) elif algorithm == 'NIPALS': target.pca_node = mdp.nodes.NIPALSNode( output_dim=output_dimension) # Train the node print "\nPerforming the PCA node training" print "This include variance normalizing" target.pca_node.train(dc[signal_mask, :][:, navigation_mask]) print "Performing PCA projection" pc = target.pca_node.execute(dc[:, navigation_mask]) pca_v = target.pca_node.v pca_V = target.pca_node.d target.output_dimension = output_dimension elif algorithm == 'svd': pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask]) pc = np.dot(dc[:, navigation_mask], pca_v) elif algorithm == 'fast_svd': pca_v, pca_V = pca(dc[signal_mask, :][:, navigation_mask], fast=True, output_dimension=output_dimension) pc = np.dot(dc[:, navigation_mask], pca_v) elif algorithm == 'mlpca' or algorithm == 'fast_mlpca': print "Performing the MLPCA training" if output_dimension is None: messages.warning_exit( "For MLPCA it is mandatory to define the output_dimension") if var_array is None and var_func is None: messages.information('No variance array provided.' 'Supposing poissonian data') var_array = dc.squeeze()[signal_mask, :][:, navigation_mask] if var_array is not None and var_func is not None: messages.warning_exit( "You have defined both the var_func and var_array keywords" "Please, define just one of them") if var_func is not None: if hasattr(var_func, '__call__'): var_array = var_func(dc[signal_mask, ...][:, navigation_mask]) else: try: var_array = np.polyval( polyfit, dc[signal_mask, navigation_mask]) except: messages.warning_exit( 'var_func must be either a function or an array' 'defining the coefficients of a polynom') if algorithm == 'mlpca': fast = False else: fast = True target.mlpca_output = mlpca( dc.squeeze()[signal_mask, :][:, navigation_mask], var_array.squeeze(), output_dimension, fast=fast) U, S, V, Sobj, ErrFlag = target.mlpca_output print "Performing PCA projection" pc = np.dot(dc[:, navigation_mask], V) pca_v = V pca_V = S**2 if output_dimension: print "trimming to %i dimensions" % output_dimension pca_v = pca_v[:, :output_dimension] pca_V = pca_V[:output_dimension] pc = pc[:, :output_dimension] target.pc = pc target.v = pca_v target.V = pca_V target.pca_algorithm = algorithm target.centered = center target.poissonian_noise_normalized = \ normalize_poissonian_noise target.output_dimension = output_dimension target.unfolded = self._unfolded4pca target.variance2one = variance2one if self._unfolded4pca is True: target.original_shape = self._shape_before_unfolding # Rescale the results if the noise was normalized if normalize_poissonian_noise is True: target.pc[signal_mask, :] *= self._root_bH target.v *= self._root_aG.T if isinstance(navigation_mask, slice): navigation_mask = None if isinstance(signal_mask, slice): signal_mask = None #undo any pre-treatments self.undo_treatments(on_peaks) # Set the pixels that were not processed to nan if navigation_mask is not None or not isinstance( navigation_mask, slice): v = np.zeros((dc.shape[1], target.v.shape[1]), dtype=target.v.dtype) v[navigation_mask == False, :] = np.nan v[navigation_mask, :] = target.v target.v = v if self._unfolded4pca is True: self.fold() self._unfolded4pca is False