Esempio n. 1
0
    def test_solver_auto(self):
        # Uses "full"
        U = self.rng.randn(100, 5)
        V = self.rng.randn(100, 5)
        X = U @ V.T
        factors, loadings, _, _ = svd_pca(X,
                                          output_dimension=5,
                                          svd_solver="auto")
        Y = loadings @ factors.T
        normX = np.linalg.norm(X - Y)
        assert normX < self.tol

        # Uses "randomized"
        U = self.rng.randn(501, 5)
        V = self.rng.randn(100, 5)
        X = U @ V.T
        factors, loadings, _, _ = svd_pca(X,
                                          output_dimension=5,
                                          svd_solver="auto")
        Y = loadings @ factors.T
        normX = np.linalg.norm(X - Y)
        assert normX < self.tol

        # Uses full
        U = self.rng.randn(501, 5)
        V = self.rng.randn(100, 5)
        X = U @ V.T
        factors, loadings, _, _ = svd_pca(X,
                                          output_dimension=81,
                                          svd_solver="auto")
        Y = loadings @ factors.T
        normX = np.linalg.norm(X - Y)
        assert normX < self.tol
Esempio n. 2
0
    def test_full(self, output_dimension, auto_transpose, centre,
                  u_based_decision):
        factors, loadings, explained_variance, mean = svd_pca(
            self.X,
            output_dimension=output_dimension,
            svd_solver="full",
            auto_transpose=auto_transpose,
            centre=centre,
            u_based_decision=u_based_decision,
        )
        X = loadings @ factors.T

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.X)
        assert normX < self.tol

        # Check singular values
        explained_variance_norm = explained_variance / np.sum(
            explained_variance)
        np.testing.assert_allclose(explained_variance_norm[:self.rank].sum(),
                                   1.0)

        if centre is None:
            assert mean is None
        elif centre == "features":
            np.testing.assert_allclose(mean, self.X_mean_1)
        elif centre == "samples":
            np.testing.assert_allclose(mean, self.X_mean_0)
Esempio n. 3
0
 def test_arpack_error(self):
     pytest.importorskip("scipy", minversion="1.4.0")
     with pytest.raises(ValueError,
                        match="requires output_dimension to be strictly"):
         _ = svd_pca(self.X,
                     output_dimension=min(self.X.shape) + 1,
                     svd_solver="arpack")
Esempio n. 4
0
    def test_randomized(self, output_dimension, auto_transpose, centre):
        factors, loadings, explained_variance, mean = svd_pca(
            self.X,
            output_dimension=output_dimension,
            svd_solver="randomized",
            auto_transpose=auto_transpose,
            centre=centre,
        )
        X = loadings @ factors.T

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.X)
        assert normX < self.tol

        # Check singular values
        explained_variance_norm = explained_variance / np.sum(explained_variance)
        np.testing.assert_allclose(explained_variance_norm[: self.rank].sum(), 1.0)
Esempio n. 5
0
    def decomposition(self, normalize_poissonian_noise=False,
    algorithm = 'svd', output_dimension=None, centre = None,
    auto_transpose = True, navigation_mask=None, signal_mask=None,
    var_array=None, var_func=None, polyfit=None, on_peaks=False, 
    reproject=None, **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.mva_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
            
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'
        
        output_dimension : None or int
            number of components to keep/calculate
            
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be 
            performed in the 'trials' axis. It only has effect when using the 
            svd or fast_svd algorithms
        
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
            
        navigation_mask : boolean numpy array
        
        signal_mask : boolean numpy array
            
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
            
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
            
        polyfit :
        
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in 
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_scores, plot_lev

        """
        # backup the original data
        if on_peaks:
            if hasattr(self.mapped_parameters,'peak_chars'):
                self._data_before_treatments = \
                    self.mapped_parameters.peak_chars.copy()
            else:
                print """No peak characteristics found.  You must run the 
                         peak_char_stack function to obtain these before 
                         you can run PCA or ICA on them."""
        else:
            self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit("With the mlpca algorithm the "
                "output_dimension must be expecified")


        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold_if_multidim()
        if hasattr(navigation_mask, 'ravel'):
            navigation_mask = navigation_mask.ravel()

        if hasattr(signal_mask, 'ravel'):
            signal_mask = signal_mask.ravel()

        # Normalize the poissonian noise
        # TODO this function can change the masks and this can cause
        # problems when reprojecting
        if normalize_poissonian_noise is True:
            if reproject is None:
                navigation_mask, signal_mask = \
                    self.normalize_poissonian_noise(
                                            navigation_mask=navigation_mask,
                                            signal_mask=signal_mask,
                                            return_masks = True)
            elif reproject == 'both':
                _, _ = \
                    self.normalize_poissonian_noise(return_masks = True)  
            elif reproject == 'navigation':
                _, signal_mask = \
                    self.normalize_poissonian_noise(return_masks = True,
                                                    signal_mask=signal_mask,) 
            elif reproject == 'signal':
                navigation_mask, _ = \
                    self.normalize_poissonian_noise(return_masks = True,
                                            navigation_mask=navigation_mask,)         
            
        messages.information('Performing decomposition analysis')
        if on_peaks:
            dc = self.mapped_parameters.peak_chars
        else:
            # The data must be transposed both for Images and Spectra
            dc = self.data
            
        #set the output target (peak results or not?)
        target = self._get_target(on_peaks)
        
        # Transform the None masks in slices to get the right behaviour
        if navigation_mask is None:
            navigation_mask = slice(None)
        if signal_mask is None:
            signal_mask = slice(None)
        
        # Reset the explained_variance which is not set by all the algorithms
        explained_variance = None
        explained_variance_ratio = None
        mean = None
        
        if algorithm == 'svd':
            factors, scores, explained_variance, mean = svd_pca(
                dc[:,signal_mask][navigation_mask,:], centre = centre,
                auto_transpose = auto_transpose)

        elif algorithm == 'fast_svd':
            factors, scores, explained_variance, mean = svd_pca(
                dc[:,signal_mask][navigation_mask,:],
            fast = True, output_dimension = output_dimension, centre = centre,
                auto_transpose = auto_transpose)

        elif algorithm == 'sklearn_pca':    
            sk = sklearn.decomposition.PCA(**kwargs)
            sk.n_components = output_dimension
            scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:]))
            factors = sk.components_.T
            explained_variance = sk.explained_variance_
            mean = sk.mean_
            centre = 'trials'   

        elif algorithm == 'nmf':    
            sk = sklearn.decomposition.NMF(**kwargs)
            sk.n_components = output_dimension
            scores = sk.fit_transform((dc[:,signal_mask][navigation_mask,:]))
            factors = sk.components_.T
            
        elif algorithm == 'sparse_pca':
            sk = sklearn.decomposition.SparsePCA(output_dimension, **kwargs)
            scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:])
            factors = sk.components_.T
            
        elif algorithm == 'mini_batch_sparse_pca':
            sk = sklearn.decomposition.MiniBatchSparsePCA(output_dimension,
                                                            **kwargs)
            scores = sk.fit_transform(dc[:,signal_mask][navigation_mask,:])
            factors = sk.components_.T

        elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
            print "Performing the MLPCA training"
            if output_dimension is None:
                messages.warning_exit(
                "For MLPCA it is mandatory to define the output_dimension")
            if var_array is None and var_func is None:
                messages.information('No variance array provided.'
                'Supposing poissonian data')
                var_array = dc[:,signal_mask][navigation_mask,:]

            if var_array is not None and var_func is not None:
                messages.warning_exit(
                "You have defined both the var_func and var_array keywords"
                "Please, define just one of them")
            if var_func is not None:
                if hasattr(var_func, '__call__'):
                    var_array = var_func(dc[signal_mask,...][:,navigation_mask])
                else:
                    try:
                        var_array = np.polyval(polyfit,dc[signal_mask,
                        navigation_mask])
                    except:
                        messages.warning_exit(
                        'var_func must be either a function or an array'
                        'defining the coefficients of a polynom')
            if algorithm == 'mlpca':
                fast = False
            else:
                fast = True
            U,S,V,Sobj, ErrFlag = mlpca(
                dc[:,signal_mask][navigation_mask,:],
                var_array, output_dimension, fast = fast)
            scores = U * S
            factors = V
            explained_variance_ratio = S ** 2 / Sobj
            explained_variance = S ** 2 / len(factors)
        else:
            messages.information('Error: Algorithm not recognised. '
                                 'Nothing done')
            return False

        # We must calculate the ratio here because otherwise the sum information
        # can be lost if the user call crop_decomposition_dimension
        if explained_variance is not None and explained_variance_ratio is None:
            explained_variance_ratio = \
                explained_variance / explained_variance.sum()
                
        # Store the results in mva_results
        target.factors = factors
        target.scores = scores
        target.explained_variance = explained_variance
        target.explained_variance_ratio = explained_variance_ratio
        target.decomposition_algorithm = algorithm
        target.poissonian_noise_normalized = \
            normalize_poissonian_noise
        target.output_dimension = output_dimension
        target.unfolded = self._unfolded4decomposition
        target.centre = centre
        target.mean = mean
        

        if output_dimension and factors.shape[1] != output_dimension:
            target.crop_decomposition_dimension(output_dimension)
        
        # Delete the unmixing information, because it'll refer to a previous
        # decompositions
        target.unmixing_matrix = None
        target.ica_algorithm = None

        if self._unfolded4decomposition is True:
            target.original_shape = self._shape_before_unfolding

        # Reproject
        if mean is None:
            mean = 0
        if reproject in ('navigation', 'both'):
            if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'):
                scores_ = np.dot(dc[:,signal_mask] - mean, factors)
            else:
                scores_ = sk.transform(dc[:,signal_mask])
            target.scores = scores_
        if reproject in ('signal', 'both'):
            if algorithm not in ('nmf', 'sparse_pca', 'mini_batch_sparse_pca'):
                factors = np.dot(np.linalg.pinv(scores), 
                                 dc[navigation_mask,:] - mean).T
                target.factors = factors
            else:
                messages.information("Reprojecting the signal is not yet "
                                     "supported for this algorithm")
                if reproject == 'both':
                    reproject = 'signal'
                else:
                    reproject = None
        
        # Rescale the results if the noise was normalized
        if normalize_poissonian_noise is True:
            target.factors[:] *= self._root_bH.T
            target.scores[:] *= self._root_aG
            
        # Set the pixels that were not processed to nan
        if not isinstance(signal_mask, slice):
            target.signal_mask = signal_mask
            if reproject not in ('both', 'signal'):
                factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                factors[signal_mask == True,:] = target.factors
                factors[signal_mask == False,:] = np.nan
                target.factors = factors
        if not isinstance(navigation_mask, slice):
            target.navigation_mask = navigation_mask
            if reproject not in ('both', 'navigation'):
                scores = np.zeros((dc.shape[0], target.scores.shape[1]))
                scores[navigation_mask == True,:] = target.scores
                scores[navigation_mask == False,:] = np.nan
                target.scores = scores

        #undo any pre-treatments
        self.undo_treatments(on_peaks)
        
        if self._unfolded4decomposition is True:
            self.fold()
            self._unfolded4decomposition is False
Esempio n. 6
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      centre=None,
                      auto_transpose=True,
                      navigation_mask=None,
                      signal_mask=None,
                      var_array=None,
                      var_func=None,
                      polyfit=None,
                      reproject=None,
                      **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise

        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'

        output_dimension : None or int
            number of components to keep/calculate

        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be
            performed in the 'trials' axis. It only has effect when using the
            svd or fast_svd algorithms

        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.

        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the
            decompostion.

        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the
            decomposition.

        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm

        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.

        polyfit :

        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']:  # If not float
            messages.warning(
                'To perform a decomposition the data must be of the float '
                'type. You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return

        if self.axes_manager.navigation_size < 2:
            raise AttributeError("It is not possible to decompose a dataset "
                                 "with navigation_size < 2")
        # backup the original data
        self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                    "It makes no sense to do normalize_poissonian_noise with "
                    "the MLPCA algorithm. Therefore, "
                    "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                raise ValueError("With the mlpca algorithm the "
                                 "output_dimension must be expecified")

        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                    navigation_mask=navigation_mask,
                    signal_mask=signal_mask,)
            messages.information('Performing decomposition analysis')
            # The rest of the code assumes that the first data axis
            # is the navigation axis. We transpose the data if that is not the
            # case.
            dc = (self.data if self.axes_manager[0].index_in_array == 0
                  else self.data.T)
            # set the output target (peak results or not?)
            target = self.learning_results

            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask

            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the
            # stored value (at the end of the method) coincides with the
            # input masks

            # Reset the explained_variance which is not set by all the
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None

            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :], centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'

            elif algorithm == 'nmf':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T

            elif algorithm == 'sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T

            elif algorithm == 'mini_batch_sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                print "Performing the MLPCA training"
                if output_dimension is None:
                    raise ValueError(
                        "For MLPCA it is mandatory to define the "
                        "output_dimension")
                if var_array is None and var_func is None:
                    messages.information('No variance array provided.'
                                         'Supposing poissonian data')
                    var_array = dc[:, signal_mask][navigation_mask, :]

                if var_array is not None and var_func is not None:
                    raise ValueError(
                        "You have defined both the var_func and var_array "
                        "keywords."
                        "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(
                            dc[signal_mask, ...][:, navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(
                                polyfit, dc[
                                    signal_mask, navigation_mask])
                        except:
                            raise ValueError(
                                'var_func must be either a function or an '
                                'array defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U, S, V, Sobj, ErrFlag = mlpca(
                    dc[:, signal_mask][navigation_mask, :],
                    var_array, output_dimension, fast=fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S ** 2 / Sobj
                explained_variance = S ** 2 / len(factors)
            else:
                raise ValueError('Algorithm not recognised. '
                                 'Nothing done')

            # We must calculate the ratio here because otherwise the sum
            # information can be lost if the user call
            # crop_decomposition_dimension
            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # Store the results in learning_results
            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)

            # Delete the unmixing information, because it'll refer to a
            # previous decomposition
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.metadata._HyperSpy.Folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:, signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:, signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings),
                                     dc[navigation_mask, :] - mean).T
                    target.factors = factors
                else:
                    messages.information("Reprojecting the signal is not yet "
                                         "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None

            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG

            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask, :] = target.factors
                    factors[~signal_mask, :] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros(
                        (dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask, :] = target.loadings
                    loadings[~navigation_mask, :] = np.nan
                    target.loadings = loadings
        finally:
            # undo any pre-treatments
            self.undo_treatments()

            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
Esempio n. 7
0
    def decomposition(self,
        normalize_poissonian_noise=False,
        algorithm = 'svd',
        output_dimension=None,
        centre=None,
        auto_transpose=True,
        navigation_mask=None,
        signal_mask=None,
        var_array=None,
        var_func=None,
        polyfit=None,
        reproject=None,
        **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
            
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca'
        
        output_dimension : None or int
            number of components to keep/calculate
            
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be 
            performed in the 'trials' axis. It only has effect when using the 
            svd or fast_svd algorithms
        
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
            
        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the 
            decompostion.
        
        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the 
            decomposition.
            
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
            
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
            
        polyfit :
        
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in 
            the selected masked area.


        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']: # If not float
            messages.warning(
                'To perform a decomposition the data must be of the float type.'
                ' You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return
        # backup the original data
        self._data_before_treatments = self.data.copy()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                messages.warning(
                "It makes no sense to do normalize_poissonian_noise with "
                "the MLPCA algorithm. Therefore, "
                "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                messages.warning_exit("With the mlpca algorithm the "
                "output_dimension must be expecified")


        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold_if_multidim()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                                        navigation_mask=navigation_mask,
                                        signal_mask=signal_mask,)
            messages.information('Performing decomposition analysis')

            dc = self.data
            #set the output target (peak results or not?)
            target = self.learning_results
            
            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask
                
            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the 
            # stored value (at the end of the method) coincides with the 
            # input masks
            
            # Reset the explained_variance which is not set by all the 
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None
            
            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:,signal_mask][navigation_mask,:], centre = centre,
                    auto_transpose = auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:,signal_mask][navigation_mask,:],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:,signal_mask][navigation_mask,:]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'   

            elif algorithm == 'nmf':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:,signal_mask][navigation_mask,:]))
                factors = sk.components_.T
                
            elif algorithm == 'sparse_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:,signal_mask][navigation_mask,:])
                factors = sk.components_.T
                
            elif algorithm == 'mini_batch_sparse_pca':
                if sklearn_installed is False:
                    raise ImportError(
                    'sklearn is not installed. Nothing done')
                sk = sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:,signal_mask][navigation_mask,:])
                factors = sk.components_.T

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                print "Performing the MLPCA training"
                if output_dimension is None:
                    messages.warning_exit(
                    "For MLPCA it is mandatory to define the "
                    "output_dimension")
                if var_array is None and var_func is None:
                    messages.information('No variance array provided.'
                    'Supposing poissonian data')
                    var_array = dc[:,signal_mask][navigation_mask,:]

                if var_array is not None and var_func is not None:
                    messages.warning_exit(
                    "You have defined both the var_func and var_array "
                    "keywords."
                    "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(
                            dc[signal_mask,...][:,navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(polyfit,dc[signal_mask,
                            navigation_mask])
                        except:
                            messages.warning_exit(
                            'var_func must be either a function or an array'
                            'defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U,S,V,Sobj, ErrFlag = mlpca(
                    dc[:,signal_mask][navigation_mask,:],
                    var_array, output_dimension, fast = fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S ** 2 / Sobj
                explained_variance = S ** 2 / len(factors)
            else:
                raise ValueError('Algorithm not recognised. '
                                     'Nothing done')

            # We must calculate the ratio here because otherwise the sum 
            # information can be lost if the user call 
            # crop_decomposition_dimension
            if explained_variance is not None and \
            explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()
                    
            # Store the results in learning_results
            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean
            

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)
            
            # Delete the unmixing information, because it'll refer to a previous
            # decompositions
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.mapped_parameters._internal_parameters.folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca', 
                                      'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:,signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:,signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                      'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings), 
                                     dc[navigation_mask,:] - mean).T
                    target.factors = factors
                else:
                    messages.information("Reprojecting the signal is not yet "
                                         "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None
            
            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG
                
            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask 
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask == True,:] = target.factors
                    factors[signal_mask == False,:] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros((dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask == True,:] = target.loadings
                    loadings[navigation_mask == False,:] = np.nan
                    target.loadings = loadings
        finally:
            #undo any pre-treatments
            self.undo_treatments()
            
            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
Esempio n. 8
0
 def test_centre_error(self):
     with pytest.raises(ValueError, match="'centre' must be one of"):
         _ = svd_pca(self.X, centre="random")
Esempio n. 9
0
 def test_arpack_error(self):
     with pytest.raises(ValueError,
                        match="requires output_dimension to be strictly"):
         _ = svd_pca(self.X,
                     output_dimension=min(self.X.shape) + 1,
                     svd_solver="arpack")