Beispiel #1
0
    def test_warnings(self):
        with pytest.warns(
                VisibleDeprecationWarning,
                match=f"The argument `learning_rate` has been deprecated"):
            _ = orpca(self.X, rank=self.rank, learning_rate=0.1)

        with pytest.warns(
                VisibleDeprecationWarning,
                match=f"The argument `momentum` has been deprecated"):
            _ = orpca(self.X, rank=self.rank, momentum=0.1)
Beispiel #2
0
    def test_init_mat(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank, store_error=True, init=self.U)
        compare_norms(X, self.A)

        with pytest.raises(ValueError, match=f"has to be a two-dimensional matrix"):
            mat = np.zeros(self.m)
            _ = orpca(self.X, rank=self.rank, init=mat)

        with pytest.raises(ValueError, match=f"has to be of shape"):
            mat = np.zeros((self.m, self.rank - 1))
            _ = orpca(self.X, rank=self.rank, init=mat)
Beispiel #3
0
    def test_training(self, rank, training_samples):
        X, E, U, S, V = orpca(
            self.X,
            rank=rank,
            store_error=True,
            init="qr",
            training_samples=training_samples,
        )
        compare_norms(X, self.A)

        with pytest.raises(ValueError, match=f"must be >="):
            _ = orpca(self.X, rank=self.rank, init="qr", training_samples=self.rank - 1)
Beispiel #4
0
    def test_method_MomentumSGD(self, subspace_momentum):
        X, E, U, S, V = orpca(
            self.X,
            rank=self.rank,
            store_error=True,
            method="MomentumSGD",
            subspace_learning_rate=1.1,
            subspace_momentum=subspace_momentum,
        )
        compare_norms(X, self.A)

        with pytest.raises(ValueError, match=f"must be a float between 0 and 1"):
            _ = orpca(
                self.X, rank=self.rank, method="MomentumSGD", subspace_momentum=1.9
            )
Beispiel #5
0
    def test_init(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank, init='rand')

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        assert normX < self.tol

        with pytest.raises(ValueError,
                           match=f"has to be a two-dimensional matrix"):
            mat = np.zeros(self.m)
            _ = orpca(self.X, rank=self.rank, init=mat)

        with pytest.raises(ValueError, match=f"has to be of shape"):
            mat = np.zeros((self.m, self.rank - 1))
            _ = orpca(self.X, rank=self.rank, init=mat)
Beispiel #6
0
    def test_method_SGD(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank,
                              method='SGD', learning_rate=self.learning_rate)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        nt.assert_true(normX < self.tol)
Beispiel #7
0
    def test_training(self):
        X, E, U, S, V = orpca(self.X,
                              rank=self.rank,
                              init='qr',
                              training_samples=self.training_samples)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        print(normX)
        assert normX < self.tol

        with pytest.raises(ValueError, match=f"must be >="):
            _ = orpca(self.X,
                      rank=self.rank,
                      init='qr',
                      training_samples=self.rank - 1)
Beispiel #8
0
    def test_training(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank, init='qr',
                              training_samples=self.training_samples)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        print(normX)
        nt.assert_true(normX < self.tol)
Beispiel #9
0
 def test_fast(self):
     X, E, U, S, V = orpca(self.X, rank=self.rank, fast=True)
     # Only check shapes
     assert X.shape == (self.m, self.n)
     assert E.shape == (self.m, self.n)
     assert U.shape == (self.m, self.rank)
     assert S.shape == (self.rank, )
     assert V.shape == (self.n, self.rank)
Beispiel #10
0
    def test_regularization(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank,
                              lambda1=self.lambda1,
                              lambda2=self.lambda2)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        nt.assert_true(normX < self.tol)
Beispiel #11
0
    def test_method_MomentumSGD(self, subspace_momentum):
        X, E, U, S, V = orpca(
            self.X,
            rank=self.rank,
            method='MomentumSGD',
            subspace_learning_rate=self.subspace_learning_rate,
            subspace_momentum=subspace_momentum)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        assert normX < self.tol

        with pytest.raises(ValueError,
                           match=f"must be a float between 0 and 1"):
            _ = orpca(self.X,
                      rank=self.rank,
                      method='MomentumSGD',
                      subspace_momentum=1.9)
Beispiel #12
0
    def test_method_SGD(self, subspace_learning_rate):
        X, E, U, S, V = orpca(self.X,
                              rank=self.rank,
                              method='SGD',
                              subspace_learning_rate=subspace_learning_rate)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        assert normX < self.tol
Beispiel #13
0
 def test_method_SGD(self, subspace_learning_rate):
     X, E, U, S, V = orpca(
         self.X,
         rank=self.rank,
         store_error=True,
         method="SGD",
         subspace_learning_rate=subspace_learning_rate,
     )
     compare_norms(X, self.A)
Beispiel #14
0
 def test_regularization(self):
     X, E, U, S, V = orpca(
         self.X,
         rank=self.rank,
         store_error=True,
         lambda1=0.01,
         lambda2=0.02,
     )
     compare_norms(X, self.A)
Beispiel #15
0
    def test_method_MomentumSGD(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank,
                              method='MomentumSGD',
                              learning_rate=self.learning_rate,
                              momentum=self.momentum)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        assert normX < self.tol
Beispiel #16
0
    def test_method_SGD(self):
        X, E, U, S, V = orpca(self.X,
                              rank=self.rank,
                              method='SGD',
                              learning_rate=self.learning_rate)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        nt.assert_true(normX < self.tol)
Beispiel #17
0
    def test_regularization(self):
        X, E, U, S, V = orpca(self.X,
                              rank=self.rank,
                              lambda1=self.lambda1,
                              lambda2=self.lambda2)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        nt.assert_true(normX < self.tol)
Beispiel #18
0
    def test_method_MomentumSGD(self):
        X, E, U, S, V = orpca(self.X,
                              rank=self.rank,
                              method='MomentumSGD',
                              learning_rate=self.learning_rate,
                              momentum=self.momentum)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        assert normX < self.tol
Beispiel #19
0
    def test_training(self):
        X, E, U, S, V = orpca(self.X,
                              rank=self.rank,
                              init='qr',
                              training_samples=self.training_samples)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        print(normX)
        nt.assert_true(normX < self.tol)
Beispiel #20
0
    def test_default(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        assert normX < self.tol
Beispiel #21
0
 def test_method_BCD(self):
     X, E, U, S, V = orpca(self.X,
                           rank=self.rank,
                           store_error=True,
                           method="BCD")
     compare_norms(X, self.A)
Beispiel #22
0
    def test_batch_size(self):
        L, R = orpca(self.X, rank=self.rank, batch_size=2)

        assert L.shape == (self.m, self.rank)
        assert R.shape == (self.rank, self.n)
Beispiel #23
0
    def test_project(self):
        L, R = orpca(self.X, rank=self.rank, project=True)

        assert L.shape == (self.m, self.rank)
        assert R.shape == (self.rank, self.n)
Beispiel #24
0
 def test_default(self):
     X, E, U, S, V = orpca(self.X, rank=self.rank, store_error=True)
     compare_norms(X, self.A)
Beispiel #25
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      centre=None,
                      auto_transpose=True,
                      navigation_mask=None,
                      signal_mask=None,
                      var_array=None,
                      var_func=None,
                      polyfit=None,
                      reproject=None,
                      return_info=False,
                      **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca' | 'RPCA_GoDec' | 'ORPCA'
        output_dimension : None or int
            number of components to keep/calculate
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be
            performed in the 'trials' axis. It only has effect when using the
            svd or fast_svd algorithms
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the
            decomposition.
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in
            the selected masked area.
        return_info: bool, default False
            The result of the decomposition is stored internally. However, some algorithms generate some extra
            information that is not stored. If True (the default is False) return any extra information if available

        Returns
        -------
        (X, E) : (numpy array, numpy array)
            If 'algorithm' == 'RPCA_GoDec' or 'ORPCA' and 'return_info' is True,
            returns the low-rank (X) and sparse (E) matrices from robust PCA.

        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        to_return = None
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']:  # If not float
            _logger.warning(
                'To perform a decomposition the data must be of the float '
                'type. You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return

        if self.axes_manager.navigation_size < 2:
            raise AttributeError("It is not possible to decompose a dataset "
                                 "with navigation_size < 2")
        # backup the original data
        self._data_before_treatments = self.data.copy()
        # set the output target (peak results or not?)
        target = LearningResults()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                _logger.warning(
                    "It makes no sense to do normalize_poissonian_noise with "
                    "the MLPCA algorithm. Therefore, "
                    "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                raise ValueError("With the MLPCA algorithm the "
                                 "output_dimension must be specified")
        if algorithm == 'RPCA_GoDec' or algorithm == 'ORPCA':
            if output_dimension is None:
                raise ValueError(
                    "With the robust PCA algorithms ('RPCA_GoDec' "
                    "and 'ORPCA'), the output_dimension "
                    "must be specified")

        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                    navigation_mask=navigation_mask,
                    signal_mask=signal_mask,
                )
            _logger.info('Performing decomposition analysis')
            # The rest of the code assumes that the first data axis
            # is the navigation axis. We transpose the data if that is not the
            # case.
            dc = (self.data
                  if self.axes_manager[0].index_in_array == 0 else self.data.T)

            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask

            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the
            # stored value (at the end of the method) coincides with the
            # input masks

            # Reset the explained_variance which is not set by all the
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None

            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :],
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError('sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform(
                    (dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'
                if return_info:
                    to_return = sk

            elif algorithm == 'nmf':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError('sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform(
                    (dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T
                if return_info:
                    to_return = sk

            elif algorithm == 'sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError('sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T
                if return_info:
                    to_return = sk

            elif algorithm == 'mini_batch_sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError('sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T
                if return_info:
                    to_return = sk

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                _logger.info("Performing the MLPCA training")
                if output_dimension is None:
                    raise ValueError("For MLPCA it is mandatory to define the "
                                     "output_dimension")
                if var_array is None and var_func is None:
                    _logger.info('No variance array provided.'
                                 'Assuming poissonian data')
                    var_array = dc[:, signal_mask][navigation_mask, :]

                if var_array is not None and var_func is not None:
                    raise ValueError(
                        "You have defined both the var_func and var_array "
                        "keywords."
                        "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(dc[signal_mask,
                                                ...][:, navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(
                                polyfit, dc[signal_mask, navigation_mask])
                        except:
                            raise ValueError(
                                'var_func must be either a function or an '
                                'array defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U, S, V, Sobj, ErrFlag = mlpca(
                    dc[:, signal_mask][navigation_mask, :],
                    var_array,
                    output_dimension,
                    fast=fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S**2 / Sobj
                explained_variance = S**2 / len(factors)
            elif algorithm == 'RPCA_GoDec':
                _logger.info("Performing Robust PCA with GoDec")

                X, E, G, U, S, V = rpca_godec(
                    dc[:, signal_mask][navigation_mask, :],
                    rank=output_dimension,
                    fast=True,
                    **kwargs)

                loadings = U * S
                factors = V
                explained_variance = S**2 / len(factors)

                if return_info:
                    to_return = (X, E)

            elif algorithm == 'ORPCA':
                _logger.info("Performing Online Robust PCA")

                X, E, U, S, V = orpca(dc[:, signal_mask][navigation_mask, :],
                                      rank=output_dimension,
                                      fast=True,
                                      **kwargs)

                loadings = U * S
                factors = V
                explained_variance = S**2 / len(factors)

                if return_info:
                    to_return = (X, E)
            else:
                raise ValueError('Algorithm not recognised. ' 'Nothing done')

            # We must calculate the ratio here because otherwise the sum
            # information can be lost if the user call
            # crop_decomposition_dimension
            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # Store the results in learning_results

            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)

            # Delete the unmixing information, because it'll refer to a
            # previous decomposition
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.metadata._HyperSpy.Folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:, signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:, signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings),
                                     dc[navigation_mask, :] - mean).T
                    target.factors = factors
                else:
                    _logger.info("Reprojecting the signal is not yet "
                                 "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None

            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG

            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask, :] = target.factors
                    factors[~signal_mask, :] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros(
                        (dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask, :] = target.loadings
                    loadings[~navigation_mask, :] = np.nan
                    target.loadings = loadings
        finally:
            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
            self.learning_results.__dict__.update(target.__dict__)
            # undo any pre-treatments
            self.undo_treatments()

        return to_return
Beispiel #26
0
    def test_method_BCD(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank, method='BCD')

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        nt.assert_true(normX < self.tol)
Beispiel #27
0
    def test_init(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank, init='rand')

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        nt.assert_true(normX < self.tol)
Beispiel #28
0
 def test_init_rand(self):
     X, E, U, S, V = orpca(self.X,
                           rank=self.rank,
                           store_error=True,
                           init="rand")
     compare_norms(X, self.A)
Beispiel #29
0
    def test_init(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank, init='rand')

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        nt.assert_true(normX < self.tol)
Beispiel #30
0
 def test_exception_method(self):
     with pytest.raises(ValueError, match=f"'method' not recognised"):
         _ = orpca(self.X, rank=self.rank, method="uniform")
Beispiel #31
0
    def test_method_BCD(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank, method='BCD')

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        nt.assert_true(normX < self.tol)
Beispiel #32
0
def VCA(R, p, verbose='on', snr_input=0, compress='svd', **kwargs):
    from math import sqrt, log10
    import time
    import numpy as np
    from scipy.linalg import svd
    from hyperspy.learn.rpca import orpca
    start_time = time.clock()
    L, N = R.shape
    print('Shape of R is: ', R.shape)
    if p < 0 or p > L or (p % 1) != 0:
        raise ValueError('ENDMEMBER parameter must be integer between 1 and L')

    if snr_input == 0:
        r_m = np.mean(R, 1)[:, np.newaxis]
        R_m = np.tile(r_m, N)
        R_o = R - R_m
        if compress == 'svd':
            Ud, Sd, Vd = svd(np.dot(R_o, R_o.T) / N)
            Ud = Ud[:, :p]
            del Sd, Vd
        elif compress == 'orpca':
            X, E, Ud, Sd, Vd = orpca(np.dot(R_o, R_o.T) / N,
                                     rank=p,
                                     fast=True,
                                     **kwargs)
            del X, E, Sd, Vd
        x_p = np.dot(Ud.T, R_o)
        SNR = estimate_snr(R, r_m, x_p)
    else:
        SNR = snr_input

    SNR_th = 15 + 10 * log10(p)
    if SNR < SNR_th:
        d = p - 1
        if snr_input == 0:
            Ud = Ud[:, :d]
        else:
            r_m = np.mean(R, 1)[:, np.newaxis]
            R_m = np.tile(r_m, N)
            R_o = R - R_m
            Ud, Sd, Vd = svd(np.dot(R_o, R_o.T) / N)
            Ud = Ud[:, :d]
            x_p = np.dot(Ud.T, R_o)
        Rp = np.dot(Ud, x_p[0:d, :]) + np.tile(r_m, N)
        x = x_p[0:d, :]
        c = np.max(np.sum(x**2, axis=0))**0.5
        c_arr_temp = c * np.ones((1, N))
        y = np.array(x, copy=True)
        y = np.append(y, c_arr_temp, axis=0)
    else:
        d = p
        Ud, Sd, Vd = svd(np.dot(R, R.T) / N)
        Ud = Ud[:, :d]
        x_p = np.dot(Ud.T, R)
        Rp = np.dot(Ud, x_p[:d, :])
        x = np.dot(Ud.T, R)
        u = np.mean(x, 1)[:, np.newaxis]
        y = x / np.tile(
            np.sum(x * np.tile(u, N), axis=0)[np.newaxis, :], (d, 1))
    indice = np.zeros((1, p), dtype=int)
    A = np.zeros((p, p))
    A[p - 1, 0] = 1

    for i in range(p):
        Ip = np.eye(p)
        mean = np.zeros(p)
        w = np.random.rand(p, 1)
        f = w - np.dot(np.dot(A, np.linalg.pinv(A)), w)
        f = f / sqrt(np.sum(f**2))
        v = np.dot(f.T, y)
        v = np.absolute(v)
        indice[0, i] = v.argmax()
        A[:, i] = y[:, indice[0, i]]
    if SNR < SNR_th:
        Ae = np.dot(Ud, x[:, indice[0]]) + np.tile(r_m, p)
    else:
        Ae = np.dot(Ud, x[:, indice[0]])
    loadings = np.dot(np.dot(np.linalg.pinv(np.dot(Ae.T, Ae)), Ae.T), R)
    print("Execution Time---%s seconds ---" % (time.clock() - start_time))
    return Ae, loadings, SNR
Beispiel #33
0
    def decomposition(self,
                      normalize_poissonian_noise=False,
                      algorithm='svd',
                      output_dimension=None,
                      centre=None,
                      auto_transpose=True,
                      navigation_mask=None,
                      signal_mask=None,
                      var_array=None,
                      var_func=None,
                      polyfit=None,
                      reproject=None,
                      return_info=False,
                      **kwargs):
        """Decomposition with a choice of algorithms

        The results are stored in self.learning_results

        Parameters
        ----------
        normalize_poissonian_noise : bool
            If True, scale the SI to normalize Poissonian noise
        algorithm : 'svd' | 'fast_svd' | 'mlpca' | 'fast_mlpca' | 'nmf' |
            'sparse_pca' | 'mini_batch_sparse_pca' | 'RPCA_GoDec' | 'ORPCA'
        output_dimension : None or int
            number of components to keep/calculate
        centre : None | 'variables' | 'trials'
            If None no centring is applied. If 'variable' the centring will be
            performed in the variable axis. If 'trials', the centring will be
            performed in the 'trials' axis. It only has effect when using the
            svd or fast_svd algorithms
        auto_transpose : bool
            If True, automatically transposes the data to boost performance.
            Only has effect when using the svd of fast_svd algorithms.
        navigation_mask : boolean numpy array
            The navigation locations marked as True are not used in the
            decompostion.
        signal_mask : boolean numpy array
            The signal locations marked as True are not used in the
            decomposition.
        var_array : numpy array
            Array of variance for the maximum likelihood PCA algorithm
        var_func : function or numpy array
            If function, it will apply it to the dataset to obtain the
            var_array. Alternatively, it can a an array with the coefficients
            of a polynomial.
        reproject : None | signal | navigation | both
            If not None, the results of the decomposition will be projected in
            the selected masked area.
        return_info: bool, default False
            The result of the decomposition is stored internally. However, some algorithms generate some extra
            information that is not stored. If True (the default is False) return any extra information if available

        Returns
        -------
        (X, E) : (numpy array, numpy array)
            If 'algorithm' == 'RPCA_GoDec' or 'ORPCA' and 'return_info' is True,
            returns the low-rank (X) and sparse (E) matrices from robust PCA.

        See also
        --------
        plot_decomposition_factors, plot_decomposition_loadings, plot_lev

        """
        to_return = None
        # Check if it is the wrong data type
        if self.data.dtype.char not in ['e', 'f', 'd']:  # If not float
            _logger.warning(
                'To perform a decomposition the data must be of the float '
                'type. You can change the type using the change_dtype method'
                ' e.g. s.change_dtype(\'float64\')\n'
                'Nothing done.')
            return

        if self.axes_manager.navigation_size < 2:
            raise AttributeError("It is not possible to decompose a dataset "
                                 "with navigation_size < 2")
        # backup the original data
        self._data_before_treatments = self.data.copy()
        # set the output target (peak results or not?)
        target = LearningResults()

        if algorithm == 'mlpca':
            if normalize_poissonian_noise is True:
                _logger.warning(
                    "It makes no sense to do normalize_poissonian_noise with "
                    "the MLPCA algorithm. Therefore, "
                    "normalize_poissonian_noise is set to False")
                normalize_poissonian_noise = False
            if output_dimension is None:
                raise ValueError("With the MLPCA algorithm the "
                                 "output_dimension must be specified")
        if algorithm == 'RPCA_GoDec' or algorithm == 'ORPCA':
            if output_dimension is None:
                raise ValueError("With the robust PCA algorithms ('RPCA_GoDec' "
                                 "and 'ORPCA'), the output_dimension "
                                 "must be specified")

        # Apply pre-treatments
        # Transform the data in a line spectrum
        self._unfolded4decomposition = self.unfold()
        try:
            if hasattr(navigation_mask, 'ravel'):
                navigation_mask = navigation_mask.ravel()

            if hasattr(signal_mask, 'ravel'):
                signal_mask = signal_mask.ravel()

            # Normalize the poissonian noise
            # TODO this function can change the masks and this can cause
            # problems when reprojecting
            if normalize_poissonian_noise is True:
                self.normalize_poissonian_noise(
                    navigation_mask=navigation_mask,
                    signal_mask=signal_mask,)
            _logger.info('Performing decomposition analysis')
            # The rest of the code assumes that the first data axis
            # is the navigation axis. We transpose the data if that is not the
            # case.
            dc = (self.data if self.axes_manager[0].index_in_array == 0
                  else self.data.T)

            # Transform the None masks in slices to get the right behaviour
            if navigation_mask is None:
                navigation_mask = slice(None)
            else:
                navigation_mask = ~navigation_mask
            if signal_mask is None:
                signal_mask = slice(None)
            else:
                signal_mask = ~signal_mask

            # WARNING: signal_mask and navigation_mask values are now their
            # negaties i.e. True -> False and viceversa. However, the
            # stored value (at the end of the method) coincides with the
            # input masks

            # Reset the explained_variance which is not set by all the
            # algorithms
            explained_variance = None
            explained_variance_ratio = None
            mean = None

            if algorithm == 'svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :], centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'fast_svd':
                factors, loadings, explained_variance, mean = svd_pca(
                    dc[:, signal_mask][navigation_mask, :],
                    fast=True,
                    output_dimension=output_dimension,
                    centre=centre,
                    auto_transpose=auto_transpose)

            elif algorithm == 'sklearn_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.PCA(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T
                explained_variance = sk.explained_variance_
                mean = sk.mean_
                centre = 'trials'
                if return_info:
                    to_return = sk

            elif algorithm == 'nmf':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.NMF(**kwargs)
                sk.n_components = output_dimension
                loadings = sk.fit_transform((
                    dc[:, signal_mask][navigation_mask, :]))
                factors = sk.components_.T
                if return_info:
                    to_return = sk

            elif algorithm == 'sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.SparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T
                if return_info:
                    to_return = sk

            elif algorithm == 'mini_batch_sparse_pca':
                if import_sklearn.sklearn_installed is False:
                    raise ImportError(
                        'sklearn is not installed. Nothing done')
                sk = import_sklearn.sklearn.decomposition.MiniBatchSparsePCA(
                    output_dimension, **kwargs)
                loadings = sk.fit_transform(
                    dc[:, signal_mask][navigation_mask, :])
                factors = sk.components_.T
                if return_info:
                    to_return = sk

            elif algorithm == 'mlpca' or algorithm == 'fast_mlpca':
                _logger.info("Performing the MLPCA training")
                if output_dimension is None:
                    raise ValueError(
                        "For MLPCA it is mandatory to define the "
                        "output_dimension")
                if var_array is None and var_func is None:
                    _logger.info('No variance array provided.'
                                 'Assuming poissonian data')
                    var_array = dc[:, signal_mask][navigation_mask, :]

                if var_array is not None and var_func is not None:
                    raise ValueError(
                        "You have defined both the var_func and var_array "
                        "keywords."
                        "Please, define just one of them")
                if var_func is not None:
                    if hasattr(var_func, '__call__'):
                        var_array = var_func(
                            dc[signal_mask, ...][:, navigation_mask])
                    else:
                        try:
                            var_array = np.polyval(
                                polyfit, dc[
                                    signal_mask, navigation_mask])
                        except:
                            raise ValueError(
                                'var_func must be either a function or an '
                                'array defining the coefficients of a polynom')
                if algorithm == 'mlpca':
                    fast = False
                else:
                    fast = True
                U, S, V, Sobj, ErrFlag = mlpca(
                    dc[:, signal_mask][navigation_mask, :],
                    var_array, output_dimension, fast=fast)
                loadings = U * S
                factors = V
                explained_variance_ratio = S ** 2 / Sobj
                explained_variance = S ** 2 / len(factors)
            elif algorithm == 'RPCA_GoDec':
                _logger.info("Performing Robust PCA with GoDec")

                X, E, G, U, S, V = rpca_godec(
                    dc[:, signal_mask][navigation_mask, :],
                    rank=output_dimension, fast=True, **kwargs)

                loadings = U * S
                factors = V
                explained_variance = S ** 2 / len(factors)

                if return_info:
                    to_return = (X, E)

            elif algorithm == 'ORPCA':
                _logger.info("Performing Online Robust PCA")

                X, E, U, S, V = orpca(
                    dc[:, signal_mask][navigation_mask, :],
                    rank=output_dimension, fast=True, **kwargs)

                loadings = U * S
                factors = V
                explained_variance = S ** 2 / len(factors)

                if return_info:
                    to_return = (X, E)
            else:
                raise ValueError('Algorithm not recognised. '
                                 'Nothing done')

            # We must calculate the ratio here because otherwise the sum
            # information can be lost if the user call
            # crop_decomposition_dimension
            if explained_variance is not None and \
                    explained_variance_ratio is None:
                explained_variance_ratio = \
                    explained_variance / explained_variance.sum()

            # Store the results in learning_results

            target.factors = factors
            target.loadings = loadings
            target.explained_variance = explained_variance
            target.explained_variance_ratio = explained_variance_ratio
            target.decomposition_algorithm = algorithm
            target.poissonian_noise_normalized = \
                normalize_poissonian_noise
            target.output_dimension = output_dimension
            target.unfolded = self._unfolded4decomposition
            target.centre = centre
            target.mean = mean

            if output_dimension and factors.shape[1] != output_dimension:
                target.crop_decomposition_dimension(output_dimension)

            # Delete the unmixing information, because it'll refer to a
            # previous decomposition
            target.unmixing_matrix = None
            target.bss_algorithm = None

            if self._unfolded4decomposition is True:
                folding = \
                    self.metadata._HyperSpy.Folding
                target.original_shape = folding.original_shape

            # Reproject
            if mean is None:
                mean = 0
            if reproject in ('navigation', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    loadings_ = np.dot(dc[:, signal_mask] - mean, factors)
                else:
                    loadings_ = sk.transform(dc[:, signal_mask])
                target.loadings = loadings_
            if reproject in ('signal', 'both'):
                if algorithm not in ('nmf', 'sparse_pca',
                                     'mini_batch_sparse_pca'):
                    factors = np.dot(np.linalg.pinv(loadings),
                                     dc[navigation_mask, :] - mean).T
                    target.factors = factors
                else:
                    _logger.info("Reprojecting the signal is not yet "
                                 "supported for this algorithm")
                    if reproject == 'both':
                        reproject = 'signal'
                    else:
                        reproject = None

            # Rescale the results if the noise was normalized
            if normalize_poissonian_noise is True:
                target.factors[:] *= self._root_bH.T
                target.loadings[:] *= self._root_aG

            # Set the pixels that were not processed to nan
            if not isinstance(signal_mask, slice):
                # Store the (inverted, as inputed) signal mask
                target.signal_mask = ~signal_mask.reshape(
                    self.axes_manager._signal_shape_in_array)
                if reproject not in ('both', 'signal'):
                    factors = np.zeros((dc.shape[-1], target.factors.shape[1]))
                    factors[signal_mask, :] = target.factors
                    factors[~signal_mask, :] = np.nan
                    target.factors = factors
            if not isinstance(navigation_mask, slice):
                # Store the (inverted, as inputed) navigation mask
                target.navigation_mask = ~navigation_mask.reshape(
                    self.axes_manager._navigation_shape_in_array)
                if reproject not in ('both', 'navigation'):
                    loadings = np.zeros(
                        (dc.shape[0], target.loadings.shape[1]))
                    loadings[navigation_mask, :] = target.loadings
                    loadings[~navigation_mask, :] = np.nan
                    target.loadings = loadings
        finally:
            if self._unfolded4decomposition is True:
                self.fold()
                self._unfolded4decomposition is False
            self.learning_results.__dict__.update(target.__dict__)
            # undo any pre-treatments
            self.undo_treatments()

        return to_return
Beispiel #34
0
 def test_exception_init(self):
     with pytest.raises(ValueError, match=f"'init' not recognised"):
         _ = orpca(self.X, rank=self.rank, init="uniform")
Beispiel #35
0
    def test_default(self):
        X, E, U, S, V = orpca(self.X, rank=self.rank)

        # Check the low-rank component MSE
        normX = np.linalg.norm(X - self.A) / (self.m * self.n)
        assert normX < self.tol