Exemple #1
0
 def figures_of_merit(X, maxPIndex, C, St, j):
     # return %explained variance and stdev of residuals when the jth compound is added
     C[:, j] = X[:, maxPIndex[j]]
     St[0:j + 1, :] = np.linalg.lstsq(C.data[:, 0:j + 1],
                                      X.data,
                                      rcond=None)[0]
     Xhat = dot(C[:, 0:j + 1], St[0:j + 1, :])
     res = Xhat - X
     stdev_res = np.std(res)
     rsquare = 1 - np.linalg.norm(res)**2 / np.linalg.norm(X)**2
     return rsquare, stdev_res
Exemple #2
0
def test_npy(ds1):
    # functions that keep units

    # DIAG
    with pytest.raises(ValueError):
        df = diag(ds1)  # work only for 1d or 2D dataset

    ds = ds1[0].squeeze()
    assert ds.ndim == 2
    df = diag(ds)
    assert df.units == ds1.units
    assert df.ndim == 1
    assert df.size == ds.x.size

    d = ds[0].squeeze()
    assert d.ndim == 1
    df = diag(d)
    assert df.units == ds1.units
    assert df.ndim == 2
    assert df.size == d.x.size**2

    df = diag(ds.data)
    assert df.implements("NDDataset")

    # DOT
    a = ds  # 2D dataset
    b = ds1[3].squeeze()  # second 2D dataset
    b.ito("km", force=True)  # put some units to b
    x = dot(a.T, b)
    assert x.units == a.units * b.units
    assert x.shape == (a.x.size, b.x.size)

    # allow mixing numpy object with dataset
    x = dot(a.T, b.data)
    assert x.units == a.units

    # if no dataset then is it equivalent to np.dot
    x = dot(a.data.T, b.data)
    assert isinstance(x, np.ndarray)
Exemple #3
0
def _generate_2D_spectra(concentrations, spectra):
    """
    Generate a fake 2D experimental spectra

    Parameters
    ----------
    concentrations : |NDDataset|
    spectra : |NDDataset|

    Returns
    -------
    |NDDataset|
    """
    from spectrochempy.core.dataset.npy import dot

    return dot(concentrations.T, spectra)
Exemple #4
0
def generate_fake():
    """
    Generate a fake 2D experimental spectra

    returns
    -------
    datasets:
        2D spectra, individual spectra and concentrations
    """

    # define properties of the spectra and concentration profiles
    # ----------------------------------------------------------------------------------------------------------------------
    from spectrochempy.core.dataset.npy import dot

    # data for four peaks (one very broad)
    POS = (6000.0, 4000.0, 2000.0, 2500.0)
    WIDTH = (6000.0, 1000.0, 250.0, 800.0)
    AMPL = (100.0, 70.0, 10.0, 50.0)
    RATIO = (0.1, 0.5, 0.2, 1.0)
    ASYM = (0.0, 0.0, 0, 4)
    MODEL = ("gaussian", "voigt", "voigt", "asymmetricvoigt")

    def C1(t):
        return t * 0.05 + 0.01  # linear evolution of the baseline

    def C2(t):
        return scp.sigmoidmodel().f(t, 1.0, max(t) / 2.0, 1, 2)

    def C3(t):
        return scp.sigmoidmodel().f(t, 1.0, max(t) / 5.0, 1, -2)

    def C4(t):
        return 1.0 - C2(t) - C3(t)

    specs = _make_spectra_matrix(MODEL, AMPL, POS, WIDTH, RATIO, ASYM)

    concs = _make_concentrations_matrix(C1, C2, C3, C4)

    # make 2D
    d = dot(concs.T, specs)

    # add some noise
    d.data = np.random.normal(d.data, 0.005 * d.data.max())

    # d.plot()
    return d, specs, concs
Exemple #5
0
    def reconstruct(self):
        """
        Transform data back to the original space.

        The following matrix operation is performed: :math:`X'_{hat} = C'.S'^t`

        Returns
        -------
        X_hat
            The reconstructed dataset based on the SIMPLISMA Analysis.
        """

        # reconstruct from concentration and spectra profiles

        X_hat = dot(self.C, self.St)
        X_hat.description = "Dataset reconstructed by SIMPLISMA\n" + self.logs
        X_hat.title = "X_hat: " + self.X.title
        return X_hat
Exemple #6
0
    def reconstruct(self):
        """
        Transform data back to the original space.

        The following matrice operation is performed : :math:`X'_{hat} = C'.S'^t`.

        Returns
        -------
        X_hat : |NDDataset|
            The reconstructed dataset based on the MCS-ALS optimization.
        """

        # reconstruct from concentration and spectra profiles
        C = self.C
        St = self.St

        X_hat = dot(C, St)

        X_hat.history = "Dataset reconstructed by MCS ALS optimization"
        X_hat.title = "X_hat: " + self.X.title
        return X_hat
Exemple #7
0
    def reconstruct(self, n_pc=None):
        """
        Transform data back to the original space using the given number of
        PC's.

        The following matrice operation is performed : :math:`X' = S'.L'^T`
        where S'=S[:, n_pc] and L=L[:, n_pc].

        Parameters
        ----------
        n_pc : int, optional
            The number of PC to use for the reconstruction.

        Returns
        -------
        X_reconstructed : |NDDataset|
            The reconstructed dataset based on n_pc principal components.
        """

        # get n_pc (automatic or determined by the n_pc arguments)
        n_pc = self._get_n_pc(n_pc)

        # reconstruct from scores and loadings using n_pc components
        S = self._S[:, :n_pc]
        LT = self._LT[:n_pc]

        X = dot(S, LT)

        # try to reconstruct something close to the original scaled, standardized or centered data
        if self._scaled:
            X *= self._ampl
            X += self._min
        if self._standardized:
            X *= self._std
        if self._centered:
            X += self._center

        X.history = f'PCA reconstructed Dataset with {n_pc} principal components'
        X.title = self._X.title
        return X
Exemple #8
0
    def __init__(self, dataset, centered=True, standardized=False, scaled=False):
        """
        Parameters
        ----------
        dataset : |NDDataset| object
            The input dataset has shape (M, N). M is the number of
            observations (for examples a series of IR spectra) while N
            is the number of features (for example the wavenumbers measured
            in each IR spectrum).
        centered : bool, optional, default:True
            If True the data are centered around the mean values: :math:`X' = X - mean(X)`.
        standardized : bool, optional, default:False
            If True the data are scaled to unit standard deviation: :math:`X' = X / \\sigma`.
        scaled : bool, optional, default:False
            If True the data are scaled in the interval [0-1]: :math:`X' = (X - min(X)) / (max(X)-min(X))`
        """
        self.prefs = dataset.preferences

        self._X = X = dataset

        Xsc = X.copy()

        # mean center the dataset
        # -----------------------
        self._centered = centered
        if centered:
            self._center = center = np.mean(X, axis=0)
            Xsc = X - center
            Xsc.title = "centered %s" % X.title

        # Standardization
        # ---------------
        self._standardized = standardized
        if standardized:
            self._std = np.std(Xsc, axis=0)
            Xsc /= self._std
            Xsc.title = "standardized %s" % Xsc.title

        # Scaling
        # -------
        self._scaled = scaled
        if scaled:
            self._min = np.min(Xsc, axis=0)
            self._ampl = np.ptp(Xsc, axis=0)
            Xsc -= self._min
            Xsc /= self._ampl
            Xsc.title = "scaled %s" % Xsc.title

        self._Xscaled = Xsc

        # perform SVD
        # -----------
        svd = SVD(Xsc)
        sigma = svd.s.diag()
        U = svd.U
        VT = svd.VT

        # select n_pc loadings & compute scores
        # --------------------------------------------------------------------

        # loadings

        LT = VT
        LT.title = 'loadings (L^T) of ' + X.name
        LT.history = 'Created by PCA'

        # scores

        S = dot(U, sigma)
        S.title = 'scores (S) of ' + X.name
        S.set_coordset(y=X.y,
                       x=Coord(None, labels=['#%d' % (i + 1) for i in range(svd.s.size)], title='principal component'))

        S.description = 'scores (S) of ' + X.name
        S.history = 'Created by PCA'

        self._LT = LT
        self._S = S

        # other attributes
        # ----------------

        self._sv = svd.sv
        self._sv.x.title = 'PC #'

        self._ev = svd.ev
        self._ev.x.title = 'PC #'

        self._ev_ratio = svd.ev_ratio
        self._ev_ratio.x.title = 'PC #'

        self._ev_cum = svd.ev_cum
        self._ev_cum.x.title = 'PC #'

        return
Exemple #9
0
    def __init__(self, dataset, guess, **kwargs):

        # list all default arguments:

        tol = kwargs.get("tol", 0.1)
        maxit = kwargs.get("maxit", 50)
        maxdiv = kwargs.get("maxdiv", 5)

        nonnegConc = kwargs.get("nonnegConc", "all")

        unimodConc = kwargs.get("unimodConc", "all")
        unimodConcTol = kwargs.get("unimodConcTol", 1.1)
        unimodConcMod = kwargs.get("unimodMod", "strict")
        if "unimodTol" in kwargs.keys():
            warnings.warn("unimodTol deprecated, use unimodConcTol instead",
                          DeprecationWarning)
            unimodConcTol = kwargs.get("unimodTol", 1.1)
        if "unimodMod" in kwargs.keys():
            warnings.warn("unimodMod deprecated, use unimodConcMod instead",
                          DeprecationWarning)
            unimodConcMod = kwargs.get("unimodConcMod", "strict")

        monoDecConc = kwargs.get("monoDecConc", None)
        monoIncTol = kwargs.get("monoIncTol", 1.1)
        monoIncConc = kwargs.get("monoIncConc", None)
        monoDecTol = kwargs.get("monoDecTol", 1.1)

        closureConc = kwargs.get("closureConc", None)
        closureTarget = kwargs.get("closureTarget", "default")
        closureMethod = kwargs.get("closureMethod", "scaling")

        hardConc = kwargs.get("hardConc", None)
        getConc = kwargs.get("getConc", None)
        argsGetConc = kwargs.get("argsGetConc", None)
        hardC_to_C_idx = kwargs.get("hardC_to_C_idx", "default")

        unimodSpec = kwargs.get("unimodSpec", None)
        unimodSpecTol = kwargs.get("unimodSpecTol", 1.1)
        unimodSpecMod = kwargs.get("unimodSpecMod", "strict")

        nonnegSpec = kwargs.get("nonnegSpec", "all")

        normSpec = kwargs.get("normSpec", None)

        if "verbose" in kwargs.keys():
            warnings.warn(
                "verbose deprecated. Instead, use set_loglevel(INFO) before launching MCRALS",
                DeprecationWarning,
            )
            set_loglevel(INFO)

        # Check initial data
        # ------------------------------------------------------------------------

        initConc, initSpec = False, False

        if type(guess) is np.ndarray:
            guess = NDDataset(guess)

        X = dataset

        if X.shape[0] == guess.shape[0]:
            initConc = True
            C = guess.copy()
            C.name = "Pure conc. profile, mcs-als of " + X.name
            nspecies = C.shape[1]

        elif X.shape[1] == guess.shape[1]:
            initSpec = True
            St = guess.copy()
            St.name = "Pure spectra profile, mcs-als of " + X.name
            nspecies = St.shape[0]

        else:
            raise ValueError("the dimensions of guess do not match the data")

        ny, _ = X.shape

        # makes a PCA with same number of species for further comparison
        Xpca = PCA(X).reconstruct(n_pc=nspecies)

        # reset default text to indexes
        # ------------------------------

        if nonnegConc == "all":
            nonnegConc = np.arange(nspecies)
        elif nonnegConc is None:
            nonnegConc = []
        elif nonnegConc != [] and (len(nonnegConc) > nspecies
                                   or max(nonnegConc) + 1 > nspecies):
            raise ValueError(
                f"The guess has only {nspecies} species, please check nonnegConc"
            )

        if unimodConc == "all":
            unimodConc = np.arange(nspecies)
        elif unimodConc is None:
            unimodConc = []
        elif unimodConc != [] and (len(unimodConc) > nspecies
                                   or max(unimodConc) + 1 > nspecies):
            raise ValueError(
                f"The guess has only {nspecies} species, please check unimodConc"
            )

        if closureTarget == "default":
            closureTarget = np.ones(ny)
        elif len(closureTarget) != ny:
            raise ValueError(
                f"The data contain only {ny} observations, please check closureTarget"
            )

        if hardC_to_C_idx == "default":
            hardC_to_C_idx = np.arange(nspecies)
        elif len(hardC_to_C_idx
                 ) > nspecies or max(hardC_to_C_idx) + 1 > nspecies:
            raise ValueError(
                f"The guess has only {nspecies} species, please check hardC_to_C_idx"
            )

        # constraints on spectra

        if unimodSpec == "all":
            unimodSpec = np.arange(nspecies)
        elif unimodSpec is None:
            unimodSpec = []
        elif unimodSpec != [] and (len(unimodSpec) > nspecies
                                   or max(unimodSpec) + 1 > nspecies):
            raise ValueError(
                f"The guess has only {nspecies} species, please check unimodSpec"
            )

        if nonnegSpec == "all":
            nonnegSpec = np.arange(nspecies)
        elif nonnegSpec is None:
            nonnegSpec = []
        elif nonnegSpec != [] and (len(nonnegSpec) > nspecies
                                   or max(nonnegSpec) + 1 > nspecies):
            raise ValueError(
                f"The guess has only {nspecies} species, please check nonnegSpec"
            )

        # Compute initial spectra or concentrations   (first iteration...)
        # ------------------------------------------------------------------------

        if initConc:
            if C.coordset is None:
                C.set_coordset(y=X.y, x=C.x)
            St = NDDataset(np.linalg.lstsq(C.data, X.data, rcond=None)[0])
            St.name = "Pure spectra profile, mcs-als of " + X.name
            St.title = X.title
            cy = C.x.copy() if C.x else None
            cx = X.x.copy() if X.x else None
            St.set_coordset(y=cy, x=cx)

        if initSpec:
            if St.coordset is None:
                St.set_coordset(y=St.y, x=X.x)
            Ct = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0]
            C = NDDataset(Ct.T)
            C.name = "Pure conc. profile, mcs-als of " + X.name
            C.title = "concentration"
            cx = St.y.copy() if St.y else None
            cy = X.y.copy() if X.y else None
            C.set_coordset(y=cy, x=cx)

        change = tol + 1
        stdev = X.std()
        niter = 0
        ndiv = 0

        log = "*** ALS optimisation log***\n"
        log += "#iter     Error/PCA        Error/Exp      %change \n"
        log += "------------------------------------------------- \n"
        info_(log)

        while change >= tol and niter < maxit and ndiv < maxdiv:

            C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T
            niter += 1

            # Force non-negative concentration
            # --------------------------------
            if nonnegConc is not None:
                for s in nonnegConc:
                    C.data[:, s] = C.data[:, s].clip(min=0)

            # Force unimodal concentration
            # ----------------------------
            if unimodConc != []:
                C.data = _unimodal_2D(
                    C.data,
                    idxes=unimodConc,
                    axis=0,
                    tol=unimodConcTol,
                    mod=unimodConcMod,
                )

            # Force monotonic increase
            # ------------------------
            if monoIncConc is not None:
                for s in monoIncConc:
                    for curid in np.arange(ny - 1):
                        if C.data[curid + 1,
                                  s] < C.data[curid, s] / monoIncTol:
                            C.data[curid + 1, s] = C.data[curid, s]

            # Force monotonic decrease
            # ----------------------------------------------
            if monoDecConc is not None:
                for s in monoDecConc:
                    for curid in np.arange(ny - 1):
                        if C.data[curid + 1,
                                  s] > C.data[curid, s] * monoDecTol:
                            C.data[curid + 1, s] = C.data[curid, s]

            # Closure
            # ------------------------------------------
            if closureConc is not None:
                if closureMethod == "scaling":
                    Q = np.linalg.lstsq(C.data[:, closureConc],
                                        closureTarget.T,
                                        rcond=None)[0]
                    C.data[:, closureConc] = np.dot(C.data[:, closureConc],
                                                    np.diag(Q))
                elif closureMethod == "constantSum":
                    totalConc = np.sum(C.data[:, closureConc], axis=1)
                    C.data[:, closureConc] = (C.data[:, closureConc] *
                                              closureTarget[:, None] /
                                              totalConc[:, None])

            # external concentration profiles
            # ------------------------------------------
            if hardConc is not None:
                extOutput = getConc(*argsGetConc)
                if isinstance(extOutput, dict):
                    fixedC = extOutput["concentrations"]
                    argsGetConc = extOutput["new_args"]
                else:
                    fixedC = extOutput

                C.data[:, hardConc] = fixedC[:, hardC_to_C_idx]

            # stores C in C_hard
            Chard = C.copy()

            # compute St
            St.data = np.linalg.lstsq(C.data, X.data, rcond=None)[0]

            # stores St in Stsoft
            Stsoft = St.copy()

            # Force non-negative spectra
            # --------------------------
            if nonnegSpec is not None:
                St.data[nonnegSpec, :] = St.data[nonnegSpec, :].clip(min=0)

            # Force unimodal spectra
            # ----------------------------
            if unimodSpec != []:
                St.data = _unimodal_2D(
                    St.data,
                    idxes=unimodSpec,
                    axis=1,
                    tol=unimodSpecTol,
                    mod=unimodSpecMod,
                )

            # recompute C for consistency(soft modeling)
            C.data = np.linalg.lstsq(St.data.T, X.data.T)[0].T

            # rescale spectra & concentrations
            if normSpec == "max":
                alpha = np.max(St.data, axis=1).reshape(nspecies, 1)
                St.data = St.data / alpha
                C.data = C.data * alpha.T
            elif normSpec == "euclid":
                alpha = np.linalg.norm(St.data, axis=1).reshape(nspecies, 1)
                St.data = St.data / alpha
                C.data = C.data * alpha.T

            # compute residuals
            # -----------------
            X_hat = dot(C, St)
            stdev2 = (X_hat - X.data).std()
            change = 100 * (stdev2 - stdev) / stdev
            stdev = stdev2

            stdev_PCA = (X_hat - Xpca.data).std()  #

            logentry = "{:3d}      {:10f}      {:10f}      {:10f}".format(
                niter, stdev_PCA, stdev2, change)
            log += logentry + "\n"
            info_(logentry)

            if change > 0:
                ndiv += 1
            else:
                ndiv = 0
                change = -change

            if change < tol:
                logentry = "converged !"
                log += logentry + "\n"
                info_(logentry)

            if ndiv == maxdiv:
                logline = (
                    f"Optimization not improved since {maxdiv} iterations... unconverged "
                    f"or 'tol' set too small ?\n")
                logline += "Stop ALS optimization"
                log += logline + "\n"
                info_(logline)

            if niter == maxit:
                logline = "Convergence criterion ('tol') not reached after {:d} iterations.".format(
                    maxit)
                logline += "Stop ALS optimization"
                log += logline + "\n"
                info_(logline)

        self._X = X
        self._params = {
            "tol": tol,
            "maxit": maxit,
            "maxdiv": maxdiv,
            "nonnegConc": nonnegConc,
            "unimodConc": unimodConc,
            "unimodConcTol": unimodConcTol,
            "unimodConcMod": unimodConcMod,
            "closureConc": closureConc,
            "closureTarget ": closureTarget,
            "closureMethod": closureMethod,
            "monoDecConc": monoDecConc,
            "monoDecTol": monoDecTol,
            "monoIncConc": monoIncConc,
            "monoIncTol": monoIncTol,
            "hardConc": hardConc,
            "getConc": getConc,
            "argsGetConc": argsGetConc,
            "hardC_to_C_idx": hardC_to_C_idx,
            "nonnegSpec": nonnegSpec,
            "unimodSpec": unimodConc,
            "unimodSpecTol": unimodSpecTol,
            "unimodSpecMod": unimodSpecMod,
            "normSpec": normSpec,
        }

        self._C = C
        if hardConc is not None:
            self._fixedC = fixedC
            self._extOutput = extOutput
        else:
            self._fixedC = None
            self._extOutput = None

        self._St = St
        self._log = log

        self._Stsoft = Stsoft
        self._Chard = Chard
Exemple #10
0
    def __init__(self,
                 dataset,
                 centered=True,
                 standardized=False,
                 scaled=False):

        super().__init__()

        self.prefs = dataset.preferences

        self._X = X = dataset

        Xsc = X.copy()

        # mean center the dataset
        # -----------------------
        self._centered = centered
        if centered:
            self._center = center = X.mean(dim=0)
            Xsc = X - center
            Xsc.name = f"centered {X.name}"

        # Standardization
        # ---------------
        self._standardized = standardized
        if standardized:
            self._std = Xsc.std(dim=0)
            Xsc /= self._std
            Xsc.name = f"standardized {Xsc.name}"

        # Scaling
        # -------
        self._scaled = scaled
        if scaled:
            self._min = Xsc.min(dim=0)
            self._ampl = Xsc.ptp(dim=0)
            Xsc -= self._min
            Xsc /= self._ampl
            Xsc.name = "scaled %s" % Xsc.name

        self._Xscaled = Xsc

        # perform SVD
        # -----------
        svd = SVD(Xsc)
        sigma = svd.s.diag()
        U = svd.U
        VT = svd.VT

        # select n_pc loadings & compute scores
        # --------------------------------------------------------------------

        # loadings

        LT = VT
        LT.title = "loadings (L^T) of " + X.name
        LT.history = "Created by PCA"

        # scores

        S = dot(U, sigma)
        S.title = "scores (S) of " + X.name
        S.set_coordset(
            y=X.y,
            x=Coord(
                None,
                labels=["#%d" % (i + 1) for i in range(svd.s.size)],
                title="principal component",
            ),
        )

        S.description = "scores (S) of " + X.name
        S.history = "Created by PCA"

        self._LT = LT
        self._S = S

        # other attributes
        # ----------------

        self._sv = svd.sv
        self._sv.x.title = "PC #"

        self._ev = svd.ev
        self._ev.x.title = "PC #"

        self._ev_ratio = svd.ev_ratio
        self._ev_ratio.x.title = "PC #"

        self._ev_cum = svd.ev_cum
        self._ev_cum.x.title = "PC #"

        return
Exemple #11
0
    def __init__(self, dataset, guess, **kwargs):   # lgtm [py/missing-call-to-init]
        """
        Parameters
        ----------
        dataset : |NDDataset|
            The dataset on which to perform the MCR-ALS analysis
        guess : |NDDataset|
            Initial concentration or spectra
        verbose : bool
            If set to True, prints a summary of residuals and residuals change at each iteration. default = False.
            In any case, the same information is returned in self.logs
        **kwargs : dict
            Optimization parameters : See Other Parameters.

        Other Parameters
        ----------------
        tol : float, optional,  default=0.1
            Convergence criterion on the change of resisuals.
            (percent change of standard deviation of residuals).
        maxit : int, optional, default=50
            Maximum number of ALS minimizations.
        maxdiv : int, optional, default=5.
            Maximum number of successive non-converging iterations.
        nonnegConc : list or tuple, default=Default [0, 1, ...] (only non-negative concentrations)
            Index of species having non-negative concentration profiles. For instance [0, 2] indicates that species
            #0 and #2 have non-negative conc profiles while species #1 can have negative concentrations.
        unimodConc : list or tuple, Default=[0, 1, ...] (only unimodal concentration profiles)
            index of species having unimodal concentrationsprofiles.
        closureConc : list or tuple, Default=None  (no closure)
            Index of species subjected to a closure constraint.
        externalConc: list or tuple, Default None (no external concentration).
            Index of species for which a concentration profile is provided by an external function.
        getExternalConc : callable
            An external function that will provide `n_ext` concentration profiles:

            getExternalConc(C, extConc, ext_to_C_idx, *args) -> extC

            or

            etExternalConc(C, extConc, ext_to_C_idx, *args) -> (extC, out2, out3, ...)

            where C is the current concentration matrix, *args are the parameters needed to completely
            specify the function, extC is a  nadarray or NDDataset of shape (C.y, n_ext), and out1, out2, ... are
            supplementary outputs returned by the function (e.g. optimized rate parameters)
        args : tuple, optional.
            Extra arguments passed to the external function
        external_to_C_idx : array or tuple, Default=np.arange(next)
            Indicates the correspondence between the indexes of external chemical
            profiles and the columns of the C matrix. [1, None, 0] indicates that the first external profile is the
            second pure species (index 1).
        nonnegSpec : list or tuple, Default [1, ..., 1]  (only non-negative spectra)
            Indicates species having non-negative spectra
        unimodSpec : list or tuple, Default [0, ..., 0]  (no unimodal concentration profiles)
            Indicates species having unimodal spectra
        """

        verbose = kwargs.pop('verbose', False)
        if verbose:
            set_loglevel(INFO)

        # Check initial data
        # ------------------------------------------------------------------------

        initConc, initSpec = False, False

        if type(guess) is np.ndarray:
            guess = NDDataset(guess)

        X = dataset

        if X.shape[0] == guess.shape[0]:
            initConc = True
            C = guess.copy()
            C.name = 'Pure conc. profile, mcs-als of ' + X.name
            nspecies = C.shape[1]

        elif X.shape[1] == guess.shape[1]:
            initSpec = True
            St = guess.copy()
            St.name = 'Pure spectra profile, mcs-als of ' + X.name
            nspecies = St.shape[0]

        else:
            raise ValueError('the dimensions of initial concentration '
                             'or spectra dataset do not match the data')

        ny, nx = X.shape

        # makes a PCA with same number of species
        Xpca = PCA(X).reconstruct(n_pc=nspecies)

        # Get optional parameters in kwargs or set them to their default
        # ------------------------------------------------------------------------

        # TODO: make a preference  file to set this kwargs
        # optimization

        tol = kwargs.get('tol', 0.1)
        maxit = kwargs.get('maxit', 50)
        maxdiv = kwargs.get('maxdiv', 5)

        # constraints on concentrations
        nonnegConc = kwargs.get('nonnegConc', np.arange(nspecies))
        unimodConc = kwargs.get('unimodConc', np.arange(nspecies))
        unimodTol = kwargs.get('unimodTol', 1.1)
        unimodMod = kwargs.get('unimodMod', 'strict')
        closureConc = kwargs.get('closureConc', None)
        if closureConc is not None:
            closureTarget = kwargs.get('closureTarget', np.ones(ny))
            closureMethod = kwargs.get('closureMethod', 'scaling')
        monoDecConc = kwargs.get('monoDecConc', None)
        monoDecTol = kwargs.get('monoDecTol', 1.1)
        monoIncConc = kwargs.get('monoIncConc', None)
        monoIncTol = kwargs.get('monoIncTol', 1.1)
        externalConc = kwargs.get('externalConc', None)
        if externalConc is not None:
            external_to_C_idx = kwargs.get('external_to_C_idx', np.arange(nspecies))
        if externalConc is not None:
            try:
                getExternalConc = kwargs.get('getExternalConc')
            except Exception:
                raise ValueError('A function must be given to get the external concentration profile(s)')
            external_to_C_idx = kwargs.get('external_to_C_idx', externalConc)
            args = kwargs.get('args', ())

        # constraints on spectra
        nonnegSpec = kwargs.get('nonnegSpec', np.arange(nspecies))
        normSpec = kwargs.get('normSpec', None)

        # TODO: add unimodal constraint on spectra

        # Compute initial spectra or concentrations   (first iteration...)
        # ------------------------------------------------------------------------

        if initConc:
            if C.coordset is None:
                C.set_coordset(y=X.y, x=C.x)
            St = NDDataset(np.linalg.lstsq(C.data, X.data, rcond=None)[0])
            St.name = 'Pure spectra profile, mcs-als of ' + X.name
            St.title = X.title
            cy = C.x.copy() if C.x else None
            cx = X.x.copy() if X.x else None
            St.set_coordset(y=cy, x=cx)

        if initSpec:
            if St.coordset is None:
                St.set_coordset(y=St.y, x=X.x)
            Ct = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0]
            C = NDDataset(Ct.T)
            C.name = 'Pure conc. profile, mcs-als of ' + X.name
            C.title = 'concentration'
            cx = St.y.copy() if St.y else None
            cy = X.y.copy() if X.y else None
            C.set_coordset(y=cy, x=cx)

        change = tol + 1
        stdev = X.std()  # .data[0]
        niter = 0
        ndiv = 0

        logs = '*** ALS optimisation log***\n'
        logs += '#iter     Error/PCA        Error/Exp      %change\n'
        logs += '---------------------------------------------------'
        info_(logs)

        while change >= tol and niter < maxit and ndiv < maxdiv:

            C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T
            niter += 1

            # Force non-negative concentration
            # --------------------------------
            if nonnegConc is not None:
                for s in nonnegConc:
                    C.data[:, s] = C.data[:, s].clip(min=0)

            # Force unimodal concentration
            # ----------------------------
            if unimodConc is not None:
                for s in unimodConc:
                    maxid = np.argmax(C.data[:, s])
                    curmax = C.data[maxid, s]
                    curid = maxid

                    while curid > 0:
                        curid -= 1
                        if C.data[curid, s] > curmax * unimodTol:
                            if unimodMod == 'strict':
                                C.data[curid, s] = C.data[curid + 1, s]
                            if unimodMod == 'smooth':
                                C.data[curid, s] = (C.data[curid, s] + C.data[
                                    curid + 1, s]) / 2
                                C.data[curid + 1, s] = C.data[curid, s]
                                curid = curid + 2
                        curmax = C.data[curid, s]

                    curid = maxid
                    while curid < ny - 1:
                        curid += 1
                        if C.data[curid, s] > curmax * unimodTol:
                            if unimodMod == 'strict':
                                C.data[curid, s] = C.data[curid - 1, s]
                            if unimodMod == 'smooth':
                                C.data[curid, s] = (C.data[curid, s] + C.data[
                                    curid - 1, s]) / 2
                                C.data[curid - 1, s] = C.data[curid, s]
                                curid = curid - 2
                        curmax = C.data[curid, s]

            # Force monotonic increase
            # ------------------------
            if monoIncConc is not None:
                for s in monoIncConc:
                    for curid in np.arange(ny - 1):
                        if C.data[curid + 1, s] < C.data[curid, s] / monoIncTol:
                            C.data[curid + 1, s] = C.data[curid, s]

            # Force monotonic decrease
            # ----------------------------------------------
            if monoDecConc is not None:
                for s in monoDecConc:
                    for curid in np.arange(ny - 1):
                        if C.data[curid + 1, s] > C.data[curid, s] * monoDecTol:
                            C.data[curid + 1, s] = C.data[curid, s]

            # Closure
            # ------------------------------------------
            if closureConc is not None:
                if closureMethod == 'scaling':
                    Q = np.linalg.lstsq(C.data[:, closureConc], closureTarget.T, rcond=None)[0]
                    C.data[:, closureConc] = np.dot(C.data[:, closureConc], np.diag(Q))
                elif closureMethod == 'constantSum':
                    totalConc = np.sum(C.data[:, closureConc], axis=1)
                    C.data[:, closureConc] = C.data[:, closureConc] * closureTarget[:, None] / totalConc[:, None]

            # external concentration profiles
            # ------------------------------------------
            if externalConc is not None:
                extOutput = getExternalConc(*((C, externalConc, external_to_C_idx,) + args))
                if isinstance(extOutput, dict):
                    extC = extOutput['concentrations']
                    args = extOutput['new_args']
                else:
                    extC = extOutput
                if type(extC) is NDDataset:
                    extC = extC.data
                C.data[:, externalConc] = extC[:, external_to_C_idx]

            # stores C in C_hard
            Chard = C.copy()

            # compute St
            St.data = np.linalg.lstsq(C.data, X.data, rcond=None)[0]

            # stores St in Stsoft
            Stsoft = St.copy()

            # Force non-negative spectra
            # --------------------------
            if nonnegSpec is not None:
                St.data[nonnegSpec, :] = St.data[nonnegSpec, :].clip(min=0)

            # recompute C for consistency(soft modeling)
            C.data = np.linalg.lstsq(St.data.T, X.data.T, rcond=None)[0].T


            # rescale spectra & concentrations
            if normSpec == 'max':
                alpha = np.max(St.data, axis=1).reshape(nspecies, 1)
                St.data = St.data / alpha
                C.data = C.data * alpha.T
            elif normSpec == 'euclid':
                alpha = np.linalg.norm(St.data, axis=1).reshape(nspecies, 1)
                St.data = St.data / alpha
                C.data = C.data * alpha.T

            # compute residuals
            # -----------------
            X_hat = dot(C, St)
            stdev2 = (X_hat - X.data).std()
            change = 100 * (stdev2 - stdev) / stdev
            stdev = stdev2

            stdev_PCA = (X_hat - Xpca.data).std()  # TODO: Check PCA : values are different from the Arnaud version ?

            logentry = '{:3d}      {:10f}      {:10f}      {:10f}'.format(niter, stdev_PCA, stdev2, change)
            logs += logentry + '\n'
            info_(logentry)


            if change > 0:
                ndiv += 1
            else:
                ndiv = 0
                change = -change

            if change < tol:
                logentry = 'converged !'
                logs += logentry + '\n'
                info_(logentry)

            if ndiv == maxdiv:
                logline = f"Optimization not improved since {maxdiv} iterations... unconverged " \
                          f"or 'tol' set too small ?\n"
                logline += 'Stop ALS optimization'
                logs += logline + '\n'
                info_(logline)

            if niter == maxit:
                logline = 'Convergence criterion (\'tol\') not reached after {:d} iterations.'.format(maxit)
                logline += 'Stop ALS optimization'
                logs += logline + '\n'
                info_(logline)

        self._X = X
        self._params = kwargs

        self._C = C
        if externalConc is not None:
            self._extC = extC
            self._extOutput = extOutput
        else:
            self._extC = None
            self._extOutput = None

        self._St = St
        self._logs = logs

        self._Stsoft = Stsoft
        self._Chard = Chard