Beispiel #1
0
def _plvstat(xp1, xp2, plv, n_perm, n_jobs, perm):
    """Sub plv-stat function
    """
    # Compute plv for each permutation of xp2 trials :
    plvs = np.array(
        Parallel(n_jobs=n_jobs)(delayed(_plv)(xp1, xp2[..., p]) for p in perm))

    # Get p-values from permutations :
    return perm_2pvalue(plv, plvs, n_perm, tail=1)
Beispiel #2
0
def _plvstat(xp1, xp2, plv, n_perm, n_jobs, perm):
    """Sub plv-stat function
    """
    # Compute plv for each permutation of xp2 trials :
    plvs = np.array(Parallel(n_jobs=n_jobs)(delayed(_plv)(
                 xp1, xp2[..., p]) for p in perm))

    # Get p-values from permutations :
    return perm_2pvalue(plv, plvs, n_perm, tail=1)
Beispiel #3
0
    def fit(self, x, mf=False, center=False, grp=None,
            method='bino', n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: list
                List of dataset for each subject. All the dataset in the list
                should have the same number of columns but the number of lines
                could be diffrent for each subject and must correspond to the 
                same number of lines each each label vector of y.

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels

                Methods 2 and 3 are based on permutations. They should provide
                similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Check x, y:
        xbk = x.copy()
        x, y, train, test = _checkXY(x, self._y, mf, grp, center, self)
        nsuj, nfeat = x.shape

        # Run classification:
        da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs)

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(self._ry, da)
            daperm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        # -> Shuffle the labels :
        elif method == 'label_rnd':
            y_sh = [_checkXY(xbk, [np.random.permutation(i) for i in self._y],
                             mf, grp, center, self)[1] for k in range(n_perm)]
            cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)(
                    x, y_sh[k], train, test, self, 1)
                    for k in range(n_perm))

            # Reconstruct daperm and get the associated p-value:
            daperm, _, _ = zip(*cvs)
            daperm = np.array(daperm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(da, daperm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method '+method+' found')

        # Try to get featinfo:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            else:
                grp = np.arange(nfeat)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv,
                                                     da[:, np.newaxis], grp=grp,
                                                     pperm=pperm)
        except:
            pass

        return da, pvalue, daperm
Beispiel #4
0
    def fit(self,
            x,
            mf=False,
            center=False,
            grp=None,
            method='bino',
            n_perm=200,
            rndstate=0,
            n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: array
                Data to classify. Consider that x.shape = (N, M), N is the number
                of trials (which should be the length of y). M, the number of
                colums, is a supplementar dimension for classifying data. If M = 1,
                the data is consider as a single feature. If M > 1, use the
                parameter mf to say if x should be consider as a single feature
                (mf=False) or multi-features (mf=True)

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels
                    - 'full_rnd': randomly shuffle the whole array x
                    - 'intra_rnd': randomly shuffle x inside each class and each feature

                Methods 2, 3 and 4 are based on permutations. The method 2 and 3
                should provide similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Get the true decoding accuracy:
        da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf,
                                                  self._cv.cvr, mf, grp,
                                                  center, n_jobs)
        nfeat = len(x)
        rndstate = np.random.RandomState(rndstate)
        score = np.array([np.mean(k) for k in da])

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(y, score)
            daPerm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        elif method.lower().find('_rnd') + 1:

            # Generate idx tricks :
            iteract = product(range(n_perm), range(nfeat))

            # -> Shuffle the labels :
            if method == 'label_rnd':
                y_sh = [rndstate.permutation(y) for k in range(n_perm)]
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                    x[k], y_sh[i], clone(self._clf), self._cv.cvr[0])
                                              for i, k in iteract)

            # -> Full randomization :
            elif method == 'full_rnd':
                cvs = Parallel(n_jobs=n_jobs)(
                    delayed(_cvscore)(rndstate.permutation(x[k]), y,
                                      clone(self._clf), self._cv.cvr[0])
                    for i, k in iteract)

            # -> Shuffle intra-class :
            elif method == 'intra_rnd':
                cvs = Parallel(n_jobs=n_jobs)(
                    delayed(_cvscore)(x[k][permIntraClass(y, rnd=i), :], y,
                                      clone(self._clf), self._cv.cvr[0])
                    for i, k in iteract)

            # Reconstruct daPerm and get the associated p-value:
            daPerm, _, _ = zip(*cvs)
            daPerm = np.array(daPerm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method ' + method + ' found')

        # Get features informations:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            self.info.featinfo = self.info._featinfo(self._clf,
                                                     self._cv,
                                                     da,
                                                     grp=grp,
                                                     pperm=pperm)
        except:
            pass

        return da.T, pvalue, daPerm
    def fit(self, x, mf=False, center=False, grp=None,
            method='bino', n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: array
                Data to classify. Consider that x.shape = (N, M), N is the number
                of trials (which should be the length of y). M, the number of
                colums, is a supplementar dimension for classifying data. If M = 1,
                the data is consider as a single feature. If M > 1, use the
                parameter mf to say if x should be consider as a single feature
                (mf=False) or multi-features (mf=True)

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels
                    - 'full_rnd': randomly shuffle the whole array x
                    - 'intra_rnd': randomly shuffle x inside each class and each feature

                Methods 2, 3 and 4 are based on permutations. The method 2 and 3
                should provide similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Get the true decoding accuracy:
        da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf, self._cv.cvr,
                                                  mf, grp, center, n_jobs)
        nfeat = len(x)
        rndstate = np.random.RandomState(rndstate)
        score = np.array([np.mean(k) for k in da])

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(y, score)
            daPerm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        elif method.lower().find('_rnd')+1:

            # Generate idx tricks :
            iteract = product(range(n_perm), range(nfeat))

            # -> Shuffle the labels :
            if method == 'label_rnd':
                y_sh = [rndstate.permutation(y) for k in range(n_perm)]
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        x[k], y_sh[i], clone(self._clf), self._cv.cvr[0])
                        for i, k in iteract)

            # -> Full randomization :
            elif method == 'full_rnd':
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        rndstate.permutation(x[k]), y, clone(self._clf),
                        self._cv.cvr[0]) for i, k in iteract)

            # -> Shuffle intra-class :
            elif method == 'intra_rnd':
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        x[k][permIntraClass(y, rnd=i), :], y, clone(self._clf),
                        self._cv.cvr[0]) for i, k in iteract)

            # Reconstruct daPerm and get the associated p-value:
            daPerm, _, _ = zip(*cvs)
            daPerm = np.array(daPerm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method '+method+' found')

        # Get features informations:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv, da,
                                                     grp=grp, pperm=pperm)
        except:
            pass

        return da.T, pvalue, daPerm
Beispiel #6
0
    def get(self, xpha, xamp, n_perm=200, p=0.05, matricial=False, n_jobs=-1):
        """Get the normalized cfc mesure between an xpha and xamp signals.

        Args:
            xpha: array
                Signal for phase. The shape of xpha should be :
                (n_electrodes x n_pts x n_trials)

            xamp: array
                Signal for amplitude. The shape of xamp should be :
                (n_electrodes x n_pts x n_trials)

        Kargs:
            n_perm: integer, optional, [def: 200]
                Number of permutations for normalizing the cfc mesure.

            p: float, optional, [def: 0.05]
                p-value for the statistical method of Ozkurt 2012.

            matricial: bool, optional, [def: False]
                Some methods can work in matricial computation. This can lead
                to a 10x or 30x time faster. But, please, monitor your RAM usage
                beacause this parameter can use a lot of RAM. So, turn this parameter
                in case of small computation.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs for parallel computing. Use 1, 2, ..
                depending of your number or cores. -1 for all the cores.

            If the same signal is used (example : xpha=x and xamp=x), this mean
            the program compute a local cfc.

        Returns:
            ncfc: array
                The cfc mesure of size :
                (n_amplitude x n_phase x n_electrodes x n_windows x n_trials)

            pvalue: array
                The associated p-values of size :
                (n_amplitude x n_phase x n_electrodes x n_windows)
        """
        # Check the inputs variables :
        xpha, xamp = _cfcCheck(xpha, xamp, self._npts)
        self.n_perm = n_perm
        self._matricial = matricial
        if n_perm != 0:
            self.p = 1 / n_perm
        else:
            self.p = None
        N = xpha.shape[0]

        # Manage jobs repartition :
        if (N < cpu_count()) and (n_jobs != 1):
            surJob = n_jobs
            elecJob = 1
        elif (N >= cpu_count()) and (n_jobs != 1):
            surJob = 1
            elecJob = n_jobs
        else:
            surJob, elecJob = 1, 1

        # Get the unormalized cfc and surogates:
        cfcsu = Parallel(n_jobs=elecJob)(
            delayed(_cfcFiltSuro)(xpha[k, ...], xamp[k, ...], surJob, self)
            for k in range(N))
        uCfc, Suro, mSuro, stdSuro = zip(*cfcsu)
        uCfc = np.array(uCfc)

        # Permutations ans stat:
        if (self.Id[0] is not '5'):
            # Compute permutations :
            if (self.n_perm is not 0) and (self.Id[1] is not '0'):
                Suro, mSuro, stdSuro = np.array(Suro), np.array(
                    mSuro), np.array(stdSuro)

                # Normalize each cfc:
                _, _, Norm, _, _, _ = CfcSettings(self.Id)
                nCfc = Norm(uCfc, mSuro, stdSuro)

                # Confidence interval :
                pvalue = perm_2pvalue(uCfc.mean(2),
                                      np.rollaxis(Suro.mean(2), 4),
                                      self.n_perm,
                                      tail=1)

                return nCfc.transpose(3, 4, 0, 1,
                                      2), pvalue.transpose(2, 3, 0, 1)
            else:
                return uCfc.transpose(3, 4, 0, 1, 2), None
        elif self.Id[0] is '5':
            # Ozkurt threshold :
            xlim = (erfinv(1 - p)**2)
            # Set to zero non-significant values:
            idxUn = np.where(uCfc <= 2 * xlim)
            uCfc[idxUn] = 0
            return uCfc.transpose(3, 4, 0, 1, 2), None
Beispiel #7
0
def _evalstat(self, x, bsl):
    """Statistical evaluation of features

    [x] = [xn] = (nFce, npts, nTrials)
    """
    # Unpack variables:
    statmeth = self._statmeth
    n_perm = self._n_perm
    tail = self._2t
    maxst = self._mxst

    # Mean Frequencies :
    x, _ = binArray(x, self._fSplitIndex, axis=0)

    # Get the baseline and set to same shape of x:
    xFm = np.mean(x[:, bsl[0]:bsl[1], :], 1)

    # Mean time :
    if self._window is not None:
        x, _ = binArray(x, self._window, axis=1)

    # Repeat baseline:
    baseline = np.tile(xFm[:, np.newaxis, :], [1, x.shape[1], 1])

    # Get shape of x:
    nf, npts, nt = x.shape
    pvalues = np.ones((nf, npts))

    # Switch between methods:
    #   -> Permutations
    # Loops on time and matrix for frequency (avoid RAM usage but increase speed)
    if statmeth == 'permutation':
        # Get metric:
        fcn = perm_metric(self._metric)
        # Apply metric to x and baseline:
        xN = fcn(x, baseline).mean(axis=2)
        # For each time points:
        for pts in range(npts):
            # Randomly swap x // baseline :
            perm = perm_swap(x[:, pts, :],
                             baseline[:, pts, :],
                             n_perm=n_perm,
                             axis=1,
                             rndstate=0 + pts)[0]
            # Normalize permutations by baline:
            perm = fcn(perm, baseline[:, pts, :]).mean(2)
            # Maximum stat (correct through frequencies):
            if maxst:
                perm = maxstat(perm, axis=1)
            # Get pvalues :
            pvalues[:, pts] = perm_2pvalue(xN[:, pts], perm, n_perm, tail=tail)

    #   -> Wilcoxon // Kruskal-Wallis:
    else:
        # Get the method:
        if statmeth == 'wilcoxon':

            def fcn(a, b):
                return wilcoxon(a, b)[1]
        elif statmeth == 'kruskal':

            def fcn(a, b):
                return kruskal(a, b)[1]

        # Apply:
        ite = product(range(nf), range(npts))
        for k, i in ite:
            pvalues[k, i] = fcn(x[k, i, :], xFm[k, :])

    return pvalues
Beispiel #8
0
    def get(self, xpha, xamp, n_perm=200, p=0.05, matricial=False, n_jobs=-1):
        """Get the normalized cfc mesure between an xpha and xamp signals.

        Args:
            xpha: array
                Signal for phase. The shape of xpha should be :
                (n_electrodes x n_pts x n_trials)

            xamp: array
                Signal for amplitude. The shape of xamp should be :
                (n_electrodes x n_pts x n_trials)

        Kargs:
            n_perm: integer, optional, [def: 200]
                Number of permutations for normalizing the cfc mesure.

            p: float, optional, [def: 0.05]
                p-value for the statistical method of Ozkurt 2012.

            matricial: bool, optional, [def: False]
                Some methods can work in matricial computation. This can lead
                to a 10x or 30x time faster. But, please, monitor your RAM usage
                beacause this parameter can use a lot of RAM. So, turn this parameter
                in case of small computation.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs for parallel computing. Use 1, 2, ..
                depending of your number or cores. -1 for all the cores.

            If the same signal is used (example : xpha=x and xamp=x), this mean
            the program compute a local cfc.

        Returns:
            ncfc: array
                The cfc mesure of size :
                (n_amplitude x n_phase x n_electrodes x n_windows x n_trials)

            pvalue: array
                The associated p-values of size :
                (n_amplitude x n_phase x n_electrodes x n_windows)
        """
        # Check the inputs variables :
        xpha, xamp = _cfcCheck(xpha, xamp, self._npts)
        self.n_perm = n_perm
        self._matricial = matricial
        if n_perm != 0:
            self.p = 1/n_perm
        else:
            self.p = None
        N = xpha.shape[0]

        # Manage jobs repartition :
        if (N < cpu_count()) and (n_jobs != 1):
            surJob = n_jobs
            elecJob = 1
        elif (N >= cpu_count()) and (n_jobs != 1):
            surJob = 1
            elecJob = n_jobs
        else:
            surJob, elecJob = 1, 1

        # Get the unormalized cfc and surogates:
        cfcsu = Parallel(n_jobs=elecJob)(delayed(_cfcFiltSuro)(
            xpha[k, ...], xamp[k, ...], surJob, self) for k in range(N))
        uCfc, Suro, mSuro, stdSuro = zip(*cfcsu)
        uCfc = np.array(uCfc)

        # Permutations ans stat:
        if (self.Id[0] is not '5'):
            # Compute permutations :
            if (self.n_perm is not 0) and (self.Id[1] is not '0'):
                Suro, mSuro, stdSuro = np.array(
                    Suro), np.array(mSuro), np.array(stdSuro)

                # Normalize each cfc:
                _, _, Norm, _, _, _ = CfcSettings(self.Id)
                nCfc = Norm(uCfc, mSuro, stdSuro)

                # Confidence interval :
                pvalue = perm_2pvalue(uCfc.mean(2), np.rollaxis(Suro.mean(2), 4),
                                      self.n_perm, tail=1)

                return nCfc.transpose(3, 4, 0, 1, 2), pvalue.transpose(2, 3, 0, 1)
            else:
                return uCfc.transpose(3, 4, 0, 1, 2), None
        elif self.Id[0] is '5':
            # Ozkurt threshold :
            xlim = (erfinv(1-p)**2)
            # Set to zero non-significant values:
            idxUn = np.where(uCfc <= 2*xlim)
            uCfc[idxUn] = 0
            return uCfc.transpose(3, 4, 0, 1, 2), None
Beispiel #9
0
    def fit(self, x, mf=False, center=False, grp=None, method="bino", n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: list
                List of dataset for each subject. All the dataset in the list
                should have the same number of columns but the number of lines
                could be diffrent for each subject and must correspond to the 
                same number of lines each each label vector of y.

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels

                Methods 2 and 3 are based on permutations. They should provide
                similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Check x, y:
        xbk = x.copy()
        x, y, train, test = _checkXY(x, self._y, mf, grp, center, self)
        nsuj, nfeat = x.shape

        # Run classification:
        da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs)

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == "bino":
            pvalue = bino_da2p(self._ry, da)
            daperm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        # -> Shuffle the labels :
        elif method == "label_rnd":
            y_sh = [
                _checkXY(xbk, [np.random.permutation(i) for i in self._y], mf, grp, center, self)[1]
                for k in range(n_perm)
            ]
            cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)(x, y_sh[k], train, test, self, 1) for k in range(n_perm))

            # Reconstruct daperm and get the associated p-value:
            daperm, _, _ = zip(*cvs)
            daperm = np.array(daperm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(da, daperm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError("No statistical method " + method + " found")

        # Try to get featinfo:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            else:
                grp = np.arange(nfeat)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv, da[:, np.newaxis], grp=grp, pperm=pperm)
        except:
            pass

        return da, pvalue, daperm
Beispiel #10
0
def _evalstat(self, x, bsl):
    """Statistical evaluation of features

    [x] = [xn] = (nFce, npts, nTrials)
    """
    # Unpack variables:
    statmeth = self._statmeth
    n_perm = self._n_perm
    tail = self._2t
    maxst = self._mxst

    # Mean Frequencies :
    x, _ = binArray(x, self._fSplitIndex, axis=0)

    # Get the baseline and set to same shape of x:
    xFm = np.mean(x[:, bsl[0]:bsl[1], :], 1)

    # Mean time :
    if self._window is not None:
        x, _ = binArray(x, self._window, axis=1)

    # Repeat baseline:
    baseline = np.tile(xFm[:, np.newaxis, :], [1, x.shape[1], 1])

    # Get shape of x:
    nf, npts, nt = x.shape
    pvalues = np.ones((nf, npts))

    # Switch between methods:
    #   -> Permutations
    # Loops on time and matrix for frequency (avoid RAM usage but increase speed)
    if statmeth == 'permutation':
        # Get metric:
        fcn = perm_metric(self._metric)
        # Apply metric to x and baseline:
        xN = fcn(x, baseline).mean(axis=2)
        # For each time points:
        for pts in range(npts):
            # Randomly swap x // baseline :
            perm = perm_swap(x[:, pts, :], baseline[:, pts, :],
                             n_perm=n_perm, axis=1, rndstate=0+pts)[0]
            # Normalize permutations by baline:
            perm = fcn(perm, baseline[:, pts, :]).mean(2)
            # Maximum stat (correct through frequencies):
            if maxst:
                perm = maxstat(perm, axis=1)
            # Get pvalues :
            pvalues[:, pts] = perm_2pvalue(xN[:, pts], perm, n_perm, tail=tail)

    #   -> Wilcoxon // Kruskal-Wallis:
    else:
        # Get the method:
        if statmeth == 'wilcoxon':
            def fcn(a, b): return wilcoxon(a, b)[1]
        elif statmeth == 'kruskal':
            def fcn(a, b): return kruskal(a, b)[1]

        # Apply:
        ite = product(range(nf), range(npts))
        for k, i in ite:
            pvalues[k, i] = fcn(x[k, i, :], xFm[k, :])

    return pvalues