Esempio n. 1
0
 def __init__(self, y, Id='0', p=0.05, n_perm=200, stat='bino',
              threshold=None, nbest=10, direction='forward', occurence='i%',
              clfIn={'clf': 'lda'}, clfOut={'clf': 'lda'},
              cvIn={'cvtype': 'skfold', 'n_folds': 10, 'rep': 1},
              cvOut={'cvtype': 'skfold', 'n_folds': 10, 'rep': 10}):
     self._Id = Id
     self._stat = stat
     self._y = np.ravel(y)
     if threshold is not None:
         p = bino_da2p(y, threshold)
     self._p = p
     self._nbest = nbest
     self._n_perm = n_perm
     self._direction = direction
     self._threshold = threshold
     self._occurence = occurence
     self._clfIn = clfIn
     self._clfOut = clfOut
     self._cvIn = cvIn
     self._cvOut = cvOut
     self.setup = {'Id': Id, 'p': p, 'n_perm': n_perm, 'stat': stat,
                   'nbest': nbest, 'threshold': threshold,
                   'direction': direction, 'occurence': occurence,
                   'clfIn': defClf(y, **clfIn).lgStr,
                   'clfOut': defClf(y, **clfOut).lgStr,
                   'cvIn': defCv(y, **cvIn).lgStr,
                   'cvOut': defCv(y, **cvOut).lgStr}
Esempio n. 2
0
 def __init__(self,
              y,
              Id='0',
              p=0.05,
              n_perm=200,
              stat='bino',
              threshold=None,
              nbest=10,
              direction='forward',
              occurence='i%',
              clfIn={'clf': 'lda'},
              clfOut={'clf': 'lda'},
              cvIn={
                  'cvtype': 'skfold',
                  'n_folds': 10,
                  'rep': 1
              },
              cvOut={
                  'cvtype': 'skfold',
                  'n_folds': 10,
                  'rep': 10
              }):
     self._Id = Id
     self._stat = stat
     self._y = np.ravel(y)
     if threshold is not None:
         p = bino_da2p(y, threshold)
     self._p = p
     self._nbest = nbest
     self._n_perm = n_perm
     self._direction = direction
     self._threshold = threshold
     self._occurence = occurence
     self._clfIn = clfIn
     self._clfOut = clfOut
     self._cvIn = cvIn
     self._cvOut = cvOut
     self.setup = {
         'Id': Id,
         'p': p,
         'n_perm': n_perm,
         'stat': stat,
         'nbest': nbest,
         'threshold': threshold,
         'direction': direction,
         'occurence': occurence,
         'clfIn': defClf(y, **clfIn).lgStr,
         'clfOut': defClf(y, **clfOut).lgStr,
         'cvIn': defCv(y, **cvIn).lgStr,
         'cvOut': defCv(y, **cvOut).lgStr
     }
Esempio n. 3
0
    def fit(self, x, mf=False, center=False, grp=None,
            method='bino', n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: list
                List of dataset for each subject. All the dataset in the list
                should have the same number of columns but the number of lines
                could be diffrent for each subject and must correspond to the 
                same number of lines each each label vector of y.

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels

                Methods 2 and 3 are based on permutations. They should provide
                similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Check x, y:
        xbk = x.copy()
        x, y, train, test = _checkXY(x, self._y, mf, grp, center, self)
        nsuj, nfeat = x.shape

        # Run classification:
        da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs)

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(self._ry, da)
            daperm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        # -> Shuffle the labels :
        elif method == 'label_rnd':
            y_sh = [_checkXY(xbk, [np.random.permutation(i) for i in self._y],
                             mf, grp, center, self)[1] for k in range(n_perm)]
            cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)(
                    x, y_sh[k], train, test, self, 1)
                    for k in range(n_perm))

            # Reconstruct daperm and get the associated p-value:
            daperm, _, _ = zip(*cvs)
            daperm = np.array(daperm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(da, daperm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method '+method+' found')

        # Try to get featinfo:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            else:
                grp = np.arange(nfeat)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv,
                                                     da[:, np.newaxis], grp=grp,
                                                     pperm=pperm)
        except:
            pass

        return da, pvalue, daperm
Esempio n. 4
0
    def fit(self,
            x,
            mf=False,
            center=False,
            grp=None,
            method='bino',
            n_perm=200,
            rndstate=0,
            n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: array
                Data to classify. Consider that x.shape = (N, M), N is the number
                of trials (which should be the length of y). M, the number of
                colums, is a supplementar dimension for classifying data. If M = 1,
                the data is consider as a single feature. If M > 1, use the
                parameter mf to say if x should be consider as a single feature
                (mf=False) or multi-features (mf=True)

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels
                    - 'full_rnd': randomly shuffle the whole array x
                    - 'intra_rnd': randomly shuffle x inside each class and each feature

                Methods 2, 3 and 4 are based on permutations. The method 2 and 3
                should provide similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Get the true decoding accuracy:
        da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf,
                                                  self._cv.cvr, mf, grp,
                                                  center, n_jobs)
        nfeat = len(x)
        rndstate = np.random.RandomState(rndstate)
        score = np.array([np.mean(k) for k in da])

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(y, score)
            daPerm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        elif method.lower().find('_rnd') + 1:

            # Generate idx tricks :
            iteract = product(range(n_perm), range(nfeat))

            # -> Shuffle the labels :
            if method == 'label_rnd':
                y_sh = [rndstate.permutation(y) for k in range(n_perm)]
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                    x[k], y_sh[i], clone(self._clf), self._cv.cvr[0])
                                              for i, k in iteract)

            # -> Full randomization :
            elif method == 'full_rnd':
                cvs = Parallel(n_jobs=n_jobs)(
                    delayed(_cvscore)(rndstate.permutation(x[k]), y,
                                      clone(self._clf), self._cv.cvr[0])
                    for i, k in iteract)

            # -> Shuffle intra-class :
            elif method == 'intra_rnd':
                cvs = Parallel(n_jobs=n_jobs)(
                    delayed(_cvscore)(x[k][permIntraClass(y, rnd=i), :], y,
                                      clone(self._clf), self._cv.cvr[0])
                    for i, k in iteract)

            # Reconstruct daPerm and get the associated p-value:
            daPerm, _, _ = zip(*cvs)
            daPerm = np.array(daPerm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method ' + method + ' found')

        # Get features informations:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            self.info.featinfo = self.info._featinfo(self._clf,
                                                     self._cv,
                                                     da,
                                                     grp=grp,
                                                     pperm=pperm)
        except:
            pass

        return da.T, pvalue, daPerm
Esempio n. 5
0
    def fit(self, x, mf=False, center=False, grp=None,
            method='bino', n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: array
                Data to classify. Consider that x.shape = (N, M), N is the number
                of trials (which should be the length of y). M, the number of
                colums, is a supplementar dimension for classifying data. If M = 1,
                the data is consider as a single feature. If M > 1, use the
                parameter mf to say if x should be consider as a single feature
                (mf=False) or multi-features (mf=True)

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels
                    - 'full_rnd': randomly shuffle the whole array x
                    - 'intra_rnd': randomly shuffle x inside each class and each feature

                Methods 2, 3 and 4 are based on permutations. The method 2 and 3
                should provide similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Get the true decoding accuracy:
        da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf, self._cv.cvr,
                                                  mf, grp, center, n_jobs)
        nfeat = len(x)
        rndstate = np.random.RandomState(rndstate)
        score = np.array([np.mean(k) for k in da])

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(y, score)
            daPerm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        elif method.lower().find('_rnd')+1:

            # Generate idx tricks :
            iteract = product(range(n_perm), range(nfeat))

            # -> Shuffle the labels :
            if method == 'label_rnd':
                y_sh = [rndstate.permutation(y) for k in range(n_perm)]
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        x[k], y_sh[i], clone(self._clf), self._cv.cvr[0])
                        for i, k in iteract)

            # -> Full randomization :
            elif method == 'full_rnd':
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        rndstate.permutation(x[k]), y, clone(self._clf),
                        self._cv.cvr[0]) for i, k in iteract)

            # -> Shuffle intra-class :
            elif method == 'intra_rnd':
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        x[k][permIntraClass(y, rnd=i), :], y, clone(self._clf),
                        self._cv.cvr[0]) for i, k in iteract)

            # Reconstruct daPerm and get the associated p-value:
            daPerm, _, _ = zip(*cvs)
            daPerm = np.array(daPerm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method '+method+' found')

        # Get features informations:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv, da,
                                                     grp=grp, pperm=pperm)
        except:
            pass

        return da.T, pvalue, daPerm
Esempio n. 6
0
    def fit(self, x, mf=False, center=False, grp=None, method="bino", n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: list
                List of dataset for each subject. All the dataset in the list
                should have the same number of columns but the number of lines
                could be diffrent for each subject and must correspond to the 
                same number of lines each each label vector of y.

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels

                Methods 2 and 3 are based on permutations. They should provide
                similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Check x, y:
        xbk = x.copy()
        x, y, train, test = _checkXY(x, self._y, mf, grp, center, self)
        nsuj, nfeat = x.shape

        # Run classification:
        da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs)

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == "bino":
            pvalue = bino_da2p(self._ry, da)
            daperm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        # -> Shuffle the labels :
        elif method == "label_rnd":
            y_sh = [
                _checkXY(xbk, [np.random.permutation(i) for i in self._y], mf, grp, center, self)[1]
                for k in range(n_perm)
            ]
            cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)(x, y_sh[k], train, test, self, 1) for k in range(n_perm))

            # Reconstruct daperm and get the associated p-value:
            daperm, _, _ = zip(*cvs)
            daperm = np.array(daperm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(da, daperm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError("No statistical method " + method + " found")

        # Try to get featinfo:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            else:
                grp = np.arange(nfeat)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv, da[:, np.newaxis], grp=grp, pperm=pperm)
        except:
            pass

        return da, pvalue, daperm