Esempio n. 1
0
def checkXY(x, y, mf, grp, center):
    """Prepare the inputs x and y
    x.shape = (ntrials, nfeat)
    """
    x, y = np.matrix(x), np.ravel(y)
    if x.shape[0] != len(y):
        x = x.T

    # Normalize features :
    if center:
        x_m = np.tile(np.mean(x, 0), (x.shape[0], 1))
        x = (x-x_m)/x_m

    # Group parameter :
    if grp is not None:
        mf = True
        grp = np.ravel(grp)
    if mf:
        if grp is None:
            x = [x]
        elif (grp is not None) and (grp.size == x.shape[1]):
            ugrp = uorderlst(grp)
            x = [np.array(x[:, np.where(grp == k)[0]]) for k in ugrp]
        elif (grp is not None) and (grp.size != x.shape[1]):
            raise ValueError('The grp parameter must have the same size as the'
                             ' number of features ('+str(x.shape[1])+')')
    else:
        x = [np.array(x[:, k]) for k in range(x.shape[1])]

    return x, y
Esempio n. 2
0
def _checkXY(x, y, mf, grp, center, self):
    # Size checking:
    if not all([k.shape[1]==x[0].shape[1] for k in x]):
        raise ValueError('All features across subjects should have '
                         'the same number of features')

    # Center data:
    if center:
        x = [(k-np.tile(k.mean(0), (k.shape[0], 1)))/k.mean(0) for k in x]

    # Manage MF:
    if grp is not None:
        mf = True
        grp = np.ravel(grp)
        if not all([k.shape[1]==len(grp) for k in x]):
            raise ValueError('The length of the grp parameter must be equal '
                             'to the number of features for each subject.')
    
    if mf:
        if grp is None:
            x = pd.DataFrame([[k] for k in x])
        else:
            ugrp = uorderlst(grp)
            x = pd.DataFrame([[k[:, np.where(grp == i)[0]] for i in ugrp] for k in x])
    else:
        x = pd.DataFrame([np.ndarray.tolist(k.T) for k in x])

    # Create training and testing set:
    train, test = [], []
    iteract = self._cv.split(np.random.rand(self._nsuj), np.arange(self._nsuj), np.arange(self._nsuj))
    for training, testing in iteract:
        train.append(list(training))
        test.append(list(testing))
    y = pd.DataFrame([[k] for k in y])
    return x, y, train, test
Esempio n. 3
0
def _checkXY(x, y, mf, grp, center, self):
    # Size checking:
    if not all([k.shape[1] == x[0].shape[1] for k in x]):
        raise ValueError("All features across subjects should have " "the same number of features")

    # Center data:
    if center:
        x = [(k - np.tile(k.mean(0), (k.shape[0], 1))) / k.mean(0) for k in x]

    # Manage MF:
    if grp is not None:
        mf = True
        grp = np.ravel(grp)
        if not all([k.shape[1] == len(grp) for k in x]):
            raise ValueError(
                "The length of the grp parameter must be equal " "to the number of features for each subject."
            )

    if mf:
        if grp is None:
            x = pd.DataFrame([[k] for k in x])
        else:
            ugrp = uorderlst(grp)
            x = pd.DataFrame([[k[:, np.where(grp == i)[0]] for i in ugrp] for k in x])
    else:
        x = pd.DataFrame([np.ndarray.tolist(k.T) for k in x])

    # Create training and testing set:
    train, test = [], []
    iteract = self._cv.split(np.random.rand(self._nsuj), np.arange(self._nsuj), np.arange(self._nsuj))
    for training, testing in iteract:
        train.append(list(training))
        test.append(list(testing))
    y = pd.DataFrame([[k] for k in y])
    return x, y, train, test
Esempio n. 4
0
def checkXY(x, y, mf, grp, center):
    """Prepare the inputs x and y
    x.shape = (ntrials, nfeat)
    """
    x, y = np.matrix(x), np.ravel(y)
    if x.shape[0] != len(y):
        x = x.T

    # Normalize features :
    if center:
        x_m = np.tile(np.mean(x, 0), (x.shape[0], 1))
        x = (x - x_m) / x_m

    # Group parameter :
    if grp is not None:
        mf = True
        grp = np.ravel(grp)
    if mf:
        if grp is None:
            x = [x]
        elif (grp is not None) and (grp.size == x.shape[1]):
            ugrp = uorderlst(grp)
            x = [np.array(x[:, np.where(grp == k)[0]]) for k in ugrp]
        elif (grp is not None) and (grp.size != x.shape[1]):
            raise ValueError('The grp parameter must have the same size as the'
                             ' number of features (' + str(x.shape[1]) + ')')
    else:
        x = [np.array(x[:, k]) for k in range(x.shape[1])]

    return x, y
Esempio n. 5
0
    def fit(self, x, mf=False, center=False, grp=None,
            method='bino', n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: list
                List of dataset for each subject. All the dataset in the list
                should have the same number of columns but the number of lines
                could be diffrent for each subject and must correspond to the 
                same number of lines each each label vector of y.

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels

                Methods 2 and 3 are based on permutations. They should provide
                similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Check x, y:
        xbk = x.copy()
        x, y, train, test = _checkXY(x, self._y, mf, grp, center, self)
        nsuj, nfeat = x.shape

        # Run classification:
        da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs)

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(self._ry, da)
            daperm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        # -> Shuffle the labels :
        elif method == 'label_rnd':
            y_sh = [_checkXY(xbk, [np.random.permutation(i) for i in self._y],
                             mf, grp, center, self)[1] for k in range(n_perm)]
            cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)(
                    x, y_sh[k], train, test, self, 1)
                    for k in range(n_perm))

            # Reconstruct daperm and get the associated p-value:
            daperm, _, _ = zip(*cvs)
            daperm = np.array(daperm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(da, daperm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method '+method+' found')

        # Try to get featinfo:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            else:
                grp = np.arange(nfeat)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv,
                                                     da[:, np.newaxis], grp=grp,
                                                     pperm=pperm)
        except:
            pass

        return da, pvalue, daperm
Esempio n. 6
0
def permIntraClass(y, rnd=0):
    """Generate intr-class permutations
    """
    yt = np.arange(len(y))
    rnd = np.random.RandomState(rnd)
    return np.ravel([rnd.permutation(yt[y == k]) for k in uorderlst(y)])
Esempio n. 7
0
    def fit(self,
            x,
            mf=False,
            center=False,
            grp=None,
            method='bino',
            n_perm=200,
            rndstate=0,
            n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: array
                Data to classify. Consider that x.shape = (N, M), N is the number
                of trials (which should be the length of y). M, the number of
                colums, is a supplementar dimension for classifying data. If M = 1,
                the data is consider as a single feature. If M > 1, use the
                parameter mf to say if x should be consider as a single feature
                (mf=False) or multi-features (mf=True)

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels
                    - 'full_rnd': randomly shuffle the whole array x
                    - 'intra_rnd': randomly shuffle x inside each class and each feature

                Methods 2, 3 and 4 are based on permutations. The method 2 and 3
                should provide similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Get the true decoding accuracy:
        da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf,
                                                  self._cv.cvr, mf, grp,
                                                  center, n_jobs)
        nfeat = len(x)
        rndstate = np.random.RandomState(rndstate)
        score = np.array([np.mean(k) for k in da])

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(y, score)
            daPerm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        elif method.lower().find('_rnd') + 1:

            # Generate idx tricks :
            iteract = product(range(n_perm), range(nfeat))

            # -> Shuffle the labels :
            if method == 'label_rnd':
                y_sh = [rndstate.permutation(y) for k in range(n_perm)]
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                    x[k], y_sh[i], clone(self._clf), self._cv.cvr[0])
                                              for i, k in iteract)

            # -> Full randomization :
            elif method == 'full_rnd':
                cvs = Parallel(n_jobs=n_jobs)(
                    delayed(_cvscore)(rndstate.permutation(x[k]), y,
                                      clone(self._clf), self._cv.cvr[0])
                    for i, k in iteract)

            # -> Shuffle intra-class :
            elif method == 'intra_rnd':
                cvs = Parallel(n_jobs=n_jobs)(
                    delayed(_cvscore)(x[k][permIntraClass(y, rnd=i), :], y,
                                      clone(self._clf), self._cv.cvr[0])
                    for i, k in iteract)

            # Reconstruct daPerm and get the associated p-value:
            daPerm, _, _ = zip(*cvs)
            daPerm = np.array(daPerm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method ' + method + ' found')

        # Get features informations:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            self.info.featinfo = self.info._featinfo(self._clf,
                                                     self._cv,
                                                     da,
                                                     grp=grp,
                                                     pperm=pperm)
        except:
            pass

        return da.T, pvalue, daPerm
Esempio n. 8
0
    def fit(self, x, mf=False, center=False, grp=None,
            method='bino', n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: array
                Data to classify. Consider that x.shape = (N, M), N is the number
                of trials (which should be the length of y). M, the number of
                colums, is a supplementar dimension for classifying data. If M = 1,
                the data is consider as a single feature. If M > 1, use the
                parameter mf to say if x should be consider as a single feature
                (mf=False) or multi-features (mf=True)

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels
                    - 'full_rnd': randomly shuffle the whole array x
                    - 'intra_rnd': randomly shuffle x inside each class and each feature

                Methods 2, 3 and 4 are based on permutations. The method 2 and 3
                should provide similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Get the true decoding accuracy:
        da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf, self._cv.cvr,
                                                  mf, grp, center, n_jobs)
        nfeat = len(x)
        rndstate = np.random.RandomState(rndstate)
        score = np.array([np.mean(k) for k in da])

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == 'bino':
            pvalue = bino_da2p(y, score)
            daPerm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        elif method.lower().find('_rnd')+1:

            # Generate idx tricks :
            iteract = product(range(n_perm), range(nfeat))

            # -> Shuffle the labels :
            if method == 'label_rnd':
                y_sh = [rndstate.permutation(y) for k in range(n_perm)]
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        x[k], y_sh[i], clone(self._clf), self._cv.cvr[0])
                        for i, k in iteract)

            # -> Full randomization :
            elif method == 'full_rnd':
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        rndstate.permutation(x[k]), y, clone(self._clf),
                        self._cv.cvr[0]) for i, k in iteract)

            # -> Shuffle intra-class :
            elif method == 'intra_rnd':
                cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)(
                        x[k][permIntraClass(y, rnd=i), :], y, clone(self._clf),
                        self._cv.cvr[0]) for i, k in iteract)

            # Reconstruct daPerm and get the associated p-value:
            daPerm, _, _ = zip(*cvs)
            daPerm = np.array(daPerm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError('No statistical method '+method+' found')

        # Get features informations:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv, da,
                                                     grp=grp, pperm=pperm)
        except:
            pass

        return da.T, pvalue, daPerm
Esempio n. 9
0
def permIntraClass(y, rnd=0):
    """Generate intr-class permutations
    """
    yt = np.arange(len(y))
    rnd = np.random.RandomState(rnd)
    return np.ravel([rnd.permutation(yt[y == k]) for k in uorderlst(y)])
Esempio n. 10
0
    def fit(self, x, mf=False, center=False, grp=None, method="bino", n_perm=200, rndstate=0, n_jobs=-1):
        """Apply the classification and cross-validation objects to the array x.

        Args:
            x: list
                List of dataset for each subject. All the dataset in the list
                should have the same number of columns but the number of lines
                could be diffrent for each subject and must correspond to the 
                same number of lines each each label vector of y.

        Kargs:
            mf: bool, optional, [def: False]
                If mf=False, the returned decoding accuracy (da) will have a
                shape of (1, rep) where rep, is the number of repetitions.
                This mean that all the features are used together. If mf=True,
                da.shape = (M, rep), where M is the number of columns of x.

            center: optional, bool, [def: False]
                Normalize fatures with a zero mean by substracting then dividing
                by the mean. The center parameter should be set to True if the
                classifier is a svm.

            grp: array, optional, [def: None]
                If mf=True, the grp parameter allow to define group of features.
                If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that
                3 groups of features will be considered : (0,1,2)

            method: string, optional, [def: 'bino']
                Four methods are implemented to test the statistical significiance
                of the decoding accuracy :

                    - 'bino': binomial test
                    - 'label_rnd': randomly shuffle the labels

                Methods 2 and 3 are based on permutations. They should provide
                similar results. But 4 should be more conservative.

            n_perm: integer, optional, [def: 200]
                Number of permutations for the methods 2, 3 and 4

            rndstate: integer, optional, [def: 0]
                Fix the random state of the machine. Usefull to reproduce results.

            n_jobs: integer, optional, [def: -1]
                Control the number of jobs to cumpute the decoding accuracy. If
                n_jobs = -1, all the jobs are used.

        Return:
            da: array
                The decoding accuracy of shape n_repetitions x n_features

            pvalue: array
                Array of associated pvalue of shape n_features

            daPerm: array
                Array of all the decodings obtained for each permutations of shape
                n_perm x n_features

        .. rubric:: Footnotes
        .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_
        .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_
        """
        # Check x, y:
        xbk = x.copy()
        x, y, train, test = _checkXY(x, self._y, mf, grp, center, self)
        nsuj, nfeat = x.shape

        # Run classification:
        da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs)

        # Get statistics:
        # -------------------------------------------------------------
        # Binomial :
        # -------------------------------------------------------------
        if method == "bino":
            pvalue = bino_da2p(self._ry, da)
            daperm = None
            pperm = None
        # -------------------------------------------------------------
        # Permutations :
        # -------------------------------------------------------------
        # -> Shuffle the labels :
        elif method == "label_rnd":
            y_sh = [
                _checkXY(xbk, [np.random.permutation(i) for i in self._y], mf, grp, center, self)[1]
                for k in range(n_perm)
            ]
            cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)(x, y_sh[k], train, test, self, 1) for k in range(n_perm))

            # Reconstruct daperm and get the associated p-value:
            daperm, _, _ = zip(*cvs)
            daperm = np.array(daperm).reshape(n_perm, nfeat)
            pvalue = perm_2pvalue(da, daperm, n_perm, tail=1)
            pperm = pvalue

        else:
            raise ValueError("No statistical method " + method + " found")

        # Try to get featinfo:
        try:
            if grp is not None:
                grp = uorderlst(grp)
            else:
                grp = np.arange(nfeat)
            self.info.featinfo = self.info._featinfo(self._clf, self._cv, da[:, np.newaxis], grp=grp, pperm=pperm)
        except:
            pass

        return da, pvalue, daperm