def __init__(self, y, Id='0', p=0.05, n_perm=200, stat='bino', threshold=None, nbest=10, direction='forward', occurence='i%', clfIn={'clf': 'lda'}, clfOut={'clf': 'lda'}, cvIn={'cvtype': 'skfold', 'n_folds': 10, 'rep': 1}, cvOut={'cvtype': 'skfold', 'n_folds': 10, 'rep': 10}): self._Id = Id self._stat = stat self._y = np.ravel(y) if threshold is not None: p = bino_da2p(y, threshold) self._p = p self._nbest = nbest self._n_perm = n_perm self._direction = direction self._threshold = threshold self._occurence = occurence self._clfIn = clfIn self._clfOut = clfOut self._cvIn = cvIn self._cvOut = cvOut self.setup = {'Id': Id, 'p': p, 'n_perm': n_perm, 'stat': stat, 'nbest': nbest, 'threshold': threshold, 'direction': direction, 'occurence': occurence, 'clfIn': defClf(y, **clfIn).lgStr, 'clfOut': defClf(y, **clfOut).lgStr, 'cvIn': defCv(y, **cvIn).lgStr, 'cvOut': defCv(y, **cvOut).lgStr}
def __init__(self, y, Id='0', p=0.05, n_perm=200, stat='bino', threshold=None, nbest=10, direction='forward', occurence='i%', clfIn={'clf': 'lda'}, clfOut={'clf': 'lda'}, cvIn={ 'cvtype': 'skfold', 'n_folds': 10, 'rep': 1 }, cvOut={ 'cvtype': 'skfold', 'n_folds': 10, 'rep': 10 }): self._Id = Id self._stat = stat self._y = np.ravel(y) if threshold is not None: p = bino_da2p(y, threshold) self._p = p self._nbest = nbest self._n_perm = n_perm self._direction = direction self._threshold = threshold self._occurence = occurence self._clfIn = clfIn self._clfOut = clfOut self._cvIn = cvIn self._cvOut = cvOut self.setup = { 'Id': Id, 'p': p, 'n_perm': n_perm, 'stat': stat, 'nbest': nbest, 'threshold': threshold, 'direction': direction, 'occurence': occurence, 'clfIn': defClf(y, **clfIn).lgStr, 'clfOut': defClf(y, **clfOut).lgStr, 'cvIn': defCv(y, **cvIn).lgStr, 'cvOut': defCv(y, **cvOut).lgStr }
def fit(self, x, mf=False, center=False, grp=None, method='bino', n_perm=200, rndstate=0, n_jobs=-1): """Apply the classification and cross-validation objects to the array x. Args: x: list List of dataset for each subject. All the dataset in the list should have the same number of columns but the number of lines could be diffrent for each subject and must correspond to the same number of lines each each label vector of y. Kargs: mf: bool, optional, [def: False] If mf=False, the returned decoding accuracy (da) will have a shape of (1, rep) where rep, is the number of repetitions. This mean that all the features are used together. If mf=True, da.shape = (M, rep), where M is the number of columns of x. center: optional, bool, [def: False] Normalize fatures with a zero mean by substracting then dividing by the mean. The center parameter should be set to True if the classifier is a svm. grp: array, optional, [def: None] If mf=True, the grp parameter allow to define group of features. If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that 3 groups of features will be considered : (0,1,2) method: string, optional, [def: 'bino'] Four methods are implemented to test the statistical significiance of the decoding accuracy : - 'bino': binomial test - 'label_rnd': randomly shuffle the labels Methods 2 and 3 are based on permutations. They should provide similar results. But 4 should be more conservative. n_perm: integer, optional, [def: 200] Number of permutations for the methods 2, 3 and 4 rndstate: integer, optional, [def: 0] Fix the random state of the machine. Usefull to reproduce results. n_jobs: integer, optional, [def: -1] Control the number of jobs to cumpute the decoding accuracy. If n_jobs = -1, all the jobs are used. Return: da: array The decoding accuracy of shape n_repetitions x n_features pvalue: array Array of associated pvalue of shape n_features daPerm: array Array of all the decodings obtained for each permutations of shape n_perm x n_features .. rubric:: Footnotes .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_ .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_ """ # Check x, y: xbk = x.copy() x, y, train, test = _checkXY(x, self._y, mf, grp, center, self) nsuj, nfeat = x.shape # Run classification: da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs) # Get statistics: # ------------------------------------------------------------- # Binomial : # ------------------------------------------------------------- if method == 'bino': pvalue = bino_da2p(self._ry, da) daperm = None pperm = None # ------------------------------------------------------------- # Permutations : # ------------------------------------------------------------- # -> Shuffle the labels : elif method == 'label_rnd': y_sh = [_checkXY(xbk, [np.random.permutation(i) for i in self._y], mf, grp, center, self)[1] for k in range(n_perm)] cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)( x, y_sh[k], train, test, self, 1) for k in range(n_perm)) # Reconstruct daperm and get the associated p-value: daperm, _, _ = zip(*cvs) daperm = np.array(daperm).reshape(n_perm, nfeat) pvalue = perm_2pvalue(da, daperm, n_perm, tail=1) pperm = pvalue else: raise ValueError('No statistical method '+method+' found') # Try to get featinfo: try: if grp is not None: grp = uorderlst(grp) else: grp = np.arange(nfeat) self.info.featinfo = self.info._featinfo(self._clf, self._cv, da[:, np.newaxis], grp=grp, pperm=pperm) except: pass return da, pvalue, daperm
def fit(self, x, mf=False, center=False, grp=None, method='bino', n_perm=200, rndstate=0, n_jobs=-1): """Apply the classification and cross-validation objects to the array x. Args: x: array Data to classify. Consider that x.shape = (N, M), N is the number of trials (which should be the length of y). M, the number of colums, is a supplementar dimension for classifying data. If M = 1, the data is consider as a single feature. If M > 1, use the parameter mf to say if x should be consider as a single feature (mf=False) or multi-features (mf=True) Kargs: mf: bool, optional, [def: False] If mf=False, the returned decoding accuracy (da) will have a shape of (1, rep) where rep, is the number of repetitions. This mean that all the features are used together. If mf=True, da.shape = (M, rep), where M is the number of columns of x. center: optional, bool, [def: False] Normalize fatures with a zero mean by substracting then dividing by the mean. The center parameter should be set to True if the classifier is a svm. grp: array, optional, [def: None] If mf=True, the grp parameter allow to define group of features. If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that 3 groups of features will be considered : (0,1,2) method: string, optional, [def: 'bino'] Four methods are implemented to test the statistical significiance of the decoding accuracy : - 'bino': binomial test - 'label_rnd': randomly shuffle the labels - 'full_rnd': randomly shuffle the whole array x - 'intra_rnd': randomly shuffle x inside each class and each feature Methods 2, 3 and 4 are based on permutations. The method 2 and 3 should provide similar results. But 4 should be more conservative. n_perm: integer, optional, [def: 200] Number of permutations for the methods 2, 3 and 4 rndstate: integer, optional, [def: 0] Fix the random state of the machine. Usefull to reproduce results. n_jobs: integer, optional, [def: -1] Control the number of jobs to cumpute the decoding accuracy. If n_jobs = -1, all the jobs are used. Return: da: array The decoding accuracy of shape n_repetitions x n_features pvalue: array Array of associated pvalue of shape n_features daPerm: array Array of all the decodings obtained for each permutations of shape n_perm x n_features .. rubric:: Footnotes .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_ .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_ """ # Get the true decoding accuracy: da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf, self._cv.cvr, mf, grp, center, n_jobs) nfeat = len(x) rndstate = np.random.RandomState(rndstate) score = np.array([np.mean(k) for k in da]) # Get statistics: # ------------------------------------------------------------- # Binomial : # ------------------------------------------------------------- if method == 'bino': pvalue = bino_da2p(y, score) daPerm = None pperm = None # ------------------------------------------------------------- # Permutations : # ------------------------------------------------------------- elif method.lower().find('_rnd') + 1: # Generate idx tricks : iteract = product(range(n_perm), range(nfeat)) # -> Shuffle the labels : if method == 'label_rnd': y_sh = [rndstate.permutation(y) for k in range(n_perm)] cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)( x[k], y_sh[i], clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # -> Full randomization : elif method == 'full_rnd': cvs = Parallel(n_jobs=n_jobs)( delayed(_cvscore)(rndstate.permutation(x[k]), y, clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # -> Shuffle intra-class : elif method == 'intra_rnd': cvs = Parallel(n_jobs=n_jobs)( delayed(_cvscore)(x[k][permIntraClass(y, rnd=i), :], y, clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # Reconstruct daPerm and get the associated p-value: daPerm, _, _ = zip(*cvs) daPerm = np.array(daPerm).reshape(n_perm, nfeat) pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1) pperm = pvalue else: raise ValueError('No statistical method ' + method + ' found') # Get features informations: try: if grp is not None: grp = uorderlst(grp) self.info.featinfo = self.info._featinfo(self._clf, self._cv, da, grp=grp, pperm=pperm) except: pass return da.T, pvalue, daPerm
def fit(self, x, mf=False, center=False, grp=None, method='bino', n_perm=200, rndstate=0, n_jobs=-1): """Apply the classification and cross-validation objects to the array x. Args: x: array Data to classify. Consider that x.shape = (N, M), N is the number of trials (which should be the length of y). M, the number of colums, is a supplementar dimension for classifying data. If M = 1, the data is consider as a single feature. If M > 1, use the parameter mf to say if x should be consider as a single feature (mf=False) or multi-features (mf=True) Kargs: mf: bool, optional, [def: False] If mf=False, the returned decoding accuracy (da) will have a shape of (1, rep) where rep, is the number of repetitions. This mean that all the features are used together. If mf=True, da.shape = (M, rep), where M is the number of columns of x. center: optional, bool, [def: False] Normalize fatures with a zero mean by substracting then dividing by the mean. The center parameter should be set to True if the classifier is a svm. grp: array, optional, [def: None] If mf=True, the grp parameter allow to define group of features. If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that 3 groups of features will be considered : (0,1,2) method: string, optional, [def: 'bino'] Four methods are implemented to test the statistical significiance of the decoding accuracy : - 'bino': binomial test - 'label_rnd': randomly shuffle the labels - 'full_rnd': randomly shuffle the whole array x - 'intra_rnd': randomly shuffle x inside each class and each feature Methods 2, 3 and 4 are based on permutations. The method 2 and 3 should provide similar results. But 4 should be more conservative. n_perm: integer, optional, [def: 200] Number of permutations for the methods 2, 3 and 4 rndstate: integer, optional, [def: 0] Fix the random state of the machine. Usefull to reproduce results. n_jobs: integer, optional, [def: -1] Control the number of jobs to cumpute the decoding accuracy. If n_jobs = -1, all the jobs are used. Return: da: array The decoding accuracy of shape n_repetitions x n_features pvalue: array Array of associated pvalue of shape n_features daPerm: array Array of all the decodings obtained for each permutations of shape n_perm x n_features .. rubric:: Footnotes .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_ .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_ """ # Get the true decoding accuracy: da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf, self._cv.cvr, mf, grp, center, n_jobs) nfeat = len(x) rndstate = np.random.RandomState(rndstate) score = np.array([np.mean(k) for k in da]) # Get statistics: # ------------------------------------------------------------- # Binomial : # ------------------------------------------------------------- if method == 'bino': pvalue = bino_da2p(y, score) daPerm = None pperm = None # ------------------------------------------------------------- # Permutations : # ------------------------------------------------------------- elif method.lower().find('_rnd')+1: # Generate idx tricks : iteract = product(range(n_perm), range(nfeat)) # -> Shuffle the labels : if method == 'label_rnd': y_sh = [rndstate.permutation(y) for k in range(n_perm)] cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)( x[k], y_sh[i], clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # -> Full randomization : elif method == 'full_rnd': cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)( rndstate.permutation(x[k]), y, clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # -> Shuffle intra-class : elif method == 'intra_rnd': cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)( x[k][permIntraClass(y, rnd=i), :], y, clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # Reconstruct daPerm and get the associated p-value: daPerm, _, _ = zip(*cvs) daPerm = np.array(daPerm).reshape(n_perm, nfeat) pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1) pperm = pvalue else: raise ValueError('No statistical method '+method+' found') # Get features informations: try: if grp is not None: grp = uorderlst(grp) self.info.featinfo = self.info._featinfo(self._clf, self._cv, da, grp=grp, pperm=pperm) except: pass return da.T, pvalue, daPerm
def fit(self, x, mf=False, center=False, grp=None, method="bino", n_perm=200, rndstate=0, n_jobs=-1): """Apply the classification and cross-validation objects to the array x. Args: x: list List of dataset for each subject. All the dataset in the list should have the same number of columns but the number of lines could be diffrent for each subject and must correspond to the same number of lines each each label vector of y. Kargs: mf: bool, optional, [def: False] If mf=False, the returned decoding accuracy (da) will have a shape of (1, rep) where rep, is the number of repetitions. This mean that all the features are used together. If mf=True, da.shape = (M, rep), where M is the number of columns of x. center: optional, bool, [def: False] Normalize fatures with a zero mean by substracting then dividing by the mean. The center parameter should be set to True if the classifier is a svm. grp: array, optional, [def: None] If mf=True, the grp parameter allow to define group of features. If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that 3 groups of features will be considered : (0,1,2) method: string, optional, [def: 'bino'] Four methods are implemented to test the statistical significiance of the decoding accuracy : - 'bino': binomial test - 'label_rnd': randomly shuffle the labels Methods 2 and 3 are based on permutations. They should provide similar results. But 4 should be more conservative. n_perm: integer, optional, [def: 200] Number of permutations for the methods 2, 3 and 4 rndstate: integer, optional, [def: 0] Fix the random state of the machine. Usefull to reproduce results. n_jobs: integer, optional, [def: -1] Control the number of jobs to cumpute the decoding accuracy. If n_jobs = -1, all the jobs are used. Return: da: array The decoding accuracy of shape n_repetitions x n_features pvalue: array Array of associated pvalue of shape n_features daPerm: array Array of all the decodings obtained for each permutations of shape n_perm x n_features .. rubric:: Footnotes .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_ .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_ """ # Check x, y: xbk = x.copy() x, y, train, test = _checkXY(x, self._y, mf, grp, center, self) nsuj, nfeat = x.shape # Run classification: da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs) # Get statistics: # ------------------------------------------------------------- # Binomial : # ------------------------------------------------------------- if method == "bino": pvalue = bino_da2p(self._ry, da) daperm = None pperm = None # ------------------------------------------------------------- # Permutations : # ------------------------------------------------------------- # -> Shuffle the labels : elif method == "label_rnd": y_sh = [ _checkXY(xbk, [np.random.permutation(i) for i in self._y], mf, grp, center, self)[1] for k in range(n_perm) ] cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)(x, y_sh[k], train, test, self, 1) for k in range(n_perm)) # Reconstruct daperm and get the associated p-value: daperm, _, _ = zip(*cvs) daperm = np.array(daperm).reshape(n_perm, nfeat) pvalue = perm_2pvalue(da, daperm, n_perm, tail=1) pperm = pvalue else: raise ValueError("No statistical method " + method + " found") # Try to get featinfo: try: if grp is not None: grp = uorderlst(grp) else: grp = np.arange(nfeat) self.info.featinfo = self.info._featinfo(self._clf, self._cv, da[:, np.newaxis], grp=grp, pperm=pperm) except: pass return da, pvalue, daperm