def checkXY(x, y, mf, grp, center): """Prepare the inputs x and y x.shape = (ntrials, nfeat) """ x, y = np.matrix(x), np.ravel(y) if x.shape[0] != len(y): x = x.T # Normalize features : if center: x_m = np.tile(np.mean(x, 0), (x.shape[0], 1)) x = (x-x_m)/x_m # Group parameter : if grp is not None: mf = True grp = np.ravel(grp) if mf: if grp is None: x = [x] elif (grp is not None) and (grp.size == x.shape[1]): ugrp = uorderlst(grp) x = [np.array(x[:, np.where(grp == k)[0]]) for k in ugrp] elif (grp is not None) and (grp.size != x.shape[1]): raise ValueError('The grp parameter must have the same size as the' ' number of features ('+str(x.shape[1])+')') else: x = [np.array(x[:, k]) for k in range(x.shape[1])] return x, y
def _checkXY(x, y, mf, grp, center, self): # Size checking: if not all([k.shape[1]==x[0].shape[1] for k in x]): raise ValueError('All features across subjects should have ' 'the same number of features') # Center data: if center: x = [(k-np.tile(k.mean(0), (k.shape[0], 1)))/k.mean(0) for k in x] # Manage MF: if grp is not None: mf = True grp = np.ravel(grp) if not all([k.shape[1]==len(grp) for k in x]): raise ValueError('The length of the grp parameter must be equal ' 'to the number of features for each subject.') if mf: if grp is None: x = pd.DataFrame([[k] for k in x]) else: ugrp = uorderlst(grp) x = pd.DataFrame([[k[:, np.where(grp == i)[0]] for i in ugrp] for k in x]) else: x = pd.DataFrame([np.ndarray.tolist(k.T) for k in x]) # Create training and testing set: train, test = [], [] iteract = self._cv.split(np.random.rand(self._nsuj), np.arange(self._nsuj), np.arange(self._nsuj)) for training, testing in iteract: train.append(list(training)) test.append(list(testing)) y = pd.DataFrame([[k] for k in y]) return x, y, train, test
def _checkXY(x, y, mf, grp, center, self): # Size checking: if not all([k.shape[1] == x[0].shape[1] for k in x]): raise ValueError("All features across subjects should have " "the same number of features") # Center data: if center: x = [(k - np.tile(k.mean(0), (k.shape[0], 1))) / k.mean(0) for k in x] # Manage MF: if grp is not None: mf = True grp = np.ravel(grp) if not all([k.shape[1] == len(grp) for k in x]): raise ValueError( "The length of the grp parameter must be equal " "to the number of features for each subject." ) if mf: if grp is None: x = pd.DataFrame([[k] for k in x]) else: ugrp = uorderlst(grp) x = pd.DataFrame([[k[:, np.where(grp == i)[0]] for i in ugrp] for k in x]) else: x = pd.DataFrame([np.ndarray.tolist(k.T) for k in x]) # Create training and testing set: train, test = [], [] iteract = self._cv.split(np.random.rand(self._nsuj), np.arange(self._nsuj), np.arange(self._nsuj)) for training, testing in iteract: train.append(list(training)) test.append(list(testing)) y = pd.DataFrame([[k] for k in y]) return x, y, train, test
def checkXY(x, y, mf, grp, center): """Prepare the inputs x and y x.shape = (ntrials, nfeat) """ x, y = np.matrix(x), np.ravel(y) if x.shape[0] != len(y): x = x.T # Normalize features : if center: x_m = np.tile(np.mean(x, 0), (x.shape[0], 1)) x = (x - x_m) / x_m # Group parameter : if grp is not None: mf = True grp = np.ravel(grp) if mf: if grp is None: x = [x] elif (grp is not None) and (grp.size == x.shape[1]): ugrp = uorderlst(grp) x = [np.array(x[:, np.where(grp == k)[0]]) for k in ugrp] elif (grp is not None) and (grp.size != x.shape[1]): raise ValueError('The grp parameter must have the same size as the' ' number of features (' + str(x.shape[1]) + ')') else: x = [np.array(x[:, k]) for k in range(x.shape[1])] return x, y
def fit(self, x, mf=False, center=False, grp=None, method='bino', n_perm=200, rndstate=0, n_jobs=-1): """Apply the classification and cross-validation objects to the array x. Args: x: list List of dataset for each subject. All the dataset in the list should have the same number of columns but the number of lines could be diffrent for each subject and must correspond to the same number of lines each each label vector of y. Kargs: mf: bool, optional, [def: False] If mf=False, the returned decoding accuracy (da) will have a shape of (1, rep) where rep, is the number of repetitions. This mean that all the features are used together. If mf=True, da.shape = (M, rep), where M is the number of columns of x. center: optional, bool, [def: False] Normalize fatures with a zero mean by substracting then dividing by the mean. The center parameter should be set to True if the classifier is a svm. grp: array, optional, [def: None] If mf=True, the grp parameter allow to define group of features. If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that 3 groups of features will be considered : (0,1,2) method: string, optional, [def: 'bino'] Four methods are implemented to test the statistical significiance of the decoding accuracy : - 'bino': binomial test - 'label_rnd': randomly shuffle the labels Methods 2 and 3 are based on permutations. They should provide similar results. But 4 should be more conservative. n_perm: integer, optional, [def: 200] Number of permutations for the methods 2, 3 and 4 rndstate: integer, optional, [def: 0] Fix the random state of the machine. Usefull to reproduce results. n_jobs: integer, optional, [def: -1] Control the number of jobs to cumpute the decoding accuracy. If n_jobs = -1, all the jobs are used. Return: da: array The decoding accuracy of shape n_repetitions x n_features pvalue: array Array of associated pvalue of shape n_features daPerm: array Array of all the decodings obtained for each permutations of shape n_perm x n_features .. rubric:: Footnotes .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_ .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_ """ # Check x, y: xbk = x.copy() x, y, train, test = _checkXY(x, self._y, mf, grp, center, self) nsuj, nfeat = x.shape # Run classification: da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs) # Get statistics: # ------------------------------------------------------------- # Binomial : # ------------------------------------------------------------- if method == 'bino': pvalue = bino_da2p(self._ry, da) daperm = None pperm = None # ------------------------------------------------------------- # Permutations : # ------------------------------------------------------------- # -> Shuffle the labels : elif method == 'label_rnd': y_sh = [_checkXY(xbk, [np.random.permutation(i) for i in self._y], mf, grp, center, self)[1] for k in range(n_perm)] cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)( x, y_sh[k], train, test, self, 1) for k in range(n_perm)) # Reconstruct daperm and get the associated p-value: daperm, _, _ = zip(*cvs) daperm = np.array(daperm).reshape(n_perm, nfeat) pvalue = perm_2pvalue(da, daperm, n_perm, tail=1) pperm = pvalue else: raise ValueError('No statistical method '+method+' found') # Try to get featinfo: try: if grp is not None: grp = uorderlst(grp) else: grp = np.arange(nfeat) self.info.featinfo = self.info._featinfo(self._clf, self._cv, da[:, np.newaxis], grp=grp, pperm=pperm) except: pass return da, pvalue, daperm
def permIntraClass(y, rnd=0): """Generate intr-class permutations """ yt = np.arange(len(y)) rnd = np.random.RandomState(rnd) return np.ravel([rnd.permutation(yt[y == k]) for k in uorderlst(y)])
def fit(self, x, mf=False, center=False, grp=None, method='bino', n_perm=200, rndstate=0, n_jobs=-1): """Apply the classification and cross-validation objects to the array x. Args: x: array Data to classify. Consider that x.shape = (N, M), N is the number of trials (which should be the length of y). M, the number of colums, is a supplementar dimension for classifying data. If M = 1, the data is consider as a single feature. If M > 1, use the parameter mf to say if x should be consider as a single feature (mf=False) or multi-features (mf=True) Kargs: mf: bool, optional, [def: False] If mf=False, the returned decoding accuracy (da) will have a shape of (1, rep) where rep, is the number of repetitions. This mean that all the features are used together. If mf=True, da.shape = (M, rep), where M is the number of columns of x. center: optional, bool, [def: False] Normalize fatures with a zero mean by substracting then dividing by the mean. The center parameter should be set to True if the classifier is a svm. grp: array, optional, [def: None] If mf=True, the grp parameter allow to define group of features. If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that 3 groups of features will be considered : (0,1,2) method: string, optional, [def: 'bino'] Four methods are implemented to test the statistical significiance of the decoding accuracy : - 'bino': binomial test - 'label_rnd': randomly shuffle the labels - 'full_rnd': randomly shuffle the whole array x - 'intra_rnd': randomly shuffle x inside each class and each feature Methods 2, 3 and 4 are based on permutations. The method 2 and 3 should provide similar results. But 4 should be more conservative. n_perm: integer, optional, [def: 200] Number of permutations for the methods 2, 3 and 4 rndstate: integer, optional, [def: 0] Fix the random state of the machine. Usefull to reproduce results. n_jobs: integer, optional, [def: -1] Control the number of jobs to cumpute the decoding accuracy. If n_jobs = -1, all the jobs are used. Return: da: array The decoding accuracy of shape n_repetitions x n_features pvalue: array Array of associated pvalue of shape n_features daPerm: array Array of all the decodings obtained for each permutations of shape n_perm x n_features .. rubric:: Footnotes .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_ .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_ """ # Get the true decoding accuracy: da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf, self._cv.cvr, mf, grp, center, n_jobs) nfeat = len(x) rndstate = np.random.RandomState(rndstate) score = np.array([np.mean(k) for k in da]) # Get statistics: # ------------------------------------------------------------- # Binomial : # ------------------------------------------------------------- if method == 'bino': pvalue = bino_da2p(y, score) daPerm = None pperm = None # ------------------------------------------------------------- # Permutations : # ------------------------------------------------------------- elif method.lower().find('_rnd') + 1: # Generate idx tricks : iteract = product(range(n_perm), range(nfeat)) # -> Shuffle the labels : if method == 'label_rnd': y_sh = [rndstate.permutation(y) for k in range(n_perm)] cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)( x[k], y_sh[i], clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # -> Full randomization : elif method == 'full_rnd': cvs = Parallel(n_jobs=n_jobs)( delayed(_cvscore)(rndstate.permutation(x[k]), y, clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # -> Shuffle intra-class : elif method == 'intra_rnd': cvs = Parallel(n_jobs=n_jobs)( delayed(_cvscore)(x[k][permIntraClass(y, rnd=i), :], y, clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # Reconstruct daPerm and get the associated p-value: daPerm, _, _ = zip(*cvs) daPerm = np.array(daPerm).reshape(n_perm, nfeat) pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1) pperm = pvalue else: raise ValueError('No statistical method ' + method + ' found') # Get features informations: try: if grp is not None: grp = uorderlst(grp) self.info.featinfo = self.info._featinfo(self._clf, self._cv, da, grp=grp, pperm=pperm) except: pass return da.T, pvalue, daPerm
def fit(self, x, mf=False, center=False, grp=None, method='bino', n_perm=200, rndstate=0, n_jobs=-1): """Apply the classification and cross-validation objects to the array x. Args: x: array Data to classify. Consider that x.shape = (N, M), N is the number of trials (which should be the length of y). M, the number of colums, is a supplementar dimension for classifying data. If M = 1, the data is consider as a single feature. If M > 1, use the parameter mf to say if x should be consider as a single feature (mf=False) or multi-features (mf=True) Kargs: mf: bool, optional, [def: False] If mf=False, the returned decoding accuracy (da) will have a shape of (1, rep) where rep, is the number of repetitions. This mean that all the features are used together. If mf=True, da.shape = (M, rep), where M is the number of columns of x. center: optional, bool, [def: False] Normalize fatures with a zero mean by substracting then dividing by the mean. The center parameter should be set to True if the classifier is a svm. grp: array, optional, [def: None] If mf=True, the grp parameter allow to define group of features. If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that 3 groups of features will be considered : (0,1,2) method: string, optional, [def: 'bino'] Four methods are implemented to test the statistical significiance of the decoding accuracy : - 'bino': binomial test - 'label_rnd': randomly shuffle the labels - 'full_rnd': randomly shuffle the whole array x - 'intra_rnd': randomly shuffle x inside each class and each feature Methods 2, 3 and 4 are based on permutations. The method 2 and 3 should provide similar results. But 4 should be more conservative. n_perm: integer, optional, [def: 200] Number of permutations for the methods 2, 3 and 4 rndstate: integer, optional, [def: 0] Fix the random state of the machine. Usefull to reproduce results. n_jobs: integer, optional, [def: -1] Control the number of jobs to cumpute the decoding accuracy. If n_jobs = -1, all the jobs are used. Return: da: array The decoding accuracy of shape n_repetitions x n_features pvalue: array Array of associated pvalue of shape n_features daPerm: array Array of all the decodings obtained for each permutations of shape n_perm x n_features .. rubric:: Footnotes .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_ .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_ """ # Get the true decoding accuracy: da, x, y, self._ytrue, self._ypred = _fit(x, self._y, self._clf, self._cv.cvr, mf, grp, center, n_jobs) nfeat = len(x) rndstate = np.random.RandomState(rndstate) score = np.array([np.mean(k) for k in da]) # Get statistics: # ------------------------------------------------------------- # Binomial : # ------------------------------------------------------------- if method == 'bino': pvalue = bino_da2p(y, score) daPerm = None pperm = None # ------------------------------------------------------------- # Permutations : # ------------------------------------------------------------- elif method.lower().find('_rnd')+1: # Generate idx tricks : iteract = product(range(n_perm), range(nfeat)) # -> Shuffle the labels : if method == 'label_rnd': y_sh = [rndstate.permutation(y) for k in range(n_perm)] cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)( x[k], y_sh[i], clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # -> Full randomization : elif method == 'full_rnd': cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)( rndstate.permutation(x[k]), y, clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # -> Shuffle intra-class : elif method == 'intra_rnd': cvs = Parallel(n_jobs=n_jobs)(delayed(_cvscore)( x[k][permIntraClass(y, rnd=i), :], y, clone(self._clf), self._cv.cvr[0]) for i, k in iteract) # Reconstruct daPerm and get the associated p-value: daPerm, _, _ = zip(*cvs) daPerm = np.array(daPerm).reshape(n_perm, nfeat) pvalue = perm_2pvalue(score, daPerm, n_perm, tail=1) pperm = pvalue else: raise ValueError('No statistical method '+method+' found') # Get features informations: try: if grp is not None: grp = uorderlst(grp) self.info.featinfo = self.info._featinfo(self._clf, self._cv, da, grp=grp, pperm=pperm) except: pass return da.T, pvalue, daPerm
def fit(self, x, mf=False, center=False, grp=None, method="bino", n_perm=200, rndstate=0, n_jobs=-1): """Apply the classification and cross-validation objects to the array x. Args: x: list List of dataset for each subject. All the dataset in the list should have the same number of columns but the number of lines could be diffrent for each subject and must correspond to the same number of lines each each label vector of y. Kargs: mf: bool, optional, [def: False] If mf=False, the returned decoding accuracy (da) will have a shape of (1, rep) where rep, is the number of repetitions. This mean that all the features are used together. If mf=True, da.shape = (M, rep), where M is the number of columns of x. center: optional, bool, [def: False] Normalize fatures with a zero mean by substracting then dividing by the mean. The center parameter should be set to True if the classifier is a svm. grp: array, optional, [def: None] If mf=True, the grp parameter allow to define group of features. If x.shape = (N, 5) and grp=np.array([0,0,1,2,1]), this mean that 3 groups of features will be considered : (0,1,2) method: string, optional, [def: 'bino'] Four methods are implemented to test the statistical significiance of the decoding accuracy : - 'bino': binomial test - 'label_rnd': randomly shuffle the labels Methods 2 and 3 are based on permutations. They should provide similar results. But 4 should be more conservative. n_perm: integer, optional, [def: 200] Number of permutations for the methods 2, 3 and 4 rndstate: integer, optional, [def: 0] Fix the random state of the machine. Usefull to reproduce results. n_jobs: integer, optional, [def: -1] Control the number of jobs to cumpute the decoding accuracy. If n_jobs = -1, all the jobs are used. Return: da: array The decoding accuracy of shape n_repetitions x n_features pvalue: array Array of associated pvalue of shape n_features daPerm: array Array of all the decodings obtained for each permutations of shape n_perm x n_features .. rubric:: Footnotes .. [#f8] `Ojala and Garriga, 2010 <http://www.jmlr.org/papers/volume11/ojala10a/ojala10a.pdf>`_ .. [#f9] `Combrisson and Jerbi, 2015 <http://www.ncbi.nlm.nih.gov/pubmed/25596422/>`_ """ # Check x, y: xbk = x.copy() x, y, train, test = _checkXY(x, self._y, mf, grp, center, self) nsuj, nfeat = x.shape # Run classification: da, ytrue, ypred = _fit(x, y, train, test, self, n_jobs) # Get statistics: # ------------------------------------------------------------- # Binomial : # ------------------------------------------------------------- if method == "bino": pvalue = bino_da2p(self._ry, da) daperm = None pperm = None # ------------------------------------------------------------- # Permutations : # ------------------------------------------------------------- # -> Shuffle the labels : elif method == "label_rnd": y_sh = [ _checkXY(xbk, [np.random.permutation(i) for i in self._y], mf, grp, center, self)[1] for k in range(n_perm) ] cvs = Parallel(n_jobs=n_jobs)(delayed(_fit)(x, y_sh[k], train, test, self, 1) for k in range(n_perm)) # Reconstruct daperm and get the associated p-value: daperm, _, _ = zip(*cvs) daperm = np.array(daperm).reshape(n_perm, nfeat) pvalue = perm_2pvalue(da, daperm, n_perm, tail=1) pperm = pvalue else: raise ValueError("No statistical method " + method + " found") # Try to get featinfo: try: if grp is not None: grp = uorderlst(grp) else: grp = np.arange(nfeat) self.info.featinfo = self.info._featinfo(self._clf, self._cv, da[:, np.newaxis], grp=grp, pperm=pperm) except: pass return da, pvalue, daperm