def submeth(Idx): if Idx == '0': # Select all features def MFmeth(x): return select_all(x) StrMeth = 'SelectAll' if Idx == '1': # use a binomial law to select features def MFmeth(x): return select_bino( x, y, p=p, classifier=clfIn, **kwargs) StrMeth = 'Binomial selection at p<'+str(p) if Idx == '2': # use permutations to select features def MFmeth(x): return select_perm( x, y, p=p, classifier=clfIn, **kwargs) StrMeth = 'Permutation selection at p<'+str(p) # use 'forward'/'backward'/'exhaustive'to select features if Idx == '3': clf = classifier_choice(clfIn, n_tree=kwargs['n_tree'], n_knn=kwargs['n_knn'], kern=kwargs['kern']) def MFmeth(x): return sequence_inner(clf, x, y, direction=direction, inner_folds=kwargs['n_folds'], display=display) StrMeth = direction+' feature selection' if Idx == '4': # nbest features def MFmeth(x): return select_nbest( x, y, nbest=nbest, classifier=clfIn, **kwargs) StrMeth = str(nbest)+' best features' return MFmeth, StrMeth
def mf(x, y, Id=0, featList=None, clfIn=0, clfOut=0, p=0.05, combineGroup=False, n_knn=3, n_tree=100, kern='rbf', cvIn='sss', n_foldsIn=10, repIn=1, cvOut='skfold', n_foldsOut=3, repOut=10, probOccur='rpercent', display=True, threshold=None, direction='forward', nbest=10): """Compute the multi-features contain in an array x, using the target vector y. The Id serve to combine MF methods. Parameters ---------- x : array-like The features. Dimension [nfeat x ntrial] y : array-like The target variable to try to predict in the case of supervised learning Id : string '0': No selection. All the features are used '1': Select <p significant features using a binomial law '2': Select <p significant features using permutations '3': use 'forward'/'backward'/'exhaustive'to select features clf : estimator object implementing 'fit' The object to use to fit the data p : float < 1, default: 0.05 The significiance level to select features threshold : float, default: None variable equivalent to p. If threshold is defined, the programm will search for the p value associed display : 'on' or 'off' display informations direction : string, optional, default: 'forward' Use 'forward', 'backward' or 'exhaustive' nbest : int, optional [def: 10] For the Id 4, use this parameter to control the number of features to select **kwargs : dictionnary Optional parameter for the classify function Returns ------- MFmeth : list list of selected methods MFstr : list list of the name of the selected methods """ # Get size elements : nfeat, ntrial = x.shape if featList is None: featList, combineGroup = [0] * nfeat, False if threshold is not None: p = binostatinv(y, threshold) # Manage group of features : groupinfo = combineInfo(x, featList, combineGroup=combineGroup) # Define classifier option : clfOutMod = classifier_choice( clfOut, n_tree=n_tree, n_knn=n_knn, kern=kern) # Keep the info : setup = {'p': p, 'cvOut': cvOut, 'n_foldsOut': n_foldsOut, 'repOut': repOut, 'cvIn': cvIn, 'n_foldsIn': n_foldsIn, 'repIn': repIn, 'n_tree': n_tree, 'kern': kern, 'probOccur': probOccur, 'direction': direction, 'nbest': nbest} # Run the MF model for each combinaition: da, prob = [], [] for k in range(0, len(groupinfo)): # list(groupinfo['idx']): if display: print('=> Group : ' + groupinfo['feature'].iloc[k], end='\r') daComb, probComb, MFstr = MFcv( x[groupinfo['idx'].iloc[k], :], y, Id, clfOutMod, clfIn=clfIn, display=display, **setup) da.append(daComb), prob.append(probComb) # Get final info on the classifier used : setup['clfIn'], _ = classifier_string( clfIn, n_tree=n_tree, n_knn=n_knn, kern=kern) setup['clfOut'], _ = classifier_string( clfOut, n_tree=n_tree, n_knn=n_knn, kern=kern) # Complete the pandas Dataframe about group decoding groupinfo['da'], groupinfo['occurrence'] = [ sum(k) / len(k) for k in da], prob return da, prob, MFstr, groupinfo, setup