Esempio n. 1
0
    def submeth(Idx):
        if Idx == '0':  # Select all features
            def MFmeth(x): return select_all(x)
            StrMeth = 'SelectAll'
        if Idx == '1':  # use a binomial law to select features
            def MFmeth(x): return select_bino(
                x, y, p=p, classifier=clfIn, **kwargs)
            StrMeth = 'Binomial selection at p<'+str(p)
        if Idx == '2':  # use permutations to select features
            def MFmeth(x): return select_perm(
                x, y, p=p, classifier=clfIn, **kwargs)
            StrMeth = 'Permutation selection at p<'+str(p)
        # use 'forward'/'backward'/'exhaustive'to  select features
        if Idx == '3':
            clf = classifier_choice(clfIn, n_tree=kwargs['n_tree'],
                                    n_knn=kwargs['n_knn'],
                                    kern=kwargs['kern'])

            def MFmeth(x): return sequence_inner(clf, x, y,
                                                 direction=direction,
                                                 inner_folds=kwargs['n_folds'],
                                                 display=display)
            StrMeth = direction+' feature selection'
        if Idx == '4':  # nbest features
            def MFmeth(x): return select_nbest(
                x, y, nbest=nbest, classifier=clfIn, **kwargs)
            StrMeth = str(nbest)+' best features'

        return MFmeth, StrMeth
Esempio n. 2
0
def mf(x, y, Id=0, featList=None, clfIn=0, clfOut=0, p=0.05, combineGroup=False,
        n_knn=3, n_tree=100, kern='rbf', cvIn='sss', n_foldsIn=10, repIn=1,
        cvOut='skfold', n_foldsOut=3, repOut=10, probOccur='rpercent',
        display=True, threshold=None, direction='forward', nbest=10):
    """Compute the multi-features contain in an array x, using the target
    vector y. The Id serve to combine MF methods.

    Parameters
    ----------

    x : array-like
    The features. Dimension [nfeat x ntrial]

    y : array-like
        The target variable to try to predict in the case of
        supervised learning

    Id : string
        '0': No selection. All the features are used
        '1': Select <p significant features using a binomial law
        '2': Select <p significant features using permutations
        '3': use 'forward'/'backward'/'exhaustive'to  select features

    clf : estimator object implementing 'fit'
        The object to use to fit the data

    p : float < 1, default: 0.05
        The significiance level to select features

    threshold : float, default: None
        variable equivalent to p. If threshold is defined, the programm
        will search for the p value associed

    display : 'on' or 'off'
        display informations

    direction : string, optional, default: 'forward'
        Use 'forward', 'backward' or 'exhaustive'

    nbest : int, optional [def: 10]
        For the Id 4, use this parameter to control the number of
        features to select

    **kwargs : dictionnary
        Optional parameter for the classify function

    Returns
    -------
    MFmeth : list
        list of selected methods

    MFstr : list
        list of the name of the selected methods

    """
    # Get size elements :
    nfeat, ntrial = x.shape
    if featList is None:
        featList, combineGroup = [0] * nfeat, False
    if threshold is not None:
        p = binostatinv(y, threshold)

    # Manage group of features :
    groupinfo = combineInfo(x, featList, combineGroup=combineGroup)

    # Define classifier option :
    clfOutMod = classifier_choice(
        clfOut, n_tree=n_tree, n_knn=n_knn, kern=kern)

    # Keep the info :
    setup = {'p': p, 'cvOut': cvOut, 'n_foldsOut': n_foldsOut,
             'repOut': repOut, 'cvIn': cvIn, 'n_foldsIn': n_foldsIn,
             'repIn': repIn, 'n_tree': n_tree, 'kern': kern,
             'probOccur': probOccur, 'direction': direction, 'nbest': nbest}

    # Run the MF model for each combinaition:
    da, prob = [], []
    for k in range(0, len(groupinfo)):  # list(groupinfo['idx']):
        if display:
            print('=> Group : ' + groupinfo['feature'].iloc[k], end='\r')
        daComb, probComb, MFstr = MFcv(
            x[groupinfo['idx'].iloc[k], :], y, Id, clfOutMod, clfIn=clfIn,
            display=display, **setup)
        da.append(daComb), prob.append(probComb)

    # Get final info on the classifier used :
    setup['clfIn'], _ = classifier_string(
        clfIn, n_tree=n_tree, n_knn=n_knn, kern=kern)
    setup['clfOut'], _ = classifier_string(
        clfOut, n_tree=n_tree, n_knn=n_knn, kern=kern)

    # Complete the pandas Dataframe about group decoding
    groupinfo['da'], groupinfo['occurrence'] = [
        sum(k) / len(k) for k in da], prob

    return da, prob, MFstr, groupinfo, setup