コード例 #1
0
ファイル: 8.py プロジェクト: mobiiin/statistical-learning
def forward_selection_regression(data, target, k_features=3):
    """

    :param data: pandas dataframe of input data
    :param target: pandas dataframe of input data's corresponding target
    :param k_features: number of desired features to fit the regression upon,
    features are chosen based on their importance
    :return: prints out the mean squared error and regression coefficients
    """

    reg = LinearRegression()
    sfs = SFS(reg,
              k_features,
              forward=True,
              floating=False,
              verbose=0,
              scoring='r2',
              cv=5)

    X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=.3)
    sfs = sfs.fit(X_train, y_train)
    X_train_sfs = sfs.transform(X_train)
    X_test_sfs = sfs.transform(X_test)
    reg = reg.fit(X_train_sfs, y_train)

    print('estimated coefficients for the linear regression:', reg.coef_)
    print('interception coefficient b_0:', reg.intercept_)
    print('MSE_train:', metrics.mean_squared_error(y_train, reg.predict(X_train_sfs)))
    print('MSE_test:', metrics.mean_squared_error(y_test, reg.predict(X_test_sfs)))
コード例 #2
0
def vote(X_train, Y_train, X_test, Y_test, voting_type, feature_selection,
         k_features):
    """Invokation of a soft voting/majority rule classification.

    This is a wrapper around `sklearn.ensemble.VotingClassifier` which
    automatically uses all classifiers that are known to `gumpy` in
    `gumpy.classification.available_classifiers`.

    Args:
        X_train: training data (values)
        Y_train: training data (labels)
        X_test: evaluation data (values)
        Y_test: evaluation data (labels)
        voting_type (str): either of 'soft' or 'hard'. See the
            sklearn.ensemble.VotingClassifier documentation for more details

    Returns:
        2-element tuple containing

        - **ClassificationResult**: The result of the classification.
        - **Classifier**:  The instance of `sklearn.ensemble.VotingClassifier`
          that was used during the classification.

    """

    k_cross_val = 10
    N_JOBS = -1

    clfs = []
    for classifier in available_classifiers:
        # determine kwargs such that the classifiers get initialized with
        # proper default settings. This avoids cross-validation, for instance
        opts = available_classifiers[classifier].static_opts('vote',
                                                             X_train=X_train)

        # retrieve instance
        cobj = available_classifiers[classifier](**opts)
        clfs.append((classifier, cobj.clf))

    # instantiate the VotingClassifier
    soft_vote_clf = VotingClassifier(estimators=clfs, voting=voting_type)

    if feature_selection:
        sfs = SFS(soft_vote_clf,
                  k_features,
                  forward=True,
                  floating=True,
                  verbose=2,
                  scoring='accuracy',
                  cv=k_cross_val,
                  n_jobs=N_JOBS)
        sfs = sfs.fit(X_train, Y_train)
        X_train = sfs.transform(X_train)
        X_test = sfs.transform(X_test)

    soft_vote_clf.fit(X_train, Y_train)
    Y_pred = soft_vote_clf.predict(X_test)
    return ClassificationResult(Y_test, Y_pred), soft_vote_clf
def step_feature_selection(keras_est,
                           x_train,
                           y_train,
                           x_test,
                           y_test,
                           features_lower_bound,
                           features_upper_bound,
                           *,
                           scoring='accuracy',
                           cv=0,
                           n_jobs=-1):
    # feature selection step forward/backward:
    sk_keras_est = SFS(keras_est,
                       k_features=(features_lower_bound, features_upper_bound),
                       forward=True,
                       floating=False,
                       verbose=2,
                       scoring=scoring,
                       cv=cv,
                       n_jobs=n_jobs)

    sk_keras_est = sk_keras_est.fit(x_train, y_train)

    # transforming data to only contain chosen features:
    x_train_sfs = sk_keras_est.transform(x_train)
    x_test_sfs = sk_keras_est.transform(x_test)

    # print(pd.DataFrame(x_train_sfs))
    # print(pd.DataFrame(x_test_sfs))

    global feature_names
    selected_features = []
    selected_features = [feature_names[i] for i in sk_keras_est.k_feature_idx_]
    feature_names = selected_features
    #print(feature_names)
    feature_names_SFS = pd.DataFrame(feature_names)
    feature_names_SFS.to_csv(RUNDIR +
                             "feature_names_SFS_{}.csv".format(average_runs),
                             index=False)
    k.clear_session()

    return x_train_sfs, x_test_sfs  #return original dataframe if none is dropped
コード例 #4
0
ファイル: rice_ml_PL3.py プロジェクト: JRLi/untitled
def select_r2(df_in, ss_label, f_n, eps):
    dfx = df_in.copy()
    if len(dfx.columns) > f_n:
        select = SFS(RandomForestClassifier(n_estimators=eps, random_state=1),
                     k_features=f_n,
                     forward=True,
                     floating=False,
                     scoring='accuracy',
                     cv=4,
                     n_jobs=3)
        select.fit(dfx.values, ss_label.values)
        mask = select.k_feature_idx_
        x_sfs = select.transform(dfx.values)
        m_mir_list = dfx.columns[[x for x in mask]]
        return x_sfs, ','.join(m_mir_list), len(m_mir_list)
    else:
        f_list = dfx.columns.tolist()
        return dfx.values, ','.join(f_list), len(f_list)
コード例 #5
0
def test_check_pandas_dataframe_transform():
    iris = load_iris()
    X = iris.data
    y = iris.target
    lr = SoftmaxRegression(random_seed=1)
    sfs1 = SFS(lr,
               k_features=2,
               forward=True,
               floating=False,
               scoring='accuracy',
               cv=0,
               verbose=0,
               n_jobs=1)

    df = pd.DataFrame(X, columns=['sepal length', 'sepal width',
                                  'petal length', 'petal width'])
    sfs1 = sfs1.fit(df, y)
    assert sfs1.k_feature_idx_ == (1, 3)
    assert (150, 2) == sfs1.transform(df).shape
コード例 #6
0
def test_check_pandas_dataframe_transform():
    iris = load_iris()
    X = iris.data
    y = iris.target
    lr = SoftmaxRegression(random_seed=1)
    sfs1 = SFS(lr,
               k_features=2,
               forward=True,
               floating=False,
               scoring='accuracy',
               cv=0,
               verbose=0,
               n_jobs=1)

    df = pd.DataFrame(X, columns=['sepal length', 'sepal width',
                                  'petal length', 'petal width'])
    sfs1 = sfs1.fit(df, y)
    assert sfs1.k_feature_idx_ == (1, 3)
    assert (150, 2) == sfs1.transform(df).shape
コード例 #7
0
def fs_rfe(df_in, ss_label, f_n, tp=0):
    dfx = df_in.copy()
    if len(dfx.columns) > f_n:
        es1 = Ridge(alpha=0.1)
        es2 = Lasso(alpha=0.1)
        # es3 = SVR(C=1.0, epsilon=0.2, kernel='linear', cache_size=3000)
        ch = {0: es1, 1: es2}
        select = SFS(ch.get(tp, es1),
                     k_features=f_n,
                     forward=True,
                     floating=False,
                     verbose=1,
                     scoring='neg_mean_squared_error',
                     cv=4,
                     n_jobs=3)
        select.fit(dfx.values, ss_label.values)
        mask = select.k_feature_idx_
        print(mask)
        x_rfe = select.transform(dfx.values)
        m_mir_list = dfx.columns[[x for x in mask]]
        return x_rfe, m_mir_list
    else:
        f_list = dfx.columns.tolist()
        return dfx.values, f_list
def step_feature_selection(keras_est, x_train, y_train, x_test, y_test,
                           features_lower_bound, features_upper_bound, *, scoring='accuracy', cv=0, n_jobs=-1):
    # feature selection step forward/backward:
    sk_keras_est = SFS(keras_est, k_features=(features_lower_bound, features_upper_bound), forward=True,
                    floating=False, verbose=2, scoring=scoring, cv=cv, n_jobs=n_jobs)

    sk_keras_est = sk_keras_est.fit(x_train, y_train)

    # transforming data to only contain chosen features:
    x_train_sfs = sk_keras_est.transform(x_train)
    x_test_sfs = sk_keras_est.transform(x_test)

    # print(pd.DataFrame(x_train_sfs))
    # print(pd.DataFrame(x_test_sfs))

    global feature_names
    selected_features = []
    selected_features = [feature_names[i] for i in sk_keras_est.k_feature_idx_]
    feature_names = selected_features
    #print(feature_names)
    feature_names_SFS=pd.DataFrame(feature_names)
    feature_names_SFS.to_csv(DATADIR+"feature_names_SFS.csv", index=False)
    k.clear_session()

# # training model with chosen features
# keras_est.fit(x_train_sfs, y_train)
# y_pred = keras_est.predict(x_test_sfs)

# # evaluating model with accuracy and false positive index
# correct = 0
# index_wrong=[]
# false_positive=[]
# y_test = y_test.flatten()
# y_pred = y_pred.flatten()

# # for i in range(len(y_pred)):
# #   if y_test[i] == y_pred[i]:
# #       correct += 1
# #   else:
# #       index_wrong.append(i)
# #       if y_test[i] == 0:
# #           false_positive.append(i)

# for i in range(len(y_pred)):
#   if y_test[i] != y_pred[i]:
#       index_wrong.append(i)
#       if y_test[i] == 0:
#         false_positive.append(i)

# # checking model accuracy:
# percent_correct= accuracy_score(y_test, y_pred)
# accuracy_result = pd.DataFrame.from_dict(sk_keras_est.get_metric_dict()).T
# accuracy_result.to_csv(DATADIR+"accuracy_result.csv", index=False)

# print('Selected features:', sk_keras_est.k_feature_idx_)
# #percent_correct = (correct/len(df_y_test))
# print("Model accurary is: {:.2f}%".format(percent_correct*100))
# print("Wrong prediction index: ", index_wrong)
# print("Index with False Positive: ", false_positive)


    return x_train_sfs, x_test_sfs #return original dataframe if none is dropped
コード例 #9
0
ファイル: model-knn.py プロジェクト: ltmolinar/prueba
        random_state=10,
        stratify=df3["descCanalRadicacion"])

    knn = KNeighborsClassifier(n_neighbors=50, weights='distance')

    sfs1 = SFS(knn,
               k_features=11,
               forward=True,
               floating=False,
               verbose=1,
               scoring=make_scorer(f1_score, average='weighted'),
               cv=5)

    sfs1 = sfs1.fit(X_train, y_train)

    X_train_sfs = sfs1.transform(X_train)
    X_test_sfs = sfs1.transform(X_test)

    clfKnn_sfs = knn.fit(X_train_sfs, y_train)

    #Predictions
    predictions = clfKnn_sfs.predict(X_test_sfs)

    #Score
    print(f1_score(y_test, predictions.astype('int64'), average='weighted'))

    #Caracteristicas seleccionadas
    print(sfs1.k_feature_names_)

    with open('./outputs/model.pkl', 'wb') as model_pkl:
        pickle.dump(clfKnn_sfs, model_pkl)
コード例 #10
0
           cv=5)

sfs = sfs1.fit(X3_train_scaled, y_train)

fig1 = plot_sfs(sfs.get_metric_dict(), kind='std_dev')
plt.ylim([0, 0.3])
plt.title('Sequential Forward Selection (w. StdDev)')
plt.ylabel("Cross validation f1-score")
plt.grid()
plt.savefig("/Users/quan/Documents/Sinto_Project/report/selection.png")
plt.show()

print('Selected features:', sfs1.k_feature_idx_)
print(sfs.k_score_)

X_train_sfs = sfs1.transform(X3_train_scaled)
X_test_sfs = sfs1.transform(X3_test_scaled)

# Fit the estimator using the new feature subset
# and make a prediction on the test data
svc.fit(X_train_sfs, y_train)
y_pred = svc.predict(X_test_sfs)
y_probs=svc.predict_proba(X_test_sfs)

# Compute the accuracy of the prediction
recall = float(sum(y_test[np.where(y_test==1)] == y_pred[np.where(y_test==1)])) / len(np.where(y_test==1)[0])
print('Test set recall: %.2f %%' % (recall * 100))

precision = float(sum(y_test[np.where(y_pred==1)] == y_pred[np.where(y_pred==1)])) / len(np.where(y_pred==1)[0])
print('Test set precision: %.2f %%' % (precision * 100))
コード例 #11
0
                                gamma=gamma_curr,
                                C=c_curr,
                                class_weight=curr_class_weight,
                                decision_function_shape=dfs)

                        sffs = SFS(classifier,
                                   k_features=20,
                                   forward=True,
                                   floating=floatingEnabled,
                                   scoring=scoreMethod,
                                   print_progress=False,
                                   cv=cv_folds)

                        #Select the features
                        sffs = sffs.fit(data_train, target_train)
                        data_sffs_train = sffs.transform(data_train)
                        data_sffs_val = sffs.transform(data_val)

                        #Fit the classifier to the training data
                        classifier.fit(data_sffs_train, target_train)

                        #Print the features selected
                        if printSelectedFeats:
                            print('Selected features: ', end="")
                            output.write('Selected features: ')
                            for feat in sffs.k_feature_idx_:
                                print(currentHeader[feat], end=",")
                                output.write(currentHeader[feat] + ",")
                            print()
                            output.write("\n")
コード例 #12
0
  
lr=LogisticRegressionCV(Cs=Cs,fit_intercept=True)
# lr.fit(train,train_y)
if args.fs=='sfs': fs=SFS(lr,k_features=k_features,forward=True,floating=True,scoring='neg_log_loss',cv=cv,verbose=2)
else:              fs=EFS(lr,min_features=1,max_features=min(train.shape[1],8),scoring='neg_log_loss',cv=cv)
fs.fit(train.values,train_y.values)
print
print pd.DataFrame.from_dict(fs.get_metric_dict()).T
if args.fs=='sfs':
  print 'SFS best score:', fs.k_score_
  print len(fs.k_feature_idx_),'features:',fs.k_feature_idx_
else:
  print 'EFS best score:', fs.best_score_
  print len(fs.best_idx_),'features:',fs.best_idx_
  
lr.fit(fs.transform(train.iloc[:split].values),train_y.iloc[:split])
print
print 'Regularization C:', lr.C_
print 'validation error fitting on train:', log_loss(train_y.iloc[split:],lr.predict_proba(fs.transform(train.iloc[split:].values))[:,1])

lr.fit(fs.transform(train.iloc[split:].values),train_y.iloc[split:])
print 'validation error fitting on val:', log_loss(train_y.iloc[split:],lr.predict_proba(fs.transform(train.iloc[split:].values))[:,1])
if args.out:
  test1['probability']=lr.predict_proba(fs.transform(test[train.columns].values))[:,1]
  test1.to_csv('~/val.csv',index=True,columns=['probability'],float_format='%.6f')

lr.fit(fs.transform(train.values),train_y)
print 'validation error fitting on trainval:', log_loss(train_y.iloc[split:],lr.predict_proba(fs.transform(train.iloc[split:].values))[:,1])
if args.out:
  test1['probability']=lr.predict_proba(fs.transform(test[train.columns].values))[:,1]
  test1.to_csv('~/trainval.csv',index=True,columns=['probability'],float_format='%.6f')
コード例 #13
0
#X_train_scaled = scaler.fit(X_train).transform(X_train)
#X_test_scaled = scaler.fit(X_test).transform(X_test)
train_data_scaled = scaler.fit_transform(train_data)
test_data_scaled = scaler.fit_transform(test_data)

mlp = MLPRegressor(max_iter=200,
                   solver='lbfgs',
                   hidden_layer_sizes=(50, 50),
                   activation='identity')
sfs = SFS(mlp, k_features=3, forward=True, floating=False, scoring='r2', cv=10)

sfs = sfs.fit(train_data_scaled, train_target)

print(sfs.k_feature_idx_)

train_sfs = sfs.transform(train_data_scaled)
test_sfs = sfs.transform(test_data_scaled)

acc_train = 0
acc_test = 0
for i in range(0, 100):
    mlp.fit(train_sfs, train_target)

    acc_train = acc_train + mlp.score(train_sfs, train_target)
    acc_test = acc_test + mlp.score(test_sfs, test_target)

print(acc_test)
print(acc_train)

datanow = np.zeros(11)
test_data = []
コード例 #14
0
def ExecuteSFFS(x, y, featureNames, featureList, clusters, clusterNames, svc,
                kFolds, nbOfSplit, featMaxNbrSFFS, standardizationType,
                removedData, permutation_flag, nbPermutation, balance_flag,
                currentDateTime, resultDir, debug_flag, verbose):
    import scipy
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from sklearn.model_selection import train_test_split as tts
    from sklearn.metrics import confusion_matrix
    from mlxtend.feature_selection import SequentialFeatureSelector as SFS
    from mlxtend.plotting import plot_sequential_feature_selection as plot_sfs
    from sklearn.model_selection import RandomizedSearchCV

    from slpClass_toolbox import BalanceClasses
    from slpClass_toolbox import Standardize
    from slpClass_toolbox import Permute
    from slpClass_toolbox import ComputePermutationAvgDA
    from slpClass_toolbox import PlotPermHist
    from slpClass_toolbox import ApplyStandardization
    from slpClass_toolbox import plot_confusion_matrix

    plt.rcParams.update({'figure.max_open_warning': 0})

    # Get features values since SFFS works only with numpy array!
    bestFeaturesHist = np.zeros([len(featureNames)])
    CvResult = pd.DataFrame()
    permResults = pd.DataFrame()
    tmpBest = []
    DA = []
    avg_perm_DA = []
    skipFS = False  # flag to skip feature selection
    fitFeatOverTresh = False  # fit classifier with most frequent features in best set

    #********************** TRAIN pre-procesing *******************************
    for it in list(range(nbOfSplit)):
        print('\nSplit #{}'.format(str(it)))

        # Use all features or given ones only
        if len(featureList) == 0:
            xx = x
        elif isinstance(featureList[0], float):
            xx = x
            fitFeatOverTresh = True
        else:
            xx = x[featureList]
            skipFS = True

        # Balance the number of old woman and old man or not
        if balance_flag:
            X, Y = BalanceClasses(xx, y)
        else:
            X, Y = xx, y

        # slpit dataset into train and test random subset
        X_train, X_test, y_train, y_test = tts(X,
                                               Y['Cluster'],
                                               test_size=0.33,
                                               stratify=Y['Cluster'])
        # Data z-score standardisation
        xTrainSet, zPrm = Standardize(X_train, y_train, standardizationType,
                                      debug_flag)

        #**************************** SVM optimisation ************************
        params_dict = {
            'C': scipy.stats.expon(scale=100),
            'kernel': ['linear'],
            'class_weight': ['balanced', None]
        }

        n_iter_search = 20
        random_search = RandomizedSearchCV(svc,
                                           param_distributions=params_dict,
                                           n_iter=n_iter_search)

        random_search.fit(xTrainSet, y_train)
        optimClf = random_search.best_estimator_

        #*************************** TRAIN ************************************
        print('Fitting...')
        if skipFS:
            optimClf = optimClf.fit(xTrainSet.as_matrix(), y_train)

            yPred = optimClf.predict(xTrainSet.as_matrix())

            # Compute the accuracy of the test prediction
            acc = float((y_train == yPred).sum()) / yPred.shape[0]
            print('Train predicted accuracy: %.2f %%' % (acc * 100))
            fitRes = pd.DataFrame(data=[acc], columns=['CV_DA_' + str(it + 1)])

        else:
            # set k_features = (1,X.shape[1]) to test all possible combinations
            sffs = SFS(optimClf,
                       k_features=(1, featMaxNbrSFFS),
                       forward=True,
                       floating=False,
                       scoring='accuracy',
                       cv=kFolds,
                       n_jobs=-1)
            sffs = sffs.fit(xTrainSet.as_matrix(), y_train)

            print('Best combination for fit #%d (ACC: %.3f): %s' % \
                  (it,sffs.k_score_, sffs.k_feature_idx_))

            # Fit the estimator using the new feature subset and make a
            # prediction on the test data
            X_train_sfs = sffs.transform(xTrainSet.as_matrix())
            optimClf.fit(X_train_sfs, y_train)

            fitRes = pd.DataFrame.from_dict(sffs.get_metric_dict()).T
            fitRes['avg_over_std'] = fitRes['avg_score'] / fitRes['std_dev']

            if featMaxNbrSFFS > 1:
                # plot feature selection process metrics
                fig1 = plot_sfs(sffs.get_metric_dict(), kind='std_err')
                savedPlotName = resultDir+'Decoding_accuracy_'+clusters+'_'+\
                                str(it)+'_'+str(nbOfSplit)+'.png'

                tmpBest.append(sffs.k_feature_idx_)
                bestFeaturesHist[[tmpBest[-1]]] += 1

                fig1.set_dpi(300)
                plt.tight_layout()
                plt.savefig(savedPlotName, bbox_inches='tight')
                plt.clf()
                plt.close(fig1)

                # plot mean / std
                plt.figure(dpi=300)
                plt.title('Moyenne sur ecart-type')
                plt.xlabel("nb attributs dans combinaison")
                plt.xticks(range(featMaxNbrSFFS))
                plt.ylabel("Moyenne sur ecart-type")
                plt.plot(list(range(1, featMaxNbrSFFS + 1)),
                         fitRes['avg_over_std'])
                figName = resultDir+'SFFS_'+clusters+'_bestSet_metric_'+ \
                          str(it)+'_'+str(nbOfSplit)
                plt.savefig(figName, bbox_inches='tight')
                plt.clf()
                plt.close()

        # add metrics iteration identifier
        fitRes = fitRes.add_suffix('_' + str(it + 1))

        CvResult = pd.concat([CvResult, fitRes], axis=1)

        #***************************** TEST ***********************************
        print('Testing...')
        # standardize test set using trainset standardization parameters
        xTestSet = ApplyStandardization(X_test, zPrm)

        # prepare test data
        if skipFS:
            xTest = xTestSet
            savedPlotName = resultDir+clusters+'_ConfusionMatrix_'+str(it+1)+ \
                            '_'+str(nbOfSplit)
        else:
            # Generate a new subset of data according to selected features
            xTest = sffs.transform(xTestSet.as_matrix())
            savedPlotName = resultDir+'SFFS_'+clusters+'_ConfusionMatrix_'+ \
                        str(it+1)+'_'+str(nbOfSplit)

        # actually test classifier and compute decoding accuracy on predictions
        y_pred = optimClf.predict(xTest)
        acc = float((y_test == y_pred).sum()) / y_pred.shape[0]
        print('Test set accuracy: %.2f %%' % (acc * 100))
        DA.append(acc)  # stack test DA for further use

        # plot confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        fig_CM = plt.figure(dpi=300)
        plot_confusion_matrix(cm,
                              clusterNames,
                              title=savedPlotName,
                              normalize=True,
                              precision=2)
        plt.clf()
        plt.close(fig_CM)

        #**************** STATISTICAL ASSESSMENT (PERMUTATION) ****************
        if permutation_flag:
            permResults['permutation_DA_' + str(it)] = Permute(
                clusters,
                xTrainSet,
                xTestSet,
                y_train,
                y_test,
                nbPermutation,
                standardizationType,
                debug_flag=0)
            avg_perm_DA.append(
                np.mean(permResults['permutation_DA_' + str(it)]))

    dfDA = pd.DataFrame(data=DA, columns=['DA_test'])
    #    CvResult = pd.concat([CvResult, dfDA[:]], axis=1)
    CvResult = pd.concat([
        CvResult, dfDA[:],
        pd.DataFrame(data=[np.mean(DA)], columns=['avg_DA'])
    ],
                         axis=1)

    #***************** COMPUTE STATISTICAL ASSESSMENT RESULTS *****************
    if permutation_flag:
        # compute permutation DA average and keep results in a dataframe
        print('\nAverage permutation DA')
        for i in list(range(len(avg_perm_DA))):
            print('\t' + str(avg_perm_DA[i]))

        savedHistName = resultDir + 'Average_Permutation_hist_' + clusters + '.png'
        PlotPermHist(permResults, CvResult['avg_DA'].iloc[0], currentDateTime,
                     savedHistName)
        #formating permutation results to save in excel file
        permResults = pd.concat(
            [permResults, ComputePermutationAvgDA(avg_perm_DA)], axis=1)
        print('Mean permutation decoding accuracy : {}'.format(
            np.mean(permResults['Avg_Permutation_DA_per_epoch'])))
    else:  # binomial law
        from scipy.stats import binom
        q = 0.001  # p value
        n = X.shape[0] + 1  # nombre d'observation (sujets)
        p = 1 / len(clusterNames)  # probablité d'avoir un essai correctement
        luckLvl = pd.DataFrame(date=[binom.isf(q, n, p) / n],
                               columns=['Chance_Level'])

#****************************** Compute results *******************************
    if not skipFS:
        # Build structure of histogram data to save in excel
        hist = pd.DataFrame(data=featureNames, columns=['Features_Name'])
        hist['Occurence_Best'] = bestFeaturesHist
        # Search best set across every iteration best set
        best_Combination = tmpBest[np.argmax(DA)]
        # Compute average size of best combination
        l = 0
        for n in list(range(len(tmpBest))):
            l += len(tmpBest[n])
        avgBestCombSize = pd.DataFrame(data=[np.ceil(l / len(tmpBest))],
                                       columns=['avgBestCombSize'])

        #    subsetHist = GetSubsetOccurence(tmpBest)
        #    PlotHist(subsetHist[1],'Subsets occurences',subsetHist[0],'Comb_Hist.png')

        # Get best set's feature names
        tmp = []
        tmp.append(np.max(DA))
        for i in best_Combination:
            tmp.append(featureNames[i])
            print('\t' + featureNames[i])
        bestFeatNames = pd.DataFrame(data=tmp, columns=['Best_Features_Set'])

        sffsRes = pd.concat([hist, bestFeatNames, avgBestCombSize], axis=1)

        # Plot best combination custom metric (mean / std_dev)
        from slpClass_toolbox import PlotBestCombinationMetrics
        filteredData = CvResult.filter(regex=r'avg_over_std_', axis=1)
        metrics = pd.DataFrame(data=filteredData)
        metrics.dropna(inplace=True)
        figName = resultDir + 'SFFS_' + clusters + '_bestSet_metric_aggreg.png'
        PlotBestCombinationMetrics(metrics, figName)

    #save training and permutation results in an excel file
    nbSubject = pd.DataFrame(data=[len(X)], columns=['Number_Of_Subjects'])

    #************************ Build results structure *************************
    excelResults = pd.concat([
        CvResult, permResults if permutation_flag else luckLvl,
        sffsRes if not skipFS else None, removedData, nbSubject
    ],
                             axis=1)

    print('Mean Decoding accuracy :{}'.format(np.mean(DA)))

    # compute occurence of every subset in bestsets of every iteration
    #    from slpClass_toolbox import GetSubsetOccurence
    #    subsetHist = GetSubsetOccurence(tmpBest)
    #    excelResults = pd.concat([excelResults, subsetHist], axis=1)
    #    excelResults.to_excel(saveTo, sheet_name=xlSheetName)

    if fitFeatOverTresh:
        tresh = featureList[0] * nbOfSplit
        bestFeatColumns = hist.iloc[:, 0][hist.iloc[:, 1] > tresh]
        bestDataSet = xx[bestFeatColumns]
        classes = y
        DABestFeat = []
        print('Fitting with features occuring over %d times in best sets' %
              tresh)
        for i in list(range(nbOfSplit)):
            print('\rFit #{} of {}\n'.format(i + 1, nbOfSplit),
                  end='\r',
                  flush=True)
            # Balance the number of old woman and old man or not
            if balance_flag:
                XX, YY = BalanceClasses(bestDataSet, classes)
            else:
                XX, YY = bestDataSet, classes

            # slpit dataset into train and test random subset
            XXtrain, XXtest, yytrain, yytest = tts(XX,
                                                   YY['Cluster'],
                                                   test_size=0.33,
                                                   stratify=YY['Cluster'])
            # Data z-score standardisation
            xxTrainSet, zzPrm = Standardize(XXtrain, yytrain,
                                            standardizationType, debug_flag)

            # fit and predict on training data
            optimClf = optimClf.fit(xxTrainSet.as_matrix(), yytrain)
            yPred = optimClf.predict(xxTrainSet.as_matrix())
            # Compute accuracy of prediction on trainnnig set
            acc = float((yytrain == yPred).sum()) / yPred.shape[0]
            print('Train predicted accuracy: %.2f %%' % (acc * 100))
            fitRes = pd.DataFrame(data=[acc], columns=['CV_DA_' + str(it + 1)])

            # test classifier and compute decoding accuracy on predictions
            xxTestSet = ApplyStandardization(XXtest, zzPrm)
            yypred = optimClf.predict(xxTestSet)
            acc = float((yytest == yypred).sum()) / yypred.shape[0]
            print('Test set accuracy: %.2f %%' % (acc * 100))
            DABestFeat.append(acc)  # stack test DA for further use
            # plot confusion matrix
            cm = confusion_matrix(yytest, yypred)
            fig_CM = plt.figure(dpi=300)
            plot_confusion_matrix(cm,
                                  clusterNames,
                                  title=savedPlotName,
                                  normalize=True,
                                  precision=2)
            plt.clf()
            plt.close(fig_CM)
        df = pd.DataFrame(data=DABestFeat, columns=['optim DA'])
        df = pd.concat([
            df,
            pd.DataFrame(data=[np.mean(DABestFeat)], columns=['optim avg DA'])
        ],
                       axis=1)
        print('Classifier trained with best features (occ > %d) only' % tresh)
        print(df)
        excelResults = pd.concat([excelResults, df], axis=1)

    return excelResults