Exemple #1
0
def mifs():
    before = datetime.datetime.now()
    result = MIFS.mifs(data, labels, mode="index", n_selected_features=treshold)
    after = datetime.datetime.now()
    print("MIFS")
    print(len(result))
    print("cas: " + str(after - before))
    print('\n')
    if len(result) < len(header):
        transform_and_save(result, "MIFS")
Exemple #2
0
 def runMIFS(self):
     datasetKeys = self.data.keys()
     for datasetKey in datasetKeys:
         self.log.emit(
             'MIFS feature selection on {} dataset...'.format(datasetKey),
             indents=1)
         f = self.data[datasetKey]['f']
         X = self.data[datasetKey]['X']
         y = self.data[datasetKey]['y']
         fIdxs = MIFS.mifs(X, y, n_selected_features=10)
         fRank = [f[i] for i in fIdxs]
         self.addToSelectedFeatures('MIFS',
                                    datasetKey,
                                    fOrig=f,
                                    fIdxs=fIdxs,
                                    fRank=fRank)
def run_feature_selection(X, Y, n_selected_features):

    lst = []

    if PARALLEL:
        # with multiprocessing.Pool(processes=4) as pool:
        #     lst.append(pool.apply(JMI.jmi, args=(X, Y), kwds={'n_selected_features': n_selected_features}))
        #     lst.append(pool.apply(MIM.mim, args=(X, Y), kwds={'n_selected_features': n_selected_features}))
        #     lst.append(pool.apply(MRMR.mrmr, args=(X, Y), kwds={'n_selected_features': n_selected_features}))
        #     lst.append(pool.apply(MIFS.mifs, args=(X, Y), kwds={'n_selected_features': n_selected_features}))

        # lst = [l[FEAT_IDX] for l in lst]

        with ProcessPoolExecutor(max_workers=4) as executor:
            lst.append(
                executor.submit(JMI.jmi,
                                X,
                                Y,
                                n_selected_features=n_selected_features))
            lst.append(
                executor.submit(MIM.mim,
                                X,
                                Y,
                                n_selected_features=n_selected_features))
            lst.append(
                executor.submit(MRMR.mrmr,
                                X,
                                Y,
                                n_selected_features=n_selected_features))
            lst.append(
                executor.submit(MIFS.mifs,
                                X,
                                Y,
                                n_selected_features=n_selected_features))
        lst = [l.result()[FEAT_IDX] for l in lst]
    else:
        lst.append(
            JMI.jmi(X, Y, n_selected_features=n_selected_features)[FEAT_IDX])
        lst.append(
            MIM.mim(X, Y, n_selected_features=n_selected_features)[FEAT_IDX])
        lst.append(
            MRMR.mrmr(X, Y, n_selected_features=n_selected_features)[FEAT_IDX])
        lst.append(
            MIFS.mifs(X, Y, n_selected_features=n_selected_features)[FEAT_IDX])

    return lst
Exemple #4
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/BASEHOCK.mat')
    X = mat['X']  # data
    X = X.astype(float)
    y = mat['Y']  # label
    y = y[:, 0]
    n_samples, n_features = X.shape  # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 10  # number of selected features
    clf = svm.LinearSVC()  # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the index of each feature on the training set
        idx = MIFS.mifs(X[train], y[train], n_selected_features=num_fea)

        # obtain the dataset on the selected features
        features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        print(acc)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print('Accuracy:', old_div(float(correct), 10))
Exemple #5
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/BASEHOCK.mat')
    X = mat['X']    # data
    X = X.astype(float)
    y = mat['Y']    # label
    y = y[:, 0]
    n_samples, n_features = X.shape    # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 10    # number of selected features
    clf = svm.LinearSVC()    # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the index of each feature on the training set
        idx = MIFS.mifs(X[train], y[train], n_selected_features=num_fea)

        # obtain the dataset on the selected features
        features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        print acc
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print 'Accuracy:', float(correct)/10
Exemple #6
0
#Get classes
y_data = ad['Label']
y = pd.DataFrame(y_data)
y = y.values.ravel()

#Save the resmapling data into npy
X_resampled = np.load('cervical_x.npy')
y_resampled = np.load('Cervical_y.npy')

cv = StratifiedKFold(n_splits=10)
from skfeature.function.information_theoretical_based import MIFS

for train, test in cv.split(X_resampled, y_resampled):
    idx = MIFS.mifs(X_resampled[train],
                    y_resampled[train],
                    n_selected_features=11)

#FCBF.fcbf(X_resampled, y_resampled)

#print(score)
X_resampled = pd.DataFrame(X_resampled)
X_resampled.columns = X.columns.values

X1 = X_resampled.iloc[:, [
    idx[0], idx[1], idx[2], idx[3], idx[4], idx[5], idx[6], idx[7], idx[8],
    idx[9], idx[10]
]]

#X1 = X_resampled.iloc[:, [idx[0], idx[1], idx[2], idx[3], idx[4],idx[5]]]
Exemple #7
0
def MIFS_featureSelection(x, y):
    idx = MIFS.mifs(x, y)
    rank = feature_ranking(idx)
    return rank
from skfeature.function.statistical_based import f_score



if __name__ == "__main__":
    train_data = pd.read_csv("train.csv")
    test_data = pd.read_csv("test.csv")
    train_label = train_data['Activity']
    test_label = test_data['Activity']

    train_x = np.array(train_data.drop(['subject', 'Activity'], axis=1))
    test_x = np.array(test_data.drop(['subject', 'Activity'], axis=1))
    encoder = preprocessing.LabelEncoder()
    encoder.fit(train_label)
    classes = list(encoder.classes_)
    train_y = np.array(encoder.transform(train_label))
    test_y = np.array(encoder.transform(test_label))
    print("start feature selection")
    index = MIFS.mifs(train_x, train_y, n_selected_features=400)
    #score = f_score.f_score(train_x, train_y)
    #index = f_score.feature_ranking(score)
    print("end feature selection")

    index_select = sorted(index[:400])
    file = open("selected feature f_score.txt", 'w')
    #file = open("selected feature MIFS.txt", 'w')
    for i in index_select:
        file.write("%d " %(i))
    file.close()
    
def experiment(data, box, cv, output):
    """
    Write the results of an experiment.
        This function will run an experiment for a specific dataset for a bounding box. 
        There will be CV runs of randomized experiments run and the outputs will be 
        written to a file. 

        Parameters
        ----------
        data : string
            Dataset name.
            
        box : string 
            Bounding box on the file name.
        cv : int 
            Number of cross validation runs. 
            
        output : string
            If float or tuple, the projection will be the same for all features,
            otherwise if a list, the projection will be described feature by feature.
                    
        Returns
        -------
        None
            
        Raises
        ------
        ValueError
            If the percent poison exceeds the number of samples in the requested data.
    """
    #data, box, cv, output = 'conn-bench-sonar-mines-rocks', '1', 5, 'results/test.npz'

    # load normal and adversarial data 
    path_adversarial_data = 'data/attacks/' + data + '_[xiao][' + box + '].csv'
    df_normal = pd.read_csv('data/clean/' + data + '.csv', header=None).values
    df_adversarial = pd.read_csv(path_adversarial_data, header=None).values
    
    # separate out the normal and adversarial data 
    Xn, yn = df_normal[:,:-1], df_normal[:,-1]
    Xa, ya = df_adversarial[:,:-1], df_adversarial[:,-1]
    
    # change the labels from +/-1 to [0,1]
    ya[ya==-1], yn[yn==-1] = 0, 0

    # calculate the rattios of data that would be used for training and hold out  
    p0, p1 = 1./cv, (1. - 1./cv)
    N = len(Xn)
    # calculate the total number of training and testing samples and set the number of 
    # features that are going to be selected 
    Ntr, Nte = int(p1*N), int(p0*N)                                             ##### [OBS]: Losing one feature in the process
    n_selected_features = int(Xn.shape[1]*SEL_PERCENT)+1
       
    # zero the results out 
    acc_KNN = np.zeros((NPR,6))
    ####################################
    # CLASSIFICATION
    ##################################
    
    # run `cv` randomized experiments. note this is not performing cross-validation, rather
    # we are going to use randomized splits of the data.  
    for _ in range(cv): 
        # shuffle up the data for the experiment then split the data into a training and 
        # testing dataset
        i = np.random.permutation(N)
        Xtrk, ytrk, Xtek, ytek = Xn[i][:Ntr], yn[i][:Ntr], Xn[i][-Nte:], yn[i][-Nte:]

        
        ####### Classification on Normal Data with no FS #######################
        yn_allfeature_KNN = KNN_classification(Xtrk, ytrk, Xtek, ytek)
           
        ####### Classification on JMI-based features on Normal data #############
        sf_base_jmi = JMI.jmi(Xtrk, ytrk, n_selected_features=n_selected_features)[FEAT_IDX]
        #print("\nNOR: JMI features", sf_base_jmi)
        Xtr_jmi = Xtrk[:, sf_base_jmi]
        Xte_jmi = Xtek[:, sf_base_jmi]
        yn_JMI_KNN = KNN_classification(Xtr_jmi, ytrk, Xte_jmi, ytek)
                
        for n in range(NPR): 

            # calucate the number of poisoned data that we are going to need to make sure 
            # that the poisoning ratio is correct in the training data. e.g., if you have 
            # N=100 samples and you want to poison by 20% then the 20% needs to be from 
            # the training size. hence it is not 20. 
            Np = int(len(ytrk)*POI_RNG[n]+1)
            if Np >= len(ya): 
                # shouldn't happen but catch the case where we are requesting more poison
                # data samples than are available. NEED TO BE CAREFUL WHEN WE ARE CREATING 
                # THE ADVERSARIAL DATA
                ValueError('Number of poison data requested is larger than the available data.')

            # find the number of normal samples (i.e., not poisoned) samples in the 
            # training data. then create the randomized data set that has Nn normal data
            # samples and Np adversarial samples in the training data
            Nn = len(ytrk) - Np
            idx_normal, idx_adversarial = np.random.permutation(len(ytrk))[:Nn], \
                                            np.random.permutation(len(ya))[:Np]
            Xtrk_poisoned, ytrk_poisoned = np.concatenate((Xtrk[idx_normal], Xa[idx_adversarial])), \
                                            np.concatenate((ytrk[idx_normal], ya[idx_adversarial]))   
            
            ya_allfeature_KNN = KNN_classification(Xtrk_poisoned, ytrk_poisoned, Xtek, ytek)
            
            # run feature selection with the training data that has adversarial samples
            sf_adv_jmi = JMI.jmi(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX]
            sf_adv_mim = MIM.mim(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX]
            sf_adv_mrmr = MRMR.mrmr(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX]
            sf_adv_misf = MIFS.mifs(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX]
            
            # KNN Classification on JMI selected features
            Xtrk_poisoned_JMI = Xtrk_poisoned[:, sf_adv_jmi]
            Xtest_JMI = Xtek[:, sf_adv_jmi]
            ya_JMI_KNN = KNN_classification(Xtrk_poisoned_JMI, ytrk_poisoned, Xtest_JMI, ytek)
            # KNN Classification on MIM selected features
            Xtrk_poisoned_MIM = Xtrk_poisoned[:, sf_adv_mim]
            Xtest_MIM = Xtek[:, sf_adv_mim]
            ya_MIM_KNN = KNN_classification(Xtrk_poisoned_MIM, ytrk_poisoned, Xtest_MIM, ytek)
            # KNN Classification on MRMR selected features
            Xtrk_poisoned_MRMR = Xtrk_poisoned[:, sf_adv_mrmr]
            Xtest_MRMR = Xtek[:, sf_adv_mrmr]
            ya_MRMR_KNN = KNN_classification(Xtrk_poisoned_MRMR, ytrk_poisoned, Xtest_MRMR, ytek)
            # KNN Classification on MISF selected features
            Xtrk_poisoned_MISF = Xtrk_poisoned[:, sf_adv_misf]
            Xtest_MISF = Xtek[:, sf_adv_misf]
            ya_MISF_KNN = KNN_classification(Xtrk_poisoned_MISF, ytrk_poisoned, Xtest_MISF, ytek)
            """
            ######### KNN Classification on adversarial data with no FS #################
            ya_allfeature_KNN = KNN_classification(Xtrk_poisoned, ytrk_poisoned, Xtek, ytek)
            #print("[ADV] KNN: No FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_allfeature_KNN))
            #print(classification_report(ytek, ya_allfeature_KNN))
            #print("[ADV] KNN: NO FS Accuracy for Poisoning ratio", POI_RNG[n], "\n",  accuracy_score(ytek, ya_allfeature_KNN))
            
            ######### KNN Classification on adversarial data with JMI FS #################
            sf_adv_jmi = JMI.jmi(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX]
            Xtrk_poisoned_JMI = Xtrk_poisoned[:, sf_adv_jmi]
            Xtest_JMI = Xtek[:, sf_adv_jmi]
            
            ya_JMI_KNN = KNN_classification(Xtrk_poisoned_JMI, ytrk_poisoned, Xtest_JMI, ytek)
            #print("\nJMI Features: ", sf_adv_jmi)
            #print("[ADV] KNN: JMI FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_JMI_KNN))
            #print("[ADV] KNN: JMI Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_JMI_KNN))
            
            ######### KNN Classification on adversarial data with MIM FS #################
            sf_adv_mim = MIM.mim(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX]
            Xtrk_poisoned_MIM = Xtrk_poisoned[:, sf_adv_mim]
            Xtest_MIM = Xtek[:, sf_adv_mim]
            
            ya_MIM_KNN = KNN_classification(Xtrk_poisoned_MIM, ytrk_poisoned, Xtest_MIM, ytek)
            #print("\nMIM Features: ", sf_adv_mim)
            #print("[ADV] KNN: MIM FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_MIM_KNN))
            #print("[ADV] KNN: MIM Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_MIM_KNN))
            
            ######### KNN Classification on adversarial data with MRMR FS #################
            sf_adv_mrmr = MRMR.mrmr(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX]
            Xtrk_poisoned_MRMR = Xtrk_poisoned[:, sf_adv_mrmr]
            Xtest_MRMR = Xtek[:, sf_adv_mrmr]
            
            ya_MRMR_KNN = KNN_classification(Xtrk_poisoned_MRMR, ytrk_poisoned, Xtest_MRMR, ytek)
            #print("\nMRMR Features: ", sf_adv_mrmr)
            #print("[ADV] KNN: MRMR FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_MRMR_KNN))
            #print("[ADV] KNN: MRMR Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_MRMR_KNN))
            
            ######### KNN Classification on adversarial data with MISF FS #################
            sf_adv_misf = MIFS.mifs(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX]
            Xtrk_poisoned_MISF = Xtrk_poisoned[:, sf_adv_misf]
            Xtest_MISF = Xtek[:, sf_adv_misf]
            
            ya_MISF_KNN = KNN_classification(Xtrk_poisoned_MISF, ytrk_poisoned, Xtest_MISF, ytek)
            #print("\nMISF Features: ", sf_adv_misf)
            #print("[ADV] KNN: MISF FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_MISF_KNN))
            #print("[ADV] KNN: MISF Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_MISF_KNN))
            """
            # Calculate accumulated accuracy in a matrix of size 9x6
            acc_KNN[n, 0] += accuracy_score(ytek, yn_allfeature_KNN)    # Acc score of normal data without Feature Selection
            acc_KNN[n, 1] += accuracy_score(ytek, ya_allfeature_KNN)    # Acc score of adversarial data without Feature Selection
            acc_KNN[n, 2] += accuracy_score(ytek, ya_JMI_KNN)    # Acc score of adversarial data with JMI Feature Selection algo
            acc_KNN[n, 3] += accuracy_score(ytek, ya_MIM_KNN)    # Acc score of adversarial data with MIM Feature Selection algo
            acc_KNN[n, 4] += accuracy_score(ytek, ya_MRMR_KNN)    # Acc score of adversarial data with MRMR Feature Selection algo
            acc_KNN[n, 5] += accuracy_score(ytek, ya_MISF_KNN)    # Acc score of adversarial data with MISF Feature Selection algo
            
            
    #print(acc_KNN)
    # scale the accuracy statistics by 1.0/cv then write the output file
    acc_KNN = acc_KNN/cv
    print("\n Accuracy matrix of KNN")
    print("[COL]: Norm_noFS, Adv_noFS, Adv_JMI, Adv_MIM, Adv_MRMR, Adv_MISF")
    print("[ROW]: Poisoning ratios: 0.01, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2")
    print("\n", acc_KNN)
    
    np.savez(output, acc_KNN=acc_KNN)
    return None
Exemple #10
0
def MIFS_FS(k, X_train, y_train):
    idx = MIFS.mifs(X_train, y_train, n_selected_features=k)
    #print(idx)
    return (idx)
        print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred)))
        print("AUC" + repr(roc_auc_score(Y_test, Y_pred)))
        print("Sensitivity" + repr(recall_score(Y_test, Y_pred)))
        tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel()
        print("Specificity" + repr(tn / (tn + fp)))

        sheet_test.write(r, c, roc_auc_score(Y_test, Y_pred))

        r = r + 1
    c = c + 1
    r = 0

MV_sel = []
MV_sel.append(('MIM', MIM.mim(X_train, Y_train, n_selected_features=num_fea)))
print('MIM')
MV_sel.append(('MIFS', MIFS.mifs(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('MIFS')
MV_sel.append(('MRMR', MRMR.mrmr(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('MRMR')
MV_sel.append(('CIFE', CIFE.cife(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('CIFE')
MV_sel.append(('JMI', JMI.jmi(X_train, Y_train, n_selected_features=num_fea)))
print('JMI')
MV_sel.append(('CMIM', CMIM.cmim(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('CMIM')
MV_sel.append(('ICAP', ICAP.icap(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('ICAP')
Exemple #12
0
        x = data[:, num]
        pval.append([num, wilcoxon(x, target)[1]])
    pval.sort(key=takeSecond)
    idx = []
    for i in range(n_selected_features):
        idx.append(pval[i][0])
    return idx


# MULTIVARIATE FEATURE SELECTION X CLASSIFICATION (10 fold CV)

# print('BEFORE')
MV_sel = []
MV_sel.append(('WLCX', WLCX(X, Y, n_selected_features=num_fea)))
print('WLCX')
MV_sel.append(('MIFS', MIFS.mifs(X, Y, n_selected_features=num_fea)))
print('MIFS')
MV_sel.append(('MRMR', MRMR.mrmr(X, Y, n_selected_features=num_fea)))
print('MRMR')
MV_sel.append(('CIFE', CIFE.cife(X, Y, n_selected_features=num_fea)))
print('CIFE')
MV_sel.append(('JMI', JMI.jmi(X, Y, n_selected_features=num_fea)))
print('JMI')
MV_sel.append(('CMIM', CMIM.cmim(X, Y, n_selected_features=num_fea)))
print('CMIM')
MV_sel.append(('ICAP', ICAP.icap(X, Y, n_selected_features=num_fea)))
print('ICAP')
MV_sel.append(('DISR', DISR.disr(X, Y, n_selected_features=num_fea)))
for name, model in models:
    for kind, idx in MV_sel:
        # X_sel = X[:, idx[0:num_fea]]
    result = pymrmr.mRMR(X, 'MIQ', 10)
    print(result)


def import_Data():
    Data = pd.read_csv('Disease_Data_BiGram.csv')
    # print(Data.shape)

    X = Data.iloc[:, 0:Data.shape[1] - 2]

    Y = Data['Class']
    Y_ = Data['Subject']

    return X, Y, Y_


FS = {}
X, Y, Y_ = import_Data()

FS['MRMR'] = X.columns[MRMR.mrmr(np.array(X), Y_, n_selected_features=15)[:15]]
FS['JMI'] = X.columns[JMI.jmi(np.array(X), Y_, n_selected_features=15)[:15]]
FS['MIFS'] = X.columns[MIFS.mifs(np.array(X), Y_, n_selected_features=15)[:15]]
FS['MIM'] = X.columns[MIM.mim(np.array(X), Y_, n_selected_features=15)[:15]]

FS = pd.DataFrame(FS)
print(FS)
FS.to_csv('Selected_Features_MultiVar_BiG.csv')

#print(pd.DataFrame(FS))
#model = apply_Model(X,Y_)
plt.ylim([0, 1])
plt.title("SVC, Test balanced accuracy")
plt.xlabel("Class")
plt.ylabel("Balanced accuracy")
plt.savefig("/media/yannick/MANAGE/BraTS20/results/svc_cing_test_balacc.png")
plt.show()

# CIFE
X_inp = cind_filtered.drop(columns=["ID", "Survival_days"])
y_inp = cind_filtered["Survival_days"]

# normalize
# X_inp_norm =

selidx, selscore, _ = CIFE.cife(X_inp.values, y_inp.values, n_selected_features=5)
selidx_mifs, selscore_mifs, _ = MIFS.mifs(X_inp.values, y_inp.values, n_selected_features=5)

X_cife5 = X_inp.iloc[:, selidx]
X_mifs5 = X_inp.iloc[:, selidx]

clf = SVC(kernel='rbf')
# scoring = ['accuracy', 'balanced_accuracy', 'average_precision', 'recall', 'f1', 'roc_auc']
scoring = ['accuracy', 'balanced_accuracy']

n_cvruns = 50
resultsdat = np.zeros((n_cvruns, 3+1))
resultsdf_train = pd.DataFrame(data=resultsdat, columns=["Run", "STS", "MTS", "LTS"])
resultsdf_train["Run"] = ["run_" + str(elem) for elem in np.arange(n_cvruns)]
resultsdf_train.set_index("Run", inplace=True)

resultsdf_train_acc = resultsdf_train.copy(deep=True)
X = np.array(train_data)
y = np.array(train_label)

X_relief, y_relief = shuffle(X, y, n_samples=10000, random_state=0)
'''
Filter
方法:
Distance:RelieF
Dependence:Chi-squared
Information:MIFS (Mutual Information Feature 
'''
# Relief 和 Chi 都是给出每个特征值的一个score,MIFS稍有不同,电脑是第二行也可以当作一个分数,将这三种分数都归一化为0-1之间的数值,求平均
RelieF_score = reliefF.reliefF(X_relief, y_relief[:, 0], k=n_features)  # RelieF
Chi = chi_square.chi_square(X, y[:, 0])
# 返回值,第一行为特征值排序后的结果,第二行为目标函数,第三行是自变量与相应变量之间的互信息
Mifs = MIFS.mifs(X_relief, y_relief[:, 0], n_selected_features=n_features)

'''
使用mean method 进行选择融合
'''
scores = pd.DataFrame({'Feature': list(Mifs[0]), 'MIFS': list(Mifs[1])})
scores = scores.sort_values(by=['Feature'])
scores['Relief'] = RelieF_score
scores['Chi'] = Chi
# 归一化
min_max_scaler = preprocessing.MinMaxScaler()
scores['MIFS_scaler'] = min_max_scaler.fit_transform(scores.loc[:, ['MIFS']])
scores['Relief_scaler'] = min_max_scaler.fit_transform(scores.loc[:, ['Relief']])
scores['Chi_scaler'] = min_max_scaler.fit_transform(scores.loc[:, ['Chi']])
scores['mean'] = (scores['MIFS_scaler'] + scores['Relief_scaler'] + scores['Chi_scaler']) / 3
scores['feature_name'] = train_data.columns
        np.logical_or(
            np.isinf(np.ravel(kd[month])), np.isnan(np.ravel(kd[month]))
        )
    )
    kdata = np.ravel(kd[month])[filter0]
    fdata = fd[month][filter0]
    print(fdata.shape, kdata.shape)

    fdata = np.nanmean(fdata, axis=2)
    nan_num = np.sum(np.isnan(fdata), axis=0)
    filter1 = nan_num < 0.3 * fdata.shape[0]
    factor_name = np.array(select_list)
    fdata = fdata[:, filter1]
    factor_name = factor_name[filter1]
    print(fdata.shape, kdata.shape)

    filter0 = np.sum(np.isnan(fdata), axis=1) == 0
    fdata = fdata[filter0]
    kdata = kdata[filter0]
    print(fdata.shape, kdata.shape)

    F, J_CMI, MIfy = MIFS.mifs(fdata, kdata)
    select = factor_name[F[:]]
    print(select)
    for j in range(len(select)):
        select_factor[select[j]] = select_factor.get(select[j], 0) + 1

select_factor = pd.DataFrame([list(select_factor.keys()), list(select_factor.items())], index=['name', 'freq'])
select_factor.sort_values(by='freq', axis=1, ascending=False)
print('\n', select_factor.values)