def mifs(): before = datetime.datetime.now() result = MIFS.mifs(data, labels, mode="index", n_selected_features=treshold) after = datetime.datetime.now() print("MIFS") print(len(result)) print("cas: " + str(after - before)) print('\n') if len(result) < len(header): transform_and_save(result, "MIFS")
def runMIFS(self): datasetKeys = self.data.keys() for datasetKey in datasetKeys: self.log.emit( 'MIFS feature selection on {} dataset...'.format(datasetKey), indents=1) f = self.data[datasetKey]['f'] X = self.data[datasetKey]['X'] y = self.data[datasetKey]['y'] fIdxs = MIFS.mifs(X, y, n_selected_features=10) fRank = [f[i] for i in fIdxs] self.addToSelectedFeatures('MIFS', datasetKey, fOrig=f, fIdxs=fIdxs, fRank=fRank)
def run_feature_selection(X, Y, n_selected_features): lst = [] if PARALLEL: # with multiprocessing.Pool(processes=4) as pool: # lst.append(pool.apply(JMI.jmi, args=(X, Y), kwds={'n_selected_features': n_selected_features})) # lst.append(pool.apply(MIM.mim, args=(X, Y), kwds={'n_selected_features': n_selected_features})) # lst.append(pool.apply(MRMR.mrmr, args=(X, Y), kwds={'n_selected_features': n_selected_features})) # lst.append(pool.apply(MIFS.mifs, args=(X, Y), kwds={'n_selected_features': n_selected_features})) # lst = [l[FEAT_IDX] for l in lst] with ProcessPoolExecutor(max_workers=4) as executor: lst.append( executor.submit(JMI.jmi, X, Y, n_selected_features=n_selected_features)) lst.append( executor.submit(MIM.mim, X, Y, n_selected_features=n_selected_features)) lst.append( executor.submit(MRMR.mrmr, X, Y, n_selected_features=n_selected_features)) lst.append( executor.submit(MIFS.mifs, X, Y, n_selected_features=n_selected_features)) lst = [l.result()[FEAT_IDX] for l in lst] else: lst.append( JMI.jmi(X, Y, n_selected_features=n_selected_features)[FEAT_IDX]) lst.append( MIM.mim(X, Y, n_selected_features=n_selected_features)[FEAT_IDX]) lst.append( MRMR.mrmr(X, Y, n_selected_features=n_selected_features)[FEAT_IDX]) lst.append( MIFS.mifs(X, Y, n_selected_features=n_selected_features)[FEAT_IDX]) return lst
def main(): # load data mat = scipy.io.loadmat('../data/BASEHOCK.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 10 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the index of each feature on the training set idx = MIFS.mifs(X[train], y[train], n_selected_features=num_fea) # obtain the dataset on the selected features features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) print(acc) correct = correct + acc # output the average classification accuracy over all 10 folds print('Accuracy:', old_div(float(correct), 10))
def main(): # load data mat = scipy.io.loadmat('../data/BASEHOCK.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 10 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the index of each feature on the training set idx = MIFS.mifs(X[train], y[train], n_selected_features=num_fea) # obtain the dataset on the selected features features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) print acc correct = correct + acc # output the average classification accuracy over all 10 folds print 'Accuracy:', float(correct)/10
#Get classes y_data = ad['Label'] y = pd.DataFrame(y_data) y = y.values.ravel() #Save the resmapling data into npy X_resampled = np.load('cervical_x.npy') y_resampled = np.load('Cervical_y.npy') cv = StratifiedKFold(n_splits=10) from skfeature.function.information_theoretical_based import MIFS for train, test in cv.split(X_resampled, y_resampled): idx = MIFS.mifs(X_resampled[train], y_resampled[train], n_selected_features=11) #FCBF.fcbf(X_resampled, y_resampled) #print(score) X_resampled = pd.DataFrame(X_resampled) X_resampled.columns = X.columns.values X1 = X_resampled.iloc[:, [ idx[0], idx[1], idx[2], idx[3], idx[4], idx[5], idx[6], idx[7], idx[8], idx[9], idx[10] ]] #X1 = X_resampled.iloc[:, [idx[0], idx[1], idx[2], idx[3], idx[4],idx[5]]]
def MIFS_featureSelection(x, y): idx = MIFS.mifs(x, y) rank = feature_ranking(idx) return rank
from skfeature.function.statistical_based import f_score if __name__ == "__main__": train_data = pd.read_csv("train.csv") test_data = pd.read_csv("test.csv") train_label = train_data['Activity'] test_label = test_data['Activity'] train_x = np.array(train_data.drop(['subject', 'Activity'], axis=1)) test_x = np.array(test_data.drop(['subject', 'Activity'], axis=1)) encoder = preprocessing.LabelEncoder() encoder.fit(train_label) classes = list(encoder.classes_) train_y = np.array(encoder.transform(train_label)) test_y = np.array(encoder.transform(test_label)) print("start feature selection") index = MIFS.mifs(train_x, train_y, n_selected_features=400) #score = f_score.f_score(train_x, train_y) #index = f_score.feature_ranking(score) print("end feature selection") index_select = sorted(index[:400]) file = open("selected feature f_score.txt", 'w') #file = open("selected feature MIFS.txt", 'w') for i in index_select: file.write("%d " %(i)) file.close()
def experiment(data, box, cv, output): """ Write the results of an experiment. This function will run an experiment for a specific dataset for a bounding box. There will be CV runs of randomized experiments run and the outputs will be written to a file. Parameters ---------- data : string Dataset name. box : string Bounding box on the file name. cv : int Number of cross validation runs. output : string If float or tuple, the projection will be the same for all features, otherwise if a list, the projection will be described feature by feature. Returns ------- None Raises ------ ValueError If the percent poison exceeds the number of samples in the requested data. """ #data, box, cv, output = 'conn-bench-sonar-mines-rocks', '1', 5, 'results/test.npz' # load normal and adversarial data path_adversarial_data = 'data/attacks/' + data + '_[xiao][' + box + '].csv' df_normal = pd.read_csv('data/clean/' + data + '.csv', header=None).values df_adversarial = pd.read_csv(path_adversarial_data, header=None).values # separate out the normal and adversarial data Xn, yn = df_normal[:,:-1], df_normal[:,-1] Xa, ya = df_adversarial[:,:-1], df_adversarial[:,-1] # change the labels from +/-1 to [0,1] ya[ya==-1], yn[yn==-1] = 0, 0 # calculate the rattios of data that would be used for training and hold out p0, p1 = 1./cv, (1. - 1./cv) N = len(Xn) # calculate the total number of training and testing samples and set the number of # features that are going to be selected Ntr, Nte = int(p1*N), int(p0*N) ##### [OBS]: Losing one feature in the process n_selected_features = int(Xn.shape[1]*SEL_PERCENT)+1 # zero the results out acc_KNN = np.zeros((NPR,6)) #################################### # CLASSIFICATION ################################## # run `cv` randomized experiments. note this is not performing cross-validation, rather # we are going to use randomized splits of the data. for _ in range(cv): # shuffle up the data for the experiment then split the data into a training and # testing dataset i = np.random.permutation(N) Xtrk, ytrk, Xtek, ytek = Xn[i][:Ntr], yn[i][:Ntr], Xn[i][-Nte:], yn[i][-Nte:] ####### Classification on Normal Data with no FS ####################### yn_allfeature_KNN = KNN_classification(Xtrk, ytrk, Xtek, ytek) ####### Classification on JMI-based features on Normal data ############# sf_base_jmi = JMI.jmi(Xtrk, ytrk, n_selected_features=n_selected_features)[FEAT_IDX] #print("\nNOR: JMI features", sf_base_jmi) Xtr_jmi = Xtrk[:, sf_base_jmi] Xte_jmi = Xtek[:, sf_base_jmi] yn_JMI_KNN = KNN_classification(Xtr_jmi, ytrk, Xte_jmi, ytek) for n in range(NPR): # calucate the number of poisoned data that we are going to need to make sure # that the poisoning ratio is correct in the training data. e.g., if you have # N=100 samples and you want to poison by 20% then the 20% needs to be from # the training size. hence it is not 20. Np = int(len(ytrk)*POI_RNG[n]+1) if Np >= len(ya): # shouldn't happen but catch the case where we are requesting more poison # data samples than are available. NEED TO BE CAREFUL WHEN WE ARE CREATING # THE ADVERSARIAL DATA ValueError('Number of poison data requested is larger than the available data.') # find the number of normal samples (i.e., not poisoned) samples in the # training data. then create the randomized data set that has Nn normal data # samples and Np adversarial samples in the training data Nn = len(ytrk) - Np idx_normal, idx_adversarial = np.random.permutation(len(ytrk))[:Nn], \ np.random.permutation(len(ya))[:Np] Xtrk_poisoned, ytrk_poisoned = np.concatenate((Xtrk[idx_normal], Xa[idx_adversarial])), \ np.concatenate((ytrk[idx_normal], ya[idx_adversarial])) ya_allfeature_KNN = KNN_classification(Xtrk_poisoned, ytrk_poisoned, Xtek, ytek) # run feature selection with the training data that has adversarial samples sf_adv_jmi = JMI.jmi(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX] sf_adv_mim = MIM.mim(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX] sf_adv_mrmr = MRMR.mrmr(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX] sf_adv_misf = MIFS.mifs(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX] # KNN Classification on JMI selected features Xtrk_poisoned_JMI = Xtrk_poisoned[:, sf_adv_jmi] Xtest_JMI = Xtek[:, sf_adv_jmi] ya_JMI_KNN = KNN_classification(Xtrk_poisoned_JMI, ytrk_poisoned, Xtest_JMI, ytek) # KNN Classification on MIM selected features Xtrk_poisoned_MIM = Xtrk_poisoned[:, sf_adv_mim] Xtest_MIM = Xtek[:, sf_adv_mim] ya_MIM_KNN = KNN_classification(Xtrk_poisoned_MIM, ytrk_poisoned, Xtest_MIM, ytek) # KNN Classification on MRMR selected features Xtrk_poisoned_MRMR = Xtrk_poisoned[:, sf_adv_mrmr] Xtest_MRMR = Xtek[:, sf_adv_mrmr] ya_MRMR_KNN = KNN_classification(Xtrk_poisoned_MRMR, ytrk_poisoned, Xtest_MRMR, ytek) # KNN Classification on MISF selected features Xtrk_poisoned_MISF = Xtrk_poisoned[:, sf_adv_misf] Xtest_MISF = Xtek[:, sf_adv_misf] ya_MISF_KNN = KNN_classification(Xtrk_poisoned_MISF, ytrk_poisoned, Xtest_MISF, ytek) """ ######### KNN Classification on adversarial data with no FS ################# ya_allfeature_KNN = KNN_classification(Xtrk_poisoned, ytrk_poisoned, Xtek, ytek) #print("[ADV] KNN: No FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_allfeature_KNN)) #print(classification_report(ytek, ya_allfeature_KNN)) #print("[ADV] KNN: NO FS Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_allfeature_KNN)) ######### KNN Classification on adversarial data with JMI FS ################# sf_adv_jmi = JMI.jmi(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX] Xtrk_poisoned_JMI = Xtrk_poisoned[:, sf_adv_jmi] Xtest_JMI = Xtek[:, sf_adv_jmi] ya_JMI_KNN = KNN_classification(Xtrk_poisoned_JMI, ytrk_poisoned, Xtest_JMI, ytek) #print("\nJMI Features: ", sf_adv_jmi) #print("[ADV] KNN: JMI FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_JMI_KNN)) #print("[ADV] KNN: JMI Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_JMI_KNN)) ######### KNN Classification on adversarial data with MIM FS ################# sf_adv_mim = MIM.mim(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX] Xtrk_poisoned_MIM = Xtrk_poisoned[:, sf_adv_mim] Xtest_MIM = Xtek[:, sf_adv_mim] ya_MIM_KNN = KNN_classification(Xtrk_poisoned_MIM, ytrk_poisoned, Xtest_MIM, ytek) #print("\nMIM Features: ", sf_adv_mim) #print("[ADV] KNN: MIM FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_MIM_KNN)) #print("[ADV] KNN: MIM Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_MIM_KNN)) ######### KNN Classification on adversarial data with MRMR FS ################# sf_adv_mrmr = MRMR.mrmr(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX] Xtrk_poisoned_MRMR = Xtrk_poisoned[:, sf_adv_mrmr] Xtest_MRMR = Xtek[:, sf_adv_mrmr] ya_MRMR_KNN = KNN_classification(Xtrk_poisoned_MRMR, ytrk_poisoned, Xtest_MRMR, ytek) #print("\nMRMR Features: ", sf_adv_mrmr) #print("[ADV] KNN: MRMR FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_MRMR_KNN)) #print("[ADV] KNN: MRMR Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_MRMR_KNN)) ######### KNN Classification on adversarial data with MISF FS ################# sf_adv_misf = MIFS.mifs(Xtrk_poisoned, ytrk_poisoned, n_selected_features=n_selected_features)[FEAT_IDX] Xtrk_poisoned_MISF = Xtrk_poisoned[:, sf_adv_misf] Xtest_MISF = Xtek[:, sf_adv_misf] ya_MISF_KNN = KNN_classification(Xtrk_poisoned_MISF, ytrk_poisoned, Xtest_MISF, ytek) #print("\nMISF Features: ", sf_adv_misf) #print("[ADV] KNN: MISF FS Confusion Matrix for Poisoning Ratio: ", POI_RNG[n], "\n", confusion_matrix(ytek, ya_MISF_KNN)) #print("[ADV] KNN: MISF Accuracy for Poisoning ratio", POI_RNG[n], "\n", accuracy_score(ytek, ya_MISF_KNN)) """ # Calculate accumulated accuracy in a matrix of size 9x6 acc_KNN[n, 0] += accuracy_score(ytek, yn_allfeature_KNN) # Acc score of normal data without Feature Selection acc_KNN[n, 1] += accuracy_score(ytek, ya_allfeature_KNN) # Acc score of adversarial data without Feature Selection acc_KNN[n, 2] += accuracy_score(ytek, ya_JMI_KNN) # Acc score of adversarial data with JMI Feature Selection algo acc_KNN[n, 3] += accuracy_score(ytek, ya_MIM_KNN) # Acc score of adversarial data with MIM Feature Selection algo acc_KNN[n, 4] += accuracy_score(ytek, ya_MRMR_KNN) # Acc score of adversarial data with MRMR Feature Selection algo acc_KNN[n, 5] += accuracy_score(ytek, ya_MISF_KNN) # Acc score of adversarial data with MISF Feature Selection algo #print(acc_KNN) # scale the accuracy statistics by 1.0/cv then write the output file acc_KNN = acc_KNN/cv print("\n Accuracy matrix of KNN") print("[COL]: Norm_noFS, Adv_noFS, Adv_JMI, Adv_MIM, Adv_MRMR, Adv_MISF") print("[ROW]: Poisoning ratios: 0.01, 0.025, 0.05, 0.075, 0.1, 0.125, 0.15, 0.175, 0.2") print("\n", acc_KNN) np.savez(output, acc_KNN=acc_KNN) return None
def MIFS_FS(k, X_train, y_train): idx = MIFS.mifs(X_train, y_train, n_selected_features=k) #print(idx) return (idx)
print("Hamming Loss: " + repr(hamming_loss(Y_test, Y_pred))) print("AUC" + repr(roc_auc_score(Y_test, Y_pred))) print("Sensitivity" + repr(recall_score(Y_test, Y_pred))) tn, fp, fn, tp = confusion_matrix(Y_test, Y_pred).ravel() print("Specificity" + repr(tn / (tn + fp))) sheet_test.write(r, c, roc_auc_score(Y_test, Y_pred)) r = r + 1 c = c + 1 r = 0 MV_sel = [] MV_sel.append(('MIM', MIM.mim(X_train, Y_train, n_selected_features=num_fea))) print('MIM') MV_sel.append(('MIFS', MIFS.mifs(X_train, Y_train, n_selected_features=num_fea))) print('MIFS') MV_sel.append(('MRMR', MRMR.mrmr(X_train, Y_train, n_selected_features=num_fea))) print('MRMR') MV_sel.append(('CIFE', CIFE.cife(X_train, Y_train, n_selected_features=num_fea))) print('CIFE') MV_sel.append(('JMI', JMI.jmi(X_train, Y_train, n_selected_features=num_fea))) print('JMI') MV_sel.append(('CMIM', CMIM.cmim(X_train, Y_train, n_selected_features=num_fea))) print('CMIM') MV_sel.append(('ICAP', ICAP.icap(X_train, Y_train, n_selected_features=num_fea))) print('ICAP')
x = data[:, num] pval.append([num, wilcoxon(x, target)[1]]) pval.sort(key=takeSecond) idx = [] for i in range(n_selected_features): idx.append(pval[i][0]) return idx # MULTIVARIATE FEATURE SELECTION X CLASSIFICATION (10 fold CV) # print('BEFORE') MV_sel = [] MV_sel.append(('WLCX', WLCX(X, Y, n_selected_features=num_fea))) print('WLCX') MV_sel.append(('MIFS', MIFS.mifs(X, Y, n_selected_features=num_fea))) print('MIFS') MV_sel.append(('MRMR', MRMR.mrmr(X, Y, n_selected_features=num_fea))) print('MRMR') MV_sel.append(('CIFE', CIFE.cife(X, Y, n_selected_features=num_fea))) print('CIFE') MV_sel.append(('JMI', JMI.jmi(X, Y, n_selected_features=num_fea))) print('JMI') MV_sel.append(('CMIM', CMIM.cmim(X, Y, n_selected_features=num_fea))) print('CMIM') MV_sel.append(('ICAP', ICAP.icap(X, Y, n_selected_features=num_fea))) print('ICAP') MV_sel.append(('DISR', DISR.disr(X, Y, n_selected_features=num_fea))) for name, model in models: for kind, idx in MV_sel: # X_sel = X[:, idx[0:num_fea]]
result = pymrmr.mRMR(X, 'MIQ', 10) print(result) def import_Data(): Data = pd.read_csv('Disease_Data_BiGram.csv') # print(Data.shape) X = Data.iloc[:, 0:Data.shape[1] - 2] Y = Data['Class'] Y_ = Data['Subject'] return X, Y, Y_ FS = {} X, Y, Y_ = import_Data() FS['MRMR'] = X.columns[MRMR.mrmr(np.array(X), Y_, n_selected_features=15)[:15]] FS['JMI'] = X.columns[JMI.jmi(np.array(X), Y_, n_selected_features=15)[:15]] FS['MIFS'] = X.columns[MIFS.mifs(np.array(X), Y_, n_selected_features=15)[:15]] FS['MIM'] = X.columns[MIM.mim(np.array(X), Y_, n_selected_features=15)[:15]] FS = pd.DataFrame(FS) print(FS) FS.to_csv('Selected_Features_MultiVar_BiG.csv') #print(pd.DataFrame(FS)) #model = apply_Model(X,Y_)
plt.ylim([0, 1]) plt.title("SVC, Test balanced accuracy") plt.xlabel("Class") plt.ylabel("Balanced accuracy") plt.savefig("/media/yannick/MANAGE/BraTS20/results/svc_cing_test_balacc.png") plt.show() # CIFE X_inp = cind_filtered.drop(columns=["ID", "Survival_days"]) y_inp = cind_filtered["Survival_days"] # normalize # X_inp_norm = selidx, selscore, _ = CIFE.cife(X_inp.values, y_inp.values, n_selected_features=5) selidx_mifs, selscore_mifs, _ = MIFS.mifs(X_inp.values, y_inp.values, n_selected_features=5) X_cife5 = X_inp.iloc[:, selidx] X_mifs5 = X_inp.iloc[:, selidx] clf = SVC(kernel='rbf') # scoring = ['accuracy', 'balanced_accuracy', 'average_precision', 'recall', 'f1', 'roc_auc'] scoring = ['accuracy', 'balanced_accuracy'] n_cvruns = 50 resultsdat = np.zeros((n_cvruns, 3+1)) resultsdf_train = pd.DataFrame(data=resultsdat, columns=["Run", "STS", "MTS", "LTS"]) resultsdf_train["Run"] = ["run_" + str(elem) for elem in np.arange(n_cvruns)] resultsdf_train.set_index("Run", inplace=True) resultsdf_train_acc = resultsdf_train.copy(deep=True)
X = np.array(train_data) y = np.array(train_label) X_relief, y_relief = shuffle(X, y, n_samples=10000, random_state=0) ''' Filter 方法: Distance:RelieF Dependence:Chi-squared Information:MIFS (Mutual Information Feature ''' # Relief 和 Chi 都是给出每个特征值的一个score,MIFS稍有不同,电脑是第二行也可以当作一个分数,将这三种分数都归一化为0-1之间的数值,求平均 RelieF_score = reliefF.reliefF(X_relief, y_relief[:, 0], k=n_features) # RelieF Chi = chi_square.chi_square(X, y[:, 0]) # 返回值,第一行为特征值排序后的结果,第二行为目标函数,第三行是自变量与相应变量之间的互信息 Mifs = MIFS.mifs(X_relief, y_relief[:, 0], n_selected_features=n_features) ''' 使用mean method 进行选择融合 ''' scores = pd.DataFrame({'Feature': list(Mifs[0]), 'MIFS': list(Mifs[1])}) scores = scores.sort_values(by=['Feature']) scores['Relief'] = RelieF_score scores['Chi'] = Chi # 归一化 min_max_scaler = preprocessing.MinMaxScaler() scores['MIFS_scaler'] = min_max_scaler.fit_transform(scores.loc[:, ['MIFS']]) scores['Relief_scaler'] = min_max_scaler.fit_transform(scores.loc[:, ['Relief']]) scores['Chi_scaler'] = min_max_scaler.fit_transform(scores.loc[:, ['Chi']]) scores['mean'] = (scores['MIFS_scaler'] + scores['Relief_scaler'] + scores['Chi_scaler']) / 3 scores['feature_name'] = train_data.columns
np.logical_or( np.isinf(np.ravel(kd[month])), np.isnan(np.ravel(kd[month])) ) ) kdata = np.ravel(kd[month])[filter0] fdata = fd[month][filter0] print(fdata.shape, kdata.shape) fdata = np.nanmean(fdata, axis=2) nan_num = np.sum(np.isnan(fdata), axis=0) filter1 = nan_num < 0.3 * fdata.shape[0] factor_name = np.array(select_list) fdata = fdata[:, filter1] factor_name = factor_name[filter1] print(fdata.shape, kdata.shape) filter0 = np.sum(np.isnan(fdata), axis=1) == 0 fdata = fdata[filter0] kdata = kdata[filter0] print(fdata.shape, kdata.shape) F, J_CMI, MIfy = MIFS.mifs(fdata, kdata) select = factor_name[F[:]] print(select) for j in range(len(select)): select_factor[select[j]] = select_factor.get(select[j], 0) + 1 select_factor = pd.DataFrame([list(select_factor.keys()), list(select_factor.items())], index=['name', 'freq']) select_factor.sort_values(by='freq', axis=1, ascending=False) print('\n', select_factor.values)