def cmim(): before = datetime.datetime.now() result = CMIM.cmim(data, labels, mode="index", n_selected_features=treshold) after = datetime.datetime.now() print("CMIM") print(len(result)) print("cas: " + str(after - before)) print('\n') if len(result) < len(header): transform_and_save(result, "CMIM")
def cmim(data): rank = [] for i in range(6): X = data[i][:, :-1] Y = data[i][:, -1] F, _, _ = CMIM.cmim(X, Y) idx = samp(F[:-1].tolist()) rank.append(idx) R = rankaggregate(rank) return R
def feature_conditional_mutual_info_maximisation(x_data, y_data): features_scores = CMIM.cmim(x_data.values, y_data.values, n_selected_features=20) features_index = [int(index[0]) for index in features_scores] feat_list = x_data.columns.values[features_index] feat_list_with_imp = [(feat_list[i], features_scores[i][1]) for i in range(len(features_scores))] # dfscores = pd.DataFrame(features_scores) # dfcolumns = pd.DataFrame(x_data.columns) # featureScores = pd.concat([dfcolumns, dfscores], axis=1) featureScores = pd.DataFrame(feat_list_with_imp) featureScores.columns = ['Specs', 'Score'] # naming the dataframe columns top_20_features = featureScores.nlargest(20, 'Score') return top_20_features
def main(): # load data mat = scipy.io.loadmat('../data/colon.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 10 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the index of each feature on the training set idx = CMIM.cmim(X[train], y[train], n_selected_features=num_fea) # obtain the dataset on the selected features features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print 'Accuracy:', float(correct)/10
def main(): # load data mat = scipy.io.loadmat('../data/colon.mat') X = mat['X'] # data X = X.astype(float) y = mat['Y'] # label y = y[:, 0] n_samples, n_features = X.shape # number of samples and number of features # split data into 10 folds ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True) # perform evaluation on classification task num_fea = 10 # number of selected features clf = svm.LinearSVC() # linear SVM correct = 0 for train, test in ss: # obtain the index of each feature on the training set idx, _, _ = CMIM.cmim(X[train], y[train], n_selected_features=num_fea) # obtain the dataset on the selected features features = X[:, idx[0:num_fea]] # train a classification model with the selected features on the training dataset clf.fit(features[train], y[train]) # predict the class labels of test data y_predict = clf.predict(features[test]) # obtain the classification accuracy on the test data acc = accuracy_score(y[test], y_predict) correct = correct + acc # output the average classification accuracy over all 10 folds print('Accuracy:', float(correct) / 10)
def CMIM_FS(X_train, y_train, num_fea): idx, _, _ = CMIM.cmim(X_train, y_train, n_selected_features=num_fea) #F, J_CMIM,MIfy= CMIM.cmim(X_train, y_train, n_selected_features=k) return (idx)
MV_sel = [] MV_sel.append(('MIM', MIM.mim(X_train, Y_train, n_selected_features=num_fea))) print('MIM') MV_sel.append(('MIFS', MIFS.mifs(X_train, Y_train, n_selected_features=num_fea))) print('MIFS') MV_sel.append(('MRMR', MRMR.mrmr(X_train, Y_train, n_selected_features=num_fea))) print('MRMR') MV_sel.append(('CIFE', CIFE.cife(X_train, Y_train, n_selected_features=num_fea))) print('CIFE') MV_sel.append(('JMI', JMI.jmi(X_train, Y_train, n_selected_features=num_fea))) print('JMI') MV_sel.append(('CMIM', CMIM.cmim(X_train, Y_train, n_selected_features=num_fea))) print('CMIM') MV_sel.append(('ICAP', ICAP.icap(X_train, Y_train, n_selected_features=num_fea))) print('ICAP') MV_sel.append(('DISR', DISR.disr(X_train, Y_train, n_selected_features=num_fea))) for name, model in models: for kind, idx in MV_sel: #print(idx[0:num_fea][0]) # X_sel = X[:, idx[0:num_fea]] X_test_ = X_test[:, idx[0:num_fea]] X_validate_ = X_validate[:, idx[0:num_fea]] X_train_ = X_train[:, idx[0:num_fea]] # X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X_sel, Y, test_size=validation_size, random_state=seed)
# MULTIVARIATE FEATURE SELECTION X CLASSIFICATION (10 fold CV) # print('BEFORE') MV_sel = [] MV_sel.append(('WLCX', WLCX(X, Y, n_selected_features=num_fea))) print('WLCX') MV_sel.append(('MIFS', MIFS.mifs(X, Y, n_selected_features=num_fea))) print('MIFS') MV_sel.append(('MRMR', MRMR.mrmr(X, Y, n_selected_features=num_fea))) print('MRMR') MV_sel.append(('CIFE', CIFE.cife(X, Y, n_selected_features=num_fea))) print('CIFE') MV_sel.append(('JMI', JMI.jmi(X, Y, n_selected_features=num_fea))) print('JMI') MV_sel.append(('CMIM', CMIM.cmim(X, Y, n_selected_features=num_fea))) print('CMIM') MV_sel.append(('ICAP', ICAP.icap(X, Y, n_selected_features=num_fea))) print('ICAP') MV_sel.append(('DISR', DISR.disr(X, Y, n_selected_features=num_fea))) for name, model in models: for kind, idx in MV_sel: # X_sel = X[:, idx[0:num_fea]] # X_test_ = X_test[:,idx[0:num_fea]] X_train_ = X_train[:, idx[0:num_fea]] # X_validation_ = X_validation[:, idx[0:num_fea]] # X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X_sel, Y, test_size=validation_size, random_state=seed) # kfold = model_selection.KFold(n_splits=10, random_state=seed) # cv_results = model_selection.cross_val_score(model, X_train_, Y_train, cv=kfold) # msg = "%s %s: %f (%f)\n" % (kind, name, cv_results.mean(), cv_results.std())
print(x.shape, y.shape) clf = load_clf(FINAL_CLASSIFIER) perfs = np.zeros(10) skf = StratifiedKFold(n_splits=n_splits, random_state=42) fold_index = 0 for train_index, test_index in skf.split(x, y): print("fold:", fold_index + 1) x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] for i, k in enumerate(np.arange(10, 101, 10)): idx, _, _ = CMIM.cmim(x_train, y_train, n_selected_features=k) x_train_selected = x_train[:, idx[0:k]] x_test_selected = x_test[:, idx[0:k]] clf.fit(x_train_selected, y_train) y_pred = clf.predict(x_test_selected) accu = accuracy_score(y_test, y_pred) print("selected features:", k, "accu:", accu) perfs[i] += accu fold_index += 1 print("n_splits:", n_splits) print("cmim", DATASET, FINAL_CLASSIFIER) # perfs /= n_splits