Exemple #1
0
def cmim():
    before = datetime.datetime.now()
    result = CMIM.cmim(data, labels, mode="index", n_selected_features=treshold)
    after = datetime.datetime.now()
    print("CMIM")
    print(len(result))
    print("cas: " + str(after - before))
    print('\n')
    if len(result) < len(header):
        transform_and_save(result, "CMIM")
def cmim(data):
    rank = []
    for i in range(6):
        X = data[i][:, :-1]
        Y = data[i][:, -1]
        F, _, _ = CMIM.cmim(X, Y)
        idx = samp(F[:-1].tolist())
        rank.append(idx)
    R = rankaggregate(rank)
    return R
def feature_conditional_mutual_info_maximisation(x_data, y_data):
    features_scores = CMIM.cmim(x_data.values, y_data.values, n_selected_features=20)
    features_index = [int(index[0]) for index in features_scores]
    feat_list = x_data.columns.values[features_index]
    feat_list_with_imp = [(feat_list[i], features_scores[i][1]) for i in range(len(features_scores))]
    # dfscores = pd.DataFrame(features_scores)
    # dfcolumns = pd.DataFrame(x_data.columns)
    # featureScores = pd.concat([dfcolumns, dfscores], axis=1)
    featureScores = pd.DataFrame(feat_list_with_imp)
    featureScores.columns = ['Specs', 'Score']  # naming the dataframe columns
    top_20_features = featureScores.nlargest(20, 'Score')
    return top_20_features
Exemple #4
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/colon.mat')
    X = mat['X']    # data
    X = X.astype(float)
    y = mat['Y']    # label
    y = y[:, 0]
    n_samples, n_features = X.shape    # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 10    # number of selected features
    clf = svm.LinearSVC()    # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the index of each feature on the training set
        idx = CMIM.cmim(X[train], y[train], n_selected_features=num_fea)

        # obtain the dataset on the selected features
        features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print 'Accuracy:', float(correct)/10
def main():
    # load data
    mat = scipy.io.loadmat('../data/colon.mat')
    X = mat['X']  # data
    X = X.astype(float)
    y = mat['Y']  # label
    y = y[:, 0]
    n_samples, n_features = X.shape  # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 10  # number of selected features
    clf = svm.LinearSVC()  # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the index of each feature on the training set
        idx, _, _ = CMIM.cmim(X[train], y[train], n_selected_features=num_fea)

        # obtain the dataset on the selected features
        features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print('Accuracy:', float(correct) / 10)
Exemple #6
0
def CMIM_FS(X_train, y_train, num_fea):
    idx, _, _ = CMIM.cmim(X_train, y_train, n_selected_features=num_fea)
    #F, J_CMIM,MIfy= CMIM.cmim(X_train, y_train, n_selected_features=k)
    return (idx)
MV_sel = []
MV_sel.append(('MIM', MIM.mim(X_train, Y_train, n_selected_features=num_fea)))
print('MIM')
MV_sel.append(('MIFS', MIFS.mifs(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('MIFS')
MV_sel.append(('MRMR', MRMR.mrmr(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('MRMR')
MV_sel.append(('CIFE', CIFE.cife(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('CIFE')
MV_sel.append(('JMI', JMI.jmi(X_train, Y_train, n_selected_features=num_fea)))
print('JMI')
MV_sel.append(('CMIM', CMIM.cmim(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('CMIM')
MV_sel.append(('ICAP', ICAP.icap(X_train, Y_train,
                                 n_selected_features=num_fea)))
print('ICAP')
MV_sel.append(('DISR', DISR.disr(X_train, Y_train,
                                 n_selected_features=num_fea)))

for name, model in models:
    for kind, idx in MV_sel:
        #print(idx[0:num_fea][0])
        # X_sel = X[:, idx[0:num_fea]]
        X_test_ = X_test[:, idx[0:num_fea]]
        X_validate_ = X_validate[:, idx[0:num_fea]]
        X_train_ = X_train[:, idx[0:num_fea]]
        # X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X_sel, Y, test_size=validation_size, random_state=seed)
Exemple #8
0
# MULTIVARIATE FEATURE SELECTION X CLASSIFICATION (10 fold CV)

# print('BEFORE')
MV_sel = []
MV_sel.append(('WLCX', WLCX(X, Y, n_selected_features=num_fea)))
print('WLCX')
MV_sel.append(('MIFS', MIFS.mifs(X, Y, n_selected_features=num_fea)))
print('MIFS')
MV_sel.append(('MRMR', MRMR.mrmr(X, Y, n_selected_features=num_fea)))
print('MRMR')
MV_sel.append(('CIFE', CIFE.cife(X, Y, n_selected_features=num_fea)))
print('CIFE')
MV_sel.append(('JMI', JMI.jmi(X, Y, n_selected_features=num_fea)))
print('JMI')
MV_sel.append(('CMIM', CMIM.cmim(X, Y, n_selected_features=num_fea)))
print('CMIM')
MV_sel.append(('ICAP', ICAP.icap(X, Y, n_selected_features=num_fea)))
print('ICAP')
MV_sel.append(('DISR', DISR.disr(X, Y, n_selected_features=num_fea)))
for name, model in models:
    for kind, idx in MV_sel:
        # X_sel = X[:, idx[0:num_fea]]
        # X_test_ = X_test[:,idx[0:num_fea]]
        X_train_ = X_train[:, idx[0:num_fea]]
        # X_validation_ = X_validation[:, idx[0:num_fea]]
        # X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X_sel, Y, test_size=validation_size, random_state=seed)
        # kfold = model_selection.KFold(n_splits=10, random_state=seed)

        # cv_results = model_selection.cross_val_score(model, X_train_, Y_train, cv=kfold)
        # msg = "%s %s: %f (%f)\n" % (kind, name, cv_results.mean(), cv_results.std())
print(x.shape, y.shape)
clf = load_clf(FINAL_CLASSIFIER)

perfs = np.zeros(10)

skf = StratifiedKFold(n_splits=n_splits, random_state=42)
fold_index = 0
for train_index, test_index in skf.split(x, y):
    print("fold:", fold_index + 1)

    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    for i, k in enumerate(np.arange(10, 101, 10)):
        idx, _, _ = CMIM.cmim(x_train, y_train, n_selected_features=k)
        x_train_selected = x_train[:, idx[0:k]]
        x_test_selected = x_test[:, idx[0:k]]

        clf.fit(x_train_selected, y_train)
        y_pred = clf.predict(x_test_selected)
        accu = accuracy_score(y_test, y_pred)

        print("selected features:", k, "accu:", accu)
        perfs[i] += accu

    fold_index += 1

print("n_splits:", n_splits)
print("cmim", DATASET, FINAL_CLASSIFIER)
# perfs /= n_splits