예제 #1
0
def UF(X, y):
    # 计算feature间的相关性,进行选择,这里做的是forward
    k_range = [50 * i for i in range(1, 4)]
    for k in k_range:
        selector = SelectKBest(chi2, k=k)
        X_ = selector.fit_transform(X, y)
        X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True)
        SVM_recommend_run(F_UF,
                          X_train,
                          X_test,
                          y_train,
                          y_test,
                          paras={'k-best': k})
예제 #2
0
def SFM(X, y):
    # 从模型中选择,根据重要性,类似逐个选择,后向选择,逐渐抛弃不重要的
    X_train, X_test, y_train, y_test = pre_process(X, y)
    clf = SVM_recommend()
    m_range = [2000 - 50 * i for i in range(36, 40)]
    for m in m_range:
        selector = SelectFromModel(
            clf, threshold=-np.inf,
            max_features=m)  # 只根据max_features确定选择的数量,不设定threshold
        X_ = selector.fit_transform(np.asarray(X), np.asarray(y))
        X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True)
        clf = SVM_recommend_run(B_SFM,
                                X_train,
                                X_test,
                                y_train,
                                y_test,
                                paras={'max-features': m})
예제 #3
0
def VT(X, y):
    # 利用方差进行选拔, 这里是backward
    for var in [0.03 * i for i in range(1, 50)]:
        selector = VarianceThreshold(threshold=var)
        X_ = selector.fit_transform(X)
        # X.shape[1] # selected feature amount
        X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True)
        # 有问题
        SVM_recommend_run(B_VT,
                          X_train,
                          X_test,
                          y_train,
                          y_test,
                          paras={
                              'variance': var,
                              'feature-num': X_.shape[1]
                          })
예제 #4
0
            if feature not in good_features:
                selected_features = list(good_features) + [feature]
                Xts = np.column_stack(X[:, j] for j in selected_features)
                score = evaluateScore(Xts, y)
                scores.append((score, feature))
                print("Current AUC : ", np.mean(score))
        good_features.add(sorted(scores)[-1][1])
        score_history.append(sorted(scores)[-1])
        print("Current Features : ", sorted(list(good_features)))

    # Remove last added feature
    good_features.remove(score_history[-1][1])
    good_features = sorted(list(good_features))
    print("Selected Features : ", good_features)
    return good_features


def transform(X, y):
    good_features = selectionLoop(X, y)
    return X[:, good_features]


if __name__ == "__main__":
    os.chdir('..')
    X, y = load_data_small()
    print(X.shape)
    X_ = transform(X, y)
    X_train, X_test, y_train, y_test = pre_process(X_, y, bReset=True)
    SVM_recommend_run(AUC, X_train, X_test, y_train, y_test,
                      {'feature-num': X_.shape[1]})
예제 #5
0
import sys
sys.path.append('..')

import os
import numpy as np
from lib.genetic import *
from configs import *
from load_data import load_data_small
from pre_process import pre_process
from baseline import SVM_recommend, SVM_recommend_run

if __name__ == "__main__":
    os.chdir('..')
    clf = SVM_recommend()
    X, y = load_data_small()
    for i in range(1,10):
        ga_selector = FeatureSelectionGA(clf,X,y,verbose=1)
        feature_num = 200*i
        pop = ga_selector.generate(feature_num)
        X_ = X[:,pop]
        X_train, X_test, y_train, y_test = pre_process(X_, y,bReset=True)
        SVM_recommend_run(GA, X_train, X_test, y_train, y_test, {'feature_num': feature_num})
예제 #6
0
                print("t-SNE: %.2g sec" % (t1 - t0))
                x_min, x_max = X_.min(0), X_.max(0)
                X_norm = (X_ - x_min) / (x_max - x_min)  # 归一化
                plt.figure(figsize=(8, 8))
                for i in range(X_norm.shape[0]):
                    #plt.text(X_norm[i, 0], X_norm[i, 1], str(y[i]), color=plt.cm.Set1(y[i]),
                    # fontdict={'weight': 'bold', 'size': 9})
                    plt.scatter(X_norm[i, 0],
                                X_norm[i, 1],
                                color=cm(1. * y[i] / NUM_COLORS))
                plt.xticks([])
                plt.yticks([])
                name = str(n_components) + "  " + str(perplexity) + " " + str(
                    random_state) + ".png"
                plt.savefig(name)
                plt.show()
                #X_ = transform(X, y)
                X_train, X_test, y_train, y_test = pre_process(X_,
                                                               y,
                                                               bReset=True)
                SVM_recommend_run(tSNE,
                                  X_train,
                                  X_test,
                                  y_train,
                                  y_test,
                                  paras={
                                      'n_cp': n_components,
                                      'ppl': perplexity,
                                      'rd': random_state
                                  })
예제 #7
0
import sys
sys.path.append('..')

from boruta import Boruta
import os

from configs import *
from baseline import SVM_recommend, SVM_recommend_run
from load_data import load_data
from pre_process import pre_process

if __name__ == "__main__":
    os.chdir('..')
    clf = SVM_recommend()
    feat_selector = BorutaPy(clf, n_estimators='auto')
    feat_selector.fit(X, y)
    print(feat_selector.support_)
    selected = X[:, feat_selector.support_]
    print("")
    print("Selected Feature Matrix Shape")
    print(selected.shape)
    X_train, X_test, y_train, y_test = pre_process(X, y)
    SVM_recommend_run(BORUTA, X_test, X_test, y_train, y_test,
                      {'feature_num': selected.shape[1]})