Exemple #1
0
        def get_cascaded_sel_idx(high_th_year,
                                 low_th_year,
                                 feature_list,
                                 set_feature,
                                 sel_feature_num,
                                 div_ratio=4):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            #trn_x, trn_y, val_x, val_y = get_train_val(high_risk_group, low_risk_group)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set
            if len(set_feature):
                trn_x = trn_x[:, set_feature]
                #val_x = val_x[:,set_feature]
            feature_num = trn_x.shape[1]

            if sel_feature_num == 0:
                sel_gene_num = int(
                    max(sel_feature_num, feature_num / div_ratio))
            else:
                sel_gene_num = sel_feature_num

            sort_idx = trace_ratio.trace_ratio(trn_x, trn_y, mode='index')
            sel_idx = sort_idx[:sel_gene_num]

            return sel_idx
Exemple #2
0
def trace():
    before = datetime.datetime.now()
    result = trace_ratio.trace_ratio(data, labels, mode="index", n_selected_features=treshold)
    after = datetime.datetime.now()
    print("Trace ratio")
    # result = result[:treshold]
    print(len(result))
    print("cas: " + str(after - before))
    print('\n')
    if len(result) < len(header):
        transform_and_save(result, "Trace_ratio")
Exemple #3
0
        def get_sel_idx(high_th_year, low_th_year, feature_list,
                        sel_feature_num):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set
            feature_num = trn_x.shape[1]

            sort_idx = trace_ratio.trace_ratio(trn_x, trn_y, mode='index')

            return sort_idx[:sel_feature_num]
Exemple #4
0
def test_trace_ratio():
    from sklearn.datasets import make_classification
    X, y = make_classification(n_samples=200,
                               n_features=20,
                               n_informative=5,
                               n_redundant=5,
                               n_classes=2)
    X = X.astype(float)
    n_samples, n_features = X.shape  # number of samples and number of features

    num_fea = 5

    #parameters = {
    #    "select_top_k__n_selected_features": [num_fea]
    #}
    assert (trace_ratio.trace_ratio(X, y, n_selected_features=5), True)
def main():
    # load data
    mat = scipy.io.loadmat('../data/COIL20.mat')
    X = mat['X']  # data
    X = X.astype(float)
    y = mat['Y']  # label
    y = y[:, 0]
    n_samples, n_features = X.shape  # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 100  # number of selected features
    clf = svm.LinearSVC()  # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the index of selected features
        idx, feature_score, subset_score = trace_ratio.trace_ratio(
            X[train], y[train], num_fea, style='fisher')

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(selected_features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(selected_features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print('Accuracy:', float(correct) / 10)
Exemple #6
0
def trace_ratio_FS(X, train_index, y_train):
    _, k = X.shape
    feature_idx, feature_score, subset_score = trace_ratio.trace_ratio(
        X[train_index], y_train, k, style='fisher')
    return (feature_idx, feature_score)
Exemple #7
0
    elif fsMethod == 'JMI':
        featSelected, J_CMI, MIfy = jmi(X_dis,Y,n_selected_features=maxNumSelFeatures)
    elif fsMethod == 'MRMR':
        featSelected, J_CMI, MIfy = mrmr(X_dis,Y,n_selected_features=maxNumSelFeatures)
    elif fsMethod == 'MIM':
        featSelected, J_CMI, MIfy = mim(X_dis,Y,n_selected_features=maxNumSelFeatures)
    elif fsMethod == 'MRI':
        featSelected = mri(X_dis,Y,n_selected_features=maxNumSelFeatures)
    elif fsMethod == 'MIFS':
        featSelected, J_CMI, MIfy = mifs(X_dis,Y,n_selected_features=maxNumSelFeatures,beta=1)
    elif fsMethod == 'CIFE':
        featSelected, J_CMI, MIfy = cife(X_dis,Y,n_selected_features=maxNumSelFeatures)
    elif fsMethod == 'CMIM':
        featSelected, J_CMI, MIfy = cmim(X_dis,Y,n_selected_features=maxNumSelFeatures)
    elif fsMethod == 'trace_ratio':
        featSelected, feature_score, subset_score = trace_ratio(X_dis,Y,n_selected_features=maxNumSelFeatures)
    else:
        print('The feature selection method %s is not supported' %fsMethod)
        assert(False)
    time1 = time.time()


    filename = "results/sel_features/selFeatures_%s_dataset_%s.csv" %(fsMethod,datName)
    if fsMethod == 'VMIrm' or fsMethod == 'VMIgm' or fsMethod == 'VMIin' \
            or fsMethod == 'JMIrm' or fsMethod == 'MRMRrm' or fsMethod == 'RMRMRrm':
        featSelected = genfromtxt(filename, delimiter=',',dtype=int)
        print(featSelected)
    else:
        fileSaving(filename, featSelected, 'w')
        print('Features selected by %s on dataset %s:' % (fsMethod, datName))
        print(featSelected)
Exemple #8
0
def trace(train, test, K):
    idx, _, _ = trace_ratio.trace_ratio(train[0], train[1], K, style='fisher')
    return idx