Beispiel #1
0
def ll_l21_FS(X_train, y, train_index):
    Y = construct_label_matrix_pan(y)
    Y_train = Y[train_index]
    Weight, obj, value_gamma = ll_l21.proximal_gradient_descent(X_train,
                                                                Y[train_index],
                                                                0.1,
                                                                verbose=False)
    #print("weight ",Weight)
    idx = feature_ranking(Weight)
    return (idx, Weight)
def main():
    # load data
    mat = scipy.io.loadmat('../data/COIL20.mat')
    X = mat['X']  # data
    X = X.astype(float)
    y = mat['Y']  # label
    y = y[:, 0]
    Y = construct_label_matrix_pan(y)
    n_samples, n_features = X.shape  # number of samples and number of features

    X = X[:200]
    Y = Y[:200]

    # split data into 10 folds
    # ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)
    ss = KFold(n_splits=10)

    # perform evaluation on classification task
    num_fea = 100  # number of selected features
    clf = svm.LinearSVC()  # linear SVM

    correct = 0
    for train, test in ss.split(X):
        # obtain the feature weight matrix
        Weight, obj, value_gamma = ll_l21.proximal_gradient_descent(
            X[train], Y[train], 0.1, verbose=False)

        # sort the feature scores in an ascending order according to the feature scores
        idx = feature_ranking(Weight)

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(selected_features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(selected_features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print('Accuracy:', float(correct) / 10)
Beispiel #3
0
        def get_sel_idx(high_th_year, low_th_year, feature_list,
                        sel_feature_num):
            high_risk_th = high_th_year * 365
            low_risk_th = low_th_year * 365
            high_risk_group, low_risk_group = helper.get_risk_group(
                x, c, s, high_risk_th, low_risk_th)
            trn_x, trn_y = helper.get_train(
                high_risk_group,
                low_risk_group,
                is_categori_y=False,
                seed=self.random_seed)  #without validation set

            W, _, _ = ll_l21.proximal_gradient_descent(trn_x,
                                                       trn_y,
                                                       z=0.01,
                                                       mode='raw')
            sort_idx = feature_ranking(W)

            return sort_idx[:sel_feature_num]
Beispiel #4
0
def main():
    # load data
    mat = scipy.io.loadmat('../data/COIL20.mat')
    X = mat['X']    # data
    X = X.astype(float)
    y = mat['Y']    # label
    y = y[:, 0]
    Y = construct_label_matrix_pan(y)
    n_samples, n_features = X.shape    # number of samples and number of features

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 100    # number of selected features
    clf = svm.LinearSVC()    # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the feature weight matrix
        Weight, obj, value_gamma = ll_l21.proximal_gradient_descent(X[train], Y[train], 0.1, verbose=False)

        # sort the feature scores in an ascending order according to the feature scores
        idx = feature_ranking(Weight)

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(selected_features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(selected_features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print 'Accuracy:', float(correct)/10
Beispiel #5
0
    def fit(self, X, y):

        idx = []

        if self.tp == 'ITB':

            if self.name == 'MRMR':
                idx = MRMR.mrmr(X,
                                y,
                                n_selected_features=self.params['num_feats'])

        elif self.tp == 'filter':

            if self.name == 'Relief':
                score = reliefF.reliefF(X, y, k=self.params['k'])
                idx = reliefF.feature_ranking(score)

            if self.name == 'Fisher':
                # obtain the score of each feature on the training set
                score = fisher_score.fisher_score(X, y)

                # rank features in descending order according to score
                idx = fisher_score.feature_ranking(score)

            if self.name == 'MI':
                idx = np.argsort(
                    mutual_info_classif(
                        X, y, n_neighbors=self.params['n_neighbors']))[::-1]

        elif self.tp == 'wrapper':

            model_fit = self.model.fit(X, y)
            model = SelectFromModel(model_fit, prefit=True)
            idx = model.get_support(indices=True)
        elif self.tp == 'SLB':

            # one-hot-encode on target
            y = construct_label_matrix(y)

            if self.name == 'SMBA':
                scba = fs.SCBA(data=X,
                               alpha=self.params['alpha'],
                               norm_type=self.params['norm_type'],
                               verbose=self.params['verbose'],
                               thr=self.params['thr'],
                               max_iter=self.params['max_iter'],
                               affine=self.params['affine'],
                               normalize=self.params['normalize'],
                               step=self.params['step'],
                               PCA=self.params['PCA'],
                               GPU=self.params['GPU'],
                               device=self.params['device'])

                nrmInd, sInd, repInd, _ = scba.admm()
                if self.params['type_indices'] == 'nrmInd':
                    idx = nrmInd
                elif self.params['type_indices'] == 'repInd':
                    idx = repInd
                else:
                    idx = sInd

            if self.name == 'RFS':
                W = RFS.rfs(X, y, gamma=self.params['gamma'])
                idx = feature_ranking(W)

            if self.name == 'll_l21':
                # obtain the feature weight matrix
                W, _, _ = ll_l21.proximal_gradient_descent(X,
                                                           y,
                                                           z=self.params['z'],
                                                           verbose=False)
                # sort the feature scores in an ascending order according to the feature scores
                idx = feature_ranking(W)
            if self.name == 'ls_l21':
                # obtain the feature weight matrix
                W, _, _ = ls_l21.proximal_gradient_descent(X,
                                                           y,
                                                           z=self.params['z'],
                                                           verbose=False)

                # sort the feature scores in an ascending order according to the feature scores
                idx = feature_ranking(W)

            if self.name == 'LASSO':

                LASSO = Lasso(alpha=self.params['alpha'], positive=True)

                y_pred_lasso = LASSO.fit(X, y)

                if y_pred_lasso.coef_.ndim == 1:
                    coeff = y_pred_lasso.coef_
                else:
                    coeff = np.asarray(y_pred_lasso.coef_[0, :])

                idx = np.argsort(-coeff)

            if self.name == 'EN':  # elastic net L1

                enet = ElasticNet(alpha=self.params['alpha'],
                                  l1_ratio=1,
                                  positive=True)
                y_pred_enet = enet.fit(X, y)

                if y_pred_enet.coef_.ndim == 1:
                    coeff = y_pred_enet.coef_
                else:
                    coeff = np.asarray(y_pred_enet.coef_[0, :])

                idx = np.argsort(-coeff)

        return idx
Beispiel #6
0
def main():
    # load data
    # mat = scipy.io.loadmat('../data/COIL20.mat')
    # X = mat['X']    # data
    # X = X.astype(float)
    # y = mat['Y']    # label
    # print(y)
    # print(y.shape)
    # y = y[:, 0]
    # print(y)
    # print(y.shape)

    sequence_name = 'D3'
    feature_model_num = 0

    img_feature_path = '/usr/luopengting/shareHoldersWithGPU_a/luopengting/workplace/python/pytorch/breast_cancer_lymph/data/' \
                       + sequence_name + '/model_' + str(feature_model_num) + '_img_feature.txt'
    img_label_path = '/usr/luopengting/shareHoldersWithGPU_a/luopengting/workplace/python/pytorch/breast_cancer_lymph/data/' \
                       + sequence_name + '/labels.txt'
    train = pd.read_csv(img_feature_path, sep=' ', header=None)
    train_labels = pd.read_csv(img_label_path, sep='\t', header=None)

    X_train = train.as_matrix(columns=None)
    y_train = train_labels.as_matrix(columns=None)
    # y.astype(int)
    y_train = y_train[:, 0]
    # print(y)

    ss = KFold(n_splits=5)
    # perform kmeans clustering based on the selected features and repeats 20 times
    nmi_total = 0
    acc_total = 0
    for train, test in ss.split(X_train):
        X = X_train[train]
        y = y_train[train]

        # obtain the feature weight matrix
        Weight, obj, value_gamma = ll_l21.proximal_gradient_descent(
            X, y, 0.1, verbose=False)

        # sort the feature scores in an ascending order according to the feature scores
        idx = feature_ranking(Weight)

        # perform evaluation on clustering task
        num_fea = 100  # number of selected features

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:num_fea]]
        classifier = LogisticRegression(penalty='l2',
                                        dual=False,
                                        tol=0.0001,
                                        C=1.0,
                                        fit_intercept=True,
                                        intercept_scaling=1,
                                        class_weight='balanced',
                                        random_state=None,
                                        solver='liblinear',
                                        max_iter=100,
                                        multi_class='ovr',
                                        verbose=0,
                                        warm_start=False,
                                        n_jobs=1)
        classifier.fit(selected_features, y)

        X_test = X_train[test]
        X_test = X_test[:, idx[0:num_fea]]
        Y_test = y_train[test]
        Y_pred = classifier.predict(X_test)
        from sklearn.metrics import confusion_matrix
        confusion_matrix = confusion_matrix(Y_test, Y_pred)
        print(confusion_matrix)
Beispiel #7
0
    def fit(self, X, y):

        if self.name == 'LASSO':

            # print self.params['alpha']

            LASSO = Lasso(alpha=self.params['alpha'], positive=True)

            y_pred_lasso = LASSO.fit(X, y)

            if y_pred_lasso.coef_.ndim == 1:
                coeff = y_pred_lasso.coef_
            else:
                coeff = np.asarray(y_pred_lasso.coef_[0, :])

            idx = np.argsort(-coeff)

        if self.name == 'EN':  # elastic net L1

            # alpha = self.params['alpha']

            # alpha = .9 - ((self.params['alpha'] - 1.0) * (1 - 0.1)) / ((50 - 1) + 0.1)
            # print alpha

            enet = ElasticNet(alpha=self.params['alpha'],
                              l1_ratio=1,
                              positive=True)

            y_pred_enet = enet.fit(X, y)
            # if y_pred_enet.coef_
            if y_pred_enet.coef_.ndim == 1:
                coeff = y_pred_enet.coef_
            else:
                coeff = np.asarray(y_pred_enet.coef_[0, :])

            idx = np.argsort(-coeff)

        if self.name == 'RFS':

            W = RFS.rfs(X,
                        construct_label_matrix(y),
                        gamma=self.params['gamma'])
            idx = feature_ranking(W)

        if self.name == 'll_l21':
            # obtain the feature weight matrix
            W, _, _ = ll_l21.proximal_gradient_descent(
                X,
                construct_label_matrix(y),
                z=self.params['z'],
                verbose=False)
            # sort the feature scores in an ascending order according to the feature scores
            idx = feature_ranking(W)

        if self.name == 'ls_l21':
            # obtain the feature weight matrix
            W, _, _ = ls_l21.proximal_gradient_descent(
                X,
                construct_label_matrix(y),
                z=self.params['z'],
                verbose=False)

            # sort the feature scores in an ascending order according to the feature scores
            idx = feature_ranking(W)

        if self.tp == 'ITB':

            if self.name == 'MRMR':
                idx = MRMR.mrmr(X,
                                y,
                                n_selected_features=self.params['num_feats'])

        if self.name == 'Relief':

            score = reliefF.reliefF(X, y, k=self.params['k'])
            idx = reliefF.feature_ranking(score)

        if self.name == 'MI':
            idx = np.argsort(
                mutual_info_classif(
                    X, y, n_neighbors=self.params['n_neighbors']))[::-1]

        return idx