def main():
    # load data
    mat = scipy.io.loadmat('../data/COIL20.mat')
    X = mat['X']    # data
    X = X.astype(float)
    y = mat['Y']    # label
    y = y[:, 0]
    Y = construct_label_matrix(y)
    n_samples, n_features = X.shape

    # split data into 10 folds
    ss = cross_validation.KFold(n_samples, n_folds=10, shuffle=True)

    # perform evaluation on classification task
    num_fea = 100    # number of selected features
    clf = svm.LinearSVC()    # linear SVM

    correct = 0
    for train, test in ss:
        # obtain the feature weight matrix
        Weight = RFS.rfs(X[train, :], Y[train, :], gamma=0.1)

        # sort the feature scores in an ascending order according to the feature scores
        idx = feature_ranking(Weight)

        # obtain the dataset on the selected features
        selected_features = X[:, idx[0:num_fea]]

        # train a classification model with the selected features on the training dataset
        clf.fit(selected_features[train], y[train])

        # predict the class labels of test data
        y_predict = clf.predict(selected_features[test])

        # obtain the classification accuracy on the test data
        acc = accuracy_score(y[test], y_predict)
        print acc
        correct = correct + acc

    # output the average classification accuracy over all 10 folds
    print 'Accuracy:', float(correct)/10
Exemple #2
0
        selected_fea_test = X_test[:, idx[0:num_features]]
        clf.fit(selected_fea_train, y_train)
        acc.append(accuracy_score(y_test, clf.predict(selected_fea_test)))

        # lasso
        lasso = Lasso(alpha=0.01, random_state=random_state)
        lasso.fit(X_train, y_train)
        weights = lasso.coef_.T
        idx = chi_square.feature_ranking(abs(weights))
        selected_fea_train = X_train[:, idx[0:num_features]]
        selected_fea_test = X_test[:, idx[0:num_features]]
        clf.fit(selected_fea_train, y_train)
        acc.append(accuracy_score(y_test, clf.predict(selected_fea_test)))

        # rfs
        weights = RFS.rfs(X_train, construct_label_matrix(y_train), gamma=0.01)
        idx = sparse_learning.feature_ranking(weights)
        selected_fea_train = X_train[:, idx[0:num_features]]
        selected_fea_test = X_test[:, idx[0:num_features]]
        clf.fit(selected_fea_train, y_train)
        acc.append(accuracy_score(y_test, clf.predict(selected_fea_test)))

        # sgl
        idx_group = np.array([[1, 16, np.sqrt(16)], [17, 28,
                                                     np.sqrt(12)],
                              [29, 60, np.sqrt(32)], [61, 160,
                                                      np.sqrt(100)]]).T
        idx_group = idx_group.astype(int)
        weights, _, _ = group_fs.group_fs(X_train,
                                          y_train,
                                          0.01,
import numpy as np
from sklearn.preprocessing import scale
from feature_selection.mfsicc_plot import MFSICC
from skfeature.utility.sparse_learning import construct_label_matrix
from feature_selection.RFS import rfs
import warnings
warnings.filterwarnings("ignore")

dataset = 'PHM09_Low_COMB.csv'
'''
CWRU_HP1_COMB.csv
CWRU_HP2_COMB.csv
CWRU_HP3_COMB.csv
PHM09_High_COMB.csv
PHM09_Low_COMB.csv
'''
file_group = '..//features//Groups.csv'
groups = np.array(pd.read_csv(file_group))[:, 0]

file_path = '..//features//' + dataset
data = pd.read_csv(file_path)
features = np.array(data.iloc[:, :-1])
features = scale(features)
labels = np.array(data.iloc[:, -1])
labels = labels.astype(int)

mfsicc = MFSICC(lamb1=0.1, alpha=0.5, lamb2=0.1, random_state=1.1, n_iter=30)
mfsicc.fit(features, labels, groups)
W, objs = rfs(features, construct_label_matrix(labels), gamma=0.1)
print(mfsicc.get_objectives())
Exemple #4
0
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score

# load data
X = np.load('Lymph_x.npy')
y = np.load('Lymph_y.npy')

X = X.astype(float)

Y = construct_label_matrix(y)
n_samples, n_features = X.shape

# split data into 10 folds
cv = StratifiedKFold(n_splits=13)

# perform evaluation on classification task
num_fea = 11  # number of selected features

for train, test in cv.split(X, y):
    # obtain the feature weight matrix
    Weight = RFS.rfs(X[train, :], Y[train, :], gamma=0.1)
    # sort the feature scores in an ascending order according to the feature scores
    idx = feature_ranking(Weight)

X_resampled = X[:, idx[0:11]]
Exemple #5
0
    def fit(self, X, y):

        idx = []

        if self.tp == 'ITB':

            if self.name == 'MRMR':
                idx = MRMR.mrmr(X,
                                y,
                                n_selected_features=self.params['num_feats'])

        elif self.tp == 'filter':

            if self.name == 'Relief':
                score = reliefF.reliefF(X, y, k=self.params['k'])
                idx = reliefF.feature_ranking(score)

            if self.name == 'Fisher':
                # obtain the score of each feature on the training set
                score = fisher_score.fisher_score(X, y)

                # rank features in descending order according to score
                idx = fisher_score.feature_ranking(score)

            if self.name == 'MI':
                idx = np.argsort(
                    mutual_info_classif(
                        X, y, n_neighbors=self.params['n_neighbors']))[::-1]

        elif self.tp == 'wrapper':

            model_fit = self.model.fit(X, y)
            model = SelectFromModel(model_fit, prefit=True)
            idx = model.get_support(indices=True)
        elif self.tp == 'SLB':

            # one-hot-encode on target
            y = construct_label_matrix(y)

            if self.name == 'SMBA':
                scba = fs.SCBA(data=X,
                               alpha=self.params['alpha'],
                               norm_type=self.params['norm_type'],
                               verbose=self.params['verbose'],
                               thr=self.params['thr'],
                               max_iter=self.params['max_iter'],
                               affine=self.params['affine'],
                               normalize=self.params['normalize'],
                               step=self.params['step'],
                               PCA=self.params['PCA'],
                               GPU=self.params['GPU'],
                               device=self.params['device'])

                nrmInd, sInd, repInd, _ = scba.admm()
                if self.params['type_indices'] == 'nrmInd':
                    idx = nrmInd
                elif self.params['type_indices'] == 'repInd':
                    idx = repInd
                else:
                    idx = sInd

            if self.name == 'RFS':
                W = RFS.rfs(X, y, gamma=self.params['gamma'])
                idx = feature_ranking(W)

            if self.name == 'll_l21':
                # obtain the feature weight matrix
                W, _, _ = ll_l21.proximal_gradient_descent(X,
                                                           y,
                                                           z=self.params['z'],
                                                           verbose=False)
                # sort the feature scores in an ascending order according to the feature scores
                idx = feature_ranking(W)
            if self.name == 'ls_l21':
                # obtain the feature weight matrix
                W, _, _ = ls_l21.proximal_gradient_descent(X,
                                                           y,
                                                           z=self.params['z'],
                                                           verbose=False)

                # sort the feature scores in an ascending order according to the feature scores
                idx = feature_ranking(W)

            if self.name == 'LASSO':

                LASSO = Lasso(alpha=self.params['alpha'], positive=True)

                y_pred_lasso = LASSO.fit(X, y)

                if y_pred_lasso.coef_.ndim == 1:
                    coeff = y_pred_lasso.coef_
                else:
                    coeff = np.asarray(y_pred_lasso.coef_[0, :])

                idx = np.argsort(-coeff)

            if self.name == 'EN':  # elastic net L1

                enet = ElasticNet(alpha=self.params['alpha'],
                                  l1_ratio=1,
                                  positive=True)
                y_pred_enet = enet.fit(X, y)

                if y_pred_enet.coef_.ndim == 1:
                    coeff = y_pred_enet.coef_
                else:
                    coeff = np.asarray(y_pred_enet.coef_[0, :])

                idx = np.argsort(-coeff)

        return idx
Exemple #6
0
    def fit(self, X, y):

        if self.name == 'LASSO':

            # print self.params['alpha']

            LASSO = Lasso(alpha=self.params['alpha'], positive=True)

            y_pred_lasso = LASSO.fit(X, y)

            if y_pred_lasso.coef_.ndim == 1:
                coeff = y_pred_lasso.coef_
            else:
                coeff = np.asarray(y_pred_lasso.coef_[0, :])

            idx = np.argsort(-coeff)

        if self.name == 'EN':  # elastic net L1

            # alpha = self.params['alpha']

            # alpha = .9 - ((self.params['alpha'] - 1.0) * (1 - 0.1)) / ((50 - 1) + 0.1)
            # print alpha

            enet = ElasticNet(alpha=self.params['alpha'],
                              l1_ratio=1,
                              positive=True)

            y_pred_enet = enet.fit(X, y)
            # if y_pred_enet.coef_
            if y_pred_enet.coef_.ndim == 1:
                coeff = y_pred_enet.coef_
            else:
                coeff = np.asarray(y_pred_enet.coef_[0, :])

            idx = np.argsort(-coeff)

        if self.name == 'RFS':

            W = RFS.rfs(X,
                        construct_label_matrix(y),
                        gamma=self.params['gamma'])
            idx = feature_ranking(W)

        if self.name == 'll_l21':
            # obtain the feature weight matrix
            W, _, _ = ll_l21.proximal_gradient_descent(
                X,
                construct_label_matrix(y),
                z=self.params['z'],
                verbose=False)
            # sort the feature scores in an ascending order according to the feature scores
            idx = feature_ranking(W)

        if self.name == 'ls_l21':
            # obtain the feature weight matrix
            W, _, _ = ls_l21.proximal_gradient_descent(
                X,
                construct_label_matrix(y),
                z=self.params['z'],
                verbose=False)

            # sort the feature scores in an ascending order according to the feature scores
            idx = feature_ranking(W)

        if self.tp == 'ITB':

            if self.name == 'MRMR':
                idx = MRMR.mrmr(X,
                                y,
                                n_selected_features=self.params['num_feats'])

        if self.name == 'Relief':

            score = reliefF.reliefF(X, y, k=self.params['k'])
            idx = reliefF.feature_ranking(score)

        if self.name == 'MI':
            idx = np.argsort(
                mutual_info_classif(
                    X, y, n_neighbors=self.params['n_neighbors']))[::-1]

        return idx
Exemple #7
0
def rfs(X, y, **kwargs):
    """
    This function implementS efficient and robust feature selection via joint l21-norms minimization
    min_W||X^T W - Y||_2,1 + gamma||W||_2,1

    Input
    -----
    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        gamma: {float}
            parameter in RFS
        n_selected_features: {int}
            the maximum number of selected features returned, the default is the number of input features
        verbose: boolean
            True if want to display the objective function value, false if not

    Output
    ------
    W: {numpy array}, shape(n_samples, n_features)
        feature weight matrix

    Reference
    ---------
    Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010.
    """

    # default gamma is 1
    gamma = kwargs.get('gamma', 0.1)
    verbose = kwargs.get('verbose', False)

    n_samples, n_features = X.shape

    n_selected_features = kwargs.get('n_selected_features', n_features)

    Y = construct_label_matrix(y)
    A = np.zeros((n_samples, n_samples + n_features))
    A[:, 0:n_features] = X
    A[:, n_features:n_features + n_samples] = gamma * np.eye(n_samples)
    D = np.eye(n_features + n_samples)

    max_iter = 1000
    obj = np.zeros(max_iter)
    for iter_step in range(max_iter):
        # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y
        D_inv = LA.inv(D)
        temp = LA.inv(
            np.dot(np.dot(A, D_inv), A.T) +
            1e-6 * np.eye(n_samples))  # (A D^-1 A^T)^-1
        U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y)
        # update D as D_ii = 1 / 2 / ||U(i,:)||
        D = generate_diagonal_matrix(U)

        obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma)

        if verbose:
            print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step]))
        if iter_step >= 1 and math.fabs(obj[iter_step] -
                                        obj[iter_step - 1]) < 1e-3:
            break

    # the first d rows of U are the feature weights
    W = U[0:n_features, :]
    scores = (W * W).sum(1)
    return scores