コード例 #1
def cfs(X, y, mode="index", **kwargs):
    This function uses a correlation based heuristic to evaluate the worth of features which is called CFS

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels

    F: {numpy array}
        index of selected features

    Zhao, Zheng et al. "Advancing Feature Selection Research - ASU Feature Selection Repository" 2010.

    if 'n_selected_features' in list(kwargs.keys()):
        n_selected_features = kwargs['n_selected_features']
        n_selected_features = 0
    n_samples, n_features = X.shape
    F = []
    # M stores the merit values
    M = []
    while True:
        merit = -100000000000
        idx = -1
        for i in range(n_features):
            if i not in F:
                # calculate the merit of current selected features
                    t = merit_calculation(X[:, F], y)
                except ZeroDivisionError:
                    t = -10000000000
                if t > merit:
                    merit = t
                    idx = i
        if len(M) == n_selected_features and n_selected_features != 0:
            if n_selected_features == 0 and len(M) == n_features:

    if mode == "index":
        return np.array(F)
        return reverse_argsort(F, X.shape[1])
コード例 #2
def t_score(X, y, mode='rank'):
    This function calculates t_score for each feature, where t_score is only used for binary problem
    t_score = |mean1-mean2|/sqrt(((std1^2)/n1)+((std2^2)/n2)))

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels

    F: {numpy array}, shape (n_features,)
        t-score for each feature
    def feature_ranking(F):
        Rank features in descending order according to t-score, the higher the t-score, the more important the feature is
        idx = np.argsort(F)
        return idx[::-1]

    n_samples, n_features = X.shape
    F = np.zeros(n_features)
    c = np.unique(y)
    if len(c) == 2:
        for i in range(n_features):
            f = X[:, i]
            # class0 contains instances belonging to the first class
            # class1 contains instances belonging to the second class
            class0 = f[y == c[0]]
            class1 = f[y == c[1]]
            mean0 = np.mean(class0)
            mean1 = np.mean(class1)
            std0 = np.std(class0)
            std1 = np.std(class1)
            n0 = len(class0)
            n1 = len(class1)
            t = mean0 - mean1
            t0 = np.true_divide(std0**2, n0)
            t1 = np.true_divide(std1**2, n1)
            F[i] = np.true_divide(t, (t0 + t1)**0.5)
        print('y should be guaranteed to a binary class vector')
    if mode == "index":
        return np.array(np.abs(F))
    elif mode == 'feature_ranking':
        return feature_ranking(np.array(np.abs(F)))
        return reverse_argsort(feature_ranking(np.array(np.abs(F))),
コード例 #3
def decision_tree_backward(X, y, mode="rank", n_selected_features=None):
    This function implements the backward feature selection algorithm based on decision tree

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels
    n_selected_features : {int}
        number of selected features

    F: {numpy array}, shape (n_features, )
        index of selected features

    n_samples, n_features = X.shape
    if n_selected_features is None:
        n_selected_features = n_features
    # using 10 fold cross validation
    kfold = KFold(n_splits=10, shuffle=True)
    # choose decision tree as the classifier
    clf = DecisionTreeClassifier()

    # selected feature set, initialized to contain all features
    F = list(range(n_features))
    count = n_features

    while count > n_selected_features:
        max_acc = 0
        for i in range(n_features):
            if i in F:
                X_tmp = X[:, F]
                results = cross_val_score(clf, X_tmp, y, cv=kfold)
                acc = results.mean()
                # record the feature which results in the largest accuracy
                if acc > max_acc:
                    max_acc = acc
                    idx = i
        # delete the feature which results in the largest accuracy
        count -= 1
    if mode == "index":
        return np.array(F)
        return reverse_argsort(F)
コード例 #4
def svm_forward(X, y, mode="rank", n_selected_features=None):
    This function implements the forward feature selection algorithm based on SVM

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels
    n_selected_features: {int}
        number of selected features

    F: {numpy array}, shape (n_features, )
        index of selected features

    n_samples, n_features = X.shape
    if n_selected_features is None:
        n_selected_features = n_features
    # using 10 fold cross validation
    kfold = KFold(n_splits=10, shuffle=True)
    # choose SVM as the classifier
    clf = SVC()

    # selected feature set, initialized to be empty
    F = []
    count = 0
    while count < n_selected_features-1:
        max_acc = 0
        for i in range(n_features):
            if i not in F:
                X_tmp = X[:, F]
                results = cross_val_score(clf, X_tmp, y, cv=kfold)
                acc = results.mean()     
                # record the feature which results in the largest accuracy
                if acc > max_acc:
                    max_acc = acc
                    idx = i
        # add the feature which results in the largest accuracy
        count += 1
    if mode == "index":
        return np.array(F)
        return reverse_argsort(F, X.shape[1])
コード例 #5
def cfs(X, y, mode="rank"):
    This function uses a correlation based heuristic to evaluate the worth of features which is called CFS

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels

    F: {numpy array}
        index of selected features

    Zhao, Zheng et al. "Advancing Feature Selection Research - ASU Feature Selection Repository" 2010.

    n_samples, n_features = X.shape
    F = []
    # M stores the merit values
    M = []
    while True:
        merit = -100000000000
        idx = -1
        for i in range(n_features):
            if i not in F:
                # calculate the merit of current selected features
                t = merit_calculation(X[:, F], y)
                if t > merit:
                    merit = t
                    idx = i
        if len(M) > 5:
            if M[len(M)-1] <= M[len(M)-2]:
                if M[len(M)-2] <= M[len(M)-3]:
                    if M[len(M)-3] <= M[len(M)-4]:
                        if M[len(M)-4] <= M[len(M)-5]:
    if mode == "index":
        return np.array(F)
        return reverse_argsort(F, X.shape[1])
コード例 #6
ファイル: MIFS.py プロジェクト: EricSchles/scikit-feature-1
def mifs(X, y, mode="rank", **kwargs):
    This function implements the MIFS feature selection

    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be discrete
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        n_selected_features: {int}
            number of features to select

    F: {numpy array}, shape (n_features,)
        index of selected features, F[0] is the most important feature
    J_CMI: {numpy array}, shape: (n_features,)
        corresponding objective function value of selected features
    MIfy: {numpy array}, shape: (n_features,)
        corresponding mutual information between selected features and response

    Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.

    if 'beta' not in list(kwargs.keys()):
        beta = 0.5
        beta = kwargs['beta']
    if 'n_selected_features' in list(kwargs.keys()):
        n_selected_features = kwargs['n_selected_features']
        F, J_CMI, MIfy = LCSI.lcsi(X,
        F, J_CMI, MIfy = LCSI.lcsi(X, y, beta=beta, gamma=0)

    if mode == "index":
        return np.array(F, dtype=int)
        # make sure that F is the same size??
        return reverse_argsort(F, size=X.shape[1])
コード例 #7
def icap(X, y, mode="rank", **kwargs):
    This function implements the ICAP feature selection.
    The scoring criteria is calculated based on the formula j_icap = I(f;y) - max_j(0,(I(fj;f)-I(fj;f|y)))

    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be a discrete data matrix
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        n_selected_features: {int}
            number of features to select

    F: {numpy array}, shape (n_features,)
        index of selected features, F[0] is the most important feature
    J_ICAP: {numpy array}, shape: (n_features,)
        corresponding objective function value of selected features
    MIfy: {numpy array}, shape: (n_features,)
        corresponding mutual information between selected features and response
    n_samples, n_features = X.shape
    # index of selected features, initialized to be empty
    F = []
    # Objective function value for selected features
    J_ICAP = []
    # Mutual information between feature and response
    MIfy = []
    # indicate whether the user specifies the number of features
    is_n_selected_features_specified = False
    if 'n_selected_features' in list(kwargs.keys()):
        n_selected_features = kwargs['n_selected_features']
        is_n_selected_features_specified = True

    # t1 contains I(f;y) for each feature f
    t1 = np.zeros(n_features)
    # max contains max_j(0,(I(fj;f)-I(fj;f|y))) for each feature f
    max = np.zeros(n_features)
    for i in range(n_features):
        f = X[:, i]
        t1[i] = midd(f, y)

    # make sure that j_cmi is positive at the very beginning
    j_icap = 1

    while True:
        if len(F) == 0:
            # select the feature whose mutual information is the largest
            idx = np.argmax(t1)
            f_select = X[:, idx]

        if is_n_selected_features_specified is True:
            if len(F) == n_selected_features:
        if is_n_selected_features_specified is not True:
            if j_icap <= 0:

        # we assign an extreme small value to j_icap to ensure it is smaller than all possible values of j_icap
        j_icap = -1000000000000
        for i in range(n_features):
            if i not in F:
                f = X[:, i]
                t2 = midd(f_select, f)
                t3 = cmidd(f_select, f, y)
                if t2-t3 > max[i]:
                    max[i] = t2-t3
                # calculate j_icap for feature i (not in F)
                t = t1[i] - max[i]
                # record the largest j_icap and the corresponding feature index
                if t > j_icap:
                    j_icap = t
                    idx = i
        f_select = X[:, idx]

    if mode=="index":
        return np.array(F, dtype=int)
        # make sure that F is the same size??
        return reverse_argsort(F, size=X.shape[1])
コード例 #8
ファイル: FCBF.py プロジェクト: EricSchles/scikit-feature-1
def fcbf(X, y, mode="rank", **kwargs):
    This function implements Fast Correlation Based Filter algorithm

    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be discrete
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        delta: {float}
            delta is a threshold parameter, the default value of delta is 0

    F: {numpy array}, shape (n_features,)
        index of selected features, F[0] is the most important feature
    SU: {numpy array}, shape (n_features,)
        symmetrical uncertainty of selected features

        Yu, Lei and Liu, Huan. "Feature Selection for High-Dimensional Data: A Fast Correlation-Based Filter Solution." ICML 2003.

    n_samples, n_features = X.shape
    if 'delta' in list(kwargs.keys()):
        delta = kwargs['delta']
        # the default value of delta is 0
        delta = 0

    # t1[:,0] stores index of features, t1[:,1] stores symmetrical uncertainty of features
    t1 = np.zeros((n_features, 2))
    for i in range(n_features):
        f = X[:, i]
        t1[i, 0] = i
        t1[i, 1] = su_calculation(f, y)
    s_list = np.array(t1[t1[:, 1] > delta, :], dtype=int)
    # index of selected features, initialized to be empty
    F = []
    # Symmetrical uncertainty of selected features
    SU = []
    while len(s_list) != 0:
        # select the largest su inside s_list
        idx = np.argmax(s_list[:, 1])
        # record the index of the feature with the largest su
        fp = X[:, s_list[idx, 0]]
        np.delete(s_list, idx, 0)
        F.append(s_list[idx, 0])
        SU.append(s_list[idx, 1])
        for i in s_list[:, 0]:
            fi = X[:, i]
            if su_calculation(fp, fi) >= t1[i, 1]:
                # construct the mask for feature whose su is larger than su(fp,y)
                idx = s_list[:, 0] != i
                idx = np.array([idx, idx])
                idx = np.transpose(idx)
                # delete the feature by using the mask
                s_list = s_list[idx]
                length = len(s_list) / 2
                s_list = s_list.reshape((int(length), 2))
    if mode == "index":
        return np.array(F, dtype=int)
        # make sure that F is the same size??
        return reverse_argsort(F, size=X.shape[1])
コード例 #9
def disr(X, y, mode="rank", **kwargs):
    This function implement the DISR feature selection.
    The scoring criteria is calculated based on the formula j_disr=sum_j(I(f,fj;y)/H(f,fj,y))

    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be a discrete data matrix
    y: {numpy array}, shape (n_samples,)
        input class labels

    kwargs: {dictionary}
        n_selected_features: {int}
            number of features to select

    F: {numpy array}, shape (n_features, )
        index of selected features, F[0] is the most important feature
    J_DISR: {numpy array}, shape: (n_features,)
        corresponding objective function value of selected features
    MIfy: {numpy array}, shape: (n_features,)
        corresponding mutual information between selected features and response

    Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.

    n_samples, n_features = X.shape
    # index of selected features, initialized to be empty
    F = []
    # Objective function value for selected features
    J_DISR = []
    # Mutual information between feature and response
    MIfy = []
    # indicate whether the user specifies the number of features
    is_n_selected_features_specified = False

    if 'n_selected_features' in list(kwargs.keys()):
        n_selected_features = kwargs['n_selected_features']
        is_n_selected_features_specified = True

    # sum stores sum_j(I(f,fj;y)/H(f,fj,y)) for each feature f
    sum = np.zeros(n_features)

    # make sure that j_cmi is positive at the very beginning
    j_disr = 1

    while True:
        if len(F) == 0:
            # t1 stores I(f;y) for each feature f
            t1 = np.zeros(n_features)
            for i in range(n_features):
                f = X[:, i]
                t1[i] = midd(f, y)
            # select the feature whose mutual information is the largest
            idx = np.argmax(t1)
            f_select = X[:, idx]

        if is_n_selected_features_specified is True:
            if len(F) == n_selected_features:
        if is_n_selected_features_specified is not True:
            if j_disr <= 0:

        # we assign an extreme small value to j_disr to ensure that it is smaller than all possible value of j_disr
        j_disr = -1E30
        for i in range(n_features):
            if i not in F:
                f = X[:, i]
                t2 = midd(f_select, y) + cmidd(f, y, f_select)
                t3 = entropyd(f) + conditional_entropy(f_select, f) + (conditional_entropy(y, f_select) - cmidd(y, f, f_select))
                sum[i] += np.true_divide(t2, t3)
                # record the largest j_disr and the corresponding feature index
                if sum[i] > j_disr:
                    j_disr = sum[i]
                    idx = i
        f_select = X[:, idx]

    if mode=="index":
        return F
        return reverse_argsort(F, X.shape[1])
コード例 #10
def lap_score(X, y=None, mode="rank", **kwargs):
    This function implements the laplacian score feature selection, steps are as follows:
    1. Construct the affinity matrix W if it is not specified
    2. For the r-th feature, we define fr = X(:,r), D = diag(W*ones), ones = [1,...,1]', L = D - W
    3. Let fr_hat = fr - (fr'*D*ones)*ones/(ones'*D*ones)
    4. Laplacian score for the r-th feature is score = (fr_hat'*L*fr_hat)/(fr_hat'*D*fr_hat)

    X: {numpy array}, shape (n_samples, n_features)
        input data
    kwargs: {dictionary}
        W: {sparse matrix}, shape (n_samples, n_samples)
            input affinity matrix

    score: {numpy array}, shape (n_features,)
        laplacian score for each feature

    He, Xiaofei et al. "Laplacian Score for Feature Selection." NIPS 2005.

    def feature_ranking(score):
        Rank features in ascending order according to their laplacian scores, the smaller the laplacian score is, the more
        important the feature is
        idx = np.argsort(score, 0)
        return idx
    # if 'W' is not specified, use the default W
    if 'W' not in list(kwargs.keys()):
        W = construct_W(X)
    # construct the affinity matrix W
    W = kwargs['W']
    # build the diagonal D matrix from affinity matrix W
    D = np.array(W.sum(axis=1))
    L = W
    tmp = np.dot(np.transpose(D), X)
    D = diags(np.transpose(D), [0])
    Xt = np.transpose(X)
    t1 = np.transpose(np.dot(Xt, D.todense()))
    t2 = np.transpose(np.dot(Xt, L.todense()))
    # compute the numerator of Lr
    D_prime = np.sum(np.multiply(t1, X), 0) - np.multiply(tmp, tmp)/D.sum()
    # compute the denominator of Lr
    L_prime = np.sum(np.multiply(t2, X), 0) - np.multiply(tmp, tmp)/D.sum()
    # avoid the denominator of Lr to be 0
    D_prime[D_prime < 1e-12] = 10000

    # compute laplacian score for all features
    score = 1 - np.array(np.multiply(L_prime, 1/D_prime))[0, :]
    F = feature_ranking(np.transpose(score))

    if mode=="index":
        return np.array(F, dtype=int)
        # make sure that F is the same size??
        return reverse_argsort(F, size=X.shape[1])
コード例 #11
ファイル: SPEC.py プロジェクト: EricSchles/scikit-feature-1
def spec(X, y=None, mode='rank', **kwargs):
    This function implements the SPEC feature selection

    X: {numpy array}, shape (n_samples, n_features)
        input data
    kwargs: {dictionary}
        style: {int}
            style == -1, the first feature ranking function, use all eigenvalues
            style == 0, the second feature ranking function, use all except the 1st eigenvalue
            style >= 2, the third feature ranking function, use the first k except 1st eigenvalue
        W: {sparse matrix}, shape (n_samples, n_samples}
            input affinity matrix

    w_fea: {numpy array}, shape (n_features,)
        SPEC feature score for each feature

    Zhao, Zheng and Liu, Huan. "Spectral Feature Selection for Supervised and Unsupervised Learning." ICML 2007.
    def feature_ranking(score, **kwargs):
        if 'style' not in kwargs:
            kwargs['style'] = 0
        style = kwargs['style']

        # if style = -1 or 0, ranking features in descending order, the higher the score, the more important the feature is
        if style == -1 or style == 0:
            idx = np.argsort(score, 0)
            return idx[::-1]
        # if style != -1 and 0, ranking features in ascending order, the lower the score, the more important the feature is
        elif style != -1 and style != 0:
            idx = np.argsort(score, 0)
            return idx

    if 'style' not in kwargs:
        kwargs['style'] = 0
    if 'is_classification' not in kwargs:
        # if y is available then we do supervised SPEC algo.
        kwargs['is_classification'] = True
    if 'W' not in kwargs:
        if y is None:
            kwargs['W'] = rbf_kernel(X, gamma=1)
        elif kwargs['is_classification']:
            kwargs['W'] = similiarity_classification(X, y)
            kwargs['W'] = similarity_regression(
                X, y, kwargs.get('n_neighbors', None))

    style = kwargs['style']
    W = kwargs['W']
    if type(W) is numpy.ndarray:
        W = csc_matrix(W)

    n_samples, n_features = X.shape

    # build the degree matrix
    X_sum = np.array(W.sum(axis=1))
    D = np.zeros((n_samples, n_samples))
    for i in range(n_samples):
        D[i, i] = X_sum[i]

    # build the laplacian matrix
    L = D - W
    d1 = np.power(np.array(W.sum(axis=1)), -0.5)
    d1[np.isinf(d1)] = 0
    d2 = np.power(np.array(W.sum(axis=1)), 0.5)
    v = np.dot(np.diag(d2[:, 0]), np.ones(n_samples))
    v = v / LA.norm(v)

    # build the normalized laplacian matrix
    L_hat = (np.matlib.repmat(d1, 1,
                              n_samples)) * np.array(L) * np.matlib.repmat(
                                  np.transpose(d1), n_samples, 1)

    # calculate and construct spectral information
    s, U = np.linalg.eigh(L_hat)
    s = np.flipud(s)
    U = np.fliplr(U)

    # begin to select features
    w_fea = np.ones(n_features) * 1000

    for i in range(n_features):
        f = X[:, i]
        F_hat = np.dot(np.diag(d2[:, 0]), f)
        l = LA.norm(F_hat)
        if l < 100 * np.spacing(1):
            w_fea[i] = 1000
            F_hat = F_hat / l
        a = np.array(np.dot(np.transpose(F_hat), U))
        a = np.multiply(a, a)
        a = np.transpose(a)

        # use f'Lf formulation
        if style == -1:
            w_fea[i] = np.sum(a * s)
        # using all eigenvalues except the 1st
        elif style == 0:
            a1 = a[0:n_samples - 1]
            w_fea[i] = np.sum(a1 * s[0:n_samples - 1]) / (
                1 - np.power(np.dot(np.transpose(F_hat), v), 2))
        # use first k except the 1st
            a1 = a[n_samples - style:n_samples - 1]
            w_fea[i] = np.sum(a1 * (2 - s[n_samples - style:n_samples - 1]))

    if style != -1 and style != 0:
        w_fea[w_fea == 1000] = -1000

    if mode == 'raw':
        return w_fea
    elif mode == 'index':
        return feature_ranking(w_fea)
        return reverse_argsort(feature_ranking(w_fea))
コード例 #12
def fisher_score(X, y, mode="rank"):
    This function implements the fisher score feature selection, steps are as follows:
    1. Construct the affinity matrix W in fisher score way
    2. For the r-th feature, we define fr = X(:,r), D = diag(W*ones), ones = [1,...,1]', L = D - W
    3. Let fr_hat = fr - (fr'*D*ones)*ones/(ones'*D*ones)
    4. Fisher score for the r-th feature is score = (fr_hat'*D*fr_hat)/(fr_hat'*L*fr_hat)-1

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels

    score: {numpy array}, shape (n_features,)
        fisher score for each feature

    He, Xiaofei et al. "Laplacian Score for Feature Selection." NIPS 2005.
    Duda, Richard et al. "Pattern classification." John Wiley & Sons, 2012.
    def feature_ranking(score):
        idx = np.argsort(score, 0)
        return idx[::-1]
    # Construct weight matrix W in a fisherScore way
    kwargs = {"neighbor_mode": "supervised", "fisher_score": True, 'y': y}
    W = construct_W(X, **kwargs)

    # build the diagonal D matrix from affinity matrix W
    D = np.array(W.sum(axis=1))
    L = W
    tmp = np.dot(np.transpose(D), X)
    D = diags(np.transpose(D), [0])
    Xt = np.transpose(X)
    t1 = np.transpose(np.dot(Xt, D.todense()))
    t2 = np.transpose(np.dot(Xt, L.todense()))
    # compute the numerator of Lr
    D_prime = np.sum(np.multiply(t1, X), 0) - np.multiply(tmp, tmp)/D.sum()
    # compute the denominator of Lr
    L_prime = np.sum(np.multiply(t2, X), 0) - np.multiply(tmp, tmp)/D.sum()
    # avoid the denominator of Lr to be 0
    D_prime[D_prime < 1e-12] = 10000
    lap_score = 1 - np.array(np.multiply(L_prime, 1/D_prime))[0, :]

    # compute fisher score from laplacian score, where fisher_score = 1/lap_score - 1
    score = 1.0/lap_score - 1

    Rank features in descending order according to fisher score, the larger the fisher score, the more important the
    feature is
    F = feature_ranking(np.transpose(score))
    if mode=="index":
        return F
        return reverse_argsort(F, X.shape[1])
コード例 #13
def proximal_gradient_descent(X, Y_flat, z, mode="rank", **kwargs):
    This function implements supervised sparse feature selection via l2,1 norm, i.e.,
    min_{W} sum_{i}log(1+exp(-yi*(W'*x+C))) + z*||W||_{2,1}

    X: {numpy array}, shape (n_samples, n_features)
        input data
    Y: {numpy array}, shape (n_samples, n_classes)
        input class labels, each row is a one-hot-coding class label, guaranteed to be a numpy array
    z: {float}
        regularization parameter
    kwargs: {dictionary}
        verbose: {boolean}
            True if user want to print out the objective function value in each iteration, false if not

    W: {numpy array}, shape (n_features, n_classes)
        weight matrix
    obj: {numpy array}, shape (n_iterations,)
        objective function value during iterations
    value_gamma: {numpy array}, shape (n_iterations,s)
        suitable step size during iterations

        Liu, Jun, et al. "Multi-Task Feature Learning Via Efficient l2,1-Norm Minimization." UAI. 2009.

    if 'verbose' not in kwargs:
        verbose = False
        verbose = kwargs['verbose']

    # Starting point initialization #

    # convert Y_flat to one hot encoded
    Y = construct_label_matrix_pan(Y_flat)
    n_samples, n_features = X.shape
    n_samples, n_classes = Y.shape

    # the indices of positive samples
    p_flag = (Y == 1)
    # the total number of positive samples
    n_positive_samples = np.sum(p_flag, 0)
    # the total number of negative samples
    n_negative_samples = n_samples - n_positive_samples
    n_positive_samples = n_positive_samples.astype(float)
    n_negative_samples = n_negative_samples.astype(float)

    # initialize a starting point
    W = np.zeros((n_features, n_classes))
    C = np.log(np.divide(n_positive_samples, n_negative_samples))

    # compute XW = X*W
    XW = np.dot(X, W)

    # starting the main program, the Armijo Goldstein line search scheme + accelerated gradient descent
    # the intial guess of the Lipschitz continuous gradient
    gamma = 1.0 / (n_samples * n_classes)

    # assign Wp with W, and XWp with XW
    XWp = XW
    WWp = np.zeros((n_features, n_classes))
    CCp = np.zeros((1, n_classes))

    alphap = 0
    alpha = 1

    # indicates whether the gradient step only changes a little
    flag = False

    max_iter = 1000
    value_gamma = np.zeros(max_iter)
    obj = np.zeros(max_iter)
    for iter_step in range(max_iter):
        # step1: compute search point S based on Wp and W (with beta)
        beta = (alphap - 1) / alpha
        S = W + beta * WWp
        SC = C + beta * CCp

        # step2: line search for gamma and compute the new approximation solution W
        XS = XW + beta * (XW - XWp)
        aa = -np.multiply(Y, XS + np.tile(SC, (n_samples, 1)))
        # fun_S is the logistic loss at the search point
        bb = np.maximum(aa, 0)
        fun_S = np.sum(np.log(np.exp(-bb) + np.exp(aa - bb)) +
                       bb) / (n_samples * n_classes)
        # compute prob = [p_1;p_2;...;p_m]
        prob = 1.0 / (1 + np.exp(aa))

        b = np.multiply(-Y, (1 - prob)) / (n_samples * n_classes)
        # compute the gradient of C
        GC = np.sum(b, 0)
        # compute the gradient of W as X'*b
        G = np.dot(np.transpose(X), b)

        # copy W and XW to Wp and XWp
        Wp = W
        XWp = XW
        Cp = C

        while True:
            # let S walk in a step in the antigradient of S to get V and then do the L1/L2-norm regularized projection
            V = S - G / gamma
            C = SC - GC / gamma
            W = euclidean_projection(V, n_features, n_classes, z, gamma)

            # the difference between the new approximate solution W and the search point S
            V = W - S
            # compute XW = X*W
            XW = np.dot(X, W)
            aa = -np.multiply(Y, XW + np.tile(C, (n_samples, 1)))
            # fun_W is the logistic loss at the new approximate solution
            bb = np.maximum(aa, 0)
            fun_W = np.sum(np.log(np.exp(-bb) + np.exp(aa - bb)) +
                           bb) / (n_samples * n_classes)

            r_sum = (LA.norm(V, 'fro')**2 + LA.norm(C - SC, 2)**2) / 2
            l_sum = fun_W - fun_S - np.sum(np.multiply(V, G)) - np.inner(
                (C - SC), GC)

            # determine weather the gradient step makes little improvement
            if r_sum <= 1e-20:
                flag = True

            # the condition is fun_W <= fun_S + <V, G> + <C ,GC> + gamma/2 * (<V,V> + <C-SC,C-SC> )
            if l_sum < r_sum * gamma:
                gamma = max(2 * gamma, l_sum / r_sum)
        value_gamma[iter_step] = gamma

        # step3: update alpha and alphap, and check weather converge
        alphap = alpha
        alpha = (1 + math.sqrt(4 * alpha * alpha + 1)) / 2

        WWp = W - Wp
        CCp = C - Cp

        # calculate obj
        obj[iter_step] = fun_W
        obj[iter_step] += z * calculate_l21_norm(W)

        if verbose:
            print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step]))

        if flag is True:

        # determine weather converge
        if iter_step >= 1 and math.fabs(obj[iter_step] -
                                        obj[iter_step - 1]) < 1e-3:
    if mode == "raw":
        return W, obj, value_gamma
    elif mode == "rank":
        # feature vector is to sort in ascending order according to the Weight
        idx = feature_ranking(W).tolist()
        return reverse_argsort(idx, size=X.shape[1])
        print("Invalid mode {} selected, should be one of \"raw\" or \"rank\"".
コード例 #14
def proximal_gradient_descent(X, Y_flat, z, mode="rank", **kwargs):
    This function implements supervised sparse feature selection via l2,1 norm, i.e.,
    min_{W} ||XW-Y||_F^2 + z*||W||_{2,1}

    X: {numpy array}, shape (n_samples, n_features)
        input data, guaranteed to be a numpy array
    Y: {numpy array}, shape (n_samples, n_classes)
        input class labels, each row is a one-hot-coding class label
    z: {float}
        regularization parameter
    kwargs: {dictionary}
        verbose: {boolean}
            True if user want to print out the objective function value in each iteration, false if not

        W: {numpy array}, shape (n_features, n_classes)
            weight matrix
        obj: {numpy array}, shape (n_iterations,)
            objective function value during iterations
        value_gamma: {numpy array}, shape (n_iterations,)
            suitable step size during iterations

        Liu, Jun, et al. "Multi-Task Feature Learning Via Efficient l2,1-Norm Minimization." UAI. 2009.
    def init_factor(W_norm, XW, Y, z):
        Initialize the starting point of W, according to the author's code
        n_samples, n_classes = XW.shape
        a = np.inner(np.reshape(XW, n_samples * n_classes),
                     np.reshape(Y, n_samples * n_classes)) - z * W_norm
        b = LA.norm(XW, 'fro')**2
        ratio = a / b
        return ratio

    if 'verbose' not in kwargs:
        verbose = False
        verbose = kwargs['verbose']

    # convert Y_flat to one hot encoded
    Y = construct_label_matrix_pan(Y_flat)
    # starting point initialization
    n_samples, n_features = X.shape
    n_samples, n_classes = Y.shape

    # compute X'Y
    XtY = np.dot(np.transpose(X), Y)

    # initialize a starting point
    W = XtY

    # compute XW = X*W
    XW = np.dot(X, W)

    # compute l2,1 norm of W
    W_norm = calculate_l21_norm(W)

    if W_norm >= 1e-6:
        ratio = init_factor(W_norm, XW, Y, z)
        W = ratio * W
        XW = ratio * XW

    # starting the main program, the Armijo Goldstein line search scheme + accelerated gradient descent
    # initialize step size gamma = 1
    gamma = 1

    # assign Wp with W, and XWp with XW
    XWp = XW
    WWp = np.zeros((n_features, n_classes))
    alphap = 0
    alpha = 1

    # indicate whether the gradient step only changes a little
    flag = False

    max_iter = 1000
    value_gamma = np.zeros(max_iter)
    obj = np.zeros(max_iter)
    for iter_step in range(max_iter):
        # step1: compute search point S based on Wp and W (with beta)
        beta = (alphap - 1) / alpha
        S = W + beta * WWp

        # step2: line search for gamma and compute the new approximation solution W
        XS = XW + beta * (XW - XWp)
        # compute X'* XS
        XtXS = np.dot(np.transpose(X), XS)
        # obtain the gradient g
        G = XtXS - XtY
        # copy W and XW to Wp and XWp
        Wp = W
        XWp = XW

        while True:
            # let S walk in a step in the antigradient of S to get V and then do the L1/L2-norm regularized projection
            V = S - G / gamma
            W = euclidean_projection(V, n_features, n_classes, z, gamma)
            # the difference between the new approximate solution W and the search point S
            V = W - S
            # compute XW = X*W
            XW = np.dot(X, W)
            XV = XW - XS
            r_sum = LA.norm(V, 'fro')**2
            l_sum = LA.norm(XV, 'fro')**2

            # determine weather the gradient step makes little improvement
            if r_sum <= 1e-20:
                flag = True

            # the condition is ||XV||_2^2 <= gamma * ||V||_2^2
            if l_sum < r_sum * gamma:
                gamma = max(2 * gamma, l_sum / r_sum)
        value_gamma[iter_step] = gamma

        # step3: update alpha and alphap, and check weather converge
        alphap = alpha
        alpha = (1 + math.sqrt(4 * alpha * alpha + 1)) / 2

        WWp = W - Wp
        XWY = XW - Y

        # calculate obj
        obj[iter_step] = LA.norm(XWY, 'fro')**2 / 2
        obj[iter_step] += z * calculate_l21_norm(W)

        if verbose:
            print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step]))
        if flag is True:

        # determine weather converge
        if iter_step >= 1 and math.fabs(obj[iter_step] -
                                        obj[iter_step - 1]) < 1e-3:
    if mode == "raw":
        return W, obj, value_gamma
    elif mode == "rank":
        # feature vector is to sort in ascending order according to the Weight
        idx = feature_ranking(W).tolist()
        return reverse_argsort(idx, size=X.shape[1])
        print("Invalid mode {} selected, should be one of \"raw\" or \"rank\"".
コード例 #15
def gini_index(X, y, mode="index"):
    This function implements the gini index feature selection.

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels

    gini: {numpy array}, shape (n_features, )
        gini index value of each feature
    def feature_ranking(W):
        Rank features in descending order according to their gini index values, the smaller the gini index,
        the more important the feature is
        idx = np.argsort(W)
        return idx

    n_samples, n_features = X.shape

    # initialize gini_index for all features to be 1
    gini = np.ones(n_features)

    # For i-th feature we define fi = x[:,i] ,v include all unique values in fi
    for i in range(n_features):
        v = np.unique(X[:, i])
        for j in range(len(v)):
            # left_y contains labels of instances whose i-th feature value is less than or equal to v[j]
            left_y = y[X[:, i] <= v[j]]
            # right_y contains labels of instances whose i-th feature value is larger than v[j]
            right_y = y[X[:, i] > v[j]]

            # gini_left is sum of square of probability of occurrence of v[i] in left_y
            # gini_right is sum of square of probability of occurrence of v[i] in right_y
            gini_left = 0
            gini_right = 0

            for k in range(np.min(y), np.max(y)+1):
                if len(left_y) != 0:
                    # t1_left is probability of occurrence of k in left_y
                    t1_left = np.true_divide(len(left_y[left_y == k]), len(left_y))
                    t2_left = np.power(t1_left, 2)
                    gini_left += t2_left

                if len(right_y) != 0:
                    # t1_right is probability of occurrence of k in left_y
                    t1_right = np.true_divide(len(right_y[right_y == k]), len(right_y))
                    t2_right = np.power(t1_right, 2)
                    gini_right += t2_right

            gini_left = 1 - gini_left
            gini_right = 1 - gini_right

            # weighted average of len(left_y) and len(right_y)
            t1_gini = (len(left_y) * gini_left + len(right_y) * gini_right)

            # compute the gini_index for the i-th feature
            value = np.true_divide(t1_gini, len(y))

            if value < gini[i]:
                gini[i] = value
    F = feature_ranking(gini)
    if mode=="index":
        return np.array(F, dtype=int)
        # make sure that F is the same size??
        return reverse_argsort(F, size=X.shape[1])
コード例 #16
def trace_ratio(X, y, n_selected_features=None, mode='rank', **kwargs):
    This function implements the trace ratio criterion for feature selection

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels
    n_selected_features: {int}
        number of features to select
    kwargs: {dictionary}
        style: {string}
            style == 'fisher', build between-class matrix and within-class affinity matrix in a fisher score way
            style == 'laplacian', build between-class matrix and within-class affinity matrix in a laplacian score way
        verbose: {boolean}
            True if user want to print out the objective function value in each iteration, False if not

    feature_idx: {numpy array}, shape (n_features,)
        the ranked (descending order) feature index based on subset-level score
    feature_score: {numpy array}, shape (n_features,)
        the feature-level score
    subset_score: {float}
        the subset-level score

    Feiping Nie et al. "Trace Ratio Criterion for Feature Selection." AAAI 2008.
    if n_selected_features is None:
        n_selected_features = X.shape[1]
    # if 'style' is not specified, use the fisher score way to built two affinity matrix
    if 'style' not in list(kwargs.keys()):
        kwargs['style'] = 'fisher'
    # get the way to build affinity matrix, 'fisher' or 'laplacian'
    style = kwargs['style']
    n_samples, n_features = X.shape

    # if 'verbose' is not specified, do not output the value of objective function
    if 'verbose' not in kwargs:
        kwargs['verbose'] = False
    verbose = kwargs['verbose']

    if style is 'fisher':
        kwargs_within = {"neighbor_mode": "supervised", "fisher_score": True, 'y': y}
        # build within class and between class laplacian matrix L_w and L_b
        W_within = construct_W(X, **kwargs_within)
        L_within = np.eye(n_samples) - W_within
        L_tmp = np.eye(n_samples) - np.ones([n_samples, n_samples])/n_samples
        L_between = L_within - L_tmp

    if style is 'laplacian':
        kwargs_within = {"metric": "euclidean", "neighbor_mode": "knn", "weight_mode": "heat_kernel", "k": 5, 't': 1}
        # build within class and between class laplacian matrix L_w and L_b
        W_within = construct_W(X, **kwargs_within)
        D_within = np.diag(np.array(W_within.sum(1))[:, 0])
        L_within = D_within - W_within
        W_between = np.dot(np.dot(D_within, np.ones([n_samples, n_samples])), D_within)/np.sum(D_within)
        D_between = np.diag(np.array(W_between.sum(1)))
        L_between = D_between - W_between

    # build X'*L_within*X and X'*L_between*X
    L_within = (np.transpose(L_within) + L_within)/2
    L_between = (np.transpose(L_between) + L_between)/2
    S_within = np.array(np.dot(np.dot(np.transpose(X), L_within), X))
    S_between = np.array(np.dot(np.dot(np.transpose(X), L_between), X))

    # reflect the within-class or local affinity relationship encoded on graph, Sw = X*Lw*X'
    S_within = (np.transpose(S_within) + S_within)/2
    # reflect the between-class or global affinity relationship encoded on graph, Sb = X*Lb*X'
    S_between = (np.transpose(S_between) + S_between)/2

    # take the absolute values of diagonal
    s_within = np.absolute(S_within.diagonal())
    s_between = np.absolute(S_between.diagonal())
    s_between[s_between == 0] = 1e-14  # this number if from authors' code

    # preprocessing
    fs_idx = np.argsort(np.divide(s_between, s_within), 0)[::-1]
    k = np.sum(s_between[0:n_selected_features])/np.sum(s_within[0:n_selected_features])
    s_within = s_within[fs_idx[0:n_selected_features]]
    s_between = s_between[fs_idx[0:n_selected_features]]

    # iterate util converge
    count = 0
    while True:
        score = np.sort(s_between-k*s_within)[::-1]
        I = np.argsort(s_between-k*s_within)[::-1]
        idx = I[0:n_selected_features]
        old_k = k
        k = np.sum(s_between[idx])/np.sum(s_within[idx])
        if verbose:
            print('obj at iter {0}: {1}'.format(count+1, k))
        count += 1
        if abs(k - old_k) < 1e-3:

    # get feature index, feature-level score and subset-level score
    feature_idx = fs_idx[I]
    feature_score = score
    subset_score = k
    if mode == 'raw':
        return feature_idx, feature_score, subset_score
    elif mode == 'index':
        return feature_idx
        return reverse_argsort(feature_idx)
コード例 #17
ファイル: RFS.py プロジェクト: EricSchles/scikit-feature-1
def rfs(X, Y_flat, mode='rank', **kwargs):
    This function implementS efficient and robust feature selection via joint l21-norms minimization
    min_W||X^T W - Y||_2,1 + gamma||W||_2,1

    X: {numpy array}, shape (n_samples, n_features)
        input data
    Y: {numpy array}, shape (n_samples, n_classes)
        input class label matrix, each row is a one-hot-coding class label
    kwargs: {dictionary}
        gamma: {float}
            parameter in RFS
        verbose: boolean
            True if want to display the objective function value, false if not

    W: {numpy array}, shape(n_samples, n_features)
        feature weight matrix

    Nie, Feiping et al. "Efficient and Robust Feature Selection via Joint l2,1-Norms Minimization" NIPS 2010.
    def calculate_obj(X, Y, W, gamma):
        This function calculates the objective function of rfs
        temp = np.dot(X, W) - Y
        return calculate_l21_norm(temp) + gamma*calculate_l21_norm(W)
    # convert Y_flat to one hot encoded
    Y = construct_label_matrix_pan(Y_flat)
    # default gamma is 1
    if 'gamma' not in kwargs:
        gamma = 1
        gamma = kwargs['gamma']
    if 'verbose' not in kwargs:
        verbose = False
        verbose = kwargs['verbose']

    n_samples, n_features = X.shape
    A = np.zeros((n_samples, n_samples + n_features))
    A[:, 0:n_features] = X
    A[:, n_features:n_features+n_samples] = gamma*np.eye(n_samples)
    D = np.eye(n_features+n_samples)

    max_iter = 1000
    obj = np.zeros(max_iter)
    for iter_step in range(max_iter):
        # update U as U = D^{-1} A^T (A D^-1 A^T)^-1 Y
        D_inv = LA.inv(D)
        temp = LA.inv(np.dot(np.dot(A, D_inv), A.T) + 1e-6*np.eye(n_samples))  # (A D^-1 A^T)^-1
        U = np.dot(np.dot(np.dot(D_inv, A.T), temp), Y)
        # update D as D_ii = 1 / 2 / ||U(i,:)||
        D = generate_diagonal_matrix(U)

        obj[iter_step] = calculate_obj(X, Y, U[0:n_features, :], gamma)

        if verbose:
            print('obj at iter {0}: {1}'.format(iter_step+1, obj[iter_step]))
        if iter_step >= 1 and math.fabs(obj[iter_step] - obj[iter_step-1]) < 1e-3:
    # the first d rows of U are the feature weights
    W = U[0:n_features, :]
    if mode=="raw":
        return W
    elif mode =="index":
        return feature_ranking(W)
    elif mode == "rank":
        return reverse_argsort(feature_ranking(W))
コード例 #18
ファイル: MCFS.py プロジェクト: EricSchles/scikit-feature-1
def mcfs(X, y=None, n_selected_features=None, mode="rank", **kwargs):
    This function implements unsupervised feature selection for multi-cluster data.

    X: {numpy array}, shape (n_samples, n_features)
        input data
    n_selected_features: {int}
        number of features to select
    kwargs: {dictionary}
        W: {sparse matrix}, shape (n_samples, n_samples)
            affinity matrix
        n_clusters: {int}
            number of clusters (default is 5)

    W: {numpy array}, shape(n_features, n_clusters)
        feature weight matrix

    Cai, Deng et al. "Unsupervised Feature Selection for Multi-Cluster Data." KDD 2010.
    def feature_ranking(W):
        This function computes MCFS score and ranking features according to feature weights matrix W
        mcfs_score = W.max(1)
        idx = np.argsort(mcfs_score, 0)
        idx = idx[::-1]
        return idx

    if n_selected_features is None:
        n_selected_features = int(X.shape[1])

    # use the default affinity matrix
    if 'W' not in kwargs:
        W = construct_W(X)
        W = kwargs['W']
    # default number of clusters is 5
    if 'n_clusters' not in kwargs:
        n_clusters = 5
        n_clusters = kwargs['n_clusters']

    # solve the generalized eigen-decomposition problem and get the top K
    # eigen-vectors with respect to the smallest eigenvalues
    W = W.toarray()
    W = (W + W.T) / 2
    W_norm = np.diag(np.sqrt(1 / W.sum(1)))
    W = np.dot(W_norm, np.dot(W, W_norm))
    WT = W.T
    W[W < WT] = WT[W < WT]
    eigen_value, ul = scipy.linalg.eigh(a=W)
    Y = np.dot(W_norm, ul[:, -1 * n_clusters - 1:-1])

    # solve K L1-regularized regression problem using LARs algorithm with cardinality constraint being d
    n_sample, n_feature = X.shape
    W = np.zeros((n_feature, n_clusters))
    for i in range(n_clusters):
        clf = linear_model.Lars(n_nonzero_coefs=n_selected_features)
        clf.fit(X, Y[:, i])
        W[:, i] = clf.coef_

    if mode == "raw":
        return W
    elif mode == "index":
        return feature_ranking(W)
    elif mode == "rank":
        W_idx = feature_ranking(W)
        return reverse_argsort(W_idx, X.shape[1])
コード例 #19
def cmim(X, y, mode="rank", **kwargs):
    This function implements the CMIM feature selection.
    The scoring criteria is calculated based on the formula j_cmim=I(f;y)-max_j(I(fj;f)-I(fj;f|y))

    X: {numpy array}, shape (n_samples, n_features)
        Input data, guaranteed to be a discrete numpy array
    y: {numpy array}, shape (n_samples,)
        guaranteed to be a numpy array
    kwargs: {dictionary}
        n_selected_features: {int}
            number of features to select

    F: {numpy array}, shape (n_features,)
        index of selected features, F[0] is the most important feature
    J_CMIM: {numpy array}, shape: (n_features,)
        corresponding objective function value of selected features
    MIfy: {numpy array}, shape: (n_features,)
        corresponding mutual information between selected features and response

    Brown, Gavin et al. "Conditional Likelihood Maximisation: A Unifying Framework for Information Theoretic Feature Selection." JMLR 2012.

    n_samples, n_features = X.shape
    # index of selected features, initialized to be empty
    F = []
    # Objective function value for selected features
    J_CMIM = []
    # Mutual information between feature and response
    MIfy = []
    # indicate whether the user specifies the number of features
    is_n_selected_features_specified = False

    if 'n_selected_features' in list(kwargs.keys()):
        n_selected_features = kwargs['n_selected_features']
        is_n_selected_features_specified = True

    # t1 stores I(f;y) for each feature f
    t1 = np.zeros(n_features)

    # max stores max(I(fj;f)-I(fj;f|y)) for each feature f
    # we assign an extreme small value to max[i] ito make it is smaller than possible value of max(I(fj;f)-I(fj;f|y))
    max = -10000000*np.ones(n_features)
    for i in range(n_features):
        f = X[:, i]
        t1[i] = midd(f, y)

    # make sure that j_cmi is positive at the very beginning
    j_cmim = 1

    while True:
        if len(F) == 0:
            # select the feature whose mutual information is the largest
            idx = np.argmax(t1)
            f_select = X[:, idx]

        if is_n_selected_features_specified:
            if len(F) == n_selected_features:
            if j_cmim <= 0:

        # we assign an extreme small value to j_cmim to ensure it is smaller than all possible values of j_cmim
        j_cmim = -1000000000000
        for i in range(n_features):
            if i not in F:
                f = X[:, i]
                t2 = midd(f_select, f)
                t3 = cmidd(f_select, f, y)
                if t2-t3 > max[i]:
                        max[i] = t2-t3
                # calculate j_cmim for feature i (not in F)
                t = t1[i] - max[i]
                # record the largest j_cmim and the corresponding feature index
                if t > j_cmim:
                    j_cmim = t
                    idx = i
        f_select = X[:, idx]
    if mode=="index":
        return np.array(F)
        return reverse_argsort(F)
コード例 #20
def udfs(X, y=None, mode='rank', **kwargs):
    This function implements l2,1-norm regularized discriminative feature
    selection for unsupervised learning, i.e., min_W Tr(W^T M W) + gamma ||W||_{2,1}, s.t. W^T W = I

    X: {numpy array}, shape (n_samples, n_features)
        input data
    kwargs: {dictionary}
        gamma: {float}
            parameter in the objective function of UDFS (default is 1)
        n_clusters: {int}
            Number of clusters
        k: {int}
            number of nearest neighbor
        verbose: {boolean}
            True if want to display the objective function value, false if not

    W: {numpy array}, shape(n_features, n_clusters)
        feature weight matrix

    Yang, Yi et al. "l2,1-Norm Regularized Discriminative Feature Selection for Unsupervised Learning." AAAI 2012.
    def construct_M(X, k, gamma):
        This function constructs the M matrix described in the paper
        n_sample, n_feature = X.shape
        Xt = X.T
        D = pairwise_distances(X)
        # sort the distance matrix D in ascending order
        idx = np.argsort(D, axis=1)
        # choose the k-nearest neighbors for each instance
        idx_new = idx[:, 0:k + 1]
        H = np.eye(k + 1) - 1 / (k + 1) * np.ones((k + 1, k + 1))
        I = np.eye(k + 1)
        Mi = np.zeros((n_sample, n_sample))
        for i in range(n_sample):
            Xi = Xt[:, idx_new[i, :]]
            Xi_tilde = np.dot(Xi, H)
            Bi = np.linalg.inv(np.dot(Xi_tilde.T, Xi_tilde) + gamma * I)
            Si = np.zeros((n_sample, k + 1))
            for q in range(k + 1):
                Si[idx_new[q], q] = 1
            Mi = Mi + np.dot(np.dot(Si, np.dot(np.dot(H, Bi), H)), Si.T)
        M = np.dot(np.dot(X.T, Mi), X)
        return M

    def calculate_obj(X, W, M, gamma):
        This function calculates the objective function of ls_l21 described in the paper
        return np.trace(np.dot(np.dot(W.T, M),
                               W)) + gamma * calculate_l21_norm(W)

    # default gamma is 0.1
    if 'gamma' not in kwargs:
        gamma = 0.1
        gamma = kwargs['gamma']
    # default k is set to be 5
    if 'k' not in kwargs:
        k = 5
        k = kwargs['k']
    if 'n_clusters' not in kwargs:
        n_clusters = 5
        n_clusters = kwargs['n_clusters']
    if 'verbose' not in kwargs:
        verbose = False
        verbose = kwargs['verbose']

    # construct M
    n_sample, n_feature = X.shape
    M = construct_M(X, k, gamma)

    D = np.eye(n_feature)
    max_iter = 1000
    obj = np.zeros(max_iter)
    for iter_step in range(max_iter):
        # update W as the eigenvectors of P corresponding to the first n_clusters
        # smallest eigenvalues
        P = M + gamma * D
        eigen_value, eigen_vector = scipy.linalg.eigh(a=P)
        W = eigen_vector[:, 0:n_clusters]
        # update D as D_ii = 1 / 2 / ||W(i,:)||
        D = generate_diagonal_matrix(W)

        obj[iter_step] = calculate_obj(X, W, M, gamma)
        if verbose:
            print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step]))

        if iter_step >= 1 and math.fabs(obj[iter_step] -
                                        obj[iter_step - 1]) < 1e-3:
    if mode == 'raw':
        return W
    elif mode == 'index':
        return feature_ranking(W)
    elif mode == 'rank':
        return reverse_argsort(feature_ranking(W))
コード例 #21
def ndfs(X, y=None, mode="rank", **kwargs):
    This function implement unsupervised feature selection using nonnegative spectral analysis, i.e.,
    min_{F,W} Tr(F^T L F) + alpha*(||XW-F||_F^2 + beta*||W||_{2,1}) + gamma/2 * ||F^T F - I||_F^2
    s.t. F >= 0
    X: {numpy array}, shape (n_samples, n_features)
        input data
    kwargs: {dictionary}
        W: {sparse matrix}, shape {n_samples, n_samples}
            affinity matrix
        alpha: {float}
            Parameter alpha in objective function
        beta: {float}
            Parameter beta in objective function
        gamma: {float}
            a very large number used to force F^T F = I
        F0: {numpy array}, shape (n_samples, n_clusters)
            initialization of the pseudo label matirx F, if not provided
        n_clusters: {int}
            number of clusters
        verbose: {boolean}
            True if user want to print out the objective function value in each iteration, false if not

    W: {numpy array}, shape(n_features, n_clusters)
        feature weight matrix
        Li, Zechao, et al. "Unsupervised Feature Selection Using Nonnegative Spectral Analysis." AAAI. 2012.
    def kmeans_initialization(X, n_clusters):
        This function uses kmeans to initialize the pseudo label

        X: {numpy array}, shape (n_samples, n_features)
            input data
        n_clusters: {int}
            number of clusters

        Y: {numpy array}, shape (n_samples, n_clusters)
            pseudo label matrix

        n_samples, n_features = X.shape
        kmeans = sklearn.cluster.KMeans(n_clusters=n_clusters,
        labels = kmeans.labels_
        Y = np.zeros((n_samples, n_clusters))
        for row in range(0, n_samples):
            Y[row, labels[row]] = 1
        T = np.dot(Y.transpose(), Y)
        F = np.dot(Y, np.sqrt(np.linalg.inv(T)))
        F = F + 0.02 * np.ones((n_samples, n_clusters))
        return F

    def calculate_obj(X, W, F, L, alpha, beta):
        This function calculates the objective function of NDFS
        # Tr(F^T L F)
        T1 = np.trace(np.dot(np.dot(F.transpose(), L), F))
        T2 = np.linalg.norm(np.dot(X, W) - F, 'fro')
        T3 = (np.sqrt((W * W).sum(1))).sum()
        obj = T1 + alpha * (T2 + beta * T3)
        return obj

    # default gamma is 10e8
    if 'gamma' not in kwargs:
        gamma = 10e8
        gamma = kwargs['gamma']
    # use the default affinity matrix
    if 'W' not in kwargs:
        W = construct_W(X)
        W = kwargs['W']
    if 'alpha' not in kwargs:
        alpha = 1
        alpha = kwargs['alpha']
    if 'beta' not in kwargs:
        beta = 1
        beta = kwargs['beta']
    if 'F0' not in kwargs:
        if 'n_clusters' not in kwargs:
            raise Exception("either F0 or n_clusters should be provided")
            # initialize F
            n_clusters = kwargs['n_clusters']
            F = kmeans_initialization(X, n_clusters)
        F = kwargs['F0']
    if 'verbose' not in kwargs:
        verbose = False
        verbose = kwargs['verbose']

    n_samples, n_features = X.shape

    # initialize D as identity matrix
    D = np.identity(n_features)
    I = np.identity(n_samples)

    # build laplacian matrix
    L = np.array(W.sum(1))[:, 0] - W

    max_iter = 1000
    obj = np.zeros(max_iter)
    for iter_step in range(max_iter):
        # update W
        T = np.linalg.inv(
            np.dot(X.transpose(), X) + beta * D + 1e-6 * np.eye(n_features))
        W = np.dot(np.dot(T, X.transpose()), F)
        # update D
        temp = np.sqrt((W * W).sum(1))
        temp[temp < 1e-16] = 1e-16
        temp = 0.5 / temp
        D = np.diag(temp)
        # update M
        M = L + alpha * (I - np.dot(np.dot(X, T), X.transpose()))
        M = (M + M.transpose()) / 2
        # update F
        denominator = np.dot(M,
                             F) + gamma * np.dot(np.dot(F, F.transpose()), F)
        temp = np.divide(gamma * F, denominator)
        F = F * np.array(temp)
        temp = np.diag(np.sqrt(np.diag(1 /
                                       (np.dot(F.transpose(), F) + 1e-16))))
        F = np.dot(F, temp)

        # calculate objective function
        obj[iter_step] = np.trace(np.dot(np.dot(
            F.transpose(), M), F)) + gamma / 4 * np.linalg.norm(
                np.dot(F.transpose(), F) - np.identity(n_clusters), 'fro')
        if verbose:
            print('obj at iter {0}: {1}'.format(iter_step + 1, obj[iter_step]))
        if iter_step >= 1 and math.fabs(obj[iter_step] -
                                        obj[iter_step - 1]) < 1e-3:
    F = feature_ranking(W)

    if mode == "index":
        return np.array(F, dtype=int)
    elif mode == "raw":
        return W
        # make sure that F is the same size??
        return reverse_argsort(F, size=X.shape[1])
コード例 #22
    def reliefF(self,X, y,**kwargs):
        This function implements the reliefF feature selection

        X: {numpy array}, shape (n_samples, n_features)
            input data
        y: {numpy array}, shape (n_samples,)
            input class labels
        kwargs: {dictionary}
            parameters of reliefF:
            k: {int}
                choices for the number of neighbors (default k = 5)

        score: {numpy array}, shape (n_features,)
            reliefF score for each feature

        Robnik-Sikonja, Marko et al. "Theoretical and empirical analysis of relieff and rrelieff." Machine Learning 2003.
        Zhao, Zheng et al. "On Similarity Preserving Feature Selection." TKDE 2013.

        if "k" not in list(kwargs.keys()):
            k = 5
            k = kwargs["k"]
        n_samples, n_features = X.shape

        # calculate pairwise distances between instances
        distance = pairwise_distances(X, metric='manhattan')

        # the number of sampled instances is equal to the number of total instances
        for idx in range(n_samples):
            score = np.zeros(n_features)
            near_hit = []
            near_miss = dict()

            self_fea = X[idx, :]
            c = np.unique(y).tolist()

            stop_dict = dict()
            for label in c:
                stop_dict[label] = 0
            del c[c.index(y[idx])]

            p_dict = dict()
            p_label_idx = float(len(y[y == y[idx]]))/float(n_samples)

            for label in c:
                p_label_c = float(len(y[y == label]))/float(n_samples)
                p_dict[label] = p_label_c/(1-p_label_idx)
                near_miss[label] = []

            distance_sort = []
            distance[idx, idx] = np.max(distance[idx, :])

            for i in range(n_samples):
                distance_sort.append([distance[idx, i], int(i), y[i]])
            distance_sort.sort(key=lambda x: x[0])

            for i in range(n_samples):
                # find k nearest hit points
                if distance_sort[i][2] == y[idx]:
                    if len(near_hit) < k:
                    elif len(near_hit) == k:
                        stop_dict[y[idx]] = 1
                    # find k nearest miss points for each label
                    if len(near_miss[distance_sort[i][2]]) < k:
                        if len(near_miss[distance_sort[i][2]]) == k:
                            stop_dict[distance_sort[i][2]] = 1
                stop = True
                for (key, value) in list(stop_dict.items()):
                        if value != 1:
                            stop = False
                if stop:

            # update reliefF score
            near_hit_term = np.zeros(n_features)
            for ele in near_hit:
                near_hit_term = np.array(abs(self_fea-X[ele, :]))+np.array(near_hit_term)

            near_miss_term = dict()
            for (label, miss_list) in list(near_miss.items()):
                near_miss_term[label] = np.zeros(n_features)
                for ele in miss_list:
                    near_miss_term[label] = np.array(abs(self_fea-X[ele, :]))+np.array(near_miss_term[label])
                score += near_miss_term[label]/(k*p_dict[label])
            score -= near_hit_term/k
        if self.mode == 'raw':
            return score
        elif self.mode == 'index':
            return feature_ranking(score)
        elif self.mode == 'rank':
            return reverse_argsort(feature_ranking(score), X.shape[1])
def reliefF(X, y, dist_params, mode="rank", **kwargs):
    This function implements the reliefF feature selection

    X: {numpy array}, shape (n_samples, n_features)
        input data
    y: {numpy array}, shape (n_samples,)
        input class labels
    kwargs: {dictionary}
        parameters of reliefF:
        k: {int}
            choices for the number of neighbors (default k = 5)

    score: {numpy array}, shape (n_features,)
        reliefF score for each feature

    Robnik-Sikonja, Marko et al. "Theoretical and empirical analysis of relieff and rrelieff." Machine Learning 2003.
    Zhao, Zheng et al. "On Similarity Preserving Feature Selection." TKDE 2013.
    def feature_ranking(score):
        Rank features in descending order according to reliefF score, the higher the reliefF score, the more important the
        feature is
        idx = np.argsort(score, 0)
        return idx[::-1]

    if "k" not in list(kwargs.keys()):
        k = 5
        k = kwargs["k"]
    n_samples, n_features = X.shape

    # calculate pairwise distances between instances
    distance = cdist(X, X, metric=partial_distance, **dist_params)
    distance = np.clip(distance, 0, X.shape[1])
    X = np.nan_to_num(X)

    score = np.zeros(n_features)

    # the number of sampled instances is equal to the number of total instances
    for idx in range(n_samples):
        near_hit = []
        near_miss = dict()

        self_fea = X[idx, :]
        c = np.unique(y).tolist()

        stop_dict = dict()
        for label in c:
            stop_dict[label] = 0
        del c[c.index(y[idx])]

        p_dict = dict()
        p_label_idx = float(len(y[y == y[idx]])) / float(n_samples)

        for label in c:
            p_label_c = float(len(y[y == label])) / float(n_samples)
            p_dict[label] = p_label_c / (1 - p_label_idx)
            near_miss[label] = []

        distance_sort = []
        distance[idx, idx] = np.max(distance[idx, :])

        for i in range(n_samples):
            distance_sort.append([distance[idx, i], int(i), y[i]])
        distance_sort.sort(key=lambda x: x[0])

        for i in range(n_samples):
            # find k nearest hit points
            if distance_sort[i][2] == y[idx]:
                if len(near_hit) < k:
                elif len(near_hit) == k:
                    stop_dict[y[idx]] = 1
                # find k nearest miss points for each label
                if len(near_miss[distance_sort[i][2]]) < k:
                    if len(near_miss[distance_sort[i][2]]) == k:
                        stop_dict[distance_sort[i][2]] = 1
            stop = True
            for (key, value) in list(stop_dict.items()):
                if value != 1:
                    stop = False
            if stop:

        # update reliefF score
        near_hit_term = np.zeros(n_features)
        for ele in near_hit:
            dist = np.zeros(X.shape[1])
            for i in range(X.shape[1]):
                if isinstance(self_fea[i], str):
                    dist[i] = 0 if self_fea[i] == X[ele, i] else 1
                    dist[i] = abs(self_fea[i] - X[ele, i])

            near_hit_term += dist

        near_miss_term = dict()
        for (label, miss_list) in list(near_miss.items()):
            near_miss_term[label] = np.zeros(n_features)
            for ele in miss_list:
                dist = np.zeros(X.shape[1])
                for i in range(X.shape[1]):
                    if isinstance(self_fea[i], str):
                        dist[i] = 0 if self_fea[i] == X[ele, i] else 1
                        dist[i] = abs(self_fea[i] - X[ele, i])
                near_miss_term[label] = dist + np.array(near_miss_term[label])
            score += near_miss_term[label] / (k * p_dict[label])
        score -= near_hit_term / k
    if mode == 'raw':
        return score
    elif mode == 'index':
        return feature_ranking(score)
    elif mode == 'rank':
        return reverse_argsort(feature_ranking(score), X.shape[1])