Exemple #1
0
def var():
    #特征选择,去掉方差值小的特征,去掉特征方差值小于threshold的那些特征
    data = [[0,2,0,3],
            [0,1,4,3],
            [0,1,1,3]]

    va = VarianceThreshold(threshold=0.5)
    data = va.fit_transform(data)
    print(data)
    data = va.inverse_transform(data)
    print(data)
Exemple #2
0
def test_VarianceThreshold():
    '''
    test the method of VarianceThreshold
    :return:  None
    '''
    X = [[100, 1, 2, 3], [100, 4, 5, 6], [100, 7, 8, 9], [101, 11, 12, 13]]
    selector = VarianceThreshold(1)
    selector.fit(X)
    print("Variances is %s" % selector.variances_)
    print("After transform is %s" % selector.transform(X))
    print("The surport is %s" % selector.get_support(True))
    print("After reverse transform is %s" %
          selector.inverse_transform(selector.transform(X)))
Exemple #3
0
def varianceFilter(train_data, train_classes, threshold):
    #if True:
    #    return frequencyFilter(train_data, train_classes, threshold)
    '''
    Variance filter
    '''
    vectorizer = DictVectorizer()
    # Fit and transform the train data.
    x_train = vectorizer.fit_transform(train_data)
    #y_train = train_classes

    sel = VarianceThreshold(threshold=(threshold * (1 - threshold)))
    x_new = sel.fit_transform(x_train)
    return vectorizer.inverse_transform(sel.inverse_transform(x_new))
Exemple #4
0
def test_varianceThreshold():
    from sklearn.feature_selection import VarianceThreshold
    x = [
        [100, 1, 2, 3],
        [100, 4, 5, 6],
        [100, 7, 8, 9],
        [101, 11, 12, 13]
    ]
    selector = VarianceThreshold(1)
    selector.fit(x)
    print("Variances is %s" % selector.variances_)
    print("After transform is %s" % selector.transform(x))
    print("The support is %s" % selector.get_support(True))
    print("After reverse transform is %s" % selector.inverse_transform(selector.transform(x)))
Exemple #5
0
def test_VarianceThreshold():
    '''
    测试 VarianceThreshold  的用法
    :return:  None
    '''
    X = [[100, 1, 2, 3],
         [100, 4, 5, 6],
         [100, 7, 8, 9],
         [101, 11, 12, 13]]
    selector = VarianceThreshold(threshold=1)   #方差低于threshold的属性将被剔除
    selector.fit(X)
    print("Variances is %s" % selector.variances_)
    print("After transform is %s" % selector.transform(X))
    print("The surport is %s" % selector.get_support(True))
    print("After reverse transform is %s" %
          selector.inverse_transform(selector.transform(X)))
def test_VarianceThreshold():
    '''
    测试 VarianceThreshold  的用法

    :return:  None
    '''
    X=[[100,1,2,3],
       [100,4,5,6],
       [100,7,8,9],
       [101,11,12,13]]  #共四个特征
    selector=VarianceThreshold(1)
    selector.fit(X)
    print("Variances is %s"%selector.variances_) #Variances is [  0.1875  13.6875  13.6875  13.6875]
    print("After transform is %s"%selector.transform(X))  #第1个特征被剔除了
    print("The surport is %s"%selector.get_support(True))  #保留特征的索引下标
    print("After reverse transform is %s"%
            selector.inverse_transform(selector.transform(X)))  #被剔除的特征填充为0
Exemple #7
0
def variance_threshold():
    x = [[1, -2, 3, 4, 5.], [3, 4, -5, 6, 7], [1, 7, 2, -6, 2],
         [3, 8, 6, 2, -8]]
    print(x)

    selector = VarianceThreshold(threshold=2)
    selector.fit(x)

    print()
    print(selector.variances_)
    print()
    print(selector.transform(x))
    print()
    print(selector.get_support(True))
    print()
    print(selector.inverse_transform(selector.transform(x)))

    pass
Exemple #8
0
def variance(train, validate, test):
    '''
    test the method of VarianceThreshold
    :return:  None
    '''

    selector=VarianceThreshold(1)
    selector.fit(train)
    train1 = selector.transform(train)

    print("Variances is %s"%selector.variances_)
    print("The support is %s"%selector.get_support(True))
    print("After transform is %s"%train1)
    print("After reverse transform is %s" %selector.inverse_transform(selector.transform(train)))

    validate = selector.transform(validate)
    test = selector.transform(test)

    return train1, validate, test
      oasis_dataset.white_matter_maps[0])  # 3D data

#############################################################################
# Preprocess data
# ----------------
nifti_masker = NiftiMasker(standardize=False,
                           smoothing_fwhm=2,
                           memory='nilearn_cache')  # cache options
gm_maps_masked = nifti_masker.fit_transform(gm_imgs_train)

# The features with too low between-subject variance are removed using
# :class:`sklearn.feature_selection.VarianceThreshold`.
from sklearn.feature_selection import VarianceThreshold
variance_threshold = VarianceThreshold(threshold=.01)
gm_maps_thresholded = variance_threshold.fit_transform(gm_maps_masked)
gm_maps_masked = variance_threshold.inverse_transform(gm_maps_thresholded)

# Then we convert the data back to the mask image in order to use it for
# decoding process
mask = nifti_masker.inverse_transform(variance_threshold.get_support())

############################################################################
# Prediction pipeline with ANOVA and SVR using
# :class:`nilearn.decoding.DecoderRegressor` Object

# In nilearn we can benefit from the built-in DecoderRegressor object to
# do ANOVA with SVR instead of manually defining the whole pipeline.
# This estimator also uses Cross Validation to select best models and ensemble
# them. Furthermore, you can pass n_jobs=<some_high_value> to the
# DecoderRegressor class to take advantage of a multi-core system.
# To save time (because these are anat images with many voxels), we include
Exemple #10
0
            ('anova', feature_selection),
            ('svr', svr)])

### Fit and predict
anova_svr.fit(gm_maps_masked, age)
age_pred = anova_svr.predict(gm_maps_masked)

#############################################################################
# Visualization
# --------------
# Look at the SVR's discriminating weights
coef = svr.coef_
# reverse feature selection
coef = feature_selection.inverse_transform(coef)
# reverse variance threshold
coef = variance_threshold.inverse_transform(coef)
# reverse masking
weight_img = nifti_masker.inverse_transform(coef)

# Create the figure
from nilearn.plotting import plot_stat_map, show
bg_filename = gray_matter_map_filenames[0]
z_slice = 0


fig = plt.figure(figsize=(5.5, 7.5), facecolor='k')
# Hard setting vmax to highlight weights more
display = plot_stat_map(weight_img, bg_img=bg_filename,
                        display_mode='z', cut_coords=[z_slice],
                        figure=fig, vmax=1)
display.title('SVM weights', y=1.2)
Exemple #11
0
gm_maps_masked = NiftiMasker().fit_transform(gray_matter_map_filenames)
data = variance_threshold.fit_transform(gm_maps_masked)

# Statistical inference
from nilearn.mass_univariate import permuted_ols

neg_log_pvals, t_scores_original_data, _ = permuted_ols(
    age,
    data,  # + intercept as a covariate by default
    n_perm=2000,  # 1,000 in the interest of time; 10000 would be better
    verbose=1,  # display progress bar
    n_jobs=1)  # can be changed to use more CPUs
signed_neg_log_pvals = neg_log_pvals * np.sign(t_scores_original_data)
signed_neg_log_pvals_unmasked = nifti_masker.inverse_transform(
    variance_threshold.inverse_transform(signed_neg_log_pvals))

# Show results
threshold = -np.log10(0.1)  # 10% corrected

fig = plt.figure(figsize=(5.5, 7.5), facecolor='k')

display = plot_stat_map(signed_neg_log_pvals_unmasked,
                        bg_img=bg_filename,
                        threshold=threshold,
                        cmap=plt.cm.RdBu_r,
                        display_mode='z',
                        cut_coords=[z_slice],
                        figure=fig)
title = ('Negative $\\log_{10}$ p-values'
         '\n(Non-parametric + max-type correction)')
Exemple #12
0
print(df.head())
print(df.shape)
print(df.columns)

array = df.values
X = array[:, :13]
Y = array[:, 13]

#VarianceThreshold is Feature selector that removes all low-variance features.
#This feature selection algorithm looks only at the features (X), not the desired outputs (y),
#and can thus be used for unsupervised learning.

from sklearn.feature_selection import VarianceThreshold
sel = VarianceThreshold(threshold=(.9 * (1 - .9)))  # for 90% threshhold
varTh = sel.fit(X, Y)

numpy.set_printoptions(precision=3)
print(sel.variances_)

for i in range(len(sel.variances_)):
    print(sel.variances_[i], end=" ")

print(X.shape)
featTransformed = sel.transform(X)
print(df.columns)
print(featTransformed.shape)

#print(featTransformed[0:5,:])
featBack = sel.inverse_transform(featTransformed)
#print(featBack[0:5,:])
# -*-coding:utf-8-*-
# @auth ivan
# @time 20200611
# @goal test 054.Test_Feature_selection

from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import SelectKBest, f_classif
X = [[100, 1, 2, 3], [100, 4, 5, 6], [100, 7, 8, 9], [101, 11, 12, 13]]
selector = VarianceThreshold(1)
selector.fit(X)
print('Variances is %s' % selector.variances_)
print('After transform is \n%s' % selector.transform(X))
print('The surport is %s' % selector.get_support(True))
print('The surport is %s' % selector.get_support(False))
print('After reverse transform is \n%s' %
      selector.inverse_transform(selector.transform(X)))
# Variances is [ 0.1875 13.6875 13.6875 13.6875]
# After transform is
# [[ 1  2  3]
#  [ 4  5  6]
#  [ 7  8  9]
#  [11 12 13]]
# The surport is [1 2 3]
# The surport is [False  True  True  True]
# After reverse transform is
# [[ 0  1  2  3]
#  [ 0  4  5  6]
#  [ 0  7  8  9]
#  [ 0 11 12 13]]

X = [[1, 2, 3, 4, 5], [5, 4, 3, 2, 1], [3, 3, 3, 3, 3], [1, 1, 1, 1, 1]]
Exemple #14
0
__author__ = 'ctiwary'
import numpy as np
from sklearn.feature_selection import VarianceThreshold

# http://scikit-learn.org/stable/modules/classes.html#module-sklearn.feature_selection
# user guide http://scikit-learn.org/stable/modules/feature_selection.html#feature-selection

# feature selection based on VarianceThreshold
from sklearn.feature_selection import VarianceThreshold
X = [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]]
print np.shape(X)
sel = VarianceThreshold(threshold=(.8 * (1 - .8)))
features_list = sel.fit_transform(X)
print features_list
print sel.inverse_transform(features_list)
print sel.get_support()
print dir(sel)

# Univariate feature selection

from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
iris = load_iris()
X, y = iris.data, iris.target
print X.shape
X_new = SelectKBest(chi2, k=2).fit_transform(X, y)
print X_new.shape
Exemple #15
0
from sklearn.feature_selection import VarianceThreshold
import DataReader
import pandas as pd

control_data = DataReader.DataShow("./data/control_rawdata.npy")
control_data.load_data()

features = ['C', 'I', 'O', 'F', 'M', 'LTXE', 'CIOFM', 'CIOFMLTXE', 'Scorr', 'Var', 'EyeMvt']

data = pd.DataFrame(control_data.data[0][0][17:, 3:14], columns=features)
selector = VarianceThreshold(1)
X = data.to_numpy()
selector.fit(X)

print('Variances is %s'%selector.variances_)
print('After transform is \n%s'%selector.transform(X))
print('The surport is %s'%selector.get_support(True))  # 如果为True那么返回的是被选中的特征的下标
print('The surport is %s'%selector.get_support(False))  # 如果为FALSE那么返回的是布尔类型的列表,反应是否选中这列特征
print('The feature is %s' % [x for (index, x) in enumerate(features) if index in selector.get_support(True)])
print('After reverse transform is \n%s'%selector.inverse_transform(selector.transform(X)))
Exemple #16
0
y = boston.target
from sklearn.linear_model import LinearRegression
p = df['CHAS'].sum() / df.shape[0]
p * (1 - p)
get_ipython().run_line_magic('pinfo', 'VarianceThreshold')
vt = VarianceThreshold(50)
x_lt = vt.fit_transform(X)
x_lt
x_lt.shape
lr = LinearRegression()
lr.fit(x_lt, y)
get_ipython().run_line_magic('pinfo', 'lr.score')
lr.score(x_lt, y)
get_ipython().run_line_magic('pinfo', 'VarianceThreshold')
vt.variances_
x_lv = vt.inverse_transform(X)
X.shape
vt.fit_transform(X)
x_lv = vt.inverse_transform(X)
get_ipython().run_line_magic('pinfo', 'vt.inverse_transform')
get_ipython().run_line_magic('ls', '')
get_ipython().run_line_magic('pinfo', 'VarianceThreshold')
vt
vt.get_support
vt.get_support(np.arange(X.shape[1]))
import numpy as np
vt.get_support(np.arange(X.shape[1]))
vt.get_support()
x_lv = X[:, _]
x_lv.shape
get_ipython().run_line_magic('whos', '')