Esempio n. 1
0
#The features identified for removal can be accessed through the ops dictionary of the FeatureSelector object.
missing_features = fs.ops['missing']
print(missing_features[:20])

fs.plot_missing()  #在每一个画图的后面加上plt.show即可
plt.show()
print(fs.missing_stats.head(20))

#   2   Single Unique Value

fs.identify_single_unique()

single_unique = fs.ops['single_unique']
print(single_unique)

fs.plot_unique()  #画图都不好用
plt.show()
print(fs.unique_stats.sample(5))

#   3   Collinear (highly correlated) Feature

fs.identify_collinear(correlation_threshold=0.975)
correlated_features = fs.ops['collinear']
correlated_features[:5]

fs.plot_collinear()
plt.show()

fs.plot_collinear(plot_all=True)
plt.show()
Esempio n. 2
0

if TEST_FEATURE_SELECTION:
    fs.identify_missing(missing_threshold=0.6)
    missing_features = fs.ops['missing']
    missing_features[:10]
    
    fs.plot_missing()
    
    fs.missing_stats.head(10)
    
    
    fs.identify_single_unique()
    single_unique = fs.ops['single_unique']
    single_unique
    fs.plot_unique()
    
    
    
    fs.identify_collinear(correlation_threshold=0.975)
    correlated_features = fs.ops['collinear']
    correlated_features[:5]
    fs.plot_collinear()
    fs.record_collinear.head()
    
    fs.identify_zero_importance(task = 'classification', eval_metric = 'auc', 
                                n_iterations = 10, early_stopping = True)
    one_hot_features = fs.one_hot_features
    base_features = fs.base_features
    print('There are %d original features' % len(base_features))
    print('There are %d one-hot features' % len(one_hot_features))
Esempio n. 3
0
from feature_selector import FeatureSelector
# Features are in train and labels are in train_labels
fs = FeatureSelector(data=train, labels=train_labels)

#缺失值统计
fs.identify_missing(0.5)
df_miss_value = fs.missing_stats.sort_values('missing_fraction',
                                             ascending=False)
print('df_miss_value', df_miss_value.head(15))
missing_features = fs.ops['missing']
print('missing_features to remove', missing_features[:20])

#单值特征统计
fs.identify_single_unique()
print('fs.plot_unique()', fs.plot_unique())

fs.identify_collinear(0.95)
print('plot_collinear()', fs.plot_collinear())

# list of collinear features to remove
collinear_features = fs.ops['collinear']
print('collinear_features', collinear_features)

# dataframe of collinear features
df_collinear_features = fs.record_collinear.sort_values('corr_value',
                                                        ascending=False)
print('df_collinear_features', df_collinear_features.head(50))

#零重要度特征统计
# Pass in the appropriate parameters