예제 #1
0
    fs.plot_missing()
    
    fs.missing_stats.head(10)
    
    
    fs.identify_single_unique()
    single_unique = fs.ops['single_unique']
    single_unique
    fs.plot_unique()
    
    
    
    fs.identify_collinear(correlation_threshold=0.975)
    correlated_features = fs.ops['collinear']
    correlated_features[:5]
    fs.plot_collinear()
    fs.record_collinear.head()
    
    fs.identify_zero_importance(task = 'classification', eval_metric = 'auc', 
                                n_iterations = 10, early_stopping = True)
    one_hot_features = fs.one_hot_features
    base_features = fs.base_features
    print('There are %d original features' % len(base_features))
    print('There are %d one-hot features' % len(one_hot_features))
    fs.plot_feature_importances(threshold = 0.99, plot_n = 12)
    fs.feature_importances.head(10)




예제 #2
0
fs.identify_single_unique()

single_unique = fs.ops['single_unique']
print(single_unique)

fs.plot_unique()  #画图都不好用
plt.show()
print(fs.unique_stats.sample(5))

#   3   Collinear (highly correlated) Feature

fs.identify_collinear(correlation_threshold=0.975)
correlated_features = fs.ops['collinear']
correlated_features[:5]

fs.plot_collinear()
plt.show()

fs.plot_collinear(plot_all=True)
plt.show()

fs.identify_collinear(correlation_threshold=0.98)
fs.plot_collinear()
plt.show()

print(fs.record_collinear.head())

#   4. Zero Importance Features:one-hot coding 主要用于0相关性特征的识别

fs.identify_zero_importance(task='classification',
                            eval_metric='auc',
예제 #3
0
test = data_new2[important_features]
test["Is_Male"] = y_pred_kmeans

#Building ANN now
data_ANN = data.copy()
data_ANN["Is_Male"] = y_pred_kmeans
data_ANN.drop(columns="customer_id", inplace=True)
X = data_ANN.iloc[:, :42]
y = data_ANN.iloc[:, 42]
#Step 1 - Feature selection
from feature_selector import FeatureSelector
fts = FeatureSelector(X, y)
fts.identify_missing(missing_threshold=0.9)

fts.identify_collinear(correlation_threshold=0.7)
fts.plot_collinear()
collinear_features = fts.ops['collinear']

fts.identify_zero_importance(task='classification',
                             eval_metric='auc',
                             n_iterations=30,
                             early_stopping=True)
zero_importance_features = fts.ops['zero_importance']

fts.plot_feature_importances(threshold=0.99, plot_n=12)
Most_important_Features = list(fts.feature_importances["feature"].head(28))

Data_ANN_2 = data_ANN[Most_important_Features]
X = Data_ANN_2.iloc[:, :]
y = data_ANN.iloc[:, 42]
예제 #4
0
fs = FeatureSelector(data=train, labels=train_labels)

#缺失值统计
fs.identify_missing(0.5)
df_miss_value = fs.missing_stats.sort_values('missing_fraction',
                                             ascending=False)
print('df_miss_value', df_miss_value.head(15))
missing_features = fs.ops['missing']
print('missing_features to remove', missing_features[:20])

#单值特征统计
fs.identify_single_unique()
print('fs.plot_unique()', fs.plot_unique())

fs.identify_collinear(0.95)
print('plot_collinear()', fs.plot_collinear())

# list of collinear features to remove
collinear_features = fs.ops['collinear']
print('collinear_features', collinear_features)

# dataframe of collinear features
df_collinear_features = fs.record_collinear.sort_values('corr_value',
                                                        ascending=False)
print('df_collinear_features', df_collinear_features.head(50))

#零重要度特征统计
# Pass in the appropriate parameters
fs.identify_zero_importance(task='classification',
                            eval_metric=tpr_weight_funtion_lc,
                            n_iterations=10,
from feature_selector import FeatureSelector
import matplotlib.pyplot as plt
import pandas as pd 

if __name__ == '__main__':
    model = pd.read_csv('../Data/FlightClassificationCleaned.csv')
    target = model['ARR_DELAY']
    model.head()
    model = model.drop(columns=['ARR_DELAY', 'ARR_DELAY_BIN'])


    fs = FeatureSelector(data=model, labels=target)

    fs.identify_collinear(correlation_threshold=0.9)

    correlated_features = fs.ops['collinear']
    print(correlated_features[:5])
    fs.record_collinear.head()
    print(fs.plot_collinear())
    # fs.record_collinear.head()
    # graph.savefig('VarCorrelation.png')