Ejemplo n.º 1
0
                                    random_state=42, verbosity=2)

pipeline_optimizer.fit(X, y_int)

pipeline_optimizer.export('tpot_exported_pipeline.py')





if TEST_FEATURE_SELECTION:
    fs.identify_missing(missing_threshold=0.6)
    missing_features = fs.ops['missing']
    missing_features[:10]
    
    fs.plot_missing()
    
    fs.missing_stats.head(10)
    
    
    fs.identify_single_unique()
    single_unique = fs.ops['single_unique']
    single_unique
    fs.plot_unique()
    
    
    
    fs.identify_collinear(correlation_threshold=0.975)
    correlated_features = fs.ops['collinear']
    correlated_features[:5]
    fs.plot_collinear()
Ejemplo n.º 2
0
#对于pandas,行标为index,列表为columns
#如常用df = pd.DataFrame(np.random.randn(5,3),index = list('abcde'),columns = ['one','two','three'])

#Create the Instance
fs = FeatureSelector(data=train, labels=train_labels)

#   1   Missing Values

fs.identify_missing(missing_threshold=0.6)

#The features identified for removal can be accessed through the ops dictionary of the FeatureSelector object.
missing_features = fs.ops['missing']
print(missing_features[:20])

fs.plot_missing()  #在每一个画图的后面加上plt.show即可
plt.show()
print(fs.missing_stats.head(20))

#   2   Single Unique Value

fs.identify_single_unique()

single_unique = fs.ops['single_unique']
print(single_unique)

fs.plot_unique()  #画图都不好用
plt.show()
print(fs.unique_stats.sample(5))

#   3   Collinear (highly correlated) Feature