コード例 #1
0
                              ada_grid,
                              X_train,
                              y_trans,
                              scoring=scoring)
plot_feature_importances(ada_selector, X_train, 50)
important_features_ada = get_important_features(ada_selector, X_train)

important_features = set(important_features_lasso) | set(
    important_features_rf) | set(important_features_ada)
print(len(important_features))

X_train1 = X_train[list(important_features)]

pca_data = feature_reduction_pca(X_train1, X_train1.shape[1])
tsne_data = feature_reduction_tsne(X_train1, 2)
plot_data_3d_regression(tsne_data, y_train)

scoring = metrics.make_scorer(log_rmse, greater_is_better=False)

gb_estimator = ensemble.GradientBoostingRegressor()
gb_grid = {
    'n_estimators': list(range(100, 501, 200)),
    'learning_rate': [0.1, 1.0],
    'max_depth': [1, 3, 5]
}
gb_model = get_best_model(gb_estimator,
                          gb_grid,
                          X_train1,
                          y_trans,
                          scoring=scoring)
コード例 #2
0
                                               X_train,
                                               y_train,
                                               scoring=scoring)
print(final_svm_model.coef_)
print(final_svm_model.intercept_)
rutils.plot_model_2d_regression(final_svm_model, X_train, y_train)
rutils.regression_performance(final_svm_model, X_test, y_test)

#linear pattern in 3d
X, y = rutils.generate_linear_synthetic_data_regression(n_samples=200,
                                                        n_features=2,
                                                        n_informative=2,
                                                        noise=10)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.1, random_state=1)
rutils.plot_data_3d_regression(X_train, y_train)

linear_estimator = linear_model.LinearRegression()
linear_grid = {'normalize': [True, False]}
final_linear_model = utils.grid_search_best_model(linear_estimator,
                                                  linear_grid,
                                                  X_train,
                                                  y_train,
                                                  scoring=scoring)
print(final_linear_model.coef_)
print(final_linear_model.intercept_)
rutils.plot_model_3d_regression(final_linear_model, X_train, y_train)
rutils.regression_performance(final_linear_model, X_test, y_test)

svm_estimator = svm.LinearSVR()
svm_grid = {'C': [0.1, 0.3, 0.5, 0.7, 1, 10]}
コード例 #3
0
lasso_selector.fit(X_train, y_train)
print(lasso_selector.coef_)
utils.plot_feature_importances(lasso_selector, X_train, 40)

X_train1 = utils.select_features(lasso_selector, X_train)

utils.corr_heatmap(X_train1)
lpca = decomposition.PCA(0.95)
lpca.fit(X_train1)
print(np.cumsum(lpca.explained_variance_ratio_))
pca_data = lpca.transform(X_train1)
print(pca_data.shape)

tsne = manifold.TSNE(n_components=2)
tsne_data = tsne.fit_transform(pca_data)
rutils.plot_data_3d_regression(tsne_data, y_train)

scoring = metrics.make_scorer(log_rmse, greater_is_better=False)

sns.distplot(y_train)
y_trans = np.log1p(y_train)
sns.distplot(y_trans)

knn_estimator = neighbors.KNeighborsRegressor()
knn_grid = {'n_neighbors': list(range(5, 15))}
final_model = utils.grid_search_best_model(knn_estimator,
                                           knn_grid,
                                           pca_data,
                                           y_trans,
                                           scoring=scoring)
コード例 #4
0
#build preprocessing pipeline for all features
cat_features = utils.get_non_continuous_features(house_train1)
num_features = utils.get_continuous_features(house_train1)

preprocess_pipeline = compose.ColumnTransformer([
    ('cat', categorical_pipeline, cat_features),
    ('num', numerical_pipeline, num_features)
])

viz_pipeline = pipeline.Pipeline([('preprocess', preprocess_pipeline),
                                  ('pca',
                                   decomposition.PCA(n_components=0.95)),
                                  ('tsne', manifold.TSNE(2))])

tsne_data = viz_pipeline.fit_transform(house_train1)
rutils.plot_data_3d_regression(tsne_data, house_train['SalePrice'])

#build feature selection pipeline
features_pipeline = pipeline.FeatureUnion([
    ('pca_selector', decomposition.PCA()),
    ('et_selector',
     feature_selection.SelectFromModel(ensemble.ExtraTreesClassifier()))
])

regressor = svm.LinearSVR()
#build complete pipeline with feature selection and ml algorithms
complete_pipeline = pipeline.Pipeline([
    ('preprocess', preprocess_pipeline),
    ('zv_filter', feature_selection.VarianceThreshold()),
    ('features', features_pipeline),
    ('tregressor',