Beispiel #1
0
import sys
path = 'I://New Folder//utils'
sys.path.append(path)
import common_utils as utils
import regression_utils as rutils
from sklearn import metrics, linear_model, svm, model_selection

scoring = metrics.make_scorer(rutils.rmse, greater_is_better=False)

#linear pattern in 2d
X, y = rutils.generate_linear_synthetic_data_regression(n_samples=100,
                                                        n_features=1,
                                                        n_informative=1,
                                                        noise=200)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.1, random_state=1)
rutils.plot_data_2d_regression(X_train, y_train)

linear_estimator = linear_model.LinearRegression()
linear_grid = {'normalize': [False]}
final_linear_model = utils.grid_search_best_model(linear_estimator,
                                                  linear_grid,
                                                  X_train,
                                                  y_train,
                                                  scoring=scoring)
print(final_linear_model.coef_)
print(final_linear_model.intercept_)
rutils.plot_model_2d_regression(final_linear_model, X_train, y_train)
rutils.regression_performance(final_linear_model, X_test, y_test)

lasso_estimator = linear_model.Lasso(max_iter=5000)
import sys
path = 'E://utils'
sys.path.append(path)

import common_utils as utils
import regression_utils as rutils
from sklearn import metrics, linear_model, model_selection
import numpy as np

scoring = metrics.make_scorer(rutils.rmse, greater_is_better=False)

##outliers in linear pattern
X, y = rutils.generate_linear_synthetic_data_regression(1000, 1, 1, 10)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    X, y, test_size=0.1, random_state=1)
rutils.plot_data_2d_regression(X_train, y_train)

#add outliers in features
X_train[::10] = 4
#add outliers in target
y_train[::10] = 250
rutils.plot_data_2d_regression(X_train, y_train)

# Fit linear model
lr_estimator = linear_model.LinearRegression()
lr_grid = {'normalize': [True, False]}
lr_model = utils.grid_search_best_model(lr_estimator,
                                        lr_grid,
                                        X_train,
                                        y_train,
                                        scoring=scoring)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2)
plot_data_2d(X_kpca, new_window=True)
X_tsne = feature_reduction_tsne(X, 2)
plot_data_2d(X_tsne, new_window=True)
X_isomap = feature_reduction_isomap(X, 2)
plot_data_2d(X_isomap, new_window=True)

X, y = generate_nonlinear_synthetic_data_classification2(n_samples=1000)
plot_data_2d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2, 'rbf', 15)
plot_data_2d(X_kpca, new_window=True)
X_tsne = feature_reduction_tsne(X, 2)
plot_data_2d(X_tsne, new_window=True)
X_isomap = feature_reduction_isomap(X, 2, 100)
plot_data_2d(X_isomap, new_window=True)

X, y = generate_linear_synthetic_data_regression(n_samples=100, n_features=2, n_informative=2, noise=0)
plot_data_2d(X)
X_lpca = feature_reduction_linear_pca(X, 2)
plot_data_2d(X_lpca, new_window=True)
X_kpca = feature_reduction_kernel_pca(X, 2, 'rbf', 15)
plot_data_2d(X_kpca, new_window=True)
X_tsne = feature_reduction_tsne(X, 2)
plot_data_2d(X_tsne, new_window=True)
X_isomap = feature_reduction_isomap(X, 2)
plot_data_2d(X_isomap, new_window=True)