# %% from sklearn.preprocessing import KBinsDiscretizer binned_regression = make_pipeline( KBinsDiscretizer(n_bins=8), LinearRegression(), ) binned_regression.fit(data, target) target_predicted = binned_regression.predict(data) mse = mean_squared_error(target, target_predicted) ax = sns.scatterplot(data=full_data, x="input_feature", y="target") ax.plot(data, target_predicted, color="tab:orange") _ = ax.set_title(f"Mean squared error = {mse:.2f}") # %% from sklearn.kernel_approximation import Nystroem nystroem_regression = make_pipeline( Nystroem(n_components=5), LinearRegression(), ) nystroem_regression.fit(data, target) target_predicted = nystroem_regression.predict(data) mse = mean_squared_error(target, target_predicted) ax = sns.scatterplot(data=full_data, x="input_feature", y="target") ax.plot(data, target_predicted, color="tab:orange") _ = ax.set_title(f"Mean squared error = {mse:.2f}")
#full dataset classification X_data =images/255.0 Y = targets #split data to train and test from sklearn.cross_validation import train_test_split X_train, X_test, y_train, y_test = train_test_split(X_data, Y, test_size=0.15, random_state=42) # Create a classifier: a support vector classifier kernel_svm = svm.SVC(gamma=.2) linear_svm = svm.LinearSVC() # create pipeline from kernel approximation # and linear svm feature_map_fourier = RBFSampler(gamma=.2, random_state=1) feature_map_nystroem = Nystroem(gamma=.2, random_state=1) fourier_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fourier), ("svm", svm.LinearSVC())]) nystroem_approx_svm = pipeline.Pipeline([("feature_map", feature_map_nystroem), ("svm", svm.LinearSVC())]) # fit and predict using linear and kernel svm: import datetime as dt # We learn the digits on train part kernel_svm_start_time = dt.datetime.now() print ('Start kernel svm learning at {}'.format(str(kernel_svm_start_time))) kernel_svm.fit(X_train, y_train)
'Normalizer': Normalizer(), 'PolynomialFeatures': PolynomialFeatures(), 'RobustScaler': RobustScaler(), 'StandardScaler': StandardScaler(), 'FastICA': FastICA(), 'PCA': PCA(), 'RBFSampler': RBFSampler(), 'Nystroem': Nystroem(), 'FeatureAgglomeration': FeatureAgglomeration(), 'SelectFwe': SelectFwe(), 'SelectPercentile': SelectPercentile(), 'VarianceThreshold': VarianceThreshold(), 'SelectFromModel': SelectFromModel( estimator=ExtraTreesClassifier(n_estimators=100, random_state=324089)), 'RFE': RFE(estimator=ExtraTreesClassifier(n_estimators=100, random_state=324089)), }
import numpy as np import pandas as pd from sklearn.ensemble import RandomForestClassifier from sklearn.kernel_approximation import Nystroem from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline from tpot.builtins import DatasetSelector # NOTE: Make sure that the class is labeled 'target' in the data file tpot_data = pd.read_csv('PATH/TO/DATA/FILE', sep='COLUMN_SEPARATOR', dtype=np.float64) features = tpot_data.drop('target', axis=1).values training_features, testing_features, training_target, testing_target = \ train_test_split(features, tpot_data['target'].values, random_state=94) # Average CV score on the training set was:0.7179130434782609 exported_pipeline = make_pipeline( DatasetSelector(sel_subset=4, subset_list="module23.csv"), Nystroem(gamma=0.75, kernel="linear", n_components=4), RandomForestClassifier(bootstrap=True, criterion="gini", max_features=1.0, min_samples_leaf=19, min_samples_split=20, n_estimators=100)) exported_pipeline.fit(training_features, training_target) results = exported_pipeline.predict(testing_features)
tstart = time() pipe_libsvm.fit(X_train) fit_time_libsvm += time() - tstart tstart = time() # scoring such that the lower, the more normal scoring = -pipe_libsvm.decision_function(X_test) predict_time_libsvm += time() - tstart fpr_libsvm_, tpr_libsvm_, _ = roc_curve(y_test, scoring) f_libsvm = interp1d(fpr_libsvm_, tpr_libsvm_) tpr_libsvm += f_libsvm(x_axis) print("----------- Online OCSVM ------------") nystroem = Nystroem(gamma=gamma, random_state=random_state) online_ocsvm = SGDOneClassSVM(nu=nu, random_state=random_state) pipe_online = make_pipeline(std, nystroem, online_ocsvm) tstart = time() pipe_online.fit(X_train) fit_time_online += time() - tstart tstart = time() # scoring such that the lower, the more normal scoring = -pipe_online.decision_function(X_test) predict_time_online += time() - tstart fpr_online_, tpr_online_, _ = roc_curve(y_test, scoring) f_online = interp1d(fpr_online_, tpr_online_) tpr_online += f_online(x_axis)