コード例 #1
0
# %%
from sklearn.preprocessing import KBinsDiscretizer

binned_regression = make_pipeline(
    KBinsDiscretizer(n_bins=8),
    LinearRegression(),
)
binned_regression.fit(data, target)
target_predicted = binned_regression.predict(data)
mse = mean_squared_error(target, target_predicted)

ax = sns.scatterplot(data=full_data, x="input_feature", y="target")
ax.plot(data, target_predicted, color="tab:orange")
_ = ax.set_title(f"Mean squared error = {mse:.2f}")

# %%
from sklearn.kernel_approximation import Nystroem

nystroem_regression = make_pipeline(
    Nystroem(n_components=5),
    LinearRegression(),
)
nystroem_regression.fit(data, target)
target_predicted = nystroem_regression.predict(data)
mse = mean_squared_error(target, target_predicted)

ax = sns.scatterplot(data=full_data, x="input_feature", y="target")
ax.plot(data, target_predicted, color="tab:orange")
_ = ax.set_title(f"Mean squared error = {mse:.2f}")
コード例 #2
0
#full dataset classification
X_data =images/255.0
Y = targets

#split data to train and test 
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_data, Y, test_size=0.15, random_state=42)

# Create a classifier: a support vector classifier
kernel_svm = svm.SVC(gamma=.2)
linear_svm = svm.LinearSVC()

# create pipeline from kernel approximation
# and linear svm
feature_map_fourier = RBFSampler(gamma=.2, random_state=1)
feature_map_nystroem = Nystroem(gamma=.2, random_state=1)

fourier_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fourier),
                                        ("svm", svm.LinearSVC())])

nystroem_approx_svm = pipeline.Pipeline([("feature_map", feature_map_nystroem),
                                        ("svm", svm.LinearSVC())])

# fit and predict using linear and kernel svm:

import datetime as dt
# We learn the digits on train part

kernel_svm_start_time = dt.datetime.now()
print ('Start kernel svm learning at {}'.format(str(kernel_svm_start_time)))
kernel_svm.fit(X_train, y_train)
コード例 #3
0
    'Normalizer':
    Normalizer(),
    'PolynomialFeatures':
    PolynomialFeatures(),
    'RobustScaler':
    RobustScaler(),
    'StandardScaler':
    StandardScaler(),
    'FastICA':
    FastICA(),
    'PCA':
    PCA(),
    'RBFSampler':
    RBFSampler(),
    'Nystroem':
    Nystroem(),
    'FeatureAgglomeration':
    FeatureAgglomeration(),
    'SelectFwe':
    SelectFwe(),
    'SelectPercentile':
    SelectPercentile(),
    'VarianceThreshold':
    VarianceThreshold(),
    'SelectFromModel':
    SelectFromModel(
        estimator=ExtraTreesClassifier(n_estimators=100, random_state=324089)),
    'RFE':
    RFE(estimator=ExtraTreesClassifier(n_estimators=100, random_state=324089)),
}
コード例 #4
0
ファイル: RNASeq_MDD94.py プロジェクト: wxyz/tpot-fss
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.kernel_approximation import Nystroem
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from tpot.builtins import DatasetSelector

# NOTE: Make sure that the class is labeled 'target' in the data file
tpot_data = pd.read_csv('PATH/TO/DATA/FILE',
                        sep='COLUMN_SEPARATOR',
                        dtype=np.float64)
features = tpot_data.drop('target', axis=1).values
training_features, testing_features, training_target, testing_target = \
            train_test_split(features, tpot_data['target'].values, random_state=94)

# Average CV score on the training set was:0.7179130434782609
exported_pipeline = make_pipeline(
    DatasetSelector(sel_subset=4, subset_list="module23.csv"),
    Nystroem(gamma=0.75, kernel="linear", n_components=4),
    RandomForestClassifier(bootstrap=True,
                           criterion="gini",
                           max_features=1.0,
                           min_samples_leaf=19,
                           min_samples_split=20,
                           n_estimators=100))

exported_pipeline.fit(training_features, training_target)
results = exported_pipeline.predict(testing_features)
コード例 #5
0
        tstart = time()
        pipe_libsvm.fit(X_train)
        fit_time_libsvm += time() - tstart

        tstart = time()
        # scoring such that the lower, the more normal
        scoring = -pipe_libsvm.decision_function(X_test)
        predict_time_libsvm += time() - tstart
        fpr_libsvm_, tpr_libsvm_, _ = roc_curve(y_test, scoring)

        f_libsvm = interp1d(fpr_libsvm_, tpr_libsvm_)
        tpr_libsvm += f_libsvm(x_axis)

        print("----------- Online OCSVM ------------")
        nystroem = Nystroem(gamma=gamma, random_state=random_state)
        online_ocsvm = SGDOneClassSVM(nu=nu, random_state=random_state)
        pipe_online = make_pipeline(std, nystroem, online_ocsvm)

        tstart = time()
        pipe_online.fit(X_train)
        fit_time_online += time() - tstart

        tstart = time()
        # scoring such that the lower, the more normal
        scoring = -pipe_online.decision_function(X_test)
        predict_time_online += time() - tstart
        fpr_online_, tpr_online_, _ = roc_curve(y_test, scoring)

        f_online = interp1d(fpr_online_, tpr_online_)
        tpr_online += f_online(x_axis)