# http://scikit-learn.org/0.18/modules/cross_validation.html
# https://mail.google.com/mail/u/0/#sent/QgrcJHsbjCZNCXqKkMlpLbTXWjKWfzHljSl
# https://mail.google.com/mail/u/0/#sent/RdDgqcJHpWcvcDjPgjkjXHLgLnDfdlQzrnZXHZlrxmfB
#
# n_splits = 2, 5
# https://datachemeng.com/doublecrossvalidation/
# http://univprof.com/archives/16-06-12-3889388.html
# n_splits = 2, 5, 10
# https://datachemeng.com/modelvalidation/
cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)

# estimation for different datasets = OK: MAE, NG: R^2
# http://univprof.com/archives/16-07-04-4453136.html
rgr = GridSearchCV(mod, param_grid, cv=cv, scoring='neg_mean_absolute_error')
rgr.fit(X_train, y_train)
print_gscv_score(rgr)

y_pred = rgr.predict(X_train)
print('train data: ', end="")
print_score(y_train, y_pred)

# step 3. test
y_pred = rgr.predict(X_test)
print('test  data: ', end="")
print_score(y_test, y_pred)
print('{:.2f} seconds '.format(time() - start))

#%%

# step 4. visualize outputs
# yy-plot (train)
('scaler', scaler),
('model', model)
])

param_grid = [
{'model__kernel': ['rbf'], 'model__gamma': range_g,
 'model__C': range_c,'model__epsilon': range_e},
]
n_splits = 5 
cv = ShuffleSplit(n_splits=n_splits, test_size=0.2)
cv = KFold(n_splits=n_splits, shuffle=True)
score='neg_mean_absolute_error'

gscv = GridSearchCV(pipe, param_grid, cv=cv, scoring=score)
gscv.fit(X_train, y_train)
print_gscv_score(gscv)

y_pred = gscv.predict(X_train)
print('train data: ',end="")
print_score(y_train, y_pred)
# visualize
fig = yyplot(y_train, y_pred)

#%%
# Novelty detection by One Class SVM with optimized hyperparameter
clf = OneClassSVM(nu=0.003, kernel=gscv.best_params_['model__kernel'],
  gamma=gscv.best_params_['model__gamma'])
clf.fit(X_train)

y_pred = gscv.predict(X_test)    # predicted y
reliability = clf.predict(X_test) # outliers = -1
Beispiel #3
0
param_grid = [
    {
        'kernel': ['rbf'],
        'gamma': range_g,
        'C': range_c
    },
]
score = 'accuracy'
print("# Tuning hyper-parameters for {}".format(score))
print()

cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0)
clf = GridSearchCV(mod, param_grid, cv=cv, scoring='accuracy')
clf.fit(X_train, y_train)
print_gscv_score(clf)

print("Detailed classification report:")
print()
print("The model is trained on the full development set.")
print("The scores are computed on the full evaluation set.")
print()
y_true, y_pred = y_test, clf.predict(X_test)
print(classification_report(y_true, y_pred))
print(confusion_matrix(y_test, y_pred))
print()

# visualize
# ref: https://pythondatascience.plavox.info/matplotlib/散布図
# http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
#