# http://scikit-learn.org/0.18/modules/cross_validation.html # https://mail.google.com/mail/u/0/#sent/QgrcJHsbjCZNCXqKkMlpLbTXWjKWfzHljSl # https://mail.google.com/mail/u/0/#sent/RdDgqcJHpWcvcDjPgjkjXHLgLnDfdlQzrnZXHZlrxmfB # # n_splits = 2, 5 # https://datachemeng.com/doublecrossvalidation/ # http://univprof.com/archives/16-06-12-3889388.html # n_splits = 2, 5, 10 # https://datachemeng.com/modelvalidation/ cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) # estimation for different datasets = OK: MAE, NG: R^2 # http://univprof.com/archives/16-07-04-4453136.html rgr = GridSearchCV(mod, param_grid, cv=cv, scoring='neg_mean_absolute_error') rgr.fit(X_train, y_train) print_gscv_score(rgr) y_pred = rgr.predict(X_train) print('train data: ', end="") print_score(y_train, y_pred) # step 3. test y_pred = rgr.predict(X_test) print('test data: ', end="") print_score(y_test, y_pred) print('{:.2f} seconds '.format(time() - start)) #%% # step 4. visualize outputs # yy-plot (train)
('scaler', scaler), ('model', model) ]) param_grid = [ {'model__kernel': ['rbf'], 'model__gamma': range_g, 'model__C': range_c,'model__epsilon': range_e}, ] n_splits = 5 cv = ShuffleSplit(n_splits=n_splits, test_size=0.2) cv = KFold(n_splits=n_splits, shuffle=True) score='neg_mean_absolute_error' gscv = GridSearchCV(pipe, param_grid, cv=cv, scoring=score) gscv.fit(X_train, y_train) print_gscv_score(gscv) y_pred = gscv.predict(X_train) print('train data: ',end="") print_score(y_train, y_pred) # visualize fig = yyplot(y_train, y_pred) #%% # Novelty detection by One Class SVM with optimized hyperparameter clf = OneClassSVM(nu=0.003, kernel=gscv.best_params_['model__kernel'], gamma=gscv.best_params_['model__gamma']) clf.fit(X_train) y_pred = gscv.predict(X_test) # predicted y reliability = clf.predict(X_test) # outliers = -1
param_grid = [ { 'kernel': ['rbf'], 'gamma': range_g, 'C': range_c }, ] score = 'accuracy' print("# Tuning hyper-parameters for {}".format(score)) print() cv = ShuffleSplit(n_splits=5, test_size=0.2, random_state=0) clf = GridSearchCV(mod, param_grid, cv=cv, scoring='accuracy') clf.fit(X_train, y_train) print_gscv_score(clf) print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred)) print(confusion_matrix(y_test, y_pred)) print() # visualize # ref: https://pythondatascience.plavox.info/matplotlib/散布図 # http://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html #