svr.fit(X[:train_size], y[:train_size]) svr_fit = time.time() - t0 print("SVR complexity and bandwidth selected and model fitted in %.3f s" % svr_fit) t0 = time.time() kr.fit(X[:train_size], y[:train_size]) kr_fit = time.time() - t0 print("KRR complexity and bandwidth selected and model fitted in %.3f s" % kr_fit) sv_ratio = svr.best_estimator_.support_.shape[0] / train_size print("Support vector ratio: %.3f" % sv_ratio) t0 = time.time() y_svr = svr.predict(X_plot) svr_predict = time.time() - t0 print("SVR prediction for %d inputs in %.3f s" % (X_plot.shape[0], svr_predict)) t0 = time.time() y_kr = kr.predict(X_plot) kr_predict = time.time() - t0 print("KRR prediction for %d inputs in %.3f s" % (X_plot.shape[0], kr_predict)) # ############################################################################# # Look at the results sv_ind = svr.best_estimator_.support_ plt.scatter(X[sv_ind], y[sv_ind], c='r',
clf = GridSearchCV( SVC(), tuned_parameters, scoring='%s_macro' % score ) clf.fit(X_train, y_train) print("Best parameters set found on development set:") print() print(clf.best_params_) print() print("Grid scores on development set:") print() means = clf.cv_results_['mean_test_score'] stds = clf.cv_results_['std_test_score'] for mean, std, params in zip(means, stds, clf.cv_results_['params']): print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred)) print() # Note the problem is too easy: the hyperparameter plateau is too flat and the # output model is the same for precision and recall with ties in quality.
kr = GridSearchCV(KernelRidge(), param_grid=param_grid) stime = time.time() kr.fit(X, y) print("Time for KRR fitting: %.3f" % (time.time() - stime)) gp_kernel = ExpSineSquared(1.0, 5.0, periodicity_bounds=(1e-2, 1e1)) \ + WhiteKernel(1e-1) gpr = GaussianProcessRegressor(kernel=gp_kernel) stime = time.time() gpr.fit(X, y) print("Time for GPR fitting: %.3f" % (time.time() - stime)) # Predict using kernel ridge X_plot = np.linspace(0, 20, 10000)[:, None] stime = time.time() y_kr = kr.predict(X_plot) print("Time for KRR prediction: %.3f" % (time.time() - stime)) # Predict using gaussian process regressor stime = time.time() y_gpr = gpr.predict(X_plot, return_std=False) print("Time for GPR prediction: %.3f" % (time.time() - stime)) stime = time.time() y_gpr, y_std = gpr.predict(X_plot, return_std=True) print("Time for GPR prediction with standard-deviation: %.3f" % (time.time() - stime)) # Plot results plt.figure(figsize=(10, 5)) lw = 2
'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1], } clf = GridSearchCV( SVC(kernel='rbf', class_weight='balanced'), param_grid ) clf = clf.fit(X_train_pca, y_train) print("done in %0.3fs" % (time() - t0)) print("Best estimator found by grid search:") print(clf.best_estimator_) # ############################################################################# # Quantitative evaluation of the model quality on the test set print("Predicting people's names on the test set") t0 = time() y_pred = clf.predict(X_test_pca) print("done in %0.3fs" % (time() - t0)) print(classification_report(y_test, y_pred, target_names=target_names)) print(confusion_matrix(y_test, y_pred, labels=range(n_classes))) # ############################################################################# # Qualitative evaluation of the predictions using matplotlib def plot_gallery(images, titles, h, w, n_row=3, n_col=4): """Helper function to plot a gallery of portraits""" plt.figure(figsize=(1.8 * n_col, 2.4 * n_row)) plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35) for i in range(n_row * n_col): plt.subplot(n_row, n_col, i + 1)
parameters = { 'vect__ngram_range': [(1, 1), (1, 2)], } grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1) grid_search.fit(docs_train, y_train) # TASK: print the mean and std for each candidate along with the parameter # settings for all the candidates explored by grid search. n_candidates = len(grid_search.cv_results_['params']) for i in range(n_candidates): print(i, 'params - %s; mean - %0.2f; std - %0.2f' % (grid_search.cv_results_['params'][i], grid_search.cv_results_['mean_test_score'][i], grid_search.cv_results_['std_test_score'][i])) # TASK: Predict the outcome on the testing set and store it in a variable # named y_predicted y_predicted = grid_search.predict(docs_test) # Print the classification report print(metrics.classification_report(y_test, y_predicted, target_names=dataset.target_names)) # Print and plot the confusion matrix cm = metrics.confusion_matrix(y_test, y_predicted) print(cm) # import matplotlib.pyplot as plt # plt.matshow(cm) # plt.show()