from sklearn.cross_validation import GridSearchCV #from sklearn.cross_validation import train_test_split parameters = [{ 'C': [1, 10, 100, 1000], 'kernel': ['linear'] }, { 'C': [1, 10, 100, 1000], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] }] grid_search = GridSearchCV(estimator=classifier, param_grid=parameters, scoring='accuracy', cv=10, n_jobs=-1) grid_search = grid_search.fit(X_train, y_train) best_accuracy = grid_search.best_score_ best_parameters = grid_search.best_params_ # Visualising the Training set results from matplotlib.colors import ListedColormap X_set, y_set = X_train, y_train X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2,
'vect__use_idf':[False], 'vect__norm':[None], 'clf__penalty': ['l1', 'l2'], 'clf__C': [1.0, 10.0, 100.0]}, ] lr_tfidf = Pipeline([('vect', tfidf), ('clf', LogisticRegression(random_state=0))]) gs_lr_tfidf = GridSearchCV(lr_tfidf, param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1) gs_lr_tfidf.fit(X_train, y_train) print('Best parameter set: %s ' % gs_lr_tfidf.best_params_) print('CV Accuracy: %.3f' % gs_lr_tfidf.best_score_) clf = gs_lr_tfidf.best_estimator_ print('Test Accuracy: %.3f' % clf.score(X_test, y_test)) ############################################################################# print(50 * '=') print('Section: Working with bigger data - online' ' algorithms and out-of-core learning') print(50 * '-')
s="Petal length [standardized]", ha="center", va="center", fontsize=12, rotation=90) plt.show() print(mv_clf.get_params()) params = {'decisiontreeclassifier__max_depth': [1, 2], 'pipeline-1__clf__C': [0.001, 0.1, 100.0]} grid = GridSearchCV(estimator=mv_clf, param_grid=params, cv=10, scoring="roc_auc") grid.fit(X_train, y_train) if Version(sklearn_version) < "0.18": for params, mean_score, scores in grid.grid_scores_: print("%0.3f +/- %0.2f %r" % (mean_score, scores.std() / 2.0, params)) else: cv_keys = ("mean_test_score", "std_test_score", "params") for r, _ in enumerate(grid.cv_results_["mean_test_score"]): print("%0.3f +/- %0.2f %r" % (grid.cv_results_[cv_keys[0]][r], grid.cv_results_[cv_keys[1]][r] / 2.0, grid.cv_results_[cv_keys[2]][r]))
'clf__penalty': ['l1', 'l2'], 'clf__C': [1.0, 10.0, 100.0] }, ] lr_tfidf = Pipeline([('vect', tfidf), ('clf', LogisticRegression(random_state=0))]) gs_lr_tfidf = GridSearchCV(lr_tfidf, param_grid, scoring='accuracy', cv=5, verbose=1, n_jobs=-1) gs_lr_tfidf.fit(X_train, y_train) print('Best parameter set: %s ' % gs_lr_tfidf.best_params_) print('CV Accuracy: %.3f' % gs_lr_tfidf.best_score_) clf = gs_lr_tfidf.best_estimator_ print('Test Accuracy: %.3f' % clf.score(X_test, y_test)) ############################################################################# print(50 * '=') print('Section: Working with bigger data - online' ' algorithms and out-of-core learning') print(50 * '-') stop = stopwords.words('english')
axi.set(xticks=[], yticks=[], xlabel=faces.target_names[faces.target[i]]) from sklearn.svm import SVC from sklearn.decomposition import RandomizedPCA from sklearn.pipeline import make_pipeline pca = RandomizedPCA(n_components=150, whiten=True, random_state=42) svc = SVC(kernel='rbf', class_weight='balanced') model = make_pipeline(pca, svc) from sklearn.cross_validation import GridSearchCV param_grid = {'svc__C': [1, 5, 10, 50], 'svc__gamma': [0.0001, 0.0005, 0.001, 0.005]} grid = GridSearchCV(model, param_grid) %time grid.fit(Xtrain, ytrain) print(grid.best_params_) model = grid.best_estimator_ yfit = model.predict(Xtest) from sklearn.metrics import classification_report print(classification_report(ytest, yfit, target_names=faces.target_names)) from sklearn.metrics import confusion_matrix mat = confusion_matrix(ytest, yfit) sns.heatmap(mat.T, squart=True, annot=True, fmt='d', cbar=False, xticklabels=faces.target_names, yticklabels=faces.target_names) plt.xlabel('true label') plt.ylabel('predicted label')
fontsize=12, rotation=90) # plt.tight_layout() # plt.savefig('./figures/voting_panel', bbox_inches='tight', dpi=300) plt.show() print(mv_clf.get_params()) params = {'decisiontreeclassifier__max_depth': [1, 2], 'pipeline-1__clf__C': [0.001, 0.1, 100.0]} grid = GridSearchCV(estimator=mv_clf, param_grid=params, cv=10, scoring='roc_auc') grid.fit(X_train, y_train) if Version(sklearn_version) < '0.18': for params, mean_score, scores in grid.grid_scores_: print("%0.3f +/- %0.2f %r" % (mean_score, scores.std() / 2.0, params)) else: cv_keys = ('mean_test_score', 'std_test_score', 'params') for r, _ in enumerate(grid.cv_results_['mean_test_score']): print("%0.3f +/- %0.2f %r" % (grid.cv_results_[cv_keys[0]][r], grid.cv_results_[cv_keys[1]][r] / 2.0, grid.cv_results_[cv_keys[2]][r]))