best_model = grid_search.best_estimator_ # Time fitting best model start = timeit.default_timer() best_model.fit(x_train, y_train) end = timeit.default_timer() print('Time to fit:', end - start) helpers.log_fit_time('CENSUS_KNN', end - start) # Plot the learning curve vs train size after finding the best model helpers.plot_learning_curve_vs_train_size( best_model, df, feature_cols, 'income_num', output_location= 'census_output/knn_%s_best_model_num_samples_learning_curve.png' % weighting, ) # Predict income with the trained best model y_pred = best_model.predict(x_test) helpers.produce_model_performance_summary( best_model, x_test, y_test, y_pred, output_location='census_output/KNN_%s_summary.txt' % weighting, cv=kfold, scoring='accuracy')
cv=5 ) grid_search.fit(x_train, y_train) print(grid_search.best_score_) print(grid_search.best_params_) # train the best model best_model = grid_search.best_estimator_ # Time fitting best model start = timeit.default_timer() best_model.fit(x_train, y_train) end = timeit.default_timer() print('Time to fit:', end-start) helpers.log_fit_time('CENSUS_SVM', end-start) # Predict income with the trained best model y_pred = best_model.predict(x_test) helpers.produce_model_performance_summary( best_model, x_test, y_test, y_pred, output_location='census_output/svm_summary.txt', cv=kfold, scoring='accuracy', grid_search=grid_search )
(20, 5)] }, cv=3) grid_search.fit(x_train, y_train) print(grid_search.best_score_) print(grid_search.best_params_) # train the best model best_model = grid_search.best_estimator_ # Time fitting best model start = timeit.default_timer() best_model.fit(x_train, y_train) end = timeit.default_timer() print('Time to fit:', end - start) helpers.log_fit_time('WINE_NN', end - start) # Predict quality with the trained best model y_pred = best_model.predict(x_test) helpers.produce_model_performance_summary( best_model, x_test, y_test, y_pred, grid_search=grid_search, output_location='wine_output/neural_net_summary.txt', cv=3, scoring='accuracy')
'n_estimators': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], }, cv=3) grid_search.fit(x_train, y_train) print(grid_search.best_score_) print(grid_search.best_params_) # train the best model best_model = grid_search.best_estimator_ # Time fitting best model start = timeit.default_timer() best_model.fit(x_train, y_train) end = timeit.default_timer() print('Time to fit:', end - start) helpers.log_fit_time('WINE_BOOST', end - start) # Predict quality with the trained best model y_pred = best_model.predict(x_test) helpers.produce_model_performance_summary( best_model, x_test, y_test, y_pred, output_location='wine_output/boost_summary.txt', cv=3, scoring='accuracy')
helpers.log_fit_time('WINE_DT', end - start) # Export decision tree to graphviz png helpers.export_decision_tree_to_file( best_model, feature_names=feature_cols, class_names=['Low Quality', 'High Quality'], output_location=r'wine_output/decision_tree', format='png') # Plot the learning curve vs train size after finding the best model helpers.plot_learning_curve_vs_train_size( best_model, df, feature_cols, 'quality_num', output_location='wine_output/best_model_num_samples_learning_curve.png') # Predict income with the trained best model y_pred = best_model.predict(x_test) helpers.produce_model_performance_summary( best_model, x_test, y_test, y_pred, grid_search=grid_search, output_location='wine_output/decision_tree_summary.txt', cv=kfold, scoring='accuracy')