################ Cross Validation Hyperparametre Tuning ############################### print("\nHYPERPARAMETRE TUNING") hyperparams = {'C': [0.1, 1, 100, 1000], 'penalty': ['l2'], 'loss': ['hinge', 'squared_hinge'], 'max_iter': [1000, 2000]} optimized_model = GridSearchCV(estimator=support_vector_classifier, param_grid=hyperparams, n_jobs=1, cv=3, verbose=1, error_score=1) optimized_model.fit(X_train, y_train) print(">>>>> Optimized params") print(optimized_model.best_params_) cv_results = optimized_model.cv_results_ print(">>>>>> Display the top results of grid search cv") results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score') display( results_dataframe.head() ) saveDataframe(results_dataframe,"IMDB Support Vector Classifier") prediction = optimized_model.predict(X_test) score = np.mean(prediction == y_test) print("Using our training-dataset optimized Support Vector Machine model on the testing dataset for evaluating") print("score = %f" % score) ################ Graph Reporting ############################### # plot the confusion matrix skplt.metrics.plot_confusion_matrix(y_test, predictions, normalize=False, figsize=(12, 8)) plt.show()
param_grid=hyperparams, n_jobs=1, cv=3, verbose=1, error_score=1) optimized_model.fit(X_train, y_train) print(">>>>> Optimized params") print(optimized_model.best_params_) cv_results = optimized_model.cv_results_ print(">>>>>> Display the top results of grid search cv") results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score') display(results_dataframe.head()) saveDataframe(results_dataframe, "IMDB AdaBoost") prediction = optimized_model.predict(X_test) score = np.mean(prediction == y_test) print( "Using our training-dataset optimized adaboost model on the testing dataset for evaluating" ) print("score = %f" % score) ################ Graph Reporting ############################### # plot the confusion matrix skplt.metrics.plot_confusion_matrix(y_test, predictions, normalize=False, figsize=(12, 8))
param_grid=hyperparams, n_jobs=1, cv=3, verbose=1, error_score=1) optimized_model.fit(X_train, y_train) print(">>>>> Optimized params") print(optimized_model.best_params_) cv_results = optimized_model.cv_results_ print(">>>>>> Display the top results of grid search cv") results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score') display(results_dataframe.head()) saveDataframe(results_dataframe, "IMDB Logistic Regression") prediction = optimized_model.predict(X_test) score = np.mean(prediction == y_test) print( "Using our training-dataset optimized logistic regression model on the testing dataset for evaluating" ) print("score = %f" % score) ################ Graph Reporting ############################### # plot the confusion matrix skplt.metrics.plot_confusion_matrix(y_test, predictions, normalize=False, figsize=(12, 8))
param_grid=hyperparams, n_jobs=1, cv=3, verbose=1, error_score=1) optimized_model.fit(X_train, y_train) print(">>>>> Optimized params") print(optimized_model.best_params_) cv_results = optimized_model.cv_results_ print(">>>>>> Display the top results of grid search cv") results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score') display(results_dataframe.head()) saveDataframe(results_dataframe, "IMDB Multinomial Naive Bayes") prediction = optimized_model.predict(X_test) score = np.mean(prediction == y_test) print( "Using our training-dataset optimized multinomial naive bayes model on the testing dataset for evaluating" ) print("score = %f" % score) ################ Graph Reporting ############################### # plot the confusion matrix skplt.metrics.plot_confusion_matrix(y_test, predictions, normalize=False, figsize=(12, 8))
param_grid=hyperparams, n_jobs=1, cv=3, verbose=1, error_score=1) optimized_model.fit(X_train, y_train) print(">>>>> Optimized params") print(optimized_model.best_params_) cv_results = optimized_model.cv_results_ print(">>>>>> Display the top results of grid search cv") results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score') display(results_dataframe.head()) saveDataframe(results_dataframe, "IMDB Decision Tree") prediction = optimized_model.predict(X_test) score = np.mean(prediction == y_test) print( "Using our training-dataset optimized decision tree model on the testing dataset for evaluating" ) print("score = %f" % score) ################ Graph Reporting ############################### # plot the confusion matrix skplt.metrics.plot_confusion_matrix(y_test, predictions, normalize=False, figsize=(12, 8))
param_grid=hyperparams, n_jobs=1, cv=3, verbose=1, error_score=1) optimized_model.fit(X_train, y_train) print(">>>>> Optimized params") print(optimized_model.best_params_) cv_results = optimized_model.cv_results_ print(">>>>>> Display the top results of grid search cv") results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score') display(results_dataframe.head()) saveDataframe(results_dataframe, "IMDB Random Forest") prediction = optimized_model.predict(X_test) score = np.mean(prediction == y_test) print( "Using our training-dataset optimized Random Forest model on the testing dataset for evaluating" ) print("score = %f" % score) ################ Graph Reporting ############################### # plot the confusion matrix skplt.metrics.plot_confusion_matrix(y_test, predictions, normalize=False, figsize=(12, 8))