################ Cross Validation Hyperparametre Tuning ###############################

print("\nHYPERPARAMETRE TUNING")
hyperparams = {'C': [0.1, 1, 100, 1000], 'penalty': ['l2'], 'loss': ['hinge', 'squared_hinge'], 'max_iter': [1000, 2000]}

optimized_model = GridSearchCV(estimator=support_vector_classifier, param_grid=hyperparams,
                                n_jobs=1, cv=3, verbose=1, error_score=1)

optimized_model.fit(X_train, y_train)

print(">>>>> Optimized params")
print(optimized_model.best_params_)

cv_results = optimized_model.cv_results_
print(">>>>>> Display the top results of grid search cv")
results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score')
display( results_dataframe.head() )
saveDataframe(results_dataframe,"IMDB Support Vector Classifier")

prediction = optimized_model.predict(X_test)
score = np.mean(prediction == y_test)
print("Using our training-dataset optimized Support Vector Machine model on the testing dataset for evaluating")
print("score = %f" % score)

################ Graph Reporting ###############################

# plot the confusion matrix
skplt.metrics.plot_confusion_matrix(y_test, predictions, normalize=False, figsize=(12, 8))
plt.show()
Example #2
0
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)

print(">>>>> Optimized params")
print(optimized_model.best_params_)

cv_results = optimized_model.cv_results_
print(">>>>>> Display the top results of grid search cv")
results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score')
display(results_dataframe.head())
saveDataframe(results_dataframe, "IMDB AdaBoost")

prediction = optimized_model.predict(X_test)
score = np.mean(prediction == y_test)
print(
    "Using our training-dataset optimized adaboost model on the testing dataset for evaluating"
)
print("score = %f" % score)

################ Graph Reporting ###############################

# plot the confusion matrix
skplt.metrics.plot_confusion_matrix(y_test,
                                    predictions,
                                    normalize=False,
                                    figsize=(12, 8))
Example #3
0
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)

print(">>>>> Optimized params")
print(optimized_model.best_params_)

cv_results = optimized_model.cv_results_
print(">>>>>> Display the top results of grid search cv")
results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score')
display(results_dataframe.head())
saveDataframe(results_dataframe, "IMDB Logistic Regression")

prediction = optimized_model.predict(X_test)
score = np.mean(prediction == y_test)
print(
    "Using our training-dataset optimized logistic regression model on the testing dataset for evaluating"
)
print("score = %f" % score)

################ Graph Reporting ###############################

# plot the confusion matrix
skplt.metrics.plot_confusion_matrix(y_test,
                                    predictions,
                                    normalize=False,
                                    figsize=(12, 8))
Example #4
0
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)

print(">>>>> Optimized params")
print(optimized_model.best_params_)

cv_results = optimized_model.cv_results_
print(">>>>>> Display the top results of grid search cv")
results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score')
display(results_dataframe.head())
saveDataframe(results_dataframe, "IMDB Multinomial Naive Bayes")

prediction = optimized_model.predict(X_test)
score = np.mean(prediction == y_test)
print(
    "Using our training-dataset optimized multinomial naive bayes model on the testing dataset for evaluating"
)
print("score = %f" % score)

################ Graph Reporting ###############################

# plot the confusion matrix
skplt.metrics.plot_confusion_matrix(y_test,
                                    predictions,
                                    normalize=False,
                                    figsize=(12, 8))
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)

print(">>>>> Optimized params")
print(optimized_model.best_params_)

cv_results = optimized_model.cv_results_
print(">>>>>> Display the top results of grid search cv")
results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score')
display(results_dataframe.head())
saveDataframe(results_dataframe, "IMDB Decision Tree")

prediction = optimized_model.predict(X_test)
score = np.mean(prediction == y_test)
print(
    "Using our training-dataset optimized decision tree model on the testing dataset for evaluating"
)
print("score = %f" % score)

################ Graph Reporting ###############################

# plot the confusion matrix
skplt.metrics.plot_confusion_matrix(y_test,
                                    predictions,
                                    normalize=False,
                                    figsize=(12, 8))
                               param_grid=hyperparams,
                               n_jobs=1,
                               cv=3,
                               verbose=1,
                               error_score=1)

optimized_model.fit(X_train, y_train)

print(">>>>> Optimized params")
print(optimized_model.best_params_)

cv_results = optimized_model.cv_results_
print(">>>>>> Display the top results of grid search cv")
results_dataframe = pd.DataFrame(cv_results).sort_values(by='rank_test_score')
display(results_dataframe.head())
saveDataframe(results_dataframe, "IMDB Random Forest")

prediction = optimized_model.predict(X_test)
score = np.mean(prediction == y_test)
print(
    "Using our training-dataset optimized Random Forest model on the testing dataset for evaluating"
)
print("score = %f" % score)

################ Graph Reporting ###############################

# plot the confusion matrix
skplt.metrics.plot_confusion_matrix(y_test,
                                    predictions,
                                    normalize=False,
                                    figsize=(12, 8))