def get_training_testing_prediction_stats(self): print_to_consol( 'Getting basic stats for training set and cross-validation') training_stats, y_train_pred, y_train_pred_proba = training_cv_stats_multiclass( self.model, self.X_train_scaled, self.y_train, self.cv) logging.info( f'Basic stats achieved for training set and 3-fold CV \n' f'Accuracy for each individual fold of 3 CV folds: {training_stats["acc_cv"]} \n' f'Accuracy across all 3 CV-folds: {training_stats["acc"]} \n' f'Recall across all 3 CV-folds: {training_stats["recall"]} \n' f'Precision across all 3 CV-folds: {training_stats["precision"]} \n' f'F1 score across all 3 CV-folds: {training_stats["f1-score"]} \n' f'Storing cross-validated y_train classes in y_train_pred \n' f'Storing cross-validated y_train probabilities in y_train_pred_proba \n' ) print_to_consol( 'Getting class predictions and probabilities for test set') test_stats, self.y_pred, self.y_pred_proba = testing_predict_stats_multiclass( self.model, self.X_test_scaled, self.y_test) y_pred_out = os.path.join(self.directory, "y_pred_before_calibration.csv") np.savetxt(y_pred_out, self.y_pred, delimiter=",") y_pred_proba_out = os.path.join(self.directory, "y_pred_proba_before_calibration.csv") np.savetxt(y_pred_proba_out, self.y_pred_proba, delimiter=",") logging.info( f'Writing y_pred and y_pred_proba before calibration to disk. \n') confidence_train = self.model.decision_function(self.X_train_scaled) confidence_test = self.model.decision_function(self.X_test_scaled) logging.info( f'Predicting on the test set. \n' f'Storing classes in y_pred and probabilities in y_pred_proba \n' f'Prediction confidence for train set: {confidence_train} \n' f'Prediction confidence for test set: {confidence_test} \n') print_to_consol( 'Calculate prediction stats for y_pred and y_pred_proba of test set' ) logging.info( f'Basic stats on the test set. \n' f'Prediction accuracy on the test set: {test_stats["predict_acc"]} \n' f'Class distributio in the test set: {test_stats["class_distribution"]} \n' f'Matthews Correlation Coefficient: {test_stats["mcc"]} \n')
def get_training_testing_prediction_stats(self): print_to_consol( 'Getting basic stats for training set and cross-validation') training_stats, y_train_pred, y_train_pred_proba = training_cv_stats_multiclass( self.model, self.X_train, self.y_train, self.cv) logging.info( f'Basic stats achieved for training set and 3-fold CV \n' f'Accuracy for each individual fold of 3 CV folds: {training_stats["acc_cv"]} \n' f'Accuracy across all 3 CV-folds: {training_stats["acc"]} \n' f'Recall across all 3 CV-folds: {training_stats["recall"]} \n' f'Precision across all 3 CV-folds: {training_stats["precision"]} \n' f'F1 score across all 3 CV-folds: {training_stats["f1-score"]} \n' f'Storing cross-validated y_train classes in y_train_pred \n' f'Storing cross-validated y_train probabilities in y_train_pred_proba \n' ) print_to_consol( 'Getting class predictions and probabilities for test set') test_stats, self.y_pred, self.y_pred_proba = testing_predict_stats_multiclass( self.model, self.X_test, self.y_test) logging.info( f'Predicting on the test set. \n' f'Storing classes in y_pred and probabilities in y_pred_proba \n') print_to_consol( 'Calculate prediction stats for y_pred and y_pred_proba of test set' ) logging.info( f'Basic stats on the test set. \n' f'Prediction accuracy on the test set: {test_stats["predict_acc"]} \n' f'Class distributio in the test set: {test_stats["class_distribution"]} \n' f'Matthews Correlation Coefficient: {test_stats["mcc"]} \n')
def detailed_analysis(self): print_to_consol( 'Making a confusion matrix for test set classification outcomes') matrix_stats, report = confusion_matrix_and_stats_multiclass( self.y_test, self.y_pred, 'before_cal', self.directory) logging.info(f'Detailed analysis of confusion matrix for test set. \n' f'True positives: {matrix_stats["TP"]} \n' f'True negatives: {matrix_stats["TN"]} \n' f'False positives: {matrix_stats["FP"]} \n' f'False negatives: {matrix_stats["FN"]} \n' f'Classification accuracy: {matrix_stats["acc"]} \n' f'Classification error: {matrix_stats["err"]} \n' f'Sensitivity: {matrix_stats["sensitivity"]} \n' f'Specificity: {matrix_stats["specificity"]} \n' f'False positive rate: {matrix_stats["FP-rate"]} \n' f'False negative rate: {matrix_stats["FN-rate"]} \n' f'Precision: {matrix_stats["precision"]} \n' f'F1-score: {matrix_stats["F1-score"]} \n') logging.info( f'Classification report on test set before calibration. \n' f'{report} \n') print_to_consol('Make a radar plot for performance metrics') radar_dict = { 'Classification accuracy': matrix_stats["acc"], 'Classification error': matrix_stats["err"], 'Sensitivity': matrix_stats["sensitivity"], 'Specificity': matrix_stats["specificity"], 'False positive rate': matrix_stats["FP-rate"], 'False negative rate': matrix_stats["FN-rate"], 'Precision': matrix_stats["precision"], 'F1-score': matrix_stats["F1-score"], 'ROC AUC': None } plot_radar_chart(radar_dict, self.directory) print_to_consol( 'Calibrating classifier and writing to disk; getting new accuracy') self.calibrated_clf, clf_acc = calibrate_classifier( self.model, self.X_cal_scaled, self.y_cal) date = datetime.strftime(datetime.now(), '%Y%m%d_%H%M') joblib.dump( self.calibrated_clf, os.path.join(self.directory, 'best_calibrated_predictor_' + date + '.pkl')) logging.info( f'Calibrated the best classifier with X_cal and y_cal and new accuracy {clf_acc}\n' f'Writing file to disk disk in {self.directory} \n') print_to_consol( 'Getting 95% confidence interval for calibrated classifier') alpha, upper, lower = get_confidence_interval( self.X_train_scaled, self.y_train, self.X_test_scaled, self.y_test, self.calibrated_clf, self.directory, self.bootiter, 'calibrated') logging.info(f'{alpha}% confidence interval {upper}% and {lower}% \n' f'for calibrated classifier. \n') print_to_consol('Running prediction for calibrated classifier') print_to_consol( 'Getting class predictions and probabilities for test set with calibrated classifier' ) test_stats_cal, self.y_pred_cal, self.y_pred_proba_cal = testing_predict_stats_multiclass( self.calibrated_clf, self.X_test_scaled, self.y_test) y_pred_cal_out = os.path.join(self.directory, "y_pred_after_calibration.csv") np.savetxt(y_pred_cal_out, self.y_pred_cal, delimiter=",") y_pred_proba_cal_out = os.path.join( self.directory, "y_pred_proba_after_calibration.csv") np.savetxt(y_pred_proba_cal_out, self.y_pred_proba_cal, delimiter=",") logging.info( f'Writing y_pred and y_pred_proba after calibration to disk. \n' f'Predicting on the test set with calibrated classifier. \n' f'Storing classes for calibrated classifier in y_pred and probabilities in y_pred_proba. \n' ) print_to_consol( 'Calculate prediction stats for y_pred and y_pred_proba of test set with calibrated classifier' ) logging.info( f'Basic stats on the test set woth calibrated classifier. \n' f'Prediction accuracy on the test set: {test_stats_cal["predict_acc"]} \n' f'Class distributio in the test set: {test_stats_cal["class_distribution"]} \n' f'Matthews Correlation Coefficient: {test_stats_cal["mcc"]} \n') print_to_consol( 'Making a confusion matrix for test set classification outcomes with calibrated classifier' ) matrix_stats_cal, report_cal = confusion_matrix_and_stats_multiclass( self.y_test, self.y_pred_cal, 'after_cal', self.directory) logging.info( f'Detailed analysis of confusion matrix for test set with calibrated classifier. \n' f'True positives: {matrix_stats_cal["TP"]} \n' f'True negatives: {matrix_stats_cal["TN"]} \n' f'False positives: {matrix_stats_cal["FP"]} \n' f'False negatives: {matrix_stats_cal["FN"]} \n' f'Classification accuracy: {matrix_stats_cal["acc"]} \n' f'Classification error: {matrix_stats_cal["err"]} \n' f'Sensitivity: {matrix_stats_cal["sensitivity"]} \n' f'Specificity: {matrix_stats_cal["specificity"]} \n' f'False positive rate: {matrix_stats_cal["FP-rate"]} \n' f'False negative rate: {matrix_stats_cal["FN-rate"]} \n' f'Precision: {matrix_stats_cal["precision"]} \n' f'F1-score: {matrix_stats_cal["F1-score"]} \n') logging.info( f'Classification report on test set afetr callibration. \n' f'{report_cal} \n') print_to_consol( 'Make a radar plot for performance metrics with calibrated classifier' ) radar_dict_cal = { 'Classification accuracy': matrix_stats_cal["acc"], 'Classification error': matrix_stats_cal["err"], 'Sensitivity': matrix_stats_cal["sensitivity"], 'Specificity': matrix_stats_cal["specificity"], 'False positive rate': matrix_stats_cal["FP-rate"], 'False negative rate': matrix_stats_cal["FN-rate"], 'Precision': matrix_stats_cal["precision"], 'F1-score': matrix_stats_cal["F1-score"], 'ROC AUC': None } plot_radar_chart(radar_dict_cal, self.directory) end = datetime.now() duration = end - self.start logging.info(f'Training lasted for {duration} minutes \n') logging.info(f'Training completed \n') print_to_consol('Training completed')