def get_training_testing_prediction_stats(self): print_to_consol( 'Getting basic stats for training set and cross-validation') training_stats, y_train_pred, y_train_pred_proba = training_cv_stats_multiclass( self.model, self.X_train_scaled, self.y_train, self.cv) logging.info( f'Basic stats achieved for training set and 3-fold CV \n' f'Accuracy for each individual fold of 3 CV folds: {training_stats["acc_cv"]} \n' f'Accuracy across all 3 CV-folds: {training_stats["acc"]} \n' f'Recall across all 3 CV-folds: {training_stats["recall"]} \n' f'Precision across all 3 CV-folds: {training_stats["precision"]} \n' f'F1 score across all 3 CV-folds: {training_stats["f1-score"]} \n' f'Storing cross-validated y_train classes in y_train_pred \n' f'Storing cross-validated y_train probabilities in y_train_pred_proba \n' ) print_to_consol( 'Getting class predictions and probabilities for test set') test_stats, self.y_pred, self.y_pred_proba = testing_predict_stats_multiclass( self.model, self.X_test_scaled, self.y_test) y_pred_out = os.path.join(self.directory, "y_pred_before_calibration.csv") np.savetxt(y_pred_out, self.y_pred, delimiter=",") y_pred_proba_out = os.path.join(self.directory, "y_pred_proba_before_calibration.csv") np.savetxt(y_pred_proba_out, self.y_pred_proba, delimiter=",") logging.info( f'Writing y_pred and y_pred_proba before calibration to disk. \n') confidence_train = self.model.decision_function(self.X_train_scaled) confidence_test = self.model.decision_function(self.X_test_scaled) logging.info( f'Predicting on the test set. \n' f'Storing classes in y_pred and probabilities in y_pred_proba \n' f'Prediction confidence for train set: {confidence_train} \n' f'Prediction confidence for test set: {confidence_test} \n') print_to_consol( 'Calculate prediction stats for y_pred and y_pred_proba of test set' ) logging.info( f'Basic stats on the test set. \n' f'Prediction accuracy on the test set: {test_stats["predict_acc"]} \n' f'Class distributio in the test set: {test_stats["class_distribution"]} \n' f'Matthews Correlation Coefficient: {test_stats["mcc"]} \n')
def get_training_testing_prediction_stats(self): print_to_consol( 'Getting basic stats for training set and cross-validation') training_stats, y_train_pred, y_train_pred_proba = training_cv_stats_multiclass( self.model, self.X_train, self.y_train, self.cv) logging.info( f'Basic stats achieved for training set and 3-fold CV \n' f'Accuracy for each individual fold of 3 CV folds: {training_stats["acc_cv"]} \n' f'Accuracy across all 3 CV-folds: {training_stats["acc"]} \n' f'Recall across all 3 CV-folds: {training_stats["recall"]} \n' f'Precision across all 3 CV-folds: {training_stats["precision"]} \n' f'F1 score across all 3 CV-folds: {training_stats["f1-score"]} \n' f'Storing cross-validated y_train classes in y_train_pred \n' f'Storing cross-validated y_train probabilities in y_train_pred_proba \n' ) print_to_consol( 'Getting class predictions and probabilities for test set') test_stats, self.y_pred, self.y_pred_proba = testing_predict_stats_multiclass( self.model, self.X_test, self.y_test) logging.info( f'Predicting on the test set. \n' f'Storing classes in y_pred and probabilities in y_pred_proba \n') print_to_consol( 'Calculate prediction stats for y_pred and y_pred_proba of test set' ) logging.info( f'Basic stats on the test set. \n' f'Prediction accuracy on the test set: {test_stats["predict_acc"]} \n' f'Class distributio in the test set: {test_stats["class_distribution"]} \n' f'Matthews Correlation Coefficient: {test_stats["mcc"]} \n')