def get_classification_reports(models, model_names, X_train, X_test, y_train, y_test): """ Inputs: list of fit models, list of model names, training data, and testing data. Note: data should be pre-processed (scaled etc). Outputs a classification report for specified model. Models are fit to training set and scored on testing set. Note: requires binarized data. """ for name, model in list(zip(model_names, models)): print(name) print('\n') # Accounting for one vs rest target data if y_train.shape[1] != 1: model = OneVsRestClassifier(model) # fit model if model == xgb: xgb_param = model.get_xgb_params() xgb_param['num_class'] = 3 model.fit(X_train, y_train) # Predicting on test set y_predict = model.predict(X_test) # Classification report for model: Precision, recall, F1-score, support for each class, # as well as averages for these metrics. print(classification_report(y_test, y_predict)) print('-' * 60) print('\n')
def get_ROC_curve(models, model_names, X_train, X_test, y_train, y_test): """ Inputs: list of fit models, list of model names, training data, and testing data. Note: data should be pre-processed (scaled etc). Outputs a graph of n ROC curves for each model, where n=num classes. Models are fit to training set and scored on testing set. Note: requires binarized data. """ for model, name in list(zip(models, model_names)): # print(name) # print('\n') # Multiple classes? if y_train.shape[1] != 1: model = OneVsRestClassifier(model) # fit model if model == xgb: xgb_param = model.get_xgb_params() xgb_param['num_class'] = 3 model.fit(X_train, y_train) y_pred = model.predict(X_test) # Get class probabilities probs = model.predict_proba(X_test) # getting rates by class n_classes = y_train.shape[1] fpr = dict() tpr = dict() roc_auc = dict() for i in range(n_classes): fpr[i], tpr[i], _ = roc_curve(y_test[:, i], probs[:, i]) roc_auc[i] = auc(fpr[i], tpr[i]) classes = ['Bad', 'Good', 'Neutral'] plt.plot(fpr[i], tpr[i], label="{}, area: {}".format(classes[i], round(roc_auc[i], 2))) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.0]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC Curves by class for {}'.format(name)) plt.legend() plt.show()
def get_scores(models, model_names, X_train, X_test, y_train, y_test): ## run model to get class reports # requires binarized data for model, name in list(zip(models, model_names)): print(name) print('\n') # Accounting for one vs rest target data if y_train.shape[1] != 1: model = OneVsRestClassifier(model) # fit model if model == xgb: xgb_param = model.get_xgb_params() xgb_param['num_class'] = 3 model.fit(X_train, y_train) y_pred = model.predict(X_test) accuracies_across_models, precisions_across_models, recalls_across_models, f1s_across_models = [], [], [], [] acc = accuracy_score(y_test, y_pred) pre = precision_score(y_test, y_pred, average="weighted") rec = recall_score(y_test, y_pred, average="weighted") f1 = f1_score(y_test, y_pred, average="weighted") accuracies_across_models.append(acc) precisions_across_models.append(pre) recalls_across_models.append(rec) f1s_across_models.append(f1) print(f'Accuracy: {np.mean(acc):.3f} +- {np.std(acc):3f}') print(f'Precision: {np.mean(pre):.3f} +- {np.std(pre):3f}') print(f'Recall: {np.mean(rec):.3f} +- {np.std(rec):3f}') print(f'f1-score: {np.mean(f1):.3f} +- {np.std(f1):3f}') print('-' * 60) print('\n')