def test_get_area_under_roc_curve_no_nan(self): """Ensures correct output from get_area_under_roc_curve. In this case, input arrays do not contain NaN. """ this_auc = model_eval.get_area_under_roc_curve(POFD_BY_THRESHOLD, POD_BY_THRESHOLD) self.assertFalse(numpy.isnan(this_auc))
def test_get_area_under_roc_curve_all_nan(self): """Ensures correct output from get_area_under_roc_curve. In this case, input arrays are all NaN's. """ these_pofd_by_threshold = numpy.full(len(POFD_BY_THRESHOLD), numpy.nan) these_pod_by_threshold = copy.deepcopy(these_pofd_by_threshold) this_auc = model_eval.get_area_under_roc_curve(these_pofd_by_threshold, these_pod_by_threshold) self.assertTrue(numpy.isnan(this_auc))
def test_get_area_under_roc_curve_some_nan(self): """Ensures correct output from get_area_under_roc_curve. In this case, input arrays contain some NaN's. """ these_pofd = POFD_BY_THRESHOLD + 0. these_pod = POD_BY_THRESHOLD + 0. nan_indices = numpy.array([0, len(these_pod) - 1], dtype=int) these_pofd[nan_indices] = numpy.nan these_pod[nan_indices] = numpy.nan this_auc = model_eval.get_area_under_roc_curve( pod_by_threshold=these_pod, pofd_by_threshold=these_pofd) self.assertFalse(numpy.isnan(this_auc))
def _create_roc_curve(forecast_probabilities, observed_labels, output_dir_name): """Creates ROC (receiver operating characteristic) curve. N = number of forecast-observation pairs :param forecast_probabilities: See doc for `run_evaluation`. :param observed_labels: Same. :param output_dir_name: Same. :return: auc: Area under ROC curve, calculated by GewitterGefahr. :return: scikit_learn_auc: Area under ROC curve, calculated by scikit-learn. """ pofd_by_threshold, pod_by_threshold = model_eval.get_points_in_roc_curve( forecast_probabilities=forecast_probabilities, observed_labels=observed_labels, threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS, unique_forecast_precision=FORECAST_PRECISION_FOR_THRESHOLDS) auc = model_eval.get_area_under_roc_curve( pofd_by_threshold=pofd_by_threshold, pod_by_threshold=pod_by_threshold) scikit_learn_auc = roc_auc_score( y_true=observed_labels, y_score=forecast_probabilities) title_string = 'AUC = {0:.4f} ... scikit-learn AUC = {1:.4f}'.format( auc, scikit_learn_auc) print title_string figure_file_name = '{0:s}/roc_curve.jpg'.format(output_dir_name) print 'Saving ROC curve to: "{0:s}"...\n'.format(figure_file_name) _, axes_object = pyplot.subplots( 1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)) model_eval_plotting.plot_roc_curve( axes_object=axes_object, pod_by_threshold=pod_by_threshold, pofd_by_threshold=pofd_by_threshold) pyplot.title(title_string) pyplot.savefig(figure_file_name, dpi=DOTS_PER_INCH) pyplot.close() return auc, scikit_learn_auc
def _plot_roc_curves(class_probability_matrix, observed_labels, output_dir_name): """Plots one-versus-all ROC curve for each class. K = number of classes :param class_probability_matrix: See doc for `run_evaluation`. :param observed_labels: Same. :param output_dir_name: Same. :return: auc_by_class: length-K numpy array of AUC (area under ROC curve) values computed by GewitterGefahr. :return: sklearn_auc_by_class: length-K numpy array of AUC values computed by scikit-learn. """ num_classes = class_probability_matrix.shape[1] auc_by_class = numpy.full(num_classes, numpy.nan) sklearn_auc_by_class = numpy.full(num_classes, numpy.nan) for k in range(num_classes): print 'Creating ROC curve for class {0:d}...'.format(k) this_pofd_by_threshold, this_pod_by_threshold = ( model_eval.get_points_in_roc_curve( forecast_probabilities=class_probability_matrix[:, k], observed_labels=(observed_labels == k).astype(int), threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS, unique_forecast_precision=FORECAST_PRECISION_FOR_THRESHOLDS)) auc_by_class[k] = model_eval.get_area_under_roc_curve( pofd_by_threshold=this_pofd_by_threshold, pod_by_threshold=this_pod_by_threshold) sklearn_auc_by_class[k] = roc_auc_score( y_true=(observed_labels == k).astype(int), y_score=class_probability_matrix[:, k]) _, this_axes_object = pyplot.subplots(1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES)) model_eval_plotting.plot_roc_curve( axes_object=this_axes_object, pod_by_threshold=this_pod_by_threshold, pofd_by_threshold=this_pofd_by_threshold) this_title_string = ( 'AUC = {0:.4f} ... scikit-learn AUC = {1:.4f}').format( auc_by_class[k], sklearn_auc_by_class[k]) print this_title_string pyplot.title(this_title_string) this_figure_file_name = '{0:s}/roc_curve_class{1:d}.jpg'.format( output_dir_name, k) print 'Saving figure to: "{0:s}"...\n'.format(this_figure_file_name) pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI) pyplot.close() return auc_by_class, sklearn_auc_by_class