예제 #1
0
    def test_get_area_under_roc_curve_no_nan(self):
        """Ensures correct output from get_area_under_roc_curve.

        In this case, input arrays do not contain NaN.
        """

        this_auc = model_eval.get_area_under_roc_curve(POFD_BY_THRESHOLD,
                                                       POD_BY_THRESHOLD)
        self.assertFalse(numpy.isnan(this_auc))
예제 #2
0
    def test_get_area_under_roc_curve_all_nan(self):
        """Ensures correct output from get_area_under_roc_curve.

        In this case, input arrays are all NaN's.
        """

        these_pofd_by_threshold = numpy.full(len(POFD_BY_THRESHOLD), numpy.nan)
        these_pod_by_threshold = copy.deepcopy(these_pofd_by_threshold)

        this_auc = model_eval.get_area_under_roc_curve(these_pofd_by_threshold,
                                                       these_pod_by_threshold)
        self.assertTrue(numpy.isnan(this_auc))
    def test_get_area_under_roc_curve_some_nan(self):
        """Ensures correct output from get_area_under_roc_curve.

        In this case, input arrays contain some NaN's.
        """

        these_pofd = POFD_BY_THRESHOLD + 0.
        these_pod = POD_BY_THRESHOLD + 0.

        nan_indices = numpy.array([0, len(these_pod) - 1], dtype=int)
        these_pofd[nan_indices] = numpy.nan
        these_pod[nan_indices] = numpy.nan

        this_auc = model_eval.get_area_under_roc_curve(
            pod_by_threshold=these_pod, pofd_by_threshold=these_pofd)

        self.assertFalse(numpy.isnan(this_auc))
예제 #4
0
def _create_roc_curve(forecast_probabilities, observed_labels, output_dir_name):
    """Creates ROC (receiver operating characteristic) curve.

    N = number of forecast-observation pairs

    :param forecast_probabilities: See doc for `run_evaluation`.
    :param observed_labels: Same.
    :param output_dir_name: Same.
    :return: auc: Area under ROC curve, calculated by GewitterGefahr.
    :return: scikit_learn_auc: Area under ROC curve, calculated by scikit-learn.
    """

    pofd_by_threshold, pod_by_threshold = model_eval.get_points_in_roc_curve(
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels,
        threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS,
        unique_forecast_precision=FORECAST_PRECISION_FOR_THRESHOLDS)

    auc = model_eval.get_area_under_roc_curve(
        pofd_by_threshold=pofd_by_threshold,
        pod_by_threshold=pod_by_threshold)
    scikit_learn_auc = roc_auc_score(
        y_true=observed_labels, y_score=forecast_probabilities)

    title_string = 'AUC = {0:.4f} ... scikit-learn AUC = {1:.4f}'.format(
        auc, scikit_learn_auc)
    print title_string

    figure_file_name = '{0:s}/roc_curve.jpg'.format(output_dir_name)
    print 'Saving ROC curve to: "{0:s}"...\n'.format(figure_file_name)

    _, axes_object = pyplot.subplots(
        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES))
    model_eval_plotting.plot_roc_curve(
        axes_object=axes_object, pod_by_threshold=pod_by_threshold,
        pofd_by_threshold=pofd_by_threshold)

    pyplot.title(title_string)
    pyplot.savefig(figure_file_name, dpi=DOTS_PER_INCH)
    pyplot.close()

    return auc, scikit_learn_auc
예제 #5
0
def _plot_roc_curves(class_probability_matrix, observed_labels,
                     output_dir_name):
    """Plots one-versus-all ROC curve for each class.

    K = number of classes

    :param class_probability_matrix: See doc for `run_evaluation`.
    :param observed_labels: Same.
    :param output_dir_name: Same.
    :return: auc_by_class: length-K numpy array of AUC (area under ROC curve)
        values computed by GewitterGefahr.
    :return: sklearn_auc_by_class: length-K numpy array of AUC values computed
        by scikit-learn.
    """

    num_classes = class_probability_matrix.shape[1]
    auc_by_class = numpy.full(num_classes, numpy.nan)
    sklearn_auc_by_class = numpy.full(num_classes, numpy.nan)

    for k in range(num_classes):
        print 'Creating ROC curve for class {0:d}...'.format(k)

        this_pofd_by_threshold, this_pod_by_threshold = (
            model_eval.get_points_in_roc_curve(
                forecast_probabilities=class_probability_matrix[:, k],
                observed_labels=(observed_labels == k).astype(int),
                threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS,
                unique_forecast_precision=FORECAST_PRECISION_FOR_THRESHOLDS))

        auc_by_class[k] = model_eval.get_area_under_roc_curve(
            pofd_by_threshold=this_pofd_by_threshold,
            pod_by_threshold=this_pod_by_threshold)

        sklearn_auc_by_class[k] = roc_auc_score(
            y_true=(observed_labels == k).astype(int),
            y_score=class_probability_matrix[:, k])

        _, this_axes_object = pyplot.subplots(1,
                                              1,
                                              figsize=(FIGURE_WIDTH_INCHES,
                                                       FIGURE_HEIGHT_INCHES))

        model_eval_plotting.plot_roc_curve(
            axes_object=this_axes_object,
            pod_by_threshold=this_pod_by_threshold,
            pofd_by_threshold=this_pofd_by_threshold)

        this_title_string = (
            'AUC = {0:.4f} ... scikit-learn AUC = {1:.4f}').format(
                auc_by_class[k], sklearn_auc_by_class[k])

        print this_title_string
        pyplot.title(this_title_string)

        this_figure_file_name = '{0:s}/roc_curve_class{1:d}.jpg'.format(
            output_dir_name, k)

        print 'Saving figure to: "{0:s}"...\n'.format(this_figure_file_name)
        pyplot.savefig(this_figure_file_name, dpi=FIGURE_RESOLUTION_DPI)
        pyplot.close()

    return auc_by_class, sklearn_auc_by_class