def test_get_binarization_thresholds_bad_input(self):
        """Ensures correct output from get_binarization_thresholds.

        In this case, input `threshold_arg` is invalid.
        """

        with self.assertRaises(ValueError):
            model_eval.get_binarization_thresholds(
                threshold_arg=FAKE_THRESHOLD_ARG)
Ejemplo n.º 2
0
    def test_get_binarization_thresholds_from_number(self):
        """Ensures correct output from get_binarization_thresholds.

        In this case, only number of thresholds is input directly.
        """

        these_thresholds = model_eval.get_binarization_thresholds(
            threshold_arg=NUM_THRESHOLDS_FOR_INPUT)
        self.assertTrue(
            numpy.allclose(these_thresholds,
                           THRESHOLDS_FROM_NUMBER,
                           atol=TOLERANCE))
Ejemplo n.º 3
0
    def test_get_binarization_thresholds_direct_input(self):
        """Ensures correct output from get_binarization_thresholds.

        In this case, desired thresholds are input directly.
        """

        these_thresholds = model_eval.get_binarization_thresholds(
            threshold_arg=THRESHOLDS_FROM_DIRECT_INPUT)
        self.assertTrue(
            numpy.allclose(these_thresholds,
                           THRESHOLDS_FROM_DIRECT_INPUT,
                           atol=TOLERANCE))
Ejemplo n.º 4
0
    def test_get_binarization_thresholds_from_unique_forecasts(self):
        """Ensures correct output from get_binarization_thresholds.

        In this case, binarization thresholds are determined from unique
        forecasts.
        """

        these_thresholds = model_eval.get_binarization_thresholds(
            threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS,
            forecast_probabilities=FORECAST_PROBS_FOR_THRESHOLDS,
            forecast_precision=FORECAST_PRECISION)

        self.assertTrue(numpy.allclose(
            these_thresholds, THRESHOLDS_FROM_UNIQUE_FORECASTS, atol=TOLERANCE
        ))
Ejemplo n.º 5
0
def find_best_binarization_threshold(
        class_probability_matrix, observed_labels, threshold_arg,
        criterion_function, optimization_direction=MAX_OPTIMIZATION_DIRECTION,
        forecast_precision_for_thresholds=
        DEFAULT_FORECAST_PRECISION):
    """Finds the best binarization threshold.

    A "binarization threshold" is used to determinize probabilistic (either
    binary or multi-class) predictions, using the following procedure.
    f* = binarization threshold, and f_0 is the forecast probability of class 0
    (no front).

    [1] If f_0 >= f*, predict no front.
    [2] If f_0 < f*, predict a front.  In multi-class problems, frontal type
        (warm or cold) is determined by whichever of the non-zero classes has
        the highest predicted probability.

    In the following definitions, P = number of evaluation pairs and K = number
    of classes.

    :param class_probability_matrix: See documentation for
        `check_evaluation_pairs`.
    :param observed_labels: See doc for `check_evaluation_pairs`.
    :param threshold_arg: See documentation for
        `model_evaluation.get_binarization_thresholds`.  Determines which
        thresholds will be tried.
    :param criterion_function: Criterion to be either minimized or maximized.
        This must be a function that takes input `contingency_table_as_matrix`
        and returns a single float.  See `get_gerrity_score` in this module for
        an example.
    :param optimization_direction: Direction in which criterion function is
        optimized.  Options are "min" and "max".
    :param forecast_precision_for_thresholds: See documentation for
        `model_evaluation.get_binarization_thresholds`.  Determines which
        thresholds will be tried.
    :return: best_threshold: Best binarization threshold.
    :return: best_criterion_value: Value of criterion function at said
        threshold.
    """

    check_evaluation_pairs(
        class_probability_matrix=class_probability_matrix,
        observed_labels=observed_labels)

    error_checking.assert_is_string(optimization_direction)
    if optimization_direction not in VALID_OPTIMIZATION_DIRECTIONS:
        error_string = (
            '\n\n{0:s}\nValid optimization directions (listed above) do not '
            'include "{1:s}".').format(VALID_OPTIMIZATION_DIRECTIONS,
                                       optimization_direction)
        raise ValueError(error_string)

    possible_thresholds = model_eval.get_binarization_thresholds(
        threshold_arg=threshold_arg,
        forecast_probabilities=class_probability_matrix[:, 0],
        unique_forecast_precision=forecast_precision_for_thresholds)

    num_thresholds = len(possible_thresholds)
    criterion_values = numpy.full(num_thresholds, numpy.nan)

    # for i in range(num_thresholds):
    #     these_predicted_labels = model_eval.binarize_forecast_probs(
    #         forecast_probabilities=class_probability_matrix[:, 0],
    #         binarization_threshold=possible_thresholds[i])
    #
    #     these_predicted_labels = numpy.invert(
    #         these_predicted_labels.astype(bool)).astype(int)
    #
    #     this_contingency_table_as_dict = model_eval.get_contingency_table(
    #         forecast_labels=these_predicted_labels,
    #         observed_labels=(observed_labels > 0).astype(int))
    #
    #     criterion_values[i] = criterion_function(
    #         this_contingency_table_as_dict)

    for i in range(num_thresholds):
        these_predicted_labels = determinize_probabilities(
            class_probability_matrix=class_probability_matrix,
            binarization_threshold=possible_thresholds[i])

        this_contingency_table_as_matrix = get_contingency_table(
            predicted_labels=these_predicted_labels,
            observed_labels=observed_labels,
            num_classes=class_probability_matrix.shape[1])

        criterion_values[i] = criterion_function(
            this_contingency_table_as_matrix)

    if optimization_direction == MAX_OPTIMIZATION_DIRECTION:
        best_criterion_value = numpy.nanmax(criterion_values)
        best_probability_threshold = possible_thresholds[
            numpy.nanargmax(criterion_values)]
    else:
        best_criterion_value = numpy.nanmin(criterion_values)
        best_probability_threshold = possible_thresholds[
            numpy.nanargmin(criterion_values)]

    return best_probability_threshold, best_criterion_value
Ejemplo n.º 6
0
def _compute_scores(forecast_probabilities,
                    observed_labels,
                    num_bootstrap_reps,
                    output_file_name,
                    best_prob_threshold=None,
                    downsampling_dict=None):
    """Computes evaluation scores.

    E = number of examples (storm objects)

    :param forecast_probabilities: length-E numpy array of forecast event
        probabilities.
    :param observed_labels: length-E numpy array of observations (1 for event,
        0 for non-event).
    :param num_bootstrap_reps: Number of bootstrap replicates.
    :param output_file_name: Path to output file (will be written by
        `model_evaluation.write_evaluation`).
    :param best_prob_threshold: Best probability threshold.  If None, will be
        determined on the fly.
    :param downsampling_dict: Dictionary with downsampling fractions.  See doc
        for `deep_learning_utils.sample_by_class`.  If this is None,
        downsampling will not be used.
    """

    num_examples = len(observed_labels)
    num_examples_by_class = numpy.unique(observed_labels,
                                         return_counts=True)[-1]

    print('Number of examples by class (no downsampling): {0:s}'.format(
        str(num_examples_by_class)))

    positive_example_indices = numpy.where(observed_labels == 1)[0]
    negative_example_indices = numpy.where(observed_labels == 0)[0]

    if downsampling_dict is None:
        these_indices = numpy.linspace(0,
                                       num_examples - 1,
                                       num=num_examples,
                                       dtype=int)
    else:
        these_indices = dl_utils.sample_by_class(
            sampling_fraction_by_class_dict=downsampling_dict,
            target_name=DUMMY_TARGET_NAME,
            target_values=observed_labels,
            num_examples_total=num_examples)

        this_num_ex_by_class = numpy.unique(observed_labels[these_indices],
                                            return_counts=True)[-1]

        print('Number of examples by class (after downsampling): {0:s}'.format(
            str(this_num_ex_by_class)))

    all_prob_thresholds = model_eval.get_binarization_thresholds(
        threshold_arg=model_eval.THRESHOLD_ARG_FOR_UNIQUE_FORECASTS,
        forecast_probabilities=forecast_probabilities[these_indices],
        forecast_precision=FORECAST_PRECISION)

    if best_prob_threshold is None:
        best_prob_threshold, best_csi = (
            model_eval.find_best_binarization_threshold(
                forecast_probabilities=forecast_probabilities[these_indices],
                observed_labels=observed_labels[these_indices],
                threshold_arg=all_prob_thresholds,
                criterion_function=model_eval.get_csi,
                optimization_direction=model_eval.MAX_OPTIMIZATION_STRING))
    else:
        these_forecast_labels = model_eval.binarize_forecast_probs(
            forecast_probabilities=forecast_probabilities[these_indices],
            binarization_threshold=best_prob_threshold)

        this_contingency_dict = model_eval.get_contingency_table(
            forecast_labels=these_forecast_labels,
            observed_labels=observed_labels[these_indices])

        best_csi = model_eval.get_csi(this_contingency_dict)

    print(
        ('Best probability threshold = {0:.4f} ... corresponding CSI = {1:.4f}'
         ).format(best_prob_threshold, best_csi))

    num_examples_by_forecast_bin = model_eval.get_points_in_reliability_curve(
        forecast_probabilities=forecast_probabilities[these_indices],
        observed_labels=observed_labels[these_indices],
        num_forecast_bins=model_eval.DEFAULT_NUM_RELIABILITY_BINS)[-1]

    list_of_evaluation_tables = []

    for i in range(num_bootstrap_reps):
        print(('Computing scores for {0:d}th of {1:d} bootstrap replicates...'
               ).format(i + 1, num_bootstrap_reps))

        if num_bootstrap_reps == 1:
            if downsampling_dict is None:
                these_indices = numpy.linspace(0,
                                               num_examples - 1,
                                               num=num_examples,
                                               dtype=int)
            else:
                these_indices = dl_utils.sample_by_class(
                    sampling_fraction_by_class_dict=downsampling_dict,
                    target_name=DUMMY_TARGET_NAME,
                    target_values=observed_labels,
                    num_examples_total=num_examples)
        else:
            if len(positive_example_indices) > 0:
                these_positive_indices = bootstrapping.draw_sample(
                    positive_example_indices)[0]
            else:
                these_positive_indices = numpy.array([], dtype=int)

            these_negative_indices = bootstrapping.draw_sample(
                negative_example_indices)[0]

            these_indices = numpy.concatenate(
                (these_positive_indices, these_negative_indices))

            if downsampling_dict is not None:
                these_subindices = dl_utils.sample_by_class(
                    sampling_fraction_by_class_dict=downsampling_dict,
                    target_name=DUMMY_TARGET_NAME,
                    target_values=observed_labels[these_indices],
                    num_examples_total=num_examples)

                these_indices = these_indices[these_subindices]

        if downsampling_dict is not None:
            this_num_ex_by_class = numpy.unique(observed_labels[these_indices],
                                                return_counts=True)[-1]

            print('Number of examples by class: {0:s}'.format(
                str(this_num_ex_by_class)))

        this_evaluation_table = model_eval.run_evaluation(
            forecast_probabilities=forecast_probabilities[these_indices],
            observed_labels=observed_labels[these_indices],
            best_prob_threshold=best_prob_threshold,
            all_prob_thresholds=all_prob_thresholds,
            climatology=numpy.mean(observed_labels[these_indices]))

        list_of_evaluation_tables.append(this_evaluation_table)

        if i == num_bootstrap_reps - 1:
            print(SEPARATOR_STRING)
        else:
            print(MINOR_SEPARATOR_STRING)

        if i == 0:
            continue

        list_of_evaluation_tables[-1] = list_of_evaluation_tables[-1].align(
            list_of_evaluation_tables[0], axis=1)[0]

    evaluation_table = pandas.concat(list_of_evaluation_tables,
                                     axis=0,
                                     ignore_index=True)

    print('Writing results to: "{0:s}"...'.format(output_file_name))

    model_eval.write_evaluation(
        pickle_file_name=output_file_name,
        forecast_probabilities=forecast_probabilities,
        observed_labels=observed_labels,
        best_prob_threshold=best_prob_threshold,
        all_prob_thresholds=all_prob_thresholds,
        num_examples_by_forecast_bin=num_examples_by_forecast_bin,
        downsampling_dict=downsampling_dict,
        evaluation_table=evaluation_table)