def test_get_confidence_interval(self):
        """Ensures correct output from get_confidence_interval."""

        this_conf_interval_min, this_conf_interval_max = (
            bootstrapping.get_confidence_interval(
                STAT_VALUES, CONFIDENCE_LEVEL))

        self.assertTrue(numpy.isclose(
            this_conf_interval_min, CONFIDENCE_INTERVAL_MIN, atol=TOLERANCE))
        self.assertTrue(numpy.isclose(
            this_conf_interval_max, CONFIDENCE_INTERVAL_MAX, atol=TOLERANCE))
Beispiel #2
0
def _plot_by_hour(evaluation_dir_name, num_hours_per_chunk, confidence_level,
                  output_dir_name):
    """Plots model evaluation by hour.

    :param evaluation_dir_name: See documentation at top of file.
    :param num_hours_per_chunk: Same.
    :param confidence_level: Same.
    :param output_dir_name: Same.
    :return: output_file_names: Paths to figures saved by this method.
    """

    chunk_to_hours_dict = temporal_subsetting.get_hourly_chunks(
        num_hours_per_chunk=num_hours_per_chunk, verbose=False)

    num_bootstrap_reps = None
    num_chunks = len(chunk_to_hours_dict.keys())

    auc_matrix = numpy.full((num_chunks, 3), numpy.nan)
    pod_matrix = numpy.full((num_chunks, 3), numpy.nan)
    far_matrix = numpy.full((num_chunks, 3), numpy.nan)
    csi_matrix = numpy.full((num_chunks, 3), numpy.nan)
    num_examples_by_chunk = numpy.full(num_chunks, 0, dtype=int)
    num_positive_ex_by_chunk = numpy.full(num_chunks, 0, dtype=int)

    for i in range(num_chunks):
        this_eval_file_name = model_eval.find_file(
            directory_name=evaluation_dir_name,
            hours_in_subset=chunk_to_hours_dict[i],
            raise_error_if_missing=False)

        if not os.path.isfile(this_eval_file_name):
            warning_string = (
                'Cannot find file (this may or may not be a problem).  Expected'
                ' at: "{0:s}"').format(this_eval_file_name)

            warnings.warn(warning_string)
            continue

        print('Reading data from: "{0:s}"...'.format(this_eval_file_name))
        this_evaluation_dict = model_eval.read_evaluation(this_eval_file_name)

        num_examples_by_chunk[i] = len(
            this_evaluation_dict[model_eval.OBSERVED_LABELS_KEY])
        num_positive_ex_by_chunk[i] = numpy.sum(
            this_evaluation_dict[model_eval.OBSERVED_LABELS_KEY])

        this_evaluation_table = this_evaluation_dict[
            model_eval.EVALUATION_TABLE_KEY]
        this_num_bootstrap_reps = len(this_evaluation_table.index)

        if num_bootstrap_reps is None:
            num_bootstrap_reps = this_num_bootstrap_reps
        assert num_bootstrap_reps == this_num_bootstrap_reps

        these_auc = this_evaluation_table[model_eval.AUC_KEY].values
        these_pod = this_evaluation_table[model_eval.POD_KEY].values
        these_far = (
            1. - this_evaluation_table[model_eval.SUCCESS_RATIO_KEY].values)
        these_csi = this_evaluation_table[model_eval.CSI_KEY].values

        auc_matrix[i, 1] = numpy.nanmean(these_auc)
        pod_matrix[i, 1] = numpy.nanmean(these_pod)
        far_matrix[i, 1] = numpy.nanmean(these_far)
        csi_matrix[i, 1] = numpy.nanmean(these_csi)

        auc_matrix[i,
                   0], auc_matrix[i,
                                  2] = (bootstrapping.get_confidence_interval(
                                      stat_values=these_auc,
                                      confidence_level=confidence_level))
        pod_matrix[i,
                   0], pod_matrix[i,
                                  2] = (bootstrapping.get_confidence_interval(
                                      stat_values=these_pod,
                                      confidence_level=confidence_level))
        far_matrix[i,
                   0], far_matrix[i,
                                  2] = (bootstrapping.get_confidence_interval(
                                      stat_values=these_far,
                                      confidence_level=confidence_level))
        csi_matrix[i,
                   0], csi_matrix[i,
                                  2] = (bootstrapping.get_confidence_interval(
                                      stat_values=these_csi,
                                      confidence_level=confidence_level))

    x_tick_labels = [None] * num_chunks
    x_tick_values = numpy.linspace(0,
                                   num_chunks - 1,
                                   num=num_chunks,
                                   dtype=float)

    for i in range(num_chunks):
        these_hours = chunk_to_hours_dict[i]

        if len(these_hours) == 1:
            x_tick_labels[i] = '{0:02d}'.format(these_hours[0])
        else:
            x_tick_labels[i] = '{0:02d}-{1:02d}'.format(
                numpy.min(these_hours), numpy.max(these_hours))

    figure_object, axes_object = _plot_auc_and_csi(
        auc_matrix=auc_matrix,
        csi_matrix=csi_matrix,
        num_examples_by_chunk=num_examples_by_chunk,
        num_bootstrap_reps=num_bootstrap_reps,
        plot_legend=False)

    axes_object.set_xticks(x_tick_values)
    axes_object.set_xticklabels(x_tick_labels, rotation=90.)
    axes_object.set_xlabel('Hour (UTC)')

    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(b)',
                              x_coord_normalized=-0.075,
                              y_coord_normalized=1.02)

    auc_csi_file_name = '{0:s}/hourly_auc_and_csi.jpg'.format(output_dir_name)
    print('Saving figure to: "{0:s}"...'.format(auc_csi_file_name))

    figure_object.savefig(auc_csi_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)

    figure_object, axes_object = _plot_pod_and_far(
        pod_matrix=pod_matrix,
        far_matrix=far_matrix,
        num_positive_ex_by_chunk=num_positive_ex_by_chunk,
        num_bootstrap_reps=num_bootstrap_reps,
        plot_legend=False)

    axes_object.set_xticks(x_tick_values)
    axes_object.set_xticklabels(x_tick_labels, rotation=90.)
    axes_object.set_xlabel('Hour (UTC)')

    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(d)',
                              x_coord_normalized=-0.075,
                              y_coord_normalized=1.02)

    pod_far_file_name = '{0:s}/hourly_pod_and_far.jpg'.format(output_dir_name)
    print('Saving figure to: "{0:s}"...'.format(pod_far_file_name))

    figure_object.savefig(pod_far_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)

    return [auc_csi_file_name, pod_far_file_name]
Beispiel #3
0
def _get_ci_one_model(evaluation_table, for_roc_curve, confidence_level):
    """Returns confidence interval for one model.

    T = number of probability thresholds

    :param evaluation_table: Similar to pandas DataFrame created by
        `model_evaluation.run_evaluation`, except that this table has multiple
        rows (one per bootstrap replicate).
    :param for_roc_curve: Boolean flag.  If True, will return confidence
        interval for ROC curve.  If False, for performance diagram.
    :param confidence_level: Confidence level (in range 0...1).
    :return: ci_bottom_dict: Dictionary with the following keys (for bottom of
        confidence interval).
    ci_bottom_dict["pod_by_threshold"]: length-T numpy array of POD values.
    ci_bottom_dict["pofd_by_threshold"]: length-T numpy array of POFD values.
        If `for_roc_curve == False`, this key is missing.
    ci_bottom_dict["success_ratio_by_threshold"]: length-T numpy array of success
        ratios.  If `for_roc_curve == True`, this key is missing.

    :return: ci_mean_dict: Same but for mean of confidence interval.
    :return: ci_top_dict: Same but for top of confidence interval.
    """

    pod_matrix = numpy.vstack(
        tuple(
            evaluation_table[model_eval.POD_BY_THRESHOLD_KEY].values.tolist()))

    num_thresholds = pod_matrix.shape[1]

    if for_roc_curve:
        pofd_matrix = numpy.vstack(
            tuple(evaluation_table[
                model_eval.POFD_BY_THRESHOLD_KEY].values.tolist()))

        ci_bottom_dict = {
            model_eval.POD_BY_THRESHOLD_KEY:
            numpy.full(num_thresholds, numpy.nan),
            model_eval.POFD_BY_THRESHOLD_KEY:
            numpy.full(num_thresholds, numpy.nan)
        }
    else:
        success_ratio_matrix = numpy.vstack(
            tuple(evaluation_table[
                model_eval.SR_BY_THRESHOLD_KEY].values.tolist()))

        ci_bottom_dict = {
            model_eval.POD_BY_THRESHOLD_KEY:
            numpy.full(num_thresholds, numpy.nan),
            model_eval.SR_BY_THRESHOLD_KEY: numpy.full(num_thresholds,
                                                       numpy.nan)
        }

    ci_mean_dict = copy.deepcopy(ci_bottom_dict)
    ci_top_dict = copy.deepcopy(ci_bottom_dict)

    for j in range(num_thresholds):
        this_min_pod, this_max_pod = bootstrapping.get_confidence_interval(
            stat_values=pod_matrix[:, j], confidence_level=confidence_level)

        ci_bottom_dict[model_eval.POD_BY_THRESHOLD_KEY][j] = this_min_pod
        ci_top_dict[model_eval.POD_BY_THRESHOLD_KEY][j] = this_max_pod
        ci_mean_dict[model_eval.POD_BY_THRESHOLD_KEY][j] = numpy.nanmean(
            pod_matrix[:, j])

        if for_roc_curve:
            this_min_pofd, this_max_pofd = (
                bootstrapping.get_confidence_interval(
                    stat_values=pofd_matrix[:, j],
                    confidence_level=confidence_level))

            ci_bottom_dict[model_eval.POFD_BY_THRESHOLD_KEY][j] = this_min_pofd
            ci_top_dict[model_eval.POFD_BY_THRESHOLD_KEY][j] = this_max_pofd
            ci_mean_dict[model_eval.POFD_BY_THRESHOLD_KEY][j] = numpy.nanmean(
                pofd_matrix[:, j])

            continue

        this_min_success_ratio, this_max_success_ratio = (
            bootstrapping.get_confidence_interval(
                stat_values=success_ratio_matrix[:, j],
                confidence_level=confidence_level))

        ci_bottom_dict[
            model_eval.SR_BY_THRESHOLD_KEY][j] = this_min_success_ratio
        ci_top_dict[model_eval.SR_BY_THRESHOLD_KEY][j] = this_max_success_ratio
        ci_mean_dict[model_eval.SR_BY_THRESHOLD_KEY][j] = numpy.nanmean(
            success_ratio_matrix[:, j])

    return ci_bottom_dict, ci_mean_dict, ci_top_dict
Beispiel #4
0
def run_permutation_test(model_object,
                         list_of_input_matrices,
                         predictor_names_by_matrix,
                         target_values,
                         prediction_function,
                         cost_function,
                         num_bootstrap_iters=DEFAULT_NUM_BOOTSTRAP_ITERS,
                         bootstrap_confidence_level=DEFAULT_CONFIDENCE_LEVEL):
    """Runs the permutation test.

    N = number of input matrices
    E = number of examples
    C_j = number of channels (predictors) in the [j]th matrix
    K = number of target classes

    :param model_object: Trained instance of `keras.models.Model` or
        `keras.models.Sequential`.
    :param list_of_input_matrices: length-N list of matrices (numpy arrays), in
        the order that they were fed to the model for training.  In other words,
        if the order of training matrices was [radar images, soundings], the
        order of these matrices must be [radar images, soundings].  The first
        axis of each matrix should have length E, and the last axis of the [j]th
        matrix should have length C_j.
    :param predictor_names_by_matrix: length-N list of lists.  The [j]th list
        should be a list of predictor variables in the [j]th matrix, with length
        C_j.
    :param target_values: length-E numpy array of target values (integer class
        labels).
    :param prediction_function: Function used to generate predictions from the
        model.  Should have the following inputs and outputs.
    Input: model_object: Same as input to this method.
    Input: list_of_input_matrices: Same as input to this method, except maybe
        with permuted values.
    Output: class_probability_matrix: E-by-K numpy array, where
        class_probability_matrix[i, k] is the probability that the [i]th example
        belongs to the [k]th class.

    :param cost_function: Function used to evaluate predictions from the model.
        Should have the following inputs and outputs.  This method will assume
        that lower values are better.  In other words, the cost function must be
        negatively oriented.
    Input: target_values: Same as input to this method.
    Input: class_probability_matrix: Output from `prediction_function`.
    Output: cost: Scalar value.

    :param num_bootstrap_iters: Number of bootstrapping iterations (used to
        compute the cost function after each permutation).  If
        `num_bootstrap_iters <= 1`, bootstrapping will not be used.
    :param bootstrap_confidence_level: Confidence level for bootstrapping.  This
        method will return the q-percent confidence interval for each cost,
        where q = 100 * `bootstrap_confidence_level`.

    :return: result_dict: Dictionary with the following keys.
        S = number of steps (loops through predictors) taken by algorithm
        P = number of predictors

    result_dict['num_bootstrap_iters']: See input.
    result_dict['bootstrap_confidence_level']: See input.
    result_dict['selected_predictor_name_by_step']: length-S list with names of
        selected predictors.
    result_dict['highest_cost_by_step_bs_matrix']: S-by-3 numpy array, where
        highest_cost_by_step_bs_matrix[i, 0] = minimum of confidence interval
        for highest cost at [i]th step; highest_cost_by_step_bs_matrix[i, 1] =
        mean; and highest_cost_by_step_bs_matrix[i, 2] = max.
    result_dict['original_cost_bs_array']: length-3 numpy array, where
        original_cost_bs_array[0] = minimum of confidence interval for cost
        without permutation; original_cost_bs_array[1] = mean; and
        original_cost_bs_array[2] = max.
    result_dict['step1_predictor_names']: length-P list of predictor names.
    result_dict['step1_cost_bs_matrix']: S-by-3 numpy array, where
        step1_cost_bs_matrix[i, 0] = minimum of confidence interval for cost
        after permuting only step1_predictor_names[i];
        step1_cost_bs_matrix[i, 1] = mean; and step1_cost_bs_matrix[i, 2] = max.

    :raises: ValueError: if length of `list_of_input_matrices` != length of
        `predictor_names_by_matrix`.
    :raises: ValueError: if any input matrix has < 3 dimensions.
    """

    # Check input args.
    error_checking.assert_is_integer_numpy_array(target_values)
    error_checking.assert_is_geq_numpy_array(target_values, 0)

    if len(list_of_input_matrices) != len(predictor_names_by_matrix):
        error_string = (
            'Number of input matrices ({0:d}) should equal number of predictor-'
            'name lists ({1:d}).').format(len(list_of_input_matrices),
                                          len(predictor_names_by_matrix))

        raise ValueError(error_string)

    num_input_matrices = len(list_of_input_matrices)
    num_examples = len(target_values)

    for j in range(num_input_matrices):
        error_checking.assert_is_numpy_array_without_nan(
            list_of_input_matrices[j])

        this_num_dimensions = len(list_of_input_matrices[j].shape)
        if this_num_dimensions < 3:
            error_string = (
                '{0:d}th input matrix has {1:d} dimensions.  Should have at '
                'least 3.').format(j + 1, this_num_dimensions)

            raise ValueError(error_string)

        error_checking.assert_is_string_list(predictor_names_by_matrix[j])
        this_num_predictors = len(predictor_names_by_matrix[j])

        these_expected_dimensions = ((num_examples, ) +
                                     list_of_input_matrices[j].shape[1:-1] +
                                     (this_num_predictors, ))
        these_expected_dimensions = numpy.array(these_expected_dimensions,
                                                dtype=int)

        error_checking.assert_is_numpy_array(
            list_of_input_matrices[j],
            exact_dimensions=these_expected_dimensions)

    error_checking.assert_is_integer(num_bootstrap_iters)
    num_bootstrap_iters = max([num_bootstrap_iters, 1])
    error_checking.assert_is_greater(bootstrap_confidence_level, 0.)
    error_checking.assert_is_less_than(bootstrap_confidence_level, 1.)

    # Get original cost (with no permutation).
    class_probability_matrix = prediction_function(model_object,
                                                   list_of_input_matrices)

    all_original_costs = numpy.full(num_bootstrap_iters, numpy.nan)

    for k in range(num_bootstrap_iters):
        _, these_indices = bootstrapping.draw_sample(target_values)

        all_original_costs[k] = cost_function(
            target_values[these_indices],
            class_probability_matrix[these_indices, ...])

    min_original_cost, max_original_cost = (
        bootstrapping.get_confidence_interval(
            stat_values=all_original_costs,
            confidence_level=bootstrap_confidence_level))

    original_cost_bs_array = numpy.array(
        [min_original_cost,
         numpy.mean(all_original_costs), max_original_cost])

    print 'Original cost (no permutation): {0:s}'.format(
        str(original_cost_bs_array))

    # Initialize output variables.
    remaining_predictor_names_by_matrix = copy.deepcopy(
        predictor_names_by_matrix)
    step_num = 0

    # Do dirty work.
    step1_predictor_names = []
    selected_predictor_name_by_step = []

    step1_cost_bs_matrix = None
    highest_cost_by_step_bs_matrix = None

    while True:
        print '\n'
        step_num += 1

        highest_cost_bs_matrix = numpy.full((1, 3), -numpy.inf)
        best_matrix_index = None
        best_predictor_name = None
        best_predictor_permuted_values = None

        stopping_criterion = True

        for j in range(num_input_matrices):
            if len(remaining_predictor_names_by_matrix[j]) == 0:
                continue

            for this_predictor_name in remaining_predictor_names_by_matrix[j]:
                stopping_criterion = False

                print(
                    'Trying predictor "{0:s}" at step {1:d} of permutation '
                    'test...').format(this_predictor_name, step_num)

                these_input_matrices = copy.deepcopy(list_of_input_matrices)
                this_predictor_index = predictor_names_by_matrix[j].index(
                    this_predictor_name)

                these_input_matrices[j][
                    ..., this_predictor_index] = numpy.take(
                        these_input_matrices[j][..., this_predictor_index],
                        indices=numpy.random.permutation(
                            these_input_matrices[j].shape[0]),
                        axis=0)

                this_probability_matrix = prediction_function(
                    model_object, these_input_matrices)

                all_these_costs = numpy.full(num_bootstrap_iters, numpy.nan)

                for k in range(num_bootstrap_iters):
                    _, these_indices = bootstrapping.draw_sample(target_values)

                    all_these_costs[k] = cost_function(
                        target_values[these_indices],
                        this_probability_matrix[these_indices, ...])

                this_min_cost, this_max_cost = (
                    bootstrapping.get_confidence_interval(
                        stat_values=all_these_costs,
                        confidence_level=bootstrap_confidence_level))

                this_cost_bs_array = numpy.array([
                    this_min_cost,
                    numpy.mean(all_these_costs), this_max_cost
                ])

                this_cost_bs_matrix = numpy.reshape(
                    this_cost_bs_array, (1, this_cost_bs_array.size))

                print 'Resulting cost = {0:s}\n'.format(
                    str(this_cost_bs_matrix))

                if step_num == 1:
                    step1_predictor_names.append(this_predictor_name)

                    if step1_cost_bs_matrix is None:
                        step1_cost_bs_matrix = this_cost_bs_matrix + 0.
                    else:
                        step1_cost_bs_matrix = numpy.concatenate(
                            (step1_cost_bs_matrix, this_cost_bs_matrix),
                            axis=0)

                if this_cost_bs_matrix[0, 1] < highest_cost_bs_matrix[0, 1]:
                    continue

                highest_cost_bs_matrix = this_cost_bs_matrix + 0.
                best_matrix_index = j + 0
                best_predictor_name = this_predictor_name + ''
                best_predictor_permuted_values = (
                    these_input_matrices[j][..., this_predictor_index] + 0.)

        if stopping_criterion:  # No more predictors to permute.
            break

        selected_predictor_name_by_step.append(best_predictor_name)

        if highest_cost_by_step_bs_matrix is None:
            highest_cost_by_step_bs_matrix = highest_cost_bs_matrix + 0.
        else:
            highest_cost_by_step_bs_matrix = numpy.concatenate(
                (highest_cost_by_step_bs_matrix, highest_cost_bs_matrix),
                axis=0)

        # Remove best predictor from list.
        remaining_predictor_names_by_matrix[best_matrix_index].remove(
            best_predictor_name)

        # Leave values of best predictor permuted.
        this_best_predictor_index = predictor_names_by_matrix[
            best_matrix_index].index(best_predictor_name)

        list_of_input_matrices[best_matrix_index][
            ...,
            this_best_predictor_index] = best_predictor_permuted_values + 0.

        print 'Best predictor = "{0:s}" ... new cost = {1:s}'.format(
            best_predictor_name, str(highest_cost_bs_matrix))

    return {
        NUM_BOOTSTRAP_ITERS_KEY: num_bootstrap_iters,
        CONFIDENCE_LEVEL_KEY: bootstrap_confidence_level,
        SELECTED_PREDICTORS_KEY: selected_predictor_name_by_step,
        HIGHEST_COSTS_KEY: highest_cost_by_step_bs_matrix,
        ORIGINAL_COST_KEY: original_cost_bs_array,
        STEP1_PREDICTORS_KEY: step1_predictor_names,
        STEP1_COSTS_KEY: step1_cost_bs_matrix
    }
Beispiel #5
0
def _plot_perf_diagrams(evaluation_tables,
                        model_names,
                        best_threshold_indices,
                        marker_indices_by_model,
                        output_file_name,
                        plot_best_thresholds,
                        confidence_level=None):
    """Plots performance diagrams (one for each model).

    :param evaluation_tables: See doc for `_plot_roc_curves`.
    :param model_names: Same.
    :param best_threshold_indices: Same.
    :param marker_indices_by_model: Same.
    :param output_file_name: Same.
    :param plot_best_thresholds: Same.
    :param confidence_level: Same.
    """

    num_models = len(evaluation_tables)
    pod_matrices = [None] * num_models
    success_ratio_matrices = [None] * num_models
    legend_strings = [None] * num_models

    num_bootstrap_reps = None

    for i in range(num_models):
        pod_matrices[i] = numpy.vstack(
            tuple(evaluation_tables[i][
                model_eval.POD_BY_THRESHOLD_KEY].values.tolist()))

        success_ratio_matrices[i] = numpy.vstack(
            tuple(evaluation_tables[i][
                model_eval.SR_BY_THRESHOLD_KEY].values.tolist()))

        if num_bootstrap_reps is None:
            num_bootstrap_reps = pod_matrices[i].shape[0]

        this_num_bootstrap_reps = pod_matrices[i].shape[0]
        # assert num_bootstrap_reps == this_num_bootstrap_reps

        if num_bootstrap_reps > 1:
            this_min_aupd, this_max_aupd = (
                bootstrapping.get_confidence_interval(
                    stat_values=evaluation_tables[i][
                        model_eval.AUPD_KEY].values,
                    confidence_level=confidence_level))

            legend_strings[i] = '{0:s}: AUPD = {1:.3f} to {2:.3f}'.format(
                model_names[i], this_min_aupd, this_max_aupd)
        else:
            this_aupd = evaluation_tables[i][model_eval.AUPD_KEY].values[0]
            legend_strings[i] = '{0:s}: AUPD = {1:.3f}'.format(
                model_names[i], this_aupd)

        print(legend_strings[i])

    figure_object, axes_object = pyplot.subplots(
        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES))

    legend_handles = [None] * num_models
    num_colours = COLOUR_MATRIX.shape[0]

    for i in range(num_models):
        this_colour = COLOUR_MATRIX[numpy.mod(i, num_colours), ...]

        if num_bootstrap_reps == 1:
            legend_handles[i] = model_eval_plotting.plot_performance_diagram(
                axes_object=axes_object,
                pod_by_threshold=pod_matrices[i][0, :],
                success_ratio_by_threshold=success_ratio_matrices[i][0, :],
                line_colour=this_colour,
                plot_background=i == 0)

            this_x = success_ratio_matrices[i][0, best_threshold_indices[i]]
            this_y = pod_matrices[i][0, best_threshold_indices[i]]

            these_x = success_ratio_matrices[i][0, marker_indices_by_model[i]]
            these_y = pod_matrices[i][0, marker_indices_by_model[i]]
        else:
            this_ci_bottom_dict, this_ci_mean_dict, this_ci_top_dict = (
                _get_ci_one_model(evaluation_table=evaluation_tables[i],
                                  for_roc_curve=False,
                                  confidence_level=confidence_level))

            legend_handles[i] = (
                model_eval_plotting.plot_bootstrapped_performance_diagram(
                    axes_object=axes_object,
                    ci_bottom_dict=this_ci_bottom_dict,
                    ci_mean_dict=this_ci_mean_dict,
                    ci_top_dict=this_ci_top_dict,
                    line_colour=this_colour,
                    plot_background=i == 0))

            this_x = this_ci_mean_dict[model_eval.SR_BY_THRESHOLD_KEY][
                best_threshold_indices[i]]
            this_y = this_ci_mean_dict[model_eval.POD_BY_THRESHOLD_KEY][
                best_threshold_indices[i]]

            these_x = this_ci_mean_dict[model_eval.SR_BY_THRESHOLD_KEY][
                marker_indices_by_model[i]]
            these_y = this_ci_mean_dict[model_eval.POD_BY_THRESHOLD_KEY][
                marker_indices_by_model[i]]

        this_csi = model_eval.csi_from_sr_and_pod(
            success_ratio_array=numpy.array([this_x]),
            pod_array=numpy.array([this_y]))[0]

        print(('POD, success ratio, and CSI at best probability threshold = '
               '{0:.3f}, {1:.3f}, {2:.3f}').format(this_y, this_x, this_csi))

        if plot_best_thresholds:
            axes_object.plot(this_x,
                             this_y,
                             linestyle='None',
                             marker=MARKER_TYPE,
                             markersize=MARKER_SIZE,
                             markeredgewidth=MARKER_EDGE_WIDTH,
                             markerfacecolor=this_colour,
                             markeredgecolor=this_colour)

        # axes_object.plot(
        #     these_x, these_y, linestyle='None', marker='o',
        #     markersize=12, markeredgewidth=MARKER_EDGE_WIDTH,
        #     markerfacecolor=this_colour, markeredgecolor=this_colour
        # )

    main_legend_handle = axes_object.legend(legend_handles,
                                            legend_strings,
                                            loc='upper right',
                                            bbox_to_anchor=(1, 1),
                                            fancybox=True,
                                            shadow=False,
                                            framealpha=0.5,
                                            ncol=1)

    for this_object in main_legend_handle.legendHandles:
        this_object.set_linewidth(5.)

    axes_object.set_title('Performance diagram')
    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(b)',
                              y_coord_normalized=1.025)

    axes_object.set_aspect('equal')

    print('Saving figure to: "{0:s}"...'.format(output_file_name))
    figure_object.savefig(output_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)
Beispiel #6
0
def _plot_roc_curves(evaluation_tables,
                     model_names,
                     best_threshold_indices,
                     marker_indices_by_model,
                     output_file_name,
                     plot_best_thresholds,
                     confidence_level=None):
    """Plots ROC curves (one for each model).

    M = number of models

    :param evaluation_tables: length-M list of pandas DataFrames.  See
        `model_evaluation.run_evaluation` for columns in each DataFrame.  The
        only difference is that each table here may have multiple rows (one per
        bootstrap replicate).
    :param model_names: length-M list of model names (will be used in legend).
    :param best_threshold_indices: length-M numpy array with index of best
        probability threshold for each model.
    :param marker_indices_by_model: Blah.
    :param output_file_name: Path to output file (figure will be saved here).
    :param plot_best_thresholds: See documentation at top of file.
    :param confidence_level: Confidence level for bootstrapping.
    """

    num_models = len(evaluation_tables)
    pod_matrices = [None] * num_models
    pofd_matrices = [None] * num_models
    legend_strings = [None] * num_models

    num_bootstrap_reps = None

    for i in range(num_models):
        pod_matrices[i] = numpy.vstack(
            tuple(evaluation_tables[i][
                model_eval.POD_BY_THRESHOLD_KEY].values.tolist()))

        pofd_matrices[i] = numpy.vstack(
            tuple(evaluation_tables[i][
                model_eval.POFD_BY_THRESHOLD_KEY].values.tolist()))

        if num_bootstrap_reps is None:
            num_bootstrap_reps = pod_matrices[i].shape[0]

        this_num_bootstrap_reps = pod_matrices[i].shape[0]
        # assert num_bootstrap_reps == this_num_bootstrap_reps

        if num_bootstrap_reps > 1:
            this_min_auc, this_max_auc = bootstrapping.get_confidence_interval(
                stat_values=evaluation_tables[i][model_eval.AUC_KEY].values,
                confidence_level=confidence_level)

            legend_strings[i] = '{0:s}: AUC = {1:.3f} to {2:.3f}'.format(
                model_names[i], this_min_auc, this_max_auc)
        else:
            this_auc = evaluation_tables[i][model_eval.AUC_KEY].values[0]
            legend_strings[i] = '{0:s}: AUC = {1:.3f}'.format(
                model_names[i], this_auc)

        print(legend_strings[i])

    figure_object, axes_object = pyplot.subplots(
        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES))

    legend_handles = [None] * num_models
    num_colours = COLOUR_MATRIX.shape[0]

    for i in range(num_models):
        this_colour = COLOUR_MATRIX[numpy.mod(i, num_colours), ...]

        if num_bootstrap_reps == 1:
            legend_handles[i] = model_eval_plotting.plot_roc_curve(
                axes_object=axes_object,
                pod_by_threshold=pod_matrices[i][0, :],
                pofd_by_threshold=pofd_matrices[i][0, :],
                line_colour=this_colour,
                plot_background=i == 0)

            this_x = pofd_matrices[i][0, best_threshold_indices[i]]
            this_y = pod_matrices[i][0, best_threshold_indices[i]]

            these_x = pofd_matrices[i][0, marker_indices_by_model[i]]
            these_y = pod_matrices[i][0, marker_indices_by_model[i]]
        else:
            this_ci_bottom_dict, this_ci_mean_dict, this_ci_top_dict = (
                _get_ci_one_model(evaluation_table=evaluation_tables[i],
                                  for_roc_curve=True,
                                  confidence_level=confidence_level))

            legend_handles[
                i] = model_eval_plotting.plot_bootstrapped_roc_curve(
                    axes_object=axes_object,
                    ci_bottom_dict=this_ci_bottom_dict,
                    ci_mean_dict=this_ci_mean_dict,
                    ci_top_dict=this_ci_top_dict,
                    line_colour=this_colour,
                    plot_background=i == 0)

            this_x = this_ci_mean_dict[model_eval.POFD_BY_THRESHOLD_KEY][
                best_threshold_indices[i]]
            this_y = this_ci_mean_dict[model_eval.POD_BY_THRESHOLD_KEY][
                best_threshold_indices[i]]

            these_x = this_ci_mean_dict[model_eval.POFD_BY_THRESHOLD_KEY][
                marker_indices_by_model[i]]
            these_y = this_ci_mean_dict[model_eval.POD_BY_THRESHOLD_KEY][
                marker_indices_by_model[i]]

        print(('POD and POFD at best probability threshold = {0:.3f}, {1:.3f}'
               ).format(this_y, this_x))

        if plot_best_thresholds:
            axes_object.plot(this_x,
                             this_y,
                             linestyle='None',
                             marker=MARKER_TYPE,
                             markersize=MARKER_SIZE,
                             markeredgewidth=MARKER_EDGE_WIDTH,
                             markerfacecolor=this_colour,
                             markeredgecolor=this_colour)

        # axes_object.plot(
        #     these_x, these_y, linestyle='None', marker='o',
        #     markersize=12, markeredgewidth=MARKER_EDGE_WIDTH,
        #     markerfacecolor=this_colour, markeredgecolor=this_colour
        # )

    main_legend_handle = axes_object.legend(legend_handles,
                                            legend_strings,
                                            loc='lower right',
                                            bbox_to_anchor=(1, 0),
                                            fancybox=True,
                                            shadow=False,
                                            framealpha=0.5,
                                            ncol=1)

    for this_object in main_legend_handle.legendHandles:
        this_object.set_linewidth(5.)

    axes_object.set_title('ROC curve')
    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(a)',
                              y_coord_normalized=1.025)

    axes_object.set_aspect('equal')

    print('Saving figure to: "{0:s}"...'.format(output_file_name))
    figure_object.savefig(output_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)
def _plot_roc_curve(evaluation_table,
                    best_threshold_index,
                    output_file_name,
                    confidence_level=None):
    """Plots ROC curve.

    :param evaluation_table: See doc for
        `model_evaluation.run_evaluation`.  The only difference is that
        this table may have multiple rows (one per bootstrap replicate).
    :param best_threshold_index: Array index of best probability threshold.
    :param output_file_name: Path to output file (figure will be saved here).
    :param confidence_level: Confidence level for bootstrapping.
    """

    pod_matrix = numpy.vstack(
        tuple(
            evaluation_table[model_eval.POD_BY_THRESHOLD_KEY].values.tolist()))
    pofd_matrix = numpy.vstack(
        tuple(evaluation_table[
            model_eval.POFD_BY_THRESHOLD_KEY].values.tolist()))

    num_bootstrap_reps = pod_matrix.shape[0]
    num_prob_thresholds = pod_matrix.shape[1]

    if num_bootstrap_reps > 1:
        min_auc, max_auc = bootstrapping.get_confidence_interval(
            stat_values=evaluation_table[model_eval.AUC_KEY].values,
            confidence_level=confidence_level)

        annotation_string = 'AUC = [{0:.3f}, {1:.3f}]'.format(min_auc, max_auc)

        mean_auc = numpy.nanmean(evaluation_table[model_eval.AUC_KEY].values)
        annotation_string = 'AUC = {0:.3f}'.format(mean_auc)
    else:
        mean_auc = numpy.nanmean(evaluation_table[model_eval.AUC_KEY].values)
        annotation_string = 'AUC = {0:.3f}'.format(mean_auc)

    print(annotation_string)

    _, axes_object = pyplot.subplots(1,
                                     1,
                                     figsize=(FIGURE_WIDTH_INCHES,
                                              FIGURE_HEIGHT_INCHES))

    if num_bootstrap_reps > 1:
        ci_bottom_dict = {
            model_eval.POD_BY_THRESHOLD_KEY:
            numpy.full(num_prob_thresholds, numpy.nan),
            model_eval.POFD_BY_THRESHOLD_KEY:
            numpy.full(num_prob_thresholds, numpy.nan)
        }

        ci_top_dict = copy.deepcopy(ci_bottom_dict)
        ci_mean_dict = copy.deepcopy(ci_bottom_dict)

        for j in range(num_prob_thresholds):
            (ci_bottom_dict[model_eval.POD_BY_THRESHOLD_KEY][j],
             ci_top_dict[model_eval.POD_BY_THRESHOLD_KEY][j]
             ) = bootstrapping.get_confidence_interval(
                 stat_values=pod_matrix[:, j],
                 confidence_level=confidence_level)

            (ci_top_dict[model_eval.POFD_BY_THRESHOLD_KEY][j],
             ci_bottom_dict[model_eval.POFD_BY_THRESHOLD_KEY][j]
             ) = bootstrapping.get_confidence_interval(
                 stat_values=pofd_matrix[:, j],
                 confidence_level=confidence_level)

            ci_mean_dict[model_eval.POD_BY_THRESHOLD_KEY][j] = numpy.nanmean(
                pod_matrix[:, j])

            ci_mean_dict[model_eval.POFD_BY_THRESHOLD_KEY][j] = numpy.nanmean(
                pofd_matrix[:, j])

        model_eval_plotting.plot_bootstrapped_roc_curve(
            axes_object=axes_object,
            ci_bottom_dict=ci_bottom_dict,
            ci_mean_dict=ci_mean_dict,
            ci_top_dict=ci_top_dict)

        best_x = ci_mean_dict[
            model_eval.POFD_BY_THRESHOLD_KEY][best_threshold_index]
        best_y = ci_mean_dict[
            model_eval.POD_BY_THRESHOLD_KEY][best_threshold_index]
    else:
        model_eval_plotting.plot_roc_curve(axes_object=axes_object,
                                           pod_by_threshold=pod_matrix[0, :],
                                           pofd_by_threshold=pofd_matrix[0, :])

        best_x = pofd_matrix[0, best_threshold_index]
        best_y = pod_matrix[0, best_threshold_index]

    print(('POD and POFD at best probability threshold = {0:.3f}, {1:.3f}'
           ).format(best_y, best_x))

    marker_colour = model_eval_plotting.ROC_CURVE_COLOUR
    # axes_object.plot(
    #     best_x, best_y, linestyle='None', marker=MARKER_TYPE,
    #     markersize=MARKER_SIZE, markeredgewidth=MARKER_EDGE_WIDTH,
    #     markerfacecolor=marker_colour, markeredgecolor=marker_colour)

    axes_object.text(0.98,
                     0.02,
                     annotation_string,
                     bbox=BOUNDING_BOX_DICT,
                     color='k',
                     horizontalalignment='right',
                     verticalalignment='bottom',
                     transform=axes_object.transAxes)

    axes_object.set_title('ROC curve')
    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(a)',
                              y_coord_normalized=1.025)

    axes_object.set_aspect('equal')

    print('Saving ROC curve to: "{0:s}"...'.format(output_file_name))
    pyplot.savefig(output_file_name,
                   dpi=FIGURE_RESOLUTION_DPI,
                   pad_inches=0,
                   bbox_inches='tight')
    pyplot.close()
def _plot_attributes_diagram(evaluation_table,
                             num_examples_by_bin,
                             output_file_name,
                             confidence_level=None):
    """Plots attributes diagram.

    K = number of bins for forecast probability

    :param evaluation_table: See doc for `_plot_roc_curve`.
    :param num_examples_by_bin: length-K numpy array with number of examples in
        each bin.
    :param output_file_name: See doc for `_plot_roc_curve`.
    :param confidence_level: Same.
    """

    mean_forecast_prob_matrix = numpy.vstack(
        tuple(evaluation_table[
            model_eval.MEAN_FORECAST_BY_BIN_KEY].values.tolist()))
    event_frequency_matrix = numpy.vstack(
        tuple(evaluation_table[
            model_eval.EVENT_FREQ_BY_BIN_KEY].values.tolist()))

    mean_bss = numpy.nanmean(evaluation_table[model_eval.BSS_KEY].values)
    annotation_string = 'Brier skill score = {0:.3f}'.format(mean_bss)

    num_bootstrap_reps = mean_forecast_prob_matrix.shape[0]
    num_bins = mean_forecast_prob_matrix.shape[1]

    if num_bootstrap_reps > 1:
        min_bss, max_bss = bootstrapping.get_confidence_interval(
            stat_values=evaluation_table[model_eval.BSS_KEY].values,
            confidence_level=confidence_level)

        annotation_string = 'Brier skill score = [{0:.3f}, {1:.3f}]'.format(
            min_bss, max_bss)

    print(annotation_string)

    figure_object, axes_object = pyplot.subplots(
        1, 1, figsize=(FIGURE_WIDTH_INCHES, FIGURE_HEIGHT_INCHES))

    if num_bootstrap_reps > 1:
        ci_bottom_dict = {
            model_eval.MEAN_FORECAST_BY_BIN_KEY:
            numpy.full(num_bins, numpy.nan),
            model_eval.EVENT_FREQ_BY_BIN_KEY: numpy.full(num_bins, numpy.nan)
        }

        ci_top_dict = copy.deepcopy(ci_bottom_dict)
        ci_mean_dict = copy.deepcopy(ci_bottom_dict)

        for j in range(num_bins):
            (ci_top_dict[model_eval.MEAN_FORECAST_BY_BIN_KEY][j],
             ci_bottom_dict[model_eval.MEAN_FORECAST_BY_BIN_KEY][j]
             ) = bootstrapping.get_confidence_interval(
                 stat_values=mean_forecast_prob_matrix[:, j],
                 confidence_level=confidence_level)

            (ci_bottom_dict[model_eval.EVENT_FREQ_BY_BIN_KEY][j],
             ci_top_dict[model_eval.EVENT_FREQ_BY_BIN_KEY][j]
             ) = bootstrapping.get_confidence_interval(
                 stat_values=event_frequency_matrix[:, j],
                 confidence_level=confidence_level)

            ci_mean_dict[model_eval.MEAN_FORECAST_BY_BIN_KEY][j] = (
                numpy.nanmean(mean_forecast_prob_matrix[:, j]))

            ci_mean_dict[model_eval.EVENT_FREQ_BY_BIN_KEY][j] = numpy.nanmean(
                event_frequency_matrix[:, j])

        model_eval_plotting.plot_bootstrapped_attributes_diagram(
            figure_object=figure_object,
            axes_object=axes_object,
            ci_bottom_dict=ci_bottom_dict,
            ci_mean_dict=ci_mean_dict,
            ci_top_dict=ci_top_dict,
            num_examples_by_bin=num_examples_by_bin)
    else:
        model_eval_plotting.plot_attributes_diagram(
            figure_object=figure_object,
            axes_object=axes_object,
            mean_forecast_by_bin=mean_forecast_prob_matrix[0, :],
            event_frequency_by_bin=event_frequency_matrix[0, :],
            num_examples_by_bin=num_examples_by_bin)

    axes_object.text(0.02,
                     0.98,
                     annotation_string,
                     bbox=BOUNDING_BOX_DICT,
                     color='k',
                     horizontalalignment='left',
                     verticalalignment='top',
                     transform=axes_object.transAxes)

    axes_object.set_title('Attributes diagram')
    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(c)',
                              y_coord_normalized=1.025)

    axes_object.set_aspect('equal')

    print('Saving attributes diagram to: "{0:s}"...'.format(output_file_name))
    pyplot.savefig(output_file_name,
                   dpi=FIGURE_RESOLUTION_DPI,
                   pad_inches=0,
                   bbox_inches='tight')
    pyplot.close()
def _plot_performance_diagram(evaluation_table,
                              best_threshold_index,
                              output_file_name,
                              confidence_level=None):
    """Plots performance diagram.

    :param evaluation_table: See doc for `_plot_roc_curve`.
    :param best_threshold_index: Array index of best probability threshold.
    :param output_file_name: Same.
    :param confidence_level: Same.
    """

    pod_matrix = numpy.vstack(
        tuple(
            evaluation_table[model_eval.POD_BY_THRESHOLD_KEY].values.tolist()))
    success_ratio_matrix = numpy.vstack(
        tuple(
            evaluation_table[model_eval.SR_BY_THRESHOLD_KEY].values.tolist()))

    mean_aupd = numpy.nanmean(evaluation_table[model_eval.AUPD_KEY].values)
    annotation_string = 'AUPD = {0:.3f}'.format(mean_aupd)

    num_bootstrap_reps = pod_matrix.shape[0]
    num_prob_thresholds = pod_matrix.shape[1]

    if num_bootstrap_reps > 1:
        min_aupd, max_aupd = bootstrapping.get_confidence_interval(
            stat_values=evaluation_table[model_eval.AUPD_KEY].values,
            confidence_level=confidence_level)

        annotation_string = 'AUPD = [{0:.3f}, {1:.3f}]'.format(
            min_aupd, max_aupd)

        mean_aupd = numpy.nanmean(evaluation_table[model_eval.AUPD_KEY].values)
        annotation_string = 'AUPD = {0:.3f}'.format(mean_aupd)

    print(annotation_string)

    _, axes_object = pyplot.subplots(1,
                                     1,
                                     figsize=(FIGURE_WIDTH_INCHES,
                                              FIGURE_HEIGHT_INCHES))

    if num_bootstrap_reps > 1:
        ci_bottom_dict = {
            model_eval.POD_BY_THRESHOLD_KEY:
            numpy.full(num_prob_thresholds, numpy.nan),
            model_eval.SR_BY_THRESHOLD_KEY:
            numpy.full(num_prob_thresholds, numpy.nan)
        }

        ci_top_dict = copy.deepcopy(ci_bottom_dict)
        ci_mean_dict = copy.deepcopy(ci_bottom_dict)

        for j in range(num_prob_thresholds):
            (ci_bottom_dict[model_eval.POD_BY_THRESHOLD_KEY][j],
             ci_top_dict[model_eval.POD_BY_THRESHOLD_KEY][j]
             ) = bootstrapping.get_confidence_interval(
                 stat_values=pod_matrix[:, j],
                 confidence_level=confidence_level)

            (ci_bottom_dict[model_eval.SR_BY_THRESHOLD_KEY][j],
             ci_top_dict[model_eval.SR_BY_THRESHOLD_KEY][j]
             ) = bootstrapping.get_confidence_interval(
                 stat_values=success_ratio_matrix[:, j],
                 confidence_level=confidence_level)

            ci_mean_dict[model_eval.POD_BY_THRESHOLD_KEY][j] = numpy.nanmean(
                pod_matrix[:, j])

            ci_mean_dict[model_eval.SR_BY_THRESHOLD_KEY][j] = numpy.nanmean(
                success_ratio_matrix[:, j])

        model_eval_plotting.plot_bootstrapped_performance_diagram(
            axes_object=axes_object,
            ci_bottom_dict=ci_bottom_dict,
            ci_mean_dict=ci_mean_dict,
            ci_top_dict=ci_top_dict)

        best_x = ci_mean_dict[
            model_eval.SR_BY_THRESHOLD_KEY][best_threshold_index]
        best_y = ci_mean_dict[
            model_eval.POD_BY_THRESHOLD_KEY][best_threshold_index]
    else:
        model_eval_plotting.plot_performance_diagram(
            axes_object=axes_object,
            pod_by_threshold=pod_matrix[0, :],
            success_ratio_by_threshold=success_ratio_matrix[0, :])

        best_x = success_ratio_matrix[0, best_threshold_index]
        best_y = pod_matrix[0, best_threshold_index]

    print((
        'POD and success ratio at best probability threshold = {0:.3f}, {1:.3f}'
    ).format(best_y, best_x))

    marker_colour = model_eval_plotting.PERF_DIAGRAM_COLOUR
    # axes_object.plot(
    #     best_x, best_y, linestyle='None', marker=MARKER_TYPE,
    #     markersize=MARKER_SIZE, markeredgewidth=MARKER_EDGE_WIDTH,
    #     markerfacecolor=marker_colour, markeredgecolor=marker_colour)

    axes_object.text(0.98,
                     0.98,
                     annotation_string,
                     bbox=BOUNDING_BOX_DICT,
                     color='k',
                     horizontalalignment='right',
                     verticalalignment='top',
                     transform=axes_object.transAxes)

    axes_object.set_title('Performance diagram')
    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(b)',
                              y_coord_normalized=1.025)

    axes_object.set_aspect('equal')

    print('Saving performance diagram to: "{0:s}"...'.format(output_file_name))
    pyplot.savefig(output_file_name,
                   dpi=FIGURE_RESOLUTION_DPI,
                   pad_inches=0,
                   bbox_inches='tight')
    pyplot.close()