def test_get_hourly_chunks_6(self):
        """Ensures correct output from get_hourly_chunks.

        In this case there are 6 hours per chunk.
        """

        this_chunk_to_hours_dict = temporal_subsetting.get_hourly_chunks(
            num_hours_per_chunk=6, verbose=False)

        self.assertTrue(
            _compare_chunk_dicts(this_chunk_to_hours_dict,
                                 CHUNK_TO_HOURS_DICT_6EACH))
Beispiel #2
0
def _plot_by_hour(evaluation_dir_name, num_hours_per_chunk, confidence_level,
                  output_dir_name):
    """Plots model evaluation by hour.

    :param evaluation_dir_name: See documentation at top of file.
    :param num_hours_per_chunk: Same.
    :param confidence_level: Same.
    :param output_dir_name: Same.
    :return: output_file_names: Paths to figures saved by this method.
    """

    chunk_to_hours_dict = temporal_subsetting.get_hourly_chunks(
        num_hours_per_chunk=num_hours_per_chunk, verbose=False)

    num_bootstrap_reps = None
    num_chunks = len(chunk_to_hours_dict.keys())

    auc_matrix = numpy.full((num_chunks, 3), numpy.nan)
    pod_matrix = numpy.full((num_chunks, 3), numpy.nan)
    far_matrix = numpy.full((num_chunks, 3), numpy.nan)
    csi_matrix = numpy.full((num_chunks, 3), numpy.nan)
    num_examples_by_chunk = numpy.full(num_chunks, 0, dtype=int)
    num_positive_ex_by_chunk = numpy.full(num_chunks, 0, dtype=int)

    for i in range(num_chunks):
        this_eval_file_name = model_eval.find_file(
            directory_name=evaluation_dir_name,
            hours_in_subset=chunk_to_hours_dict[i],
            raise_error_if_missing=False)

        if not os.path.isfile(this_eval_file_name):
            warning_string = (
                'Cannot find file (this may or may not be a problem).  Expected'
                ' at: "{0:s}"').format(this_eval_file_name)

            warnings.warn(warning_string)
            continue

        print('Reading data from: "{0:s}"...'.format(this_eval_file_name))
        this_evaluation_dict = model_eval.read_evaluation(this_eval_file_name)

        num_examples_by_chunk[i] = len(
            this_evaluation_dict[model_eval.OBSERVED_LABELS_KEY])
        num_positive_ex_by_chunk[i] = numpy.sum(
            this_evaluation_dict[model_eval.OBSERVED_LABELS_KEY])

        this_evaluation_table = this_evaluation_dict[
            model_eval.EVALUATION_TABLE_KEY]
        this_num_bootstrap_reps = len(this_evaluation_table.index)

        if num_bootstrap_reps is None:
            num_bootstrap_reps = this_num_bootstrap_reps
        assert num_bootstrap_reps == this_num_bootstrap_reps

        these_auc = this_evaluation_table[model_eval.AUC_KEY].values
        these_pod = this_evaluation_table[model_eval.POD_KEY].values
        these_far = (
            1. - this_evaluation_table[model_eval.SUCCESS_RATIO_KEY].values)
        these_csi = this_evaluation_table[model_eval.CSI_KEY].values

        auc_matrix[i, 1] = numpy.nanmean(these_auc)
        pod_matrix[i, 1] = numpy.nanmean(these_pod)
        far_matrix[i, 1] = numpy.nanmean(these_far)
        csi_matrix[i, 1] = numpy.nanmean(these_csi)

        auc_matrix[i,
                   0], auc_matrix[i,
                                  2] = (bootstrapping.get_confidence_interval(
                                      stat_values=these_auc,
                                      confidence_level=confidence_level))
        pod_matrix[i,
                   0], pod_matrix[i,
                                  2] = (bootstrapping.get_confidence_interval(
                                      stat_values=these_pod,
                                      confidence_level=confidence_level))
        far_matrix[i,
                   0], far_matrix[i,
                                  2] = (bootstrapping.get_confidence_interval(
                                      stat_values=these_far,
                                      confidence_level=confidence_level))
        csi_matrix[i,
                   0], csi_matrix[i,
                                  2] = (bootstrapping.get_confidence_interval(
                                      stat_values=these_csi,
                                      confidence_level=confidence_level))

    x_tick_labels = [None] * num_chunks
    x_tick_values = numpy.linspace(0,
                                   num_chunks - 1,
                                   num=num_chunks,
                                   dtype=float)

    for i in range(num_chunks):
        these_hours = chunk_to_hours_dict[i]

        if len(these_hours) == 1:
            x_tick_labels[i] = '{0:02d}'.format(these_hours[0])
        else:
            x_tick_labels[i] = '{0:02d}-{1:02d}'.format(
                numpy.min(these_hours), numpy.max(these_hours))

    figure_object, axes_object = _plot_auc_and_csi(
        auc_matrix=auc_matrix,
        csi_matrix=csi_matrix,
        num_examples_by_chunk=num_examples_by_chunk,
        num_bootstrap_reps=num_bootstrap_reps,
        plot_legend=False)

    axes_object.set_xticks(x_tick_values)
    axes_object.set_xticklabels(x_tick_labels, rotation=90.)
    axes_object.set_xlabel('Hour (UTC)')

    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(b)',
                              x_coord_normalized=-0.075,
                              y_coord_normalized=1.02)

    auc_csi_file_name = '{0:s}/hourly_auc_and_csi.jpg'.format(output_dir_name)
    print('Saving figure to: "{0:s}"...'.format(auc_csi_file_name))

    figure_object.savefig(auc_csi_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)

    figure_object, axes_object = _plot_pod_and_far(
        pod_matrix=pod_matrix,
        far_matrix=far_matrix,
        num_positive_ex_by_chunk=num_positive_ex_by_chunk,
        num_bootstrap_reps=num_bootstrap_reps,
        plot_legend=False)

    axes_object.set_xticks(x_tick_values)
    axes_object.set_xticklabels(x_tick_labels, rotation=90.)
    axes_object.set_xlabel('Hour (UTC)')

    plotting_utils.label_axes(axes_object=axes_object,
                              label_string='(d)',
                              x_coord_normalized=-0.075,
                              y_coord_normalized=1.02)

    pod_far_file_name = '{0:s}/hourly_pod_and_far.jpg'.format(output_dir_name)
    print('Saving figure to: "{0:s}"...'.format(pod_far_file_name))

    figure_object.savefig(pod_far_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)

    return [auc_csi_file_name, pod_far_file_name]
Beispiel #3
0
def _run(input_file_name, num_months_per_chunk, num_hours_per_chunk,
         output_dir_name):
    """Subsets ungridded predictions by time.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param num_months_per_chunk: Same.
    :param num_hours_per_chunk: Same.
    :param output_dir_name: Same.
    """

    if num_months_per_chunk > 0:
        chunk_to_months_dict = temporal_subsetting.get_monthly_chunks(
            num_months_per_chunk=num_months_per_chunk, verbose=True)

        num_monthly_chunks = len(chunk_to_months_dict.keys())
        print(SEPARATOR_STRING)
    else:
        num_monthly_chunks = 0

    if num_hours_per_chunk > 0:
        chunk_to_hours_dict = temporal_subsetting.get_hourly_chunks(
            num_hours_per_chunk=num_hours_per_chunk, verbose=True)

        num_hourly_chunks = len(chunk_to_hours_dict.keys())
        print(SEPARATOR_STRING)
    else:
        num_hourly_chunks = 0

    print('Reading input data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_ungridded_predictions(input_file_name)
    storm_times_unix_sec = prediction_dict[prediction_io.STORM_TIMES_KEY]

    storm_months = None

    for i in range(num_monthly_chunks):
        these_storm_indices, storm_months = (
            temporal_subsetting.get_events_in_months(
                event_months=storm_months,
                event_times_unix_sec=storm_times_unix_sec,
                desired_months=chunk_to_months_dict[i],
                verbose=True))

        this_prediction_dict = prediction_io.subset_ungridded_predictions(
            prediction_dict=prediction_dict,
            desired_storm_indices=these_storm_indices)

        this_output_file_name = prediction_io.find_ungridded_file(
            directory_name=output_dir_name,
            months_in_subset=chunk_to_months_dict[i],
            raise_error_if_missing=False)

        print('Writing temporal subset to: "{0:s}"...'.format(
            this_output_file_name))

        prediction_io.write_ungridded_predictions(
            netcdf_file_name=this_output_file_name,
            class_probability_matrix=this_prediction_dict[
                prediction_io.PROBABILITY_MATRIX_KEY],
            storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY],
            storm_times_unix_sec=this_prediction_dict[
                prediction_io.STORM_TIMES_KEY],
            observed_labels=this_prediction_dict[
                prediction_io.OBSERVED_LABELS_KEY],
            target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

        print(SEPARATOR_STRING)

    storm_hours = None

    for i in range(num_hourly_chunks):
        these_storm_indices, storm_hours = (
            temporal_subsetting.get_events_in_hours(
                event_hours=storm_hours,
                event_times_unix_sec=storm_times_unix_sec,
                desired_hours=chunk_to_hours_dict[i],
                verbose=True))

        if len(these_storm_indices) == 0:
            continue

        this_prediction_dict = prediction_io.subset_ungridded_predictions(
            prediction_dict=prediction_dict,
            desired_storm_indices=these_storm_indices)

        this_output_file_name = prediction_io.find_ungridded_file(
            directory_name=output_dir_name,
            hours_in_subset=chunk_to_hours_dict[i],
            raise_error_if_missing=False)

        print('Writing temporal subset to: "{0:s}"...'.format(
            this_output_file_name))

        prediction_io.write_ungridded_predictions(
            netcdf_file_name=this_output_file_name,
            class_probability_matrix=this_prediction_dict[
                prediction_io.PROBABILITY_MATRIX_KEY],
            storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY],
            storm_times_unix_sec=this_prediction_dict[
                prediction_io.STORM_TIMES_KEY],
            observed_labels=this_prediction_dict[
                prediction_io.OBSERVED_LABELS_KEY],
            target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

        if i != num_hourly_chunks - 1:
            print(SEPARATOR_STRING)