def _run(input_file_name, top_tracking_dir_name, min_latitude_deg,
         max_latitude_deg, min_longitude_deg, max_longitude_deg,
         grid_spacing_metres, output_dir_name):
    """Subsets ungridded predictions by space.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param top_tracking_dir_name: Same.
    :param min_latitude_deg: Same.
    :param max_latitude_deg: Same.
    :param min_longitude_deg: Same.
    :param max_longitude_deg: Same.
    :param grid_spacing_metres: Same.
    :param output_dir_name: Same.
    """

    equidistant_grid_dict = grids.create_equidistant_grid(
        min_latitude_deg=min_latitude_deg, max_latitude_deg=max_latitude_deg,
        min_longitude_deg=min_longitude_deg,
        max_longitude_deg=max_longitude_deg,
        x_spacing_metres=grid_spacing_metres,
        y_spacing_metres=grid_spacing_metres, azimuthal=False)

    grid_metafile_name = grids.find_equidistant_metafile(
        directory_name=output_dir_name, raise_error_if_missing=False)

    print('Writing metadata for equidistant grid to: "{0:s}"...'.format(
        grid_metafile_name
    ))

    grids.write_equidistant_metafile(grid_dict=equidistant_grid_dict,
                                     pickle_file_name=grid_metafile_name)

    grid_point_x_coords_metres = equidistant_grid_dict[grids.X_COORDS_KEY]
    grid_point_y_coords_metres = equidistant_grid_dict[grids.Y_COORDS_KEY]
    projection_object = equidistant_grid_dict[grids.PROJECTION_KEY]

    grid_edge_x_coords_metres = numpy.append(
        grid_point_x_coords_metres - 0.5 * grid_spacing_metres,
        grid_point_x_coords_metres[-1] + 0.5 * grid_spacing_metres
    )
    grid_edge_y_coords_metres = numpy.append(
        grid_point_y_coords_metres - 0.5 * grid_spacing_metres,
        grid_point_y_coords_metres[-1] + 0.5 * grid_spacing_metres
    )

    print('Reading input data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_ungridded_predictions(input_file_name)
    print(SEPARATOR_STRING)

    full_id_strings = prediction_dict[prediction_io.STORM_IDS_KEY]
    storm_times_unix_sec = prediction_dict[prediction_io.STORM_TIMES_KEY]
    unique_storm_times_unix_sec = numpy.unique(storm_times_unix_sec)

    num_storm_objects = len(storm_times_unix_sec)
    storm_latitudes_deg = numpy.full(num_storm_objects, numpy.nan)
    storm_longitudes_deg = numpy.full(num_storm_objects, numpy.nan)

    for this_time_unix_sec in unique_storm_times_unix_sec:
        these_indices = numpy.where(
            storm_times_unix_sec == this_time_unix_sec
        )[0]
        these_full_id_strings = [full_id_strings[k] for k in these_indices]

        (storm_latitudes_deg[these_indices],
         storm_longitudes_deg[these_indices]
        ) = _read_storm_locations_one_time(
            top_tracking_dir_name=top_tracking_dir_name,
            valid_time_unix_sec=this_time_unix_sec,
            desired_full_id_strings=these_full_id_strings)

    print(SEPARATOR_STRING)

    storm_x_coords_metres, storm_y_coords_metres = (
        projections.project_latlng_to_xy(
            latitudes_deg=storm_latitudes_deg,
            longitudes_deg=storm_longitudes_deg,
            projection_object=projection_object)
    )

    num_grid_rows = len(grid_point_y_coords_metres)
    num_grid_columns = len(grid_point_x_coords_metres)

    for i in range(num_grid_rows):
        for j in range(num_grid_columns):
            these_indices = grids.find_events_in_grid_cell(
                event_x_coords_metres=storm_x_coords_metres,
                event_y_coords_metres=storm_y_coords_metres,
                grid_edge_x_coords_metres=grid_edge_x_coords_metres,
                grid_edge_y_coords_metres=grid_edge_y_coords_metres,
                row_index=i, column_index=j, verbose=True)

            if len(these_indices) == 0:
                continue

            this_prediction_dict = prediction_io.subset_ungridded_predictions(
                prediction_dict=prediction_dict,
                desired_storm_indices=these_indices)

            this_output_file_name = prediction_io.find_ungridded_file(
                directory_name=output_dir_name, grid_row=i, grid_column=j,
                raise_error_if_missing=False)

            print('Writing subset to: "{0:s}"...'.format(this_output_file_name))

            prediction_io.write_ungridded_predictions(
                netcdf_file_name=this_output_file_name,
                class_probability_matrix=this_prediction_dict[
                    prediction_io.PROBABILITY_MATRIX_KEY],
                storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY],
                storm_times_unix_sec=this_prediction_dict[
                    prediction_io.STORM_TIMES_KEY],
                observed_labels=this_prediction_dict[
                    prediction_io.OBSERVED_LABELS_KEY],
                target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY],
                model_file_name=this_prediction_dict[
                    prediction_io.MODEL_FILE_KEY]
            )

            print('\n')
def _run(prediction_file_names, top_match_dir_name, unique_storm_cells,
         num_hits, num_misses, num_false_alarms, num_correct_nulls,
         num_disagreements, output_dir_names):
    """Finds extreme examples vis-a-vis two models.

    This is effectively the main method.

    :param prediction_file_names: See documentation at top of file.
    :param top_match_dir_name: Same.
    :param unique_storm_cells: Same.
    :param num_hits: Same.
    :param num_misses: Same.
    :param num_false_alarms: Same.
    :param num_correct_nulls: Same.
    :param num_disagreements: Same.
    :param output_dir_names: Same.
    """

    # TODO(thunderhoser): Throw error if multiclass predictions are read.

    # Check input args.
    example_counts = numpy.array([
        num_hits, num_misses, num_false_alarms, num_correct_nulls,
        num_disagreements
    ],
                                 dtype=int)

    error_checking.assert_is_geq_numpy_array(example_counts, 0)

    first_output_dir_name = output_dir_names[0]
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=first_output_dir_name)

    second_output_dir_name = output_dir_names[1]
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=second_output_dir_name)

    # Match storm objects between the two prediction files.
    print('Reading data from: "{0:s}"...'.format(prediction_file_names[0]))
    first_prediction_dict = prediction_io.read_ungridded_predictions(
        prediction_file_names[0])

    print('Reading data from: "{0:s}"...'.format(prediction_file_names[1]))
    second_prediction_dict = prediction_io.read_ungridded_predictions(
        prediction_file_names[1])

    print(SEPARATOR_STRING)
    first_prediction_dict, second_prediction_dict = _match_storm_objects(
        first_prediction_dict=first_prediction_dict,
        second_prediction_dict=second_prediction_dict,
        top_match_dir_name=top_match_dir_name)
    print(SEPARATOR_STRING)

    observed_labels = first_prediction_dict[prediction_io.OBSERVED_LABELS_KEY]

    first_model_file_name = first_prediction_dict[prediction_io.MODEL_FILE_KEY]
    first_full_id_strings = first_prediction_dict[prediction_io.STORM_IDS_KEY]
    first_storm_times_unix_sec = first_prediction_dict[
        prediction_io.STORM_TIMES_KEY]
    first_probabilities = first_prediction_dict[
        prediction_io.PROBABILITY_MATRIX_KEY][:, 1]

    second_model_file_name = second_prediction_dict[
        prediction_io.MODEL_FILE_KEY]
    second_full_id_strings = second_prediction_dict[
        prediction_io.STORM_IDS_KEY]
    second_storm_times_unix_sec = second_prediction_dict[
        prediction_io.STORM_TIMES_KEY]
    second_probabilities = second_prediction_dict[
        prediction_io.PROBABILITY_MATRIX_KEY][:, 1]

    if num_disagreements > 0:
        second_high_indices, first_high_indices = (
            model_activation.get_hilo_activation_examples(
                storm_activations=second_probabilities - first_probabilities,
                num_low_activation_examples=num_disagreements,
                num_high_activation_examples=num_disagreements,
                unique_storm_cells=unique_storm_cells,
                full_storm_id_strings=first_full_id_strings))

        # Print summary to command window.
        this_mean_diff = numpy.mean(second_probabilities[second_high_indices] -
                                    first_probabilities[second_high_indices])

        print((
            'Average prob difference for {0:d} worst disagreements with second '
            'model higher: {1:.3f}').format(num_disagreements, this_mean_diff))

        this_mean_diff = numpy.mean(second_probabilities[first_high_indices] -
                                    first_probabilities[first_high_indices])

        print((
            'Average prob difference for {0:d} worst disagreements with first '
            'model higher: {1:.3f}').format(num_disagreements, this_mean_diff))

        # Write file.
        this_activation_file_name = '{0:s}/low_disagreement_examples.p'.format(
            first_output_dir_name)

        print(('Writing disagreements (second model higher) to: "{0:s}"...'
               ).format(this_activation_file_name))

        this_activation_matrix = numpy.reshape(
            first_probabilities[second_high_indices],
            (len(second_high_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[
                first_full_id_strings[j] for j in second_high_indices
            ],
            storm_times_unix_sec=first_storm_times_unix_sec[
                second_high_indices],
            model_file_name=first_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

        # Write file.
        this_activation_file_name = '{0:s}/high_disagreement_examples.p'.format(
            second_output_dir_name)

        print(('Writing disagreements (second model higher) to: "{0:s}"...'
               ).format(this_activation_file_name))

        this_activation_matrix = numpy.reshape(
            second_probabilities[second_high_indices],
            (len(second_high_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[
                second_full_id_strings[j] for j in second_high_indices
            ],
            storm_times_unix_sec=second_storm_times_unix_sec[
                second_high_indices],
            model_file_name=second_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

        # Write file.
        this_activation_file_name = '{0:s}/high_disagreement_examples.p'.format(
            first_output_dir_name)

        print(('Writing disagreements (first model higher) to: "{0:s}"...'
               ).format(this_activation_file_name))

        this_activation_matrix = numpy.reshape(
            first_probabilities[first_high_indices],
            (len(first_high_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[
                first_full_id_strings[j] for j in first_high_indices
            ],
            storm_times_unix_sec=first_storm_times_unix_sec[
                first_high_indices],
            model_file_name=first_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

        # Write file.
        this_activation_file_name = '{0:s}/low_disagreement_examples.p'.format(
            second_output_dir_name)

        print(('Writing disagreements (first model higher) to: "{0:s}"...'
               ).format(this_activation_file_name))

        this_activation_matrix = numpy.reshape(
            second_probabilities[first_high_indices],
            (len(first_high_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[
                second_full_id_strings[j] for j in first_high_indices
            ],
            storm_times_unix_sec=second_storm_times_unix_sec[
                first_high_indices],
            model_file_name=second_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

    if num_hits + num_misses + num_false_alarms + num_correct_nulls == 0:
        return

    mean_probabilities = 0.5 * (first_probabilities + second_probabilities)

    ct_extreme_dict = model_activation.get_contingency_table_extremes(
        storm_activations=mean_probabilities,
        storm_target_values=observed_labels,
        num_hits=num_hits,
        num_misses=num_misses,
        num_false_alarms=num_false_alarms,
        num_correct_nulls=num_correct_nulls,
        unique_storm_cells=unique_storm_cells,
        full_storm_id_strings=first_full_id_strings)

    hit_indices = ct_extreme_dict[model_activation.HIT_INDICES_KEY]
    miss_indices = ct_extreme_dict[model_activation.MISS_INDICES_KEY]
    false_alarm_indices = ct_extreme_dict[
        model_activation.FALSE_ALARM_INDICES_KEY]
    correct_null_indices = ct_extreme_dict[
        model_activation.CORRECT_NULL_INDICES_KEY]

    if num_hits > 0:
        print((
            'Mean probability from first and second model for {0:d} best hits: '
            '{1:.3f}, {2:.3f}').format(
                num_hits, numpy.mean(first_probabilities[hit_indices]),
                numpy.mean(second_probabilities[hit_indices])))

        this_activation_file_name = '{0:s}/best_hits.p'.format(
            first_output_dir_name)

        print('Writing best hits to: "{0:s}"...'.format(
            this_activation_file_name))
        this_activation_matrix = numpy.reshape(
            first_probabilities[hit_indices], (len(hit_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[first_full_id_strings[j] for j in hit_indices],
            storm_times_unix_sec=first_storm_times_unix_sec[hit_indices],
            model_file_name=first_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

        this_activation_file_name = '{0:s}/best_hits.p'.format(
            second_output_dir_name)

        print('Writing best hits to: "{0:s}"...'.format(
            this_activation_file_name))
        this_activation_matrix = numpy.reshape(
            second_probabilities[hit_indices], (len(hit_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[second_full_id_strings[j] for j in hit_indices],
            storm_times_unix_sec=second_storm_times_unix_sec[hit_indices],
            model_file_name=second_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

    if num_misses > 0:
        print(('Mean probability from first and second model for {0:d} worst '
               'misses: {1:.3f}, {2:.3f}').format(
                   num_misses, numpy.mean(first_probabilities[miss_indices]),
                   numpy.mean(second_probabilities[miss_indices])))

        this_activation_file_name = '{0:s}/worst_misses.p'.format(
            first_output_dir_name)

        print('Writing worst misses to: "{0:s}"...'.format(
            this_activation_file_name))
        this_activation_matrix = numpy.reshape(
            first_probabilities[miss_indices], (len(miss_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[first_full_id_strings[j] for j in miss_indices],
            storm_times_unix_sec=first_storm_times_unix_sec[miss_indices],
            model_file_name=first_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

        this_activation_file_name = '{0:s}/worst_misses.p'.format(
            second_output_dir_name)

        print('Writing worst misses to: "{0:s}"...'.format(
            this_activation_file_name))
        this_activation_matrix = numpy.reshape(
            second_probabilities[miss_indices], (len(miss_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[second_full_id_strings[j] for j in miss_indices],
            storm_times_unix_sec=second_storm_times_unix_sec[miss_indices],
            model_file_name=second_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

    if num_false_alarms > 0:
        print(('Mean probability from first and second model for {0:d} worst '
               'false alarms: {1:.3f}, {2:.3f}').format(
                   num_false_alarms,
                   numpy.mean(first_probabilities[false_alarm_indices]),
                   numpy.mean(second_probabilities[false_alarm_indices])))

        this_activation_file_name = '{0:s}/worst_false_alarms.p'.format(
            first_output_dir_name)

        print('Writing worst false alarms to: "{0:s}"...'.format(
            this_activation_file_name))
        this_activation_matrix = numpy.reshape(
            first_probabilities[false_alarm_indices],
            (len(false_alarm_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[
                first_full_id_strings[j] for j in false_alarm_indices
            ],
            storm_times_unix_sec=first_storm_times_unix_sec[
                false_alarm_indices],
            model_file_name=first_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

        this_activation_file_name = '{0:s}/worst_false_alarms.p'.format(
            second_output_dir_name)

        print('Writing worst false alarms to: "{0:s}"...'.format(
            this_activation_file_name))
        this_activation_matrix = numpy.reshape(
            second_probabilities[false_alarm_indices],
            (len(false_alarm_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[
                second_full_id_strings[j] for j in false_alarm_indices
            ],
            storm_times_unix_sec=second_storm_times_unix_sec[
                false_alarm_indices],
            model_file_name=second_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

    if num_correct_nulls > 0:
        print(('Mean probability from first and second model for {0:d} best '
               'correct nulls: {1:.3f}, {2:.3f}').format(
                   num_correct_nulls,
                   numpy.mean(first_probabilities[correct_null_indices]),
                   numpy.mean(second_probabilities[correct_null_indices])))

        this_activation_file_name = '{0:s}/best_correct_nulls.p'.format(
            first_output_dir_name)

        print('Writing best correct nulls to: "{0:s}"...'.format(
            this_activation_file_name))
        this_activation_matrix = numpy.reshape(
            first_probabilities[correct_null_indices],
            (len(correct_null_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[
                first_full_id_strings[j] for j in correct_null_indices
            ],
            storm_times_unix_sec=first_storm_times_unix_sec[
                correct_null_indices],
            model_file_name=first_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)

        this_activation_file_name = '{0:s}/best_correct_nulls.p'.format(
            second_output_dir_name)

        print('Writing best correct nulls to: "{0:s}"...'.format(
            this_activation_file_name))
        this_activation_matrix = numpy.reshape(
            second_probabilities[correct_null_indices],
            (len(correct_null_indices), 1))

        model_activation.write_file(
            pickle_file_name=this_activation_file_name,
            activation_matrix=this_activation_matrix,
            full_id_strings=[
                second_full_id_strings[j] for j in correct_null_indices
            ],
            storm_times_unix_sec=second_storm_times_unix_sec[
                correct_null_indices],
            model_file_name=second_model_file_name,
            component_type_string=CLASS_COMPONENT_STRING,
            target_class=1)
Ejemplo n.º 3
0
def _run(prediction_file_name, best_prob_threshold, upgraded_min_ef_rating,
         top_target_dir_name, num_bootstrap_reps, downsampling_fractions,
         output_dir_name):
    """Evaluates CNN predictions.

    This is effectively the main method.

    :param prediction_file_name: See documentation at top of file.
    :param best_prob_threshold: Same.
    :param upgraded_min_ef_rating: Same.
    :param top_target_dir_name: Same.
    :param num_bootstrap_reps: Same.
    :param downsampling_fractions: Same.
    :param output_dir_name: Same.
    :raises: ValueError: if file contains no examples (storm objects).
    :raises: ValueError: if file contains multi-class predictions.
    :raises: ValueError: if you try to upgrade minimum EF rating but the
        original is non-zero.
    """

    # Verify and process input args.
    if upgraded_min_ef_rating <= 0:
        upgraded_min_ef_rating = None

    num_bootstrap_reps = max([num_bootstrap_reps, 1])
    if best_prob_threshold < 0:
        best_prob_threshold = None

    # Read predictions.
    print('Reading data from: "{0:s}"...'.format(prediction_file_name))
    prediction_dict = prediction_io.read_ungridded_predictions(
        prediction_file_name)

    observed_labels = prediction_dict[prediction_io.OBSERVED_LABELS_KEY]
    class_probability_matrix = (
        prediction_dict[prediction_io.PROBABILITY_MATRIX_KEY])

    num_examples = len(observed_labels)
    num_classes = class_probability_matrix.shape[1]

    if num_examples == 0:
        raise ValueError('File contains no examples (storm objects).')

    if num_classes > 2:
        error_string = (
            'This script handles only binary, not {0:d}-class, classification.'
        ).format(num_classes)

        raise ValueError(error_string)

    forecast_probabilities = class_probability_matrix[:, -1]

    # If necessary, upgrade minimum EF rating.
    if upgraded_min_ef_rating is not None:
        target_param_dict = target_val_utils.target_name_to_params(
            prediction_dict[prediction_io.TARGET_NAME_KEY])
        orig_min_ef_rating = (
            target_param_dict[target_val_utils.MIN_FUJITA_RATING_KEY])

        if orig_min_ef_rating != 0:
            error_string = (
                'Cannot upgrade minimum EF rating when original min rating is '
                'non-zero (in this case it is {0:d}).'
            ).format(orig_min_ef_rating)

            raise ValueError(error_string)

        new_target_name = target_val_utils.target_params_to_name(
            min_lead_time_sec=target_param_dict[
                target_val_utils.MIN_LEAD_TIME_KEY],
            max_lead_time_sec=target_param_dict[
                target_val_utils.MAX_LEAD_TIME_KEY],
            min_link_distance_metres=target_param_dict[
                target_val_utils.MIN_LINKAGE_DISTANCE_KEY],
            max_link_distance_metres=target_param_dict[
                target_val_utils.MAX_LINKAGE_DISTANCE_KEY],
            tornadogenesis_only=(
                target_param_dict[target_val_utils.EVENT_TYPE_KEY] ==
                linkage.TORNADOGENESIS_EVENT_STRING),
            min_fujita_rating=upgraded_min_ef_rating)

        print(SEPARATOR_STRING)

        observed_labels = _read_new_target_values(
            top_target_dir_name=top_target_dir_name,
            new_target_name=new_target_name,
            full_storm_id_strings=prediction_dict[prediction_io.STORM_IDS_KEY],
            storm_times_unix_sec=prediction_dict[
                prediction_io.STORM_TIMES_KEY],
            orig_target_values=observed_labels)

        print(SEPARATOR_STRING)

        good_indices = numpy.where(observed_labels >= 0)[0]
        observed_labels = observed_labels[good_indices]
        forecast_probabilities = forecast_probabilities[good_indices]

    # Do calculations.
    output_file_name = model_eval.find_file_from_prediction_file(
        input_prediction_file_name=prediction_file_name,
        output_dir_name=output_dir_name,
        raise_error_if_missing=False)
    file_system_utils.mkdir_recursive_if_necessary(file_name=output_file_name)

    if numpy.any(downsampling_fractions <= 0):
        downsampling_dict = None
    else:
        downsampling_dict = {
            0: downsampling_fractions[0],
            1: downsampling_fractions[1]
        }

    _compute_scores(forecast_probabilities=forecast_probabilities,
                    observed_labels=observed_labels,
                    num_bootstrap_reps=num_bootstrap_reps,
                    best_prob_threshold=best_prob_threshold,
                    downsampling_dict=downsampling_dict,
                    output_file_name=output_file_name)
Ejemplo n.º 4
0
def _run(input_prediction_file_name, top_tracking_dir_name,
         tracking_scale_metres2, x_spacing_metres, y_spacing_metres,
         effective_radius_metres, smoothing_method_name,
         smoothing_cutoff_radius_metres, smoothing_efold_radius_metres,
         top_output_dir_name):
    """Projects CNN forecasts onto the RAP grid.

    This is effectively the same method.

    :param input_prediction_file_name: See documentation at top of file.
    :param top_tracking_dir_name: Same.
    :param tracking_scale_metres2: Same.
    :param x_spacing_metres: Same.
    :param y_spacing_metres: Same.
    :param effective_radius_metres: Same.
    :param smoothing_method_name: Same.
    :param smoothing_cutoff_radius_metres: Same.
    :param smoothing_efold_radius_metres: Same.
    :param top_output_dir_name: Same.
    """

    print('Reading data from: "{0:s}"...'.format(input_prediction_file_name))
    ungridded_forecast_dict = prediction_io.read_ungridded_predictions(
        input_prediction_file_name)

    target_param_dict = target_val_utils.target_name_to_params(
        ungridded_forecast_dict[prediction_io.TARGET_NAME_KEY])

    min_buffer_dist_metres = target_param_dict[
        target_val_utils.MIN_LINKAGE_DISTANCE_KEY]

    # TODO(thunderhoser): This is HACKY.
    if min_buffer_dist_metres == 0:
        min_buffer_dist_metres = numpy.nan

    max_buffer_dist_metres = target_param_dict[
        target_val_utils.MAX_LINKAGE_DISTANCE_KEY]

    min_lead_time_seconds = target_param_dict[
        target_val_utils.MIN_LEAD_TIME_KEY]

    max_lead_time_seconds = target_param_dict[
        target_val_utils.MAX_LEAD_TIME_KEY]

    forecast_column_name = gridded_forecasts._buffer_to_column_name(
        min_buffer_dist_metres=min_buffer_dist_metres,
        max_buffer_dist_metres=max_buffer_dist_metres,
        column_type=gridded_forecasts.FORECAST_COLUMN_TYPE)

    init_times_unix_sec = numpy.unique(
        ungridded_forecast_dict[prediction_io.STORM_TIMES_KEY])

    tracking_file_names = []

    for this_time_unix_sec in init_times_unix_sec:
        this_tracking_file_name = tracking_io.find_file(
            top_tracking_dir_name=top_tracking_dir_name,
            tracking_scale_metres2=tracking_scale_metres2,
            source_name=tracking_utils.SEGMOTION_NAME,
            valid_time_unix_sec=this_time_unix_sec,
            spc_date_string=time_conversion.time_to_spc_date_string(
                this_time_unix_sec),
            raise_error_if_missing=True)

        tracking_file_names.append(this_tracking_file_name)

    storm_object_table = tracking_io.read_many_files(tracking_file_names)
    print(SEPARATOR_STRING)

    tracking_utils.find_storm_objects(
        all_id_strings=ungridded_forecast_dict[prediction_io.STORM_IDS_KEY],
        all_times_unix_sec=ungridded_forecast_dict[
            prediction_io.STORM_TIMES_KEY],
        id_strings_to_keep=storm_object_table[
            tracking_utils.FULL_ID_COLUMN].values.tolist(),
        times_to_keep_unix_sec=storm_object_table[
            tracking_utils.VALID_TIME_COLUMN].values,
        allow_missing=False)

    sort_indices = tracking_utils.find_storm_objects(
        all_id_strings=storm_object_table[
            tracking_utils.FULL_ID_COLUMN].values.tolist(),
        all_times_unix_sec=storm_object_table[
            tracking_utils.VALID_TIME_COLUMN].values,
        id_strings_to_keep=ungridded_forecast_dict[
            prediction_io.STORM_IDS_KEY],
        times_to_keep_unix_sec=ungridded_forecast_dict[
            prediction_io.STORM_TIMES_KEY],
        allow_missing=False)

    forecast_probabilities = ungridded_forecast_dict[
        prediction_io.PROBABILITY_MATRIX_KEY][sort_indices, 1]

    storm_object_table = storm_object_table.assign(
        **{forecast_column_name: forecast_probabilities})

    gridded_forecast_dict = gridded_forecasts.create_forecast_grids(
        storm_object_table=storm_object_table,
        min_lead_time_sec=min_lead_time_seconds,
        max_lead_time_sec=max_lead_time_seconds,
        lead_time_resolution_sec=gridded_forecasts.
        DEFAULT_LEAD_TIME_RES_SECONDS,
        grid_spacing_x_metres=x_spacing_metres,
        grid_spacing_y_metres=y_spacing_metres,
        interp_to_latlng_grid=False,
        prob_radius_for_grid_metres=effective_radius_metres,
        smoothing_method=smoothing_method_name,
        smoothing_e_folding_radius_metres=smoothing_efold_radius_metres,
        smoothing_cutoff_radius_metres=smoothing_cutoff_radius_metres)

    print(SEPARATOR_STRING)

    output_file_name = prediction_io.find_file(
        top_prediction_dir_name=top_output_dir_name,
        first_init_time_unix_sec=numpy.min(
            storm_object_table[tracking_utils.VALID_TIME_COLUMN].values),
        last_init_time_unix_sec=numpy.max(
            storm_object_table[tracking_utils.VALID_TIME_COLUMN].values),
        gridded=True,
        raise_error_if_missing=False)

    print(('Writing results (forecast grids for {0:d} initial times) to: '
           '"{1:s}"...').format(
               len(gridded_forecast_dict[prediction_io.INIT_TIMES_KEY]),
               output_file_name))

    prediction_io.write_gridded_predictions(
        gridded_forecast_dict=gridded_forecast_dict,
        pickle_file_name=output_file_name)
Ejemplo n.º 5
0
def _run(input_file_name, num_months_per_chunk, num_hours_per_chunk,
         output_dir_name):
    """Subsets ungridded predictions by time.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param num_months_per_chunk: Same.
    :param num_hours_per_chunk: Same.
    :param output_dir_name: Same.
    """

    if num_months_per_chunk > 0:
        chunk_to_months_dict = temporal_subsetting.get_monthly_chunks(
            num_months_per_chunk=num_months_per_chunk, verbose=True)

        num_monthly_chunks = len(chunk_to_months_dict.keys())
        print(SEPARATOR_STRING)
    else:
        num_monthly_chunks = 0

    if num_hours_per_chunk > 0:
        chunk_to_hours_dict = temporal_subsetting.get_hourly_chunks(
            num_hours_per_chunk=num_hours_per_chunk, verbose=True)

        num_hourly_chunks = len(chunk_to_hours_dict.keys())
        print(SEPARATOR_STRING)
    else:
        num_hourly_chunks = 0

    print('Reading input data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_ungridded_predictions(input_file_name)
    storm_times_unix_sec = prediction_dict[prediction_io.STORM_TIMES_KEY]

    storm_months = None

    for i in range(num_monthly_chunks):
        these_storm_indices, storm_months = (
            temporal_subsetting.get_events_in_months(
                event_months=storm_months,
                event_times_unix_sec=storm_times_unix_sec,
                desired_months=chunk_to_months_dict[i],
                verbose=True))

        this_prediction_dict = prediction_io.subset_ungridded_predictions(
            prediction_dict=prediction_dict,
            desired_storm_indices=these_storm_indices)

        this_output_file_name = prediction_io.find_ungridded_file(
            directory_name=output_dir_name,
            months_in_subset=chunk_to_months_dict[i],
            raise_error_if_missing=False)

        print('Writing temporal subset to: "{0:s}"...'.format(
            this_output_file_name))

        prediction_io.write_ungridded_predictions(
            netcdf_file_name=this_output_file_name,
            class_probability_matrix=this_prediction_dict[
                prediction_io.PROBABILITY_MATRIX_KEY],
            storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY],
            storm_times_unix_sec=this_prediction_dict[
                prediction_io.STORM_TIMES_KEY],
            observed_labels=this_prediction_dict[
                prediction_io.OBSERVED_LABELS_KEY],
            target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

        print(SEPARATOR_STRING)

    storm_hours = None

    for i in range(num_hourly_chunks):
        these_storm_indices, storm_hours = (
            temporal_subsetting.get_events_in_hours(
                event_hours=storm_hours,
                event_times_unix_sec=storm_times_unix_sec,
                desired_hours=chunk_to_hours_dict[i],
                verbose=True))

        if len(these_storm_indices) == 0:
            continue

        this_prediction_dict = prediction_io.subset_ungridded_predictions(
            prediction_dict=prediction_dict,
            desired_storm_indices=these_storm_indices)

        this_output_file_name = prediction_io.find_ungridded_file(
            directory_name=output_dir_name,
            hours_in_subset=chunk_to_hours_dict[i],
            raise_error_if_missing=False)

        print('Writing temporal subset to: "{0:s}"...'.format(
            this_output_file_name))

        prediction_io.write_ungridded_predictions(
            netcdf_file_name=this_output_file_name,
            class_probability_matrix=this_prediction_dict[
                prediction_io.PROBABILITY_MATRIX_KEY],
            storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY],
            storm_times_unix_sec=this_prediction_dict[
                prediction_io.STORM_TIMES_KEY],
            observed_labels=this_prediction_dict[
                prediction_io.OBSERVED_LABELS_KEY],
            target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

        if i != num_hourly_chunks - 1:
            print(SEPARATOR_STRING)
Ejemplo n.º 6
0
def _run(prediction_file_name, top_tracking_dir_name, prob_threshold,
         grid_spacing_metres, output_dir_name):
    """Plots spatial distribution of false alarms.

    This is effectively the main method.

    :param prediction_file_name: See documentation at top of file.
    :param top_tracking_dir_name: Same.
    :param prob_threshold: Same.
    :param grid_spacing_metres: Same.
    :param output_dir_name: Same.
    """

    # Process input args.
    file_system_utils.mkdir_recursive_if_necessary(
        directory_name=output_dir_name)
    error_checking.assert_is_greater(prob_threshold, 0.)
    error_checking.assert_is_less_than(prob_threshold, 1.)

    grid_metadata_dict = grids.create_equidistant_grid(
        min_latitude_deg=MIN_LATITUDE_DEG,
        max_latitude_deg=MAX_LATITUDE_DEG,
        min_longitude_deg=MIN_LONGITUDE_DEG,
        max_longitude_deg=MAX_LONGITUDE_DEG,
        x_spacing_metres=grid_spacing_metres,
        y_spacing_metres=grid_spacing_metres,
        azimuthal=False)

    # Read predictions and find positive forecasts and false alarms.
    print('Reading predictions from: "{0:s}"...'.format(prediction_file_name))
    prediction_dict = prediction_io.read_ungridded_predictions(
        prediction_file_name)

    observed_labels = prediction_dict[prediction_io.OBSERVED_LABELS_KEY]
    forecast_labels = (
        prediction_dict[prediction_io.PROBABILITY_MATRIX_KEY][:, -1] >=
        prob_threshold).astype(int)

    pos_forecast_indices = numpy.where(forecast_labels == 1)[0]
    false_alarm_indices = numpy.where(
        numpy.logical_and(observed_labels == 0, forecast_labels == 1))[0]

    num_examples = len(observed_labels)
    num_positive_forecasts = len(pos_forecast_indices)
    num_false_alarms = len(false_alarm_indices)

    print(('Probability threshold = {0:.3f} ... number of examples, positive '
           'forecasts, false alarms = {1:d}, {2:d}, {3:d}').format(
               prob_threshold, num_examples, num_positive_forecasts,
               num_false_alarms))

    # Find and read tracking files.
    pos_forecast_id_strings = [
        prediction_dict[prediction_io.STORM_IDS_KEY][k]
        for k in pos_forecast_indices
    ]
    pos_forecast_times_unix_sec = (
        prediction_dict[prediction_io.STORM_TIMES_KEY][pos_forecast_indices])

    file_times_unix_sec = numpy.unique(pos_forecast_times_unix_sec)
    num_files = len(file_times_unix_sec)
    storm_object_tables = [None] * num_files

    print(SEPARATOR_STRING)

    for i in range(num_files):
        this_tracking_file_name = tracking_io.find_file(
            top_tracking_dir_name=top_tracking_dir_name,
            tracking_scale_metres2=DUMMY_TRACKING_SCALE_METRES2,
            source_name=tracking_utils.SEGMOTION_NAME,
            valid_time_unix_sec=file_times_unix_sec[i],
            spc_date_string=time_conversion.time_to_spc_date_string(
                file_times_unix_sec[i]),
            raise_error_if_missing=True)

        print('Reading data from: "{0:s}"...'.format(this_tracking_file_name))
        this_table = tracking_io.read_file(this_tracking_file_name)
        storm_object_tables[i] = this_table.loc[this_table[
            tracking_utils.FULL_ID_COLUMN].isin(pos_forecast_id_strings)]

        if i == 0:
            continue

        storm_object_tables[i] = storm_object_tables[i].align(
            storm_object_tables[0], axis=1)[0]

    storm_object_table = pandas.concat(storm_object_tables,
                                       axis=0,
                                       ignore_index=True)
    print(SEPARATOR_STRING)

    # Find latitudes and longitudes of false alarms.
    all_id_strings = (
        storm_object_table[tracking_utils.FULL_ID_COLUMN].values.tolist())
    all_times_unix_sec = (
        storm_object_table[tracking_utils.VALID_TIME_COLUMN].values)
    good_indices = tracking_utils.find_storm_objects(
        all_id_strings=all_id_strings,
        all_times_unix_sec=all_times_unix_sec,
        id_strings_to_keep=pos_forecast_id_strings,
        times_to_keep_unix_sec=pos_forecast_times_unix_sec,
        allow_missing=False)

    pos_forecast_latitudes_deg = storm_object_table[
        tracking_utils.CENTROID_LATITUDE_COLUMN].values[good_indices]

    pos_forecast_longitudes_deg = storm_object_table[
        tracking_utils.CENTROID_LONGITUDE_COLUMN].values[good_indices]

    false_alarm_id_strings = [
        prediction_dict[prediction_io.STORM_IDS_KEY][k]
        for k in false_alarm_indices
    ]
    false_alarm_times_unix_sec = (
        prediction_dict[prediction_io.STORM_TIMES_KEY][false_alarm_indices])
    good_indices = tracking_utils.find_storm_objects(
        all_id_strings=all_id_strings,
        all_times_unix_sec=all_times_unix_sec,
        id_strings_to_keep=false_alarm_id_strings,
        times_to_keep_unix_sec=false_alarm_times_unix_sec,
        allow_missing=False)

    false_alarm_latitudes_deg = storm_object_table[
        tracking_utils.CENTROID_LATITUDE_COLUMN].values[good_indices]

    false_alarm_longitudes_deg = storm_object_table[
        tracking_utils.CENTROID_LONGITUDE_COLUMN].values[good_indices]

    pos_forecast_x_coords_metres, pos_forecast_y_coords_metres = (
        projections.project_latlng_to_xy(
            latitudes_deg=pos_forecast_latitudes_deg,
            longitudes_deg=pos_forecast_longitudes_deg,
            projection_object=grid_metadata_dict[grids.PROJECTION_KEY]))

    num_pos_forecasts_matrix = grids.count_events_on_equidistant_grid(
        event_x_coords_metres=pos_forecast_x_coords_metres,
        event_y_coords_metres=pos_forecast_y_coords_metres,
        grid_point_x_coords_metres=grid_metadata_dict[grids.X_COORDS_KEY],
        grid_point_y_coords_metres=grid_metadata_dict[grids.Y_COORDS_KEY])[0]
    print(SEPARATOR_STRING)

    false_alarm_x_coords_metres, false_alarm_y_coords_metres = (
        projections.project_latlng_to_xy(
            latitudes_deg=false_alarm_latitudes_deg,
            longitudes_deg=false_alarm_longitudes_deg,
            projection_object=grid_metadata_dict[grids.PROJECTION_KEY]))

    num_false_alarms_matrix = grids.count_events_on_equidistant_grid(
        event_x_coords_metres=false_alarm_x_coords_metres,
        event_y_coords_metres=false_alarm_y_coords_metres,
        grid_point_x_coords_metres=grid_metadata_dict[grids.X_COORDS_KEY],
        grid_point_y_coords_metres=grid_metadata_dict[grids.Y_COORDS_KEY])[0]
    print(SEPARATOR_STRING)

    num_pos_forecasts_matrix = num_pos_forecasts_matrix.astype(float)
    num_pos_forecasts_matrix[num_pos_forecasts_matrix == 0] = numpy.nan
    num_false_alarms_matrix = num_false_alarms_matrix.astype(float)
    num_false_alarms_matrix[num_false_alarms_matrix == 0] = numpy.nan
    far_matrix = num_false_alarms_matrix / num_pos_forecasts_matrix

    this_max_value = numpy.nanpercentile(num_false_alarms_matrix,
                                         MAX_COUNT_PERCENTILE_TO_PLOT)
    if this_max_value < 10:
        this_max_value = numpy.nanmax(num_false_alarms_matrix)

    figure_object = plotter._plot_one_value(
        data_matrix=num_false_alarms_matrix,
        grid_metadata_dict=grid_metadata_dict,
        colour_map_object=CMAP_OBJECT_FOR_COUNTS,
        min_colour_value=0,
        max_colour_value=this_max_value,
        plot_cbar_min_arrow=False,
        plot_cbar_max_arrow=True)[0]

    num_false_alarms_file_name = '{0:s}/num_false_alarms.jpg'.format(
        output_dir_name)

    print('Saving figure to: "{0:s}"...'.format(num_false_alarms_file_name))
    figure_object.savefig(num_false_alarms_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)

    this_max_value = numpy.nanpercentile(num_pos_forecasts_matrix,
                                         MAX_COUNT_PERCENTILE_TO_PLOT)
    if this_max_value < 10:
        this_max_value = numpy.nanmax(num_pos_forecasts_matrix)

    figure_object = plotter._plot_one_value(
        data_matrix=num_pos_forecasts_matrix,
        grid_metadata_dict=grid_metadata_dict,
        colour_map_object=CMAP_OBJECT_FOR_COUNTS,
        min_colour_value=0,
        max_colour_value=this_max_value,
        plot_cbar_min_arrow=False,
        plot_cbar_max_arrow=True)[0]

    num_pos_forecasts_file_name = '{0:s}/num_positive_forecasts.jpg'.format(
        output_dir_name)

    print('Saving figure to: "{0:s}"...'.format(num_pos_forecasts_file_name))
    figure_object.savefig(num_pos_forecasts_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)

    this_max_value = numpy.nanpercentile(far_matrix,
                                         MAX_FAR_PERCENTILE_TO_PLOT)
    this_min_value = numpy.nanpercentile(far_matrix,
                                         100. - MAX_FAR_PERCENTILE_TO_PLOT)

    figure_object = plotter._plot_one_value(
        data_matrix=far_matrix,
        grid_metadata_dict=grid_metadata_dict,
        colour_map_object=CMAP_OBJECT_FOR_FAR,
        min_colour_value=this_min_value,
        max_colour_value=this_max_value,
        plot_cbar_min_arrow=this_min_value > 0.,
        plot_cbar_max_arrow=this_max_value < 1.)[0]

    far_file_name = '{0:s}/false_alarm_ratio.jpg'.format(output_dir_name)

    print('Saving figure to: "{0:s}"...'.format(far_file_name))
    figure_object.savefig(far_file_name,
                          dpi=FIGURE_RESOLUTION_DPI,
                          pad_inches=0,
                          bbox_inches='tight')
    pyplot.close(figure_object)