def _match_storm_objects(first_prediction_dict, second_prediction_dict,
                         top_match_dir_name):
    """Matches storm objects between first and second prediction files.

    F = number of storm objects in first prediction file

    :param first_prediction_dict: Dictionary returned by
        `prediction_io.read_ungridded_predictions` for first prediction file.
    :param second_prediction_dict: Same but for second prediction file.
    :param top_match_dir_name: See documentation at top of file.
    :return: first_prediction_dict: Same as input, but containing only storm
        objects matched with one in the second file.
    :return: second_prediction_dict: Same as input, but containing only storm
        objects matched with one in the first file.  Both dictionaries have
        storm objects in the same order.
    """

    first_storm_times_unix_sec = first_prediction_dict[
        prediction_io.STORM_TIMES_KEY]
    first_unique_times_unix_sec = numpy.unique(first_storm_times_unix_sec)

    first_indices = numpy.array([], dtype=int)
    second_indices = numpy.array([], dtype=int)

    for i in range(len(first_unique_times_unix_sec)):
        this_match_file_name = tracking_io.find_match_file(
            top_directory_name=top_match_dir_name,
            valid_time_unix_sec=first_unique_times_unix_sec[i],
            raise_error_if_missing=True)

        print('Reading data from: "{0:s}"...'.format(this_match_file_name))
        this_match_dict = tracking_io.read_matches(this_match_file_name)[0]

        these_first_indices, these_second_indices = (
            _match_storm_objects_one_time(
                first_prediction_dict=first_prediction_dict,
                second_prediction_dict=second_prediction_dict,
                match_dict=this_match_dict))

        first_indices = numpy.concatenate((first_indices, these_first_indices))
        second_indices = numpy.concatenate(
            (second_indices, these_second_indices))

    _, unique_subindices = numpy.unique(first_indices, return_index=True)
    first_indices = first_indices[unique_subindices]
    second_indices = second_indices[unique_subindices]

    _, unique_subindices = numpy.unique(second_indices, return_index=True)
    first_indices = first_indices[unique_subindices]
    second_indices = second_indices[unique_subindices]

    first_prediction_dict = prediction_io.subset_ungridded_predictions(
        prediction_dict=first_prediction_dict,
        desired_storm_indices=first_indices)

    second_prediction_dict = prediction_io.subset_ungridded_predictions(
        prediction_dict=second_prediction_dict,
        desired_storm_indices=second_indices)

    return first_prediction_dict, second_prediction_dict
    def test_subset_ungridded_predictions_with_obs(self):
        """Ensures correct output from subset_ungridded_predictions.

        In this case, labels (observations) are included.
        """

        this_prediction_dict = prediction_io.subset_ungridded_predictions(
            prediction_dict=FULL_PREDICTION_DICT_WITH_OBS,
            desired_storm_indices=DESIRED_INDICES)

        self.assertTrue(
            _compare_ungridded_predictions(this_prediction_dict,
                                           SMALL_PREDICTION_DICT_WITH_OBS))
def _run(input_file_name, top_tracking_dir_name, min_latitude_deg,
         max_latitude_deg, min_longitude_deg, max_longitude_deg,
         grid_spacing_metres, output_dir_name):
    """Subsets ungridded predictions by space.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param top_tracking_dir_name: Same.
    :param min_latitude_deg: Same.
    :param max_latitude_deg: Same.
    :param min_longitude_deg: Same.
    :param max_longitude_deg: Same.
    :param grid_spacing_metres: Same.
    :param output_dir_name: Same.
    """

    equidistant_grid_dict = grids.create_equidistant_grid(
        min_latitude_deg=min_latitude_deg, max_latitude_deg=max_latitude_deg,
        min_longitude_deg=min_longitude_deg,
        max_longitude_deg=max_longitude_deg,
        x_spacing_metres=grid_spacing_metres,
        y_spacing_metres=grid_spacing_metres, azimuthal=False)

    grid_metafile_name = grids.find_equidistant_metafile(
        directory_name=output_dir_name, raise_error_if_missing=False)

    print('Writing metadata for equidistant grid to: "{0:s}"...'.format(
        grid_metafile_name
    ))

    grids.write_equidistant_metafile(grid_dict=equidistant_grid_dict,
                                     pickle_file_name=grid_metafile_name)

    grid_point_x_coords_metres = equidistant_grid_dict[grids.X_COORDS_KEY]
    grid_point_y_coords_metres = equidistant_grid_dict[grids.Y_COORDS_KEY]
    projection_object = equidistant_grid_dict[grids.PROJECTION_KEY]

    grid_edge_x_coords_metres = numpy.append(
        grid_point_x_coords_metres - 0.5 * grid_spacing_metres,
        grid_point_x_coords_metres[-1] + 0.5 * grid_spacing_metres
    )
    grid_edge_y_coords_metres = numpy.append(
        grid_point_y_coords_metres - 0.5 * grid_spacing_metres,
        grid_point_y_coords_metres[-1] + 0.5 * grid_spacing_metres
    )

    print('Reading input data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_ungridded_predictions(input_file_name)
    print(SEPARATOR_STRING)

    full_id_strings = prediction_dict[prediction_io.STORM_IDS_KEY]
    storm_times_unix_sec = prediction_dict[prediction_io.STORM_TIMES_KEY]
    unique_storm_times_unix_sec = numpy.unique(storm_times_unix_sec)

    num_storm_objects = len(storm_times_unix_sec)
    storm_latitudes_deg = numpy.full(num_storm_objects, numpy.nan)
    storm_longitudes_deg = numpy.full(num_storm_objects, numpy.nan)

    for this_time_unix_sec in unique_storm_times_unix_sec:
        these_indices = numpy.where(
            storm_times_unix_sec == this_time_unix_sec
        )[0]
        these_full_id_strings = [full_id_strings[k] for k in these_indices]

        (storm_latitudes_deg[these_indices],
         storm_longitudes_deg[these_indices]
        ) = _read_storm_locations_one_time(
            top_tracking_dir_name=top_tracking_dir_name,
            valid_time_unix_sec=this_time_unix_sec,
            desired_full_id_strings=these_full_id_strings)

    print(SEPARATOR_STRING)

    storm_x_coords_metres, storm_y_coords_metres = (
        projections.project_latlng_to_xy(
            latitudes_deg=storm_latitudes_deg,
            longitudes_deg=storm_longitudes_deg,
            projection_object=projection_object)
    )

    num_grid_rows = len(grid_point_y_coords_metres)
    num_grid_columns = len(grid_point_x_coords_metres)

    for i in range(num_grid_rows):
        for j in range(num_grid_columns):
            these_indices = grids.find_events_in_grid_cell(
                event_x_coords_metres=storm_x_coords_metres,
                event_y_coords_metres=storm_y_coords_metres,
                grid_edge_x_coords_metres=grid_edge_x_coords_metres,
                grid_edge_y_coords_metres=grid_edge_y_coords_metres,
                row_index=i, column_index=j, verbose=True)

            if len(these_indices) == 0:
                continue

            this_prediction_dict = prediction_io.subset_ungridded_predictions(
                prediction_dict=prediction_dict,
                desired_storm_indices=these_indices)

            this_output_file_name = prediction_io.find_ungridded_file(
                directory_name=output_dir_name, grid_row=i, grid_column=j,
                raise_error_if_missing=False)

            print('Writing subset to: "{0:s}"...'.format(this_output_file_name))

            prediction_io.write_ungridded_predictions(
                netcdf_file_name=this_output_file_name,
                class_probability_matrix=this_prediction_dict[
                    prediction_io.PROBABILITY_MATRIX_KEY],
                storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY],
                storm_times_unix_sec=this_prediction_dict[
                    prediction_io.STORM_TIMES_KEY],
                observed_labels=this_prediction_dict[
                    prediction_io.OBSERVED_LABELS_KEY],
                target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY],
                model_file_name=this_prediction_dict[
                    prediction_io.MODEL_FILE_KEY]
            )

            print('\n')
Esempio n. 4
0
def _run(input_file_name, num_months_per_chunk, num_hours_per_chunk,
         output_dir_name):
    """Subsets ungridded predictions by time.

    This is effectively the main method.

    :param input_file_name: See documentation at top of file.
    :param num_months_per_chunk: Same.
    :param num_hours_per_chunk: Same.
    :param output_dir_name: Same.
    """

    if num_months_per_chunk > 0:
        chunk_to_months_dict = temporal_subsetting.get_monthly_chunks(
            num_months_per_chunk=num_months_per_chunk, verbose=True)

        num_monthly_chunks = len(chunk_to_months_dict.keys())
        print(SEPARATOR_STRING)
    else:
        num_monthly_chunks = 0

    if num_hours_per_chunk > 0:
        chunk_to_hours_dict = temporal_subsetting.get_hourly_chunks(
            num_hours_per_chunk=num_hours_per_chunk, verbose=True)

        num_hourly_chunks = len(chunk_to_hours_dict.keys())
        print(SEPARATOR_STRING)
    else:
        num_hourly_chunks = 0

    print('Reading input data from: "{0:s}"...'.format(input_file_name))
    prediction_dict = prediction_io.read_ungridded_predictions(input_file_name)
    storm_times_unix_sec = prediction_dict[prediction_io.STORM_TIMES_KEY]

    storm_months = None

    for i in range(num_monthly_chunks):
        these_storm_indices, storm_months = (
            temporal_subsetting.get_events_in_months(
                event_months=storm_months,
                event_times_unix_sec=storm_times_unix_sec,
                desired_months=chunk_to_months_dict[i],
                verbose=True))

        this_prediction_dict = prediction_io.subset_ungridded_predictions(
            prediction_dict=prediction_dict,
            desired_storm_indices=these_storm_indices)

        this_output_file_name = prediction_io.find_ungridded_file(
            directory_name=output_dir_name,
            months_in_subset=chunk_to_months_dict[i],
            raise_error_if_missing=False)

        print('Writing temporal subset to: "{0:s}"...'.format(
            this_output_file_name))

        prediction_io.write_ungridded_predictions(
            netcdf_file_name=this_output_file_name,
            class_probability_matrix=this_prediction_dict[
                prediction_io.PROBABILITY_MATRIX_KEY],
            storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY],
            storm_times_unix_sec=this_prediction_dict[
                prediction_io.STORM_TIMES_KEY],
            observed_labels=this_prediction_dict[
                prediction_io.OBSERVED_LABELS_KEY],
            target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

        print(SEPARATOR_STRING)

    storm_hours = None

    for i in range(num_hourly_chunks):
        these_storm_indices, storm_hours = (
            temporal_subsetting.get_events_in_hours(
                event_hours=storm_hours,
                event_times_unix_sec=storm_times_unix_sec,
                desired_hours=chunk_to_hours_dict[i],
                verbose=True))

        if len(these_storm_indices) == 0:
            continue

        this_prediction_dict = prediction_io.subset_ungridded_predictions(
            prediction_dict=prediction_dict,
            desired_storm_indices=these_storm_indices)

        this_output_file_name = prediction_io.find_ungridded_file(
            directory_name=output_dir_name,
            hours_in_subset=chunk_to_hours_dict[i],
            raise_error_if_missing=False)

        print('Writing temporal subset to: "{0:s}"...'.format(
            this_output_file_name))

        prediction_io.write_ungridded_predictions(
            netcdf_file_name=this_output_file_name,
            class_probability_matrix=this_prediction_dict[
                prediction_io.PROBABILITY_MATRIX_KEY],
            storm_ids=this_prediction_dict[prediction_io.STORM_IDS_KEY],
            storm_times_unix_sec=this_prediction_dict[
                prediction_io.STORM_TIMES_KEY],
            observed_labels=this_prediction_dict[
                prediction_io.OBSERVED_LABELS_KEY],
            target_name=this_prediction_dict[prediction_io.TARGET_NAME_KEY],
            model_file_name=this_prediction_dict[prediction_io.MODEL_FILE_KEY])

        if i != num_hourly_chunks - 1:
            print(SEPARATOR_STRING)